├── .DS_Store ├── .coveragerc ├── .github └── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── .gitignore ├── .readthedocs.yaml ├── .travis.yml ├── LICENSE ├── MANIFEST.in ├── PCT.qrc ├── README.md ├── bin └── pct_qt_debug.py ├── build_dmg.sh ├── continuous-integration └── travis │ └── install.sh ├── corpustools ├── __init__.py ├── acousticsim │ ├── __init__.py │ ├── distance_functions.py │ ├── io.py │ ├── main.py │ └── representations.py ├── command_line │ ├── __init__.py │ ├── favicon.ico │ ├── favicon.png │ ├── pct.py │ ├── pct.pyw │ ├── pct.spec │ ├── pct_corpus.py │ ├── pct_funcload.py │ ├── pct_kl.py │ ├── pct_mutualinfo.py │ ├── pct_neighdens.py │ ├── pct_search.py │ ├── pct_visualize.py │ ├── resources.py │ └── resources.qrc ├── contextmanagers.py ├── corpus │ ├── __init__.py │ ├── classes │ │ ├── __init__.py │ │ ├── lexicon.py │ │ ├── lexicon.py.orig │ │ └── spontaneous.py │ └── io │ │ ├── __init__.py │ │ ├── binary.py │ │ ├── csv.py │ │ ├── helper.py │ │ ├── multiple_files.py │ │ ├── pct_textgrid.py │ │ ├── text_ilg.py │ │ ├── text_spelling.py │ │ ├── text_transcription.py │ │ └── textgrid11_pct.py ├── decorators.py ├── exceptions.py ├── freqalt │ ├── __init__.py │ ├── freq_of_alt.py │ └── io.py ├── funcload │ ├── New Text Document.py │ ├── __init__.py │ ├── functional_load.py │ ├── functional_load_old_version.py │ └── io.py ├── gui │ ├── __init__.py │ ├── asgui.py │ ├── autogui.py │ ├── config.py │ ├── corpusgui.py │ ├── corpusgui.py.orig │ ├── delegates.py │ ├── environments.py │ ├── fagui.py │ ├── fagui.py.orig │ ├── featuregui.py │ ├── featuregui.py.orig │ ├── flgui.py │ ├── flgui.py.orig │ ├── graph.py │ ├── guitest.py │ ├── helpgui.py │ ├── imports.py │ ├── imports.py.orig │ ├── infogui.py │ ├── inventorygui.py │ ├── iogui.py │ ├── iogui.py.orig │ ├── klgui.py │ ├── luckygui.py │ ├── main.py │ ├── main.py.orig │ ├── main.spec │ ├── migui.py │ ├── models.py │ ├── modernize.py │ ├── multimedia.py │ ├── ndgui.py │ ├── pct_rc.py │ ├── pdgui.py │ ├── pdgui.py.orig │ ├── ppgui.py │ ├── psgui.py │ ├── psgui.py.orig │ ├── ssgui.py │ ├── syllables.py │ ├── trpgui.py │ ├── versioning.py │ ├── views.py │ ├── widgets.py │ ├── widgets.py.orig │ └── windows.py ├── informativity │ ├── __init__.py │ └── informativity.py ├── kl │ ├── __init__.py │ └── kl.py ├── lemurian.py ├── lemurian.txt ├── multiproc.py ├── mutualinfo │ ├── __init__.py │ └── mutual_information.py ├── neighdens │ ├── __init__.py │ ├── io.py │ └── neighborhood_density.py ├── phonoprob │ ├── __init__.py │ └── phonotactic_probability.py ├── phonosearch │ ├── __init__.py │ └── phonosearch.py ├── prod │ ├── __init__.py │ └── pred_of_dist.py ├── sphinx │ ├── __init__.py │ └── ipatexescape.py ├── symbolsim │ ├── __init__.py │ ├── edit_distance.py │ ├── io.py │ ├── khorsi.py │ ├── phono_align.py │ ├── phono_edit_distance.py │ └── string_similarity.py ├── transprob │ └── transitional_probability.py ├── utils.py └── visualize.py ├── cx_setup.py ├── docs ├── Makefile ├── images │ ├── favicon.icns │ ├── favicon.png │ ├── icon.icns │ ├── logo.ico │ └── logo.jpg └── source │ ├── about.rst │ ├── acoustic_similarity.rst │ ├── apireference.rst │ ├── bigram_selector.rst │ ├── citing_pct.rst │ ├── conf.py │ ├── downloading_and_installing.rst │ ├── environment_selection.rst │ ├── examplecorpora.rst │ ├── feature_selection.rst │ ├── frequency_of_alternation.rst │ ├── functional_load.rst │ ├── index.rst │ ├── informativity.rst │ ├── introduction.rst │ ├── kullback-leibler.rst │ ├── loading_corpora.rst │ ├── mutual_information.rst │ ├── neighborhood_density.rst │ ├── phonological_search.rst │ ├── phonotactic_probability.rst │ ├── predictability_of_distribution.rst │ ├── pronunciation_variants.rst │ ├── references.rst │ ├── release.rst │ ├── release │ ├── 1.0.1-notes.rst │ ├── 1.1.0-notes.rst │ ├── 1.2.0-notes.rst │ ├── 1.3.0-notes.rst │ ├── 1.4.0-notes.rst │ ├── 1.4.1-notes.rst │ ├── 1.5.0-notes.rst │ └── 1.5.1-notes.rst │ ├── sound_selection.rst │ ├── static │ ├── PCT_logo.png │ ├── a_matches.png │ ├── about.css │ ├── acousticsimdialog.png │ ├── asresults.png │ ├── bigram.png │ ├── bigram_selector.png │ ├── bigram_warning.png │ ├── bigrams_selected.png │ ├── construct_syllables.png │ ├── corpustranscribed.png │ ├── corpustranscribed_digraphs.png │ ├── createtier.png │ ├── createtierfeatures.png │ ├── cvtier.png │ ├── delta-H_vs_raw_min_pairs.png │ ├── digraph.png │ ├── downloadfeature.png │ ├── duplicated_search_warning.png │ ├── editcategories.png │ ├── editfeatures.png │ ├── environment1.png │ ├── environment1.tiff │ ├── environment2.png │ ├── environment2.tiff │ ├── environment3.png │ ├── environment4.png │ ├── examplecvtier.png │ ├── examplevoweltier.png │ ├── featurefile.png │ ├── featurepairselection.png │ ├── freqaltdialog.png │ ├── freqaltresults.png │ ├── functionalloaddialog.png │ ├── functionalloadresults.png │ ├── funtionalloaddialog.png │ ├── funtionalloadresults.png │ ├── gitksandelimited.png │ ├── gitksanloaded.png │ ├── gitksanoriginal.png │ ├── gitksanorthcorpus1.png │ ├── gitksanparsing.png │ ├── high_front_vowels_only.png │ ├── ilg_loading1.png │ ├── importcsv1.png │ ├── importspontaneous.png │ ├── informativity1.png │ ├── informativity1GUI.png │ ├── informativity2.png │ ├── informativity2GUI.png │ ├── informativity_559tests.png │ ├── kldialog.png │ ├── klresults.png │ ├── lemurian_sorted.png │ ├── lemurian_sorted_cons.png │ ├── lemurian_sorted_vowels.png │ ├── lemurian_unsorted.png │ ├── lemurian_unsorted.png .png │ ├── loadcorpus.png │ ├── loadexample.png │ ├── loadfeature.png │ ├── logo.png │ ├── midialog.png │ ├── miresults.png │ ├── neighdencolumn.png │ ├── neighdendialog.png │ ├── neighdendialogoutput.png │ ├── neighdeninput.png │ ├── neighdeninputresults.png │ ├── neighdenoutput.png │ ├── neighdenresults.png │ ├── non-labial_onset_plus_A.png │ ├── overwrite_file.png │ ├── parsingsettings.png │ ├── pctguilog.png │ ├── phonoprobdialog.png │ ├── phonoprobresults.png │ ├── phonoprobresults_2.png │ ├── phonosearchenvironment.png │ ├── phonosearchenvironment2.png │ ├── phonosearchenvironment3.png │ ├── phonosearchenvironment4_individ_results.png │ ├── phonosearchenvironment4_not_sep.png │ ├── phonosearchenvironment4_sep.png │ ├── phonosearchenvironmentsyllable.png │ ├── phonosearchenvironmentsyllable2.png │ ├── phonosearchindividual.png │ ├── phonosearchindividualnegative.png │ ├── phonosearchindividualsyllable.png │ ├── phonosearchsaved.png │ ├── phonosearchsaved2.png │ ├── phonosearchsummary.png │ ├── phonosearchsummarynegative.png │ ├── phonosearchsummarysyllable.png │ ├── prod_vs_raw_min_pairs.png │ ├── prod_vs_rel_min_pairs.png │ ├── proddialog.png │ ├── proderror.png │ ├── proderror2.png │ ├── prodfreq.png │ ├── prodresults.png │ ├── pronunciationvariant.png │ ├── savingphonosearch1.png │ ├── savingphonosearch2.png │ ├── segmentpair.png │ ├── segmentpair1.png │ ├── segmentpair2.png │ ├── spontaneouscorpus.png │ ├── stringsimilaritydialog.png │ ├── stringsimilarityresults.png │ ├── superset_search_individuals.png │ ├── superset_search_summary.png │ ├── tierpreview.png │ ├── two_separate_searches_individual_results.png │ ├── two_separate_searches_summary_results.png │ └── two_separate_syllable_searches.png │ ├── string_similarity.rst │ ├── templates │ ├── class.rst │ ├── function.rst │ └── layout.html │ ├── transcriptions_and_feature_systems.rst │ └── transitional_probability.rst ├── esk_setup.py ├── examples ├── example_corpus.txt ├── using_freq_of_alt.py ├── using_freq_of_alt_samples.py ├── using_morph_relatedness.py └── using_string_similarity.py ├── pa_setup.py ├── pct.iss ├── pct.spec ├── requirements.txt ├── requirements_dev.txt ├── resources ├── 48x48_win.ico ├── CORPUS │ ├── example.corpus │ ├── example_syllabified.corpus │ ├── iphod_with_homographs.corpus │ ├── iphod_without_homographs.corpus │ └── lemurian.corpus ├── FEATURE │ ├── arpabet2hayes.feature │ ├── arpabet2spe.feature │ ├── buckeye2hayes.feature │ ├── buckeye2spe.feature │ ├── celex2hayes.feature │ ├── celex2spe.feature │ ├── cpa2hayes.feature │ ├── cpa2spe.feature │ ├── disc2hayes.feature │ ├── disc2spe.feature │ ├── ipa2hayes.feature │ ├── ipa2spe.feature │ ├── klatt2hayes.feature │ ├── klatt2spe.feature │ ├── sampa2hayes.feature │ └── sampa2spe.feature ├── favicon.icns ├── favicon.png ├── favicon32x32.ico ├── logo.ico ├── mac_iconBase.png ├── object-flip-horizontal.png ├── object-flip-horizontal.svg └── qt.conf ├── setup.py ├── syllabification.rst └── tests ├── __init__.py ├── conftest.py ├── data ├── Buckeye_sample │ ├── s0101a.log │ ├── s0101a.phones │ ├── s0101a.wav │ └── s0101a.words ├── buckeye │ ├── test.phones │ └── test.words ├── csv │ ├── stressed.txt │ ├── tiered.txt │ └── tonal.txt ├── exported │ ├── test_export_ilg.txt │ ├── test_export_spelling.txt │ └── test_export_transcription.txt ├── features │ ├── basic.txt │ ├── missing_value.txt │ ├── no_symbol_column.txt │ ├── no_unspecified.txt │ ├── overlapping_specified.txt │ ├── overlapping_symbols.txt │ ├── overlapping_symbols_diff_feat.txt │ ├── overlapping_unspecified.txt │ ├── test_feature_matrix.txt │ ├── test_feature_matrix_extra_feature.txt │ ├── test_feature_matrix_missing_segment.txt │ └── test_feature_matrix_missing_value.txt ├── homophones │ └── ND_test_README.rtf ├── ilg │ ├── ilg_example.txt │ ├── test_basic.txt │ ├── test_contains_translations.txt │ └── test_mismatched.txt ├── text │ ├── test_text_spelling.txt │ ├── test_text_transcription.txt │ └── test_text_transcription_morpheme_boundaries.txt └── textgrids │ ├── 2speakers.TextGrid │ ├── WebMAUS_English_story_114.TextGrid │ ├── WebMAUS_English_story_123.TextGrid │ ├── hashtag_BC.TextGrid │ ├── phone_word.TextGrid │ ├── phone_word_notes.TextGrid │ ├── phone_word_silence.TextGrid │ ├── phone_word_sp_phone.TextGrid │ ├── phone_word_sp_word.TextGrid │ ├── pronunc_variants_corpus.TextGrid │ ├── pronunc_variants_corpus2.TextGrid │ ├── pronunc_variants_corpus_remade.TextGrid │ ├── test_directory │ ├── pronunc_variants_corpus.TextGrid │ ├── pronunc_variants_corpus2.TextGrid │ └── pronunc_variants_corpus3.TextGrid │ ├── webMAUS_directory │ ├── WebMAUS_English_story_114.TextGrid │ └── WebMAUS_English_story_123.TextGrid │ ├── word_phone.TextGrid │ ├── word_segment.TextGrid │ ├── words_phones.TextGrid │ └── words_segments.TextGrid ├── test_acousticsim_io.py ├── test_cl.py ├── test_edit_distance.py ├── test_freq_of_alt.py ├── test_functional_load.py ├── test_gui_asgui.py ├── test_gui_config.py ├── test_gui_corpusgui.py ├── test_gui_fagui.py ├── test_gui_featuregui.py ├── test_gui_flgui.py ├── test_gui_iogui.py ├── test_gui_klgui.py ├── test_gui_main.py ├── test_gui_migui.py ├── test_gui_models.py ├── test_gui_ndgui.py ├── test_gui_pdgui.py ├── test_gui_ppgui.py ├── test_gui_psgui.py ├── test_gui_ssgui.py ├── test_gui_views.py ├── test_gui_widgets.py ├── test_gui_widgets.py.orig ├── test_gui_windows.py ├── test_io_binary.py ├── test_io_csv.py ├── test_io_ilg.py ├── test_io_multiple_files.py ├── test_io_text.py ├── test_io_textgrids.py ├── test_khorsi.py ├── test_kl.py ├── test_lexicon.py ├── test_lexicon.py.orig ├── test_mutual_information.py ├── test_neighborhood_density.py ├── test_phono_aligner.py ├── test_phono_edit_distance.py ├── test_phonosearch.py ├── test_phonotactic_probability.py ├── test_pred_of_dist.py ├── test_seg_select_dialog.py ├── test_spontaneous_classes.py ├── test_string_similarity.py └── test_utils.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/.DS_Store -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source = corpustools 3 | omit = 4 | *_rc.py 5 | *_ui.py 6 | 7 | [report] 8 | exclude_lines = 9 | if __name__ == .__main__.: 10 | 11 | raise AssertionError 12 | raise NotImplementedError 13 | pass 14 | 15 | if sys\.platform 16 | if hasattr\(sys, 'frozen'\): 17 | 18 | except ImportError: 19 | 20 | if call_back 21 | if stop_check 22 | 23 | def run\(self\): 24 | 25 | def calc\(self\): 26 | 27 | def setResults 28 | 29 | if dialog\.exec_(): 30 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: "[BUG] Brief description of the bug" 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **Sample corpus file** 14 | A sample file you created the corpus with. Note GitHub only supports .txt files. Please send the sample file to us at PCTbugs@gmail.com if your file is in a format not supported by GitHub. 15 | 16 | **To Reproduce** 17 | Steps to reproduce the behavior: 18 | 1. Go to '...' 19 | 2. Click on '....' 20 | 3. Scroll down to '....' 21 | 4. See error 22 | 23 | **Expected behavior** 24 | A clear and concise description of what you expected to happen. 25 | 26 | **Screenshots** 27 | If applicable, add screenshots to help explain your problem. 28 | 29 | **Operating system and PCT version** 30 | - OS: [e.g. macOS 10.15.7] 31 | - PCT version: [e.g. v1.4.1] 32 | 33 | **Additional context** 34 | Add any other context about the problem here. 35 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: "[ENC] Brief description of the feature" 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | !requirements.txt 3 | *.corpus 4 | *.feature 5 | .idea/ 6 | *.iml 7 | *.exe 8 | *.msi 9 | *.app 10 | *.xls 11 | *.xlsx 12 | *.textgrid 13 | 14 | # C extensions 15 | *.so 16 | 17 | # Documentation 18 | docs/build/ 19 | docs/source/generate/ 20 | docs/source/generated/ 21 | 22 | # Generated test files 23 | tests/data/export 24 | 25 | # Packages 26 | *.egg 27 | *.egg-info 28 | dist 29 | build 30 | eggs 31 | parts 32 | var 33 | sdist 34 | develop-eggs 35 | .installed.cfg 36 | lib 37 | lib64 38 | 39 | # Installer logs 40 | pip-log.txt 41 | 42 | # Unit test / coverage reports 43 | .coverage 44 | .tox 45 | nosetests.xml 46 | 47 | # Translations 48 | *.mo 49 | 50 | # Mr Developer 51 | .mr.developer.cfg 52 | .project 53 | .pydevproject 54 | *.bat 55 | *.txt 56 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yaml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | # also https://blog.readthedocs.com/migrate-configuration-v2/ 5 | # also https://docs.readthedocs.io/en/stable/config-file/index.html 6 | 7 | # Required 8 | version: 2 9 | 10 | # Set the OS, Python version and other tools you might need 11 | build: 12 | os: ubuntu-22.04 13 | tools: 14 | python: "3.9" 15 | #python: "3.12" 16 | # You can also specify other tool versions: 17 | # nodejs: "20" 18 | # rust: "1.70" 19 | # golang: "1.20" 20 | 21 | # Build documentation in the docs/ directory with Sphinx 22 | sphinx: 23 | configuration: docs/source/conf.py 24 | # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs 25 | # builder: "dirhtml" 26 | # Fail on all warnings to avoid broken references 27 | # fail_on_warning: true 28 | 29 | # Optionally build your docs in additional formats such as PDF and ePub 30 | # formats: 31 | # - pdf 32 | # - epub 33 | 34 | # We recommend specifying your dependencies to enable reproducible builds: 35 | # https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html 36 | python: 37 | install: 38 | - requirements: requirements.txt 39 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.4" 4 | - "3.5" 5 | branches: 6 | only: 7 | - master 8 | - develop 9 | notifications: 10 | email: false 11 | 12 | sudo: required 13 | 14 | before_install: 15 | - sudo add-apt-repository --yes ppa:beineri/opt-qt542 16 | - sudo apt-get update -qq 17 | - sudo apt-get install qt54base 18 | - source /opt/qt54/bin/qt54-env.sh 19 | - bash continuous-integration/travis/install.sh 20 | 21 | install: 22 | - export PATH="$HOME/cachedir/miniconda/bin:$PATH" 23 | - source activate test-environment 24 | - which python 25 | - conda list 26 | - python setup.py install 27 | 28 | before_script: 29 | - ls $HOME 30 | - export DISPLAY=:99.0 31 | - sh -e /etc/init.d/xvfb start 32 | - sleep 3 33 | 34 | # Run test 35 | script: 36 | - py.test -v --cov corpustools tests/ 37 | 38 | # Calculate coverage 39 | after_success: 40 | - coveralls 41 | 42 | cache: 43 | directories: 44 | - $HOME/cachedir 45 | apt: true 46 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include LICENSE 3 | recursive-include docs *.html *.css *.png *.gif *.ico *.jpeg 4 | recursive-include examples *.txt 5 | -------------------------------------------------------------------------------- /PCT.qrc: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | resources/object-flip-horizontal.png 5 | 6 | 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Phonological CorpusTools 2 | ======================== 3 | 4 | [![Build Status](https://travis-ci.org/PhonologicalCorpusTools/CorpusTools.svg?branch=master)](https://travis-ci.org/PhonologicalCorpusTools/CorpusTools?branch=master) 5 | [![Coverage Status](https://coveralls.io/repos/PhonologicalCorpusTools/CorpusTools/badge.svg?branch=master)](https://coveralls.io/r/PhonologicalCorpusTools/CorpusTools?branch=master) 6 | 7 | [![Documentation Status](https://readthedocs.org/projects/corpustools/badge/?version=latest)](https://readthedocs.org/projects/corpustools/?badge=latest) 8 | 9 | This document contains installation instructions for Phonological 10 | CorpusTools (PCT). For a description of available functionality, please 11 | refer to the documentation available on ReadTheDocs 12 | (http://corpustools.readthedocs.org/en/master/). 13 | 14 | 15 | ## Standard installation (executable) 16 | 17 | ### Windows 18 | 19 | (NOTE: This method requires that you are running a 64-bit version of 20 | windows. You can check this by in Control Panel -> System and 21 | Security -> System.) 22 | 23 | Download the latest executable (`.exe`) file from the Phonological CorpusTools 24 | [release page](https://github.com/PhonologicalCorpusTools/CorpusTools/releases). 25 | Double-click this file to run PCT. 26 | 27 | ### Mac OS X 28 | 29 | Download the .dmg for the latest release (e.g. Phonological.CorpusTools-.dmg 30 | from the Phonological CorpusTools [releases page](https://github.com/PhonologicalCorpusTools/CorpusTools/releases). 31 | Install Phonological CorpusTools by dragging the app into the Applications 32 | directory. 33 | 34 | ### Linux 35 | 36 | There is currently no executable option available for Linux operating systems. 37 | Please use the fallback installation method below to install from source. 38 | 39 | 40 | ## Fallback installation 41 | 42 | ### Windows, Mac OS X, or Linux 43 | 44 | 1. Install Python 3.3 or higher (Python 3.10 recommended) if your system does not have Python: https://www.python.org/downloads/ 45 | 46 | 2. Download the latest source code for Phonological CorpusTools by clicking on `Code` > `` (see image below). 47 | ![image](https://github.com/user-attachments/assets/b4bf61c6-87e9-4830-b396-0dc7cc1783bc) 48 | 49 | If you want to download a specific version, go to [releases page](https://github.com/PhonologicalCorpusTools/CorpusTools/releases) 50 | and Download `Source code (zip)` or `Source code (tar.gz)` under `Assets`. 51 | 52 | 53 | 4. Unpack the source code in a directory of your choice. 54 | 55 | 5. Open a terminal (or command prompt for Windows) and `cd` into the directory. 56 | 57 | 6. Use the following command to install the dependencies 58 | ```bash 59 | pip install -r requirements.txt 60 | ``` 61 | 62 | ### Windows and Mac OS X 63 | 6. Once all dependencies are installed, run this command to create an executable. 64 | ```bash 65 | pyinstaller pct.spec 66 | ``` 67 | 7. Now you should be able to run PCT using the icon in the `dist` directory. 68 | 69 | ### Linux 70 | 6. Once all dependencies are installed, run this command to run PCT from the source code. 71 | ```bash 72 | python ./bin/pct_qt_debug.py 73 | ``` 74 | -------------------------------------------------------------------------------- /bin/pct_qt_debug.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | base = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 4 | sys.path.insert(0,base) 5 | import corpustools.gui.main as Main #import MainWindow,QApplicationMessaging 6 | 7 | if __name__ == '__main__': 8 | 9 | app = Main.QApplicationMessaging(sys.argv) 10 | if app.isRunning(): 11 | if len(sys.argv) > 1: 12 | app.sendMessage(sys.argv[1]) 13 | else: 14 | app.sendMessage('ARISE') 15 | else: 16 | main = Main.MainWindow(app) 17 | 18 | app.aboutToQuit.connect(main.cleanUp) 19 | app.setActiveWindow(main) 20 | main.show() 21 | sys.exit(app.exec_()) 22 | -------------------------------------------------------------------------------- /build_dmg.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # This shell script creates a DMG distribution of the executable on a mac. 3 | # Run it after creating an .app using pyinstaller. 4 | 5 | # Create a folder 'dmg' under dist and use it to prepare the DMG. 6 | mkdir -p dist/dmg 7 | # Empty the dmg folder. 8 | rm -r dist/dmg/* 9 | # Copy the app bundle to the dmg folder. 10 | cp -r "dist/Phonological CorpusTools.app" dist/dmg 11 | # If the DMG already exists, delete it. 12 | test -f "dist/CorpusTools.dmg" && rm "dist/CorpusTools.dmg" 13 | 14 | create-dmg \ 15 | --volname "Phonological CorpusTools" \ 16 | --volicon "resources/favicon.icns" \ 17 | --window-pos 200 120 \ 18 | --window-size 600 300 \ 19 | --icon-size 100 \ 20 | --icon "Phonological CorpusTools.app" 175 120 \ 21 | --hide-extension "Phonological CorpusTools.app" \ 22 | --app-drop-link 425 120 \ 23 | "dist/CorpusTools.dmg" \ 24 | "dist/dmg/" 25 | -------------------------------------------------------------------------------- /continuous-integration/travis/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e 3 | #check to see if miniconda folder is empty 4 | if [ ! -d "$HOME/cachedir/miniconda/envs/test-environment" ]; then 5 | wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh 6 | chmod +x miniconda.sh 7 | ./miniconda.sh -b -p $HOME/cachedir/miniconda 8 | export PATH="$HOME/cachedir/miniconda/bin:$PATH" 9 | conda config --set always_yes yes --set changeps1 no 10 | conda config --add channels dsdale24 11 | conda update -q conda 12 | conda info -a 13 | conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION atlas numpy scipy pytest 14 | else 15 | echo "Miniconda already installed." 16 | fi 17 | 18 | source activate test-environment 19 | which python 20 | 21 | # check to see if sip folder is empty 22 | if [ ! -d "$HOME/cachedir/sip/bin" ]; then 23 | wget http://sourceforge.net/projects/pyqt/files/sip/sip-4.16.6/sip-4.16.6.tar.gz 24 | tar -xzf sip-4.16.6.tar.gz 25 | cd sip-4.16.6 && python configure.py -b $HOME/cachedir/sip/bin -e $HOME/cachedir/sip/include -v $HOME/cachedir/sip && make && make install 26 | else 27 | echo "Using cached sip directory." 28 | fi 29 | 30 | # check to see if pyqt5 folder is empty 31 | if [ ! -d "$HOME/cachedir/pyqt5/bin" ]; then 32 | wget http://sourceforge.net/projects/pyqt/files/PyQt5/PyQt-5.4.1/PyQt-gpl-5.4.1.tar.gz 33 | tar -xzf PyQt-gpl-5.4.1.tar.gz 34 | cd PyQt-gpl-5.4.1 && python configure.py -c --confirm-license --no-designer-plugin --no-qml-plugin -b $HOME/cachedir/pyqt5/bin -v $HOME/cachedir/sip/PyQt5 --sip=$HOME/cachedir/sip/bin/sip --sip-incdir=$HOME/cachedir/sip/include && make && make install 35 | git clone https://github.com/mmcauliffe/pytest-qt.git 36 | cd pytest-qt && python setup.py install 37 | pip install coveralls pytest-cov 38 | else 39 | echo "Using cached pyqt5 directory." 40 | fi 41 | 42 | 43 | -------------------------------------------------------------------------------- /corpustools/__init__.py: -------------------------------------------------------------------------------- 1 | __ver_major__ = 1 2 | __ver_minor__ = 5 3 | __ver_patch__ = 1 4 | __ver_tuple__ = (__ver_major__, __ver_minor__, __ver_patch__) 5 | __version__ = "%d.%d.%d" % __ver_tuple__ 6 | 7 | -------------------------------------------------------------------------------- /corpustools/acousticsim/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /corpustools/acousticsim/io.py: -------------------------------------------------------------------------------- 1 | import os 2 | import csv 3 | 4 | def load_path_mapping(path): 5 | mapping = list() 6 | with open(path,'r') as f: 7 | reader = csv.reader(f,delimiter='\t') 8 | for line in reader: 9 | for f in line: 10 | if not os.path.exists(f): 11 | raise(OSError('The file path \'{}\' does not exist.'.format(f))) 12 | mapping.append(line) 13 | return mapping 14 | -------------------------------------------------------------------------------- /corpustools/command_line/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /corpustools/command_line/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/corpustools/command_line/favicon.ico -------------------------------------------------------------------------------- /corpustools/command_line/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/corpustools/command_line/favicon.png -------------------------------------------------------------------------------- /corpustools/command_line/pct.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | import os 4 | base = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 5 | sys.path.insert(0,base) 6 | print(base) 7 | from corpustools.gui.main import MainWindow,QApplicationMessaging 8 | from PyQt5.QtGui import QIcon 9 | 10 | if sys.platform.startswith('win'): 11 | if getattr(sys, 'frozen', False): 12 | os.chdir(os.path.expanduser('~/Documents')) 13 | # Implementing dummy stdout and stderr for frozen Windows release 14 | class FakeSTD(object): 15 | def write(self, string): 16 | pass 17 | def flush(self): 18 | pass 19 | #This should fix stdout flush errors, but doesn't always 20 | #Also needs a small tweak to multiprocessing.process._bootstrap 21 | sys.stdout = FakeSTD() 22 | sys.stderr = FakeSTD() 23 | 24 | def main(): 25 | 26 | app = QApplicationMessaging(sys.argv) 27 | if app.isRunning(): 28 | if len(sys.argv) > 1: 29 | app.sendMessage(sys.argv[1]) 30 | else: 31 | app.sendMessage('ARISE') 32 | else: 33 | main = MainWindow(app) 34 | main.setWindowIcon(QIcon(':/favicon.png')) 35 | app.aboutToQuit.connect(main.cleanUp) 36 | 37 | app.setActiveWindow(main) 38 | main.show() 39 | sys.exit(app.exec_()) 40 | 41 | 42 | if __name__ == '__main__': 43 | main() 44 | 45 | -------------------------------------------------------------------------------- /corpustools/command_line/pct.pyw: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | import os 4 | 5 | if sys.platform.startswith('win'): 6 | if getattr(sys, 'frozen', False): 7 | os.chdir(os.path.expanduser('~/Documents')) 8 | # Implementing dummy stdout and stderr for frozen Windows release 9 | class FakeSTD(object): 10 | def write(self, string): 11 | pass 12 | def flush(self): 13 | pass 14 | #This should fix stdout flush errors, but doesn't always 15 | #Also needs a small tweak to multiprocessing.process._bootstrap 16 | sys.stdout = FakeSTD() 17 | sys.stderr = FakeSTD() 18 | 19 | from multiprocessing import freeze_support 20 | from corpustools.gui.main import MainWindow,QApplicationMessaging 21 | 22 | def main(): 23 | freeze_support() 24 | 25 | app = QApplicationMessaging(sys.argv) 26 | if app.isRunning(): 27 | if len(sys.argv) > 1: 28 | app.sendMessage(sys.argv[1]) 29 | else: 30 | app.sendMessage('ARISE') 31 | else: 32 | main = MainWindow(app) 33 | app.aboutToQuit.connect(main.cleanUp) 34 | 35 | app.setActiveWindow(main) 36 | main.show() 37 | sys.exit(app.exec_()) 38 | 39 | 40 | if __name__ == '__main__': 41 | main() 42 | 43 | -------------------------------------------------------------------------------- /corpustools/command_line/pct.spec: -------------------------------------------------------------------------------- 1 | # -*- mode: python -*- 2 | 3 | block_cipher = None 4 | 5 | 6 | a = Analysis(['pct.py'], 7 | pathex=['C:\\Users\\Scott\\Documents\\GitHub\\CorpusTools\\corpustools\\command_line'], 8 | binaries=[], 9 | datas=[], 10 | hiddenimports=[], 11 | hookspath=[], 12 | runtime_hooks=[], 13 | excludes=['ttk', 'tkinter', 'matplotlib'], 14 | win_no_prefer_redirects=False, 15 | win_private_assemblies=False, 16 | cipher=block_cipher) 17 | pyz = PYZ(a.pure, a.zipped_data, 18 | cipher=block_cipher) 19 | exe = EXE(pyz, 20 | a.scripts, 21 | a.binaries, 22 | a.zipfiles, 23 | a.datas, 24 | name='PCT', 25 | debug=False, 26 | strip=False, 27 | upx=True, 28 | icon='favicon.ico', 29 | console=False ) 30 | -------------------------------------------------------------------------------- /corpustools/command_line/pct_corpus.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import sys 4 | import codecs 5 | import ntpath 6 | 7 | # default to importing from CorpusTools repo 8 | base = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) 9 | sys.path.insert(0,base) 10 | 11 | from corpustools.corpus.io.csv import inspect_csv 12 | from corpustools.corpus.io.csv import load_corpus_csv 13 | from corpustools.corpus.io.binary import save_binary 14 | 15 | 16 | def path_leaf(path): 17 | head, tail = ntpath.split(path) 18 | return tail or ntpath.basename(head) 19 | 20 | 21 | def main(): 22 | 23 | #### Parse command-line arguments 24 | parser = argparse.ArgumentParser(description = \ 25 | 'Phonological CorpusTools: corpus object creation CL interface') 26 | parser.add_argument('csv_file_name', help='Name of input CSV file') 27 | parser.add_argument('-f', '--feature_file_name', default = '', type=str, help='Name of input feature file') 28 | parser.add_argument('-d', '--delimiter', default=None, type=str, help='Character that delimits columns in the input file') 29 | # parser.add_argument('-t', '--trans_delimiter', default=None, type=str, help='Character that delimits segments in the input file') 30 | 31 | args = parser.parse_args() 32 | 33 | #### 34 | if args.delimiter: 35 | delimiter = codecs.getdecoder("unicode_escape")(args.delimiter)[0] 36 | else: 37 | delimiter = args.delimiter 38 | 39 | try: # Full path specified 40 | filename, extension = os.path.splitext(args.csv_file_name) 41 | filename = path_leaf(filename) 42 | corpus = load_corpus_csv(args.csv_file_name, args.csv_file_name, 43 | delimiter=delimiter, feature_system_path=args.feature_file_name) 44 | save_binary(corpus, filename+'.corpus') 45 | except FileNotFoundError: 46 | #TO-DO: os.path.join takes care of os specific paths 47 | try: # Unix filepaths 48 | filename, extension = os.path.splitext(os.path.dirname(os.path.realpath(__file__))+'/'+args.csv_file_name) 49 | corpus = load_corpus_csv(args.csv_file_name, os.path.dirname(os.path.realpath(__file__))+'/'+args.csv_file_name, 50 | delimiter=delimiter, feature_system_path=os.path.dirname(os.path.realpath(__file__))+'/'+args.feature_file_name) 51 | save_binary(corpus, filename+'.corpus') 52 | except FileNotFoundError: # Windows filepaths 53 | filename, extension = os.path.splitext(os.path.dirname(os.path.realpath(__file__))+'\\'+args.csv_file_name) 54 | corpus = load_corpus_csv(args.csv_file_name, os.path.dirname(os.path.realpath(__file__))+'\\'+args.csv_file_name, 55 | delimiter=delimiter, feature_system_path=os.path.dirname(os.path.realpath(__file__))+'\\'+args.feature_file_name) 56 | save_binary(corpus, filename+'.corpus') 57 | 58 | 59 | if __name__ == '__main__': 60 | main() 61 | -------------------------------------------------------------------------------- /corpustools/command_line/pct_search.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import csv 4 | import re 5 | import sys 6 | 7 | # default to importing from CorpusTools repo 8 | base = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) 9 | sys.path.insert(0,base) 10 | 11 | from corpustools.corpus.io import load_binary 12 | from corpustools.corpus.classes.lexicon import EnvironmentFilter 13 | from corpustools.phonosearch.phonosearch import phonological_search 14 | 15 | def main(): 16 | 17 | #### Parse command-line arguments 18 | parser = argparse.ArgumentParser(description = \ 19 | 'Phonological CorpusTools: phonological search CL interface') 20 | parser.add_argument('corpus_file_name', help='Name of corpus file') 21 | parser.add_argument('sequence', 22 | help=('Sequence to search for, with segment positions separated by commas,' 23 | +' and with sets separated by slashes.' 24 | +' E.g. the input i will return all words with the segment [i], while' 25 | +' the input a/o,t/p,i,n will return all words with [atin], [apin],' 26 | +' [otin], or [opin].')) 27 | parser.add_argument('-s', '--sequence_type', default='transcription', 28 | help="The attribute of Words to search within. Normally this will be the transcription, but it can also be the spelling or a user-specified tier.") 29 | parser.add_argument('-o', '--outfile', help='Name of output file') 30 | 31 | args = parser.parse_args() 32 | 33 | #### 34 | 35 | try: 36 | home = os.path.expanduser('~') 37 | corpus = load_binary(os.path.join(home, 'Documents', 'PCT', 'CorpusTools', 'CORPUS', args.corpus_file_name)) 38 | except FileNotFoundError: 39 | corpus = load_binary(args.corpus_file_name) 40 | 41 | split_sequence = [tuple(pos.split('/')) for pos in args.sequence.split(',')] 42 | middle = split_sequence[0] 43 | try: 44 | rhs = split_sequence[1:] 45 | except: 46 | rhs = None 47 | if len(rhs) == 0: 48 | rhs = None 49 | 50 | ef = EnvironmentFilter(middle, None, rhs) 51 | 52 | results = phonological_search(corpus, [ef], sequence_type=args.sequence_type) 53 | 54 | if args.outfile: 55 | with open(args.outfile, 'w') as outfile: 56 | for result in results: 57 | outfile.write(' '.join(getattr(result[0], args.sequence_type))+'\n') 58 | print('Search results written to output file.') 59 | else: 60 | print('No output file name provided.') 61 | print('Your search produced the results below:') 62 | for result in results: 63 | print('{}'.format(result[0])) 64 | print('Total number of results: {}'.format(str(len(results)))) 65 | print('Please specify an output file name with -o to save these results.') 66 | 67 | 68 | if __name__ == '__main__': 69 | main() 70 | -------------------------------------------------------------------------------- /corpustools/command_line/pct_visualize.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import sys 4 | import codecs 5 | import ntpath 6 | import csv 7 | import re 8 | 9 | # default to importing from CorpusTools repo 10 | base = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) 11 | sys.path.insert(0,base) 12 | 13 | from corpustools.visualize import visualize 14 | 15 | 16 | def path_leaf(path): 17 | head, tail = ntpath.split(path) 18 | return tail or ntpath.basename(head) 19 | 20 | 21 | def main(): 22 | 23 | #### Parse command-line arguments 24 | parser = argparse.ArgumentParser(description = \ 25 | 'Phonological CorpusTools: visualization of segment inventory') 26 | parser.add_argument('distance_file_name', help='Name of input distance file') 27 | parser.add_argument('-m', '--visualization_method', default='pca', help="Method of visualization: any of principal components analysis ('pca'), hierarchical clustering ('hc'), or a heatmap ('hm')") 28 | parser.add_argument('-v', '--value_column', default='result', type=str, help='header for column containing distance values') 29 | parser.add_argument('-s', '--segment_column', default='segment(s)', type=str, help='header for column containing segment pairs') 30 | parser.add_argument('-d', '--column_delimiter', default='\t', type=str, help='header for column containing segment pairs') 31 | 32 | args = parser.parse_args() 33 | 34 | #### 35 | 36 | delimiter = codecs.getdecoder("unicode_escape")(args.column_delimiter)[0] 37 | 38 | try: # Full path specified 39 | with open(args.distance_file_name) as infile: 40 | reader = csv.DictReader(infile, delimiter=delimiter) 41 | visualize(reader, args.visualization_method, args.value_column, args.segment_column) 42 | except FileNotFoundError: 43 | try: # Unix filepaths 44 | filename, extension = os.path.splitext(os.path.dirname(os.path.realpath(__file__))+'/'+args.csv_file_name) 45 | reader = csv.DictReader(os.path.dirname(os.path.realpath(__file__))+'/'+args.csv_file_name) 46 | visualize(reader, args.visualization_method, args.value_column, args.segment_column) 47 | except FileNotFoundError: # Windows filepaths 48 | filename, extension = os.path.splitext(os.path.dirname(os.path.realpath(__file__))+'\\'+args.csv_file_name) 49 | reader = csv.DictReader(os.path.dirname(os.path.realpath(__file__))+'\\'+args.csv_file_name) 50 | visualize(reader, args.visualization_method, args.value_column, args.segment_column) 51 | 52 | 53 | 54 | 55 | if __name__ == '__main__': 56 | main() -------------------------------------------------------------------------------- /corpustools/command_line/resources.qrc: -------------------------------------------------------------------------------- 1 | 2 | 3 | favicon.png 4 | 5 | -------------------------------------------------------------------------------- /corpustools/corpus/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /corpustools/corpus/classes/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .lexicon import (Corpus, Word, Environment, EnvironmentFilter, FeatureMatrix, 3 | Segment, Transcription, Attribute, ) 4 | 5 | from .spontaneous import Speaker, WordToken, Discourse, SpontaneousSpeechCorpus 6 | -------------------------------------------------------------------------------- /corpustools/corpus/io/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .binary import download_binary, load_binary, save_binary 3 | 4 | from .csv import (load_corpus_csv, load_feature_matrix_csv, export_corpus_csv, 5 | export_feature_matrix_csv, DelimiterError) 6 | 7 | from .text_spelling import load_discourse_spelling, export_discourse_spelling 8 | 9 | from .text_transcription import (load_discourse_transcription, 10 | export_discourse_transcription, 11 | inspect_discourse_transcription) 12 | 13 | from .text_ilg import load_discourse_ilg, export_discourse_ilg 14 | -------------------------------------------------------------------------------- /corpustools/decorators.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import functools 4 | 5 | from corpustools.exceptions import PCTError, PCTPythonError 6 | 7 | def check_for_errors(function): 8 | @functools.wraps(function) 9 | def do_check(*args,**kwargs): 10 | self = args[0] 11 | try: 12 | function(*args,**kwargs) 13 | except PCTError as e: 14 | if not hasattr(self, 'handleError'): 15 | raise 16 | self.handleError(e) 17 | except Exception as e: 18 | if not hasattr(self, 'handleError'): 19 | raise 20 | e = PCTPythonError(e) 21 | self.handleError(e) 22 | return do_check 23 | -------------------------------------------------------------------------------- /corpustools/freqalt/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /corpustools/freqalt/io.py: -------------------------------------------------------------------------------- 1 | import csv 2 | 3 | 4 | def print_freqalt_results(output_filename, related_list): 5 | with open(output_filename, mode='w', encoding='utf-8-sig', newline='') as outf2: 6 | writer = csv.writer(outf2,delimiter= '\t') 7 | writer.writerow(['FirstWord', 'SecondWord', 'RelatednessScore']) 8 | for line in related_list: 9 | writer.writerow(line) 10 | -------------------------------------------------------------------------------- /corpustools/funcload/New Text Document.py: -------------------------------------------------------------------------------- 1 | import sys 2 | print(sys.meta_path) 3 | -------------------------------------------------------------------------------- /corpustools/funcload/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /corpustools/funcload/io.py: -------------------------------------------------------------------------------- 1 | import csv 2 | 3 | 4 | def save_minimal_pairs(output_filename, to_output, write_header=True): 5 | if isinstance(output_filename, str): 6 | outf = open(output_filename, mode='w', encoding='utf-8-sig', newline='') 7 | needs_closed = True 8 | else: 9 | outf = output_filename 10 | needs_closed = False 11 | 12 | writer = csv.writer(outf, delimiter='\t') 13 | if write_header: 14 | writer.writerow(['FIRST_SEGMENT', 'SECOND_SEGMENT', 15 | 'FIRST_WORD', 'FIRST_WORD_TRANSCRIPTION', 16 | 'SECOND_WORD', 'SECOND_WORD_TRANSCRIPTION']) 17 | for _, _, ret_dict in to_output: 18 | for seg_pair, word_pair_set in ret_dict.items(): 19 | for word_pair in word_pair_set: 20 | writer.writerow([seg_pair[0], seg_pair[1], 21 | word_pair[0][0], word_pair[0][1], 22 | word_pair[1][0], word_pair[1][1]]) 23 | 24 | if needs_closed: 25 | outf.close() 26 | -------------------------------------------------------------------------------- /corpustools/gui/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /corpustools/gui/delegates.py: -------------------------------------------------------------------------------- 1 | from .imports import * 2 | 3 | 4 | class SwitchDelegate(QItemDelegate): 5 | def __init__(self, parent): 6 | QItemDelegate.__init__(self, parent) 7 | 8 | def createEditor(self, parent, option, index): 9 | switch = QPushButton(parent) 10 | switch.setCheckable(False) 11 | switch.setAutoExclusive(False) 12 | icon = switch.style().standardIcon(QStyle.SP_BrowserReload) 13 | switch.setIcon(icon) 14 | switch.setSizePolicy(QSizePolicy.Fixed,QSizePolicy.Fixed) 15 | switch.setMaximumSize(switch.iconSize().width()+14,switch.iconSize().height()+14) 16 | switch.clicked.connect(lambda : self.click(index=index)) 17 | switch.setFocusPolicy(Qt.NoFocus) 18 | return switch 19 | 20 | def click(self, index): 21 | index.model().switchRow(index.row()) 22 | # The following two lines add and then delete a random row. 23 | # This serves as a solution to the problem where the table does not refresh on MacOS. 24 | index.model().addRow(row=('xx', 'xx')) 25 | index.model().removeRow(ind=len(index.model().rows)-1) 26 | -------------------------------------------------------------------------------- /corpustools/gui/graph.py: -------------------------------------------------------------------------------- 1 | # Archived method for visualizing functional load 2 | #import igraph as ig 3 | 4 | class FLGraph: 5 | # Plots a graph based on the results of functional load calculation 6 | # by taking a list of the phonemes that were calculated, which will be the graph's vertices, 7 | # and a list of functional load values, which will be the graph's edges, from the results of 8 | # the functional load dialog. 9 | def __init__(self, results_dict): 10 | self.graph = ig.Graph() 11 | self.segments = [] 12 | self.fl_weights = [] 13 | for result in results_dict: 14 | segment_1 = result['First segment'] 15 | segment_2 = result['Second segment'] 16 | fl_weight = result['Result'] 17 | 18 | self.fl_weights.append( [segment_1, segment_2, fl_weight] ) 19 | if not (segment_1 in self.segments): 20 | self.segments.append(segment_1) 21 | if not (segment_2 in self.segments): 22 | self.segments.append(segment_2) 23 | 24 | self.construct_graph() 25 | # Plots a circular graph with edges connecting all the vertices, where the width of each edge is 26 | # 50 * functional load 27 | ig.plot(self.graph, layout=self.graph.layout_circle(), vertex_label=self.graph.vs["name"], 28 | edge_width=[1-(50 * weight) for weight in self.graph.es["weight"]]) 29 | 30 | def construct_graph(self): 31 | # Creates a graph from self.segments and self.fl_weights 32 | self.graph.add_vertices(len(self.segments)) 33 | self.graph.vs["name"] = self.segments 34 | 35 | for weight_list in self.fl_weights: 36 | segment_1 = self.graph.vs.find(name=weight_list[0]) 37 | segment_2 = self.graph.vs.find(name=weight_list[1]) 38 | 39 | self.graph.add_edge(segment_1.index, segment_2.index, weight = weight_list[2]) 40 | -------------------------------------------------------------------------------- /corpustools/gui/guitest.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import unittest 3 | from PyQt5.QtTest import QTest 4 | from PyQt5.QtCore import Qt 5 | from PyQt5.QtGui import QGuiApplication 6 | from corpustools.gui.flgui import FLDialog, FLWorker 7 | from corpustools.corpus.io import load_binary 8 | from corpustools.gui.models import InventoryModel 9 | from corpustools.gui.main import MainWindow, QApplicationMessaging 10 | 11 | app = QApplicationMessaging(sys.argv) 12 | main = MainWindow(app) 13 | 14 | class FLTester(unittest.TestCase): 15 | 16 | @classmethod 17 | def setUpClass(cls): 18 | corpus = load_binary(r'C:\Users\Scott\Documents\GitHub\CorpusTools\corpustools\lemurian.corpus') 19 | inventory = InventoryModel(corpus.inventory, copy_mode=True) 20 | cls.dialog = FLDialog(main, None, corpus, inventory, False) 21 | 22 | def test_FuncLoadAlgorithmChoice(self): 23 | print(self.dialog.algorithmWidget.value()) 24 | self.dialog.algorithmWidget.widgets[1].click() 25 | #why doesn't this work with QTest.mouseClick(...,Qt.LeftButton, Qt.NoModifier) ??? 26 | print(self.dialog.algorithmWidget.value()) 27 | self.assertEqual(self.dialog.algorithmWidget.value(), 'entropy') 28 | 29 | def test_MinFreqEdit(self): 30 | self.dialog.minFreqEdit.clear() 31 | QTest.keyClicks(self.dialog.minFreqEdit, '2') 32 | self.assertEqual(self.dialog.minFreqEdit.text(), '2') 33 | 34 | @unittest.expectedFailure() 35 | def test_Kwargs(self): 36 | QTest.mouseClick(self.dialog.newTableButton, Qt.LeftButton) 37 | self.assertEqual(self.dialog.kwargs['frequency_cutoff'], '2') 38 | 39 | if __name__ == '__main__': 40 | unittest.main() 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /corpustools/gui/imports.py: -------------------------------------------------------------------------------- 1 | AUDIO_ENABLED = True 2 | HELP_ENABLED = True 3 | try: 4 | from PyQt5.QtCore import (QRectF, Qt, QModelIndex, QItemSelection, 5 | pyqtSignal as Signal, pyqtSlot as Slot, 6 | QThread,QAbstractTableModel,QAbstractListModel, 7 | QSize, QSettings,QPoint, QItemSelectionModel, 8 | QSortFilterProxyModel, QAbstractProxyModel, QAbstractItemModel, 9 | QSharedMemory, QEvent, QIODevice, QProcess, QUrl, QTime, 10 | QStringListModel) 11 | from PyQt5.QtGui import (QFont, QKeySequence, QPainter, QFontMetrics, QPen, 12 | QRegion,QStandardItemModel,QStandardItem, QIcon, QPixmap, 13 | QDesktopServices, QCursor, QDoubleValidator) 14 | from PyQt5.QtWidgets import (QMainWindow, QLayout, QHBoxLayout, QLabel, QAction, 15 | QApplication, QWidget, QMessageBox,QSplitter, 16 | QDialog, QListWidget, QGroupBox,QVBoxLayout, 17 | QPushButton, QFrame, QGridLayout,QRadioButton, 18 | QFormLayout, QLineEdit, QFileDialog, QComboBox, 19 | QProgressDialog, QCheckBox, QMessageBox,QTableView, 20 | QAbstractItemView, QHeaderView, QDockWidget, QTreeView, 21 | QStyle, QMenu, QSizePolicy, QButtonGroup,QTabWidget, 22 | QTableWidget, QToolBar, QStyledItemDelegate, QDataWidgetMapper, 23 | QSlider, QItemDelegate, QScrollArea, QBoxLayout, QStackedWidget, 24 | QCompleter, QTableWidgetItem) 25 | from PyQt5.QtNetwork import QLocalSocket, QLocalServer 26 | 27 | # The following chunk for importing QWebView can be removed. We are not using it anymore. 28 | try: 29 | from PyQt5.QtWebEngineWidgets import QWebEngineView as QWebView #This is required for PyQt5.9 30 | except ImportError: 31 | try: 32 | from PyQt5.QtWebKitWidgets import QWebView 33 | except ImportError: 34 | HELP_ENABLED = False 35 | 36 | try: 37 | from PyQt5.QtMultimedia import QSound, QMediaPlayer, QMediaContent, QAudioOutput 38 | except ImportError: 39 | AUDIO_ENABLED = False 40 | 41 | class QHLine(QFrame): 42 | def __init__(self): 43 | super(QHLine, self).__init__() 44 | self.setFrameShape(QFrame.HLine) 45 | self.setFrameShadow(QFrame.Sunken) 46 | 47 | 48 | class QVLine(QFrame): 49 | def __init__(self): 50 | super(QVLine, self).__init__() 51 | self.setFrameShape(QFrame.VLine) 52 | self.setFrameShadow(QFrame.Sunken) 53 | 54 | except ImportError: 55 | raise(Exception("We could not find an installation of PyQt5. Please double check that it is installed.")) 56 | 57 | import locale 58 | import sys 59 | if sys.platform.startswith('win'): 60 | locale_string = 'English_US' 61 | else: 62 | locale_string = 'en_US.UTF-8' 63 | locale.setlocale(locale.LC_ALL, locale_string) 64 | 65 | import time 66 | -------------------------------------------------------------------------------- /corpustools/gui/imports.py.orig: -------------------------------------------------------------------------------- 1 | AUDIO_ENABLED = True 2 | HELP_ENABLED = True 3 | try: 4 | from PyQt5.QtCore import (QRectF, Qt, QModelIndex, QItemSelection, 5 | pyqtSignal as Signal,QThread,QAbstractTableModel, 6 | QSize, QSettings,QPoint, QItemSelectionModel, 7 | QSortFilterProxyModel, QAbstractItemModel, 8 | QSharedMemory, QEvent, QIODevice, QProcess, QUrl, QTime, 9 | <<<<<<< HEAD 10 | QStringListModel) 11 | from PyQt5.QtGui import (QFont, QKeySequence, QPainter, QFontMetrics, QPen, 12 | QRegion,QStandardItemModel,QStandardItem, QIcon, QPixmap, 13 | QDesktopServices) 14 | ======= 15 | QStringListModel, QVariant, QMimeData, QByteArray) 16 | from PyQt5.QtGui import (QFont, QKeySequence, QPainter, QFontMetrics, QPen, QDrag, 17 | QRegion,QStandardItemModel,QStandardItem, QIcon, QPixmap) 18 | >>>>>>> refs/remotes/origin/InventoryTableView 19 | from PyQt5.QtWidgets import (QMainWindow, QLayout, QHBoxLayout, QLabel, QAction, 20 | QApplication, QWidget, QMessageBox,QSplitter, 21 | QDialog, QListWidget, QGroupBox,QVBoxLayout, 22 | QPushButton, QFrame, QGridLayout,QRadioButton, 23 | QFormLayout, QLineEdit, QFileDialog, QComboBox, 24 | QProgressDialog, QCheckBox, QMessageBox,QTableView, 25 | QAbstractItemView, QHeaderView, QDockWidget, QTreeView, 26 | QStyle, QMenu, QSizePolicy, QButtonGroup,QTabWidget, 27 | QTableWidget, QToolBar, QStyledItemDelegate, QDataWidgetMapper, 28 | QSlider, QItemDelegate, QScrollArea, QBoxLayout, QStackedWidget, 29 | QCompleter, QTableWidgetItem) 30 | from PyQt5.QtNetwork import QLocalSocket, QLocalServer 31 | try: 32 | from PyQt5.QtWebKitWidgets import QWebView 33 | except ImportError: 34 | HELP_ENABLED = False 35 | try: 36 | from PyQt5.QtMultimedia import QSound, QMediaPlayer, QMediaContent, QAudioOutput 37 | except ImportError: 38 | AUDIO_ENABLED = False 39 | #print('PyQt5 version') 40 | except ImportError: 41 | raise(Exception("We could not find an installation of PyQt5. Please double check that it is installed.")) 42 | 43 | import locale 44 | import sys 45 | if sys.platform.startswith('win'): 46 | locale_string = 'English_US' 47 | else: 48 | locale_string = 'en_US.UTF-8' 49 | locale.setlocale(locale.LC_ALL, locale_string) 50 | -------------------------------------------------------------------------------- /corpustools/gui/main.spec: -------------------------------------------------------------------------------- 1 | # -*- mode: python -*- 2 | 3 | block_cipher = None 4 | 5 | 6 | a = Analysis(['main.py'], 7 | pathex=['C:\\Users\\Scott\\Documents\\GitHub\\CorpusTools\\corpustools\\gui'], 8 | binaries=None, 9 | datas=None, 10 | hiddenimports=[], 11 | hookspath=[], 12 | runtime_hooks=[], 13 | excludes=[], 14 | win_no_prefer_redirects=False, 15 | win_private_assemblies=False, 16 | cipher=block_cipher) 17 | pyz = PYZ(a.pure, a.zipped_data, 18 | cipher=block_cipher) 19 | exe = EXE(pyz, 20 | a.scripts, 21 | exclude_binaries=True, 22 | name='main', 23 | debug=False, 24 | strip=False, 25 | upx=True, 26 | console=True ) 27 | coll = COLLECT(exe, 28 | a.binaries, 29 | a.zipfiles, 30 | a.datas, 31 | strip=False, 32 | upx=True, 33 | name='main') 34 | -------------------------------------------------------------------------------- /corpustools/gui/psgui.py.orig: -------------------------------------------------------------------------------- 1 | 2 | 3 | from .imports import * 4 | 5 | <<<<<<< HEAD 6 | from .widgets import (EnvironmentSelectWidget, SegmentPairSelectWidget, 7 | RadioSelectWidget, InventoryBox, 8 | TierWidget, SegmentSelectionWidget) 9 | ======= 10 | from .widgets import (EnvironmentSelectWidget, TierWidget) 11 | >>>>>>> refs/remotes/origin/InventoryTableView 12 | 13 | from .windows import FunctionWorker, FunctionDialog 14 | 15 | from corpustools.phonosearch import phonological_search 16 | 17 | from corpustools.exceptions import PCTError, PCTPythonError 18 | 19 | class PSWorker(FunctionWorker): 20 | def run(self): 21 | time.sleep(0.1) 22 | kwargs = self.kwargs 23 | try: 24 | self.results = phonological_search(**kwargs) 25 | 26 | except PCTError as e: 27 | self.errorEncountered.emit(e) 28 | return 29 | except Exception as e: 30 | e = PCTPythonError(e) 31 | self.errorEncountered.emit(e) 32 | return 33 | 34 | if self.stopped: 35 | self.finishedCancelling.emit() 36 | return 37 | self.dataReady.emit(self.results) 38 | 39 | class PhonoSearchDialog(FunctionDialog): 40 | header = ['Word', 41 | 'Transcription', 42 | 'Segment', 43 | 'Environment'] 44 | summary_header = ['Segment', ' Environment', 'Type frequency', 'Token frequency'] 45 | _about = [''] 46 | 47 | name = 'phonological search' 48 | def __init__(self, parent, settings, corpus, showToolTips): 49 | FunctionDialog.__init__(self, parent, settings, PSWorker()) 50 | 51 | self.corpus = corpus 52 | self.showToolTips = showToolTips 53 | 54 | psFrame = QFrame() 55 | pslayout = QHBoxLayout() 56 | 57 | #self.targetWidget = SegmentSelectionWidget(self.corpus.inventory) 58 | 59 | #pslayout.addWidget(self.targetWidget) 60 | 61 | self.envWidget = EnvironmentSelectWidget(self.corpus) 62 | pslayout.addWidget(self.envWidget) 63 | 64 | 65 | optionLayout = QVBoxLayout() 66 | 67 | self.tierWidget = TierWidget(corpus,include_spelling=False) 68 | 69 | optionLayout.addWidget(self.tierWidget) 70 | 71 | optionFrame = QGroupBox('Options') 72 | 73 | optionFrame.setLayout(optionLayout) 74 | 75 | pslayout.addWidget(optionFrame) 76 | 77 | psFrame.setLayout(pslayout) 78 | self.layout().insertWidget(0,psFrame) 79 | self.setWindowTitle('Phonological search') 80 | self.progressDialog.setWindowTitle('Searching') 81 | 82 | def generateKwargs(self): 83 | kwargs = {} 84 | envs = self.envWidget.value() 85 | if len(envs) > 0: 86 | for i, e in enumerate(envs): 87 | if len(e.middle) == 0: 88 | reply = QMessageBox.critical(self, 89 | "Missing information", 90 | "Please specify at least segment to search for in environment {}.".format(i+1)) 91 | return 92 | kwargs['envs'] = envs 93 | 94 | kwargs['corpus'] = self.corpus 95 | kwargs['sequence_type'] = self.tierWidget.value() 96 | return kwargs 97 | 98 | def setResults(self,results): 99 | self.results = [] 100 | for w,f in results: 101 | segs = tuple(x.middle for x in f) 102 | try: 103 | envs = tuple(str(x) for x in f) 104 | except IndexError: 105 | envs = tuple() 106 | self.results.append((w, str(getattr(w,self.tierWidget.value())), segs, 107 | envs)) 108 | -------------------------------------------------------------------------------- /corpustools/gui/syllables.py: -------------------------------------------------------------------------------- 1 | from .imports import * 2 | from .widgets import SegmentSelectionWidget, SegmentSelectDialog 3 | from corpustools.corpus.classes.lexicon import EnvironmentFilter, SyllableEnvironmentFilter 4 | import sip 5 | from pprint import pprint 6 | import regex as re 7 | 8 | SPECIAL_SYMBOL_RE = ['.', '^', '$', '*', '+', '?', '|', '{', '}', '[', ']', '#', '(', ')', '\'', '\"'] -------------------------------------------------------------------------------- /corpustools/gui/versioning.py: -------------------------------------------------------------------------------- 1 | from pkg_resources import parse_version 2 | from urllib.request import urlopen 3 | from urllib.parse import quote 4 | import re 5 | 6 | import corpustools 7 | 8 | VERSION = corpustools.__version__ 9 | 10 | def open_url(url): 11 | f = urlopen(url, timeout=30) 12 | try: 13 | size = f.headers.get("content-length",None) 14 | if size is not None: 15 | size = int(size) 16 | except ValueError: 17 | pass 18 | else: 19 | f.size = size 20 | return f 21 | 22 | def join_app_version(appname,version,platform): 23 | """Join an app name, version and platform into a version directory name. 24 | For example, ("app-name","0.1.2","win32") => appname-0.1.2.win32 25 | """ 26 | return "%s-%s.%s" % (appname,version,platform,) 27 | 28 | def find_versions(download_url): 29 | version_re = "[a-zA-Z0-9\\.-_]+" 30 | appname_re = "(?P%s)" % (version_re,) 31 | name_re = "(%s|%s)" % ("PhonologicalCorpusTools", quote("PhonologicalCorpusTools")) 32 | appname_re = join_app_version(name_re,appname_re,"win-amd64") 33 | filename_re = "%s\\.(zip|exe|from-(?P%s)\\.patch)" 34 | filename_re = filename_re % (appname_re,version_re,) 35 | link_re = "href=['\"]?(?P([^'\"]*/)?%s)['\"]?" % (filename_re,) 36 | 37 | df = open_url(download_url) 38 | 39 | try: 40 | downloads = df.read().decode("utf-8-sig") 41 | finally: 42 | df.close() 43 | versions = list() 44 | for match in re.finditer(link_re,downloads,re.I): 45 | version = match.group("version") 46 | href = match.group("href") 47 | from_version = match.group("from_version") 48 | versions.append(version) 49 | return versions 50 | -------------------------------------------------------------------------------- /corpustools/informativity/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/corpustools/informativity/__init__.py -------------------------------------------------------------------------------- /corpustools/kl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/corpustools/kl/__init__.py -------------------------------------------------------------------------------- /corpustools/lemurian.txt: -------------------------------------------------------------------------------- 1 | Transcription,Spelling,Frequency 2 | u.z.a.l.u,usalu,50 3 | m.e.z.e.z.a,mesesa,11 4 | r.o.z.u.d.u,rosudu,52 5 | l.j.o.z.o,ljoso,85 6 | a.z.a.m.k.y.o,asamkyo,80 7 | a.n,aN,127 8 | f.p.e.m.i,fpemi,58 9 | f.m.u,fmu,115 10 | k.j.i,ki,86 11 | i.n.i.f.w.e,iNifwe,59 12 | s.u.n.o.b.a,suNoba,74 13 | s.i,si,103 14 | f.j.a.n,fjaN,30 15 | x.e.m,heN,112 16 | x.k.a.x.j.e,hkahje,51 17 | o.m,oN,37 18 | a,a,93 19 | s.w.i,swi,129 20 | a.n.a,ana,87 21 | s.w.a.f.w.a.p.u,swafwapu,39 22 | a.l.a.m.k.y.o,alamkyo,118 23 | l.u.n.o.b.a,luNoba,29 24 | r.i,ri,40 25 | m.e.r.e.z.a,meresa,53 26 | r.o.d.u.z.u,rodusu,52 27 | n.j.o.z.o,njoso,71 28 | t.i,ti,39 29 | m.e.z.e.t.a,meseta,45 30 | r.w.i,rwi,72 31 | n.w.a.f.w.a.p.u,nwafwapu,132 32 | m.e.t.e.z.a,metesa,62 33 | m.e.z.e.r.a,mesera,63 34 | l.w.a.f.w.a.p.u,lwafwapu,125 35 | l.w.a.f.w.a.z.u,lwafwasu,58 36 | l.o.z.u.d.u,losudu,122 37 | e,e,5 38 | l.j.o.z.a,ljosa,52 39 | m.a.z.e.z.a,masesa,97 40 | e.n.i.f.w.e,eNifwe,74 41 | u.x.a.l.u,uhalu,69 42 | -------------------------------------------------------------------------------- /corpustools/mutualinfo/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /corpustools/neighdens/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /corpustools/neighdens/io.py: -------------------------------------------------------------------------------- 1 | import csv 2 | from corpustools.corpus.classes import Word 3 | 4 | def load_words_neighden(path, file_sequence_type='spelling'): 5 | output = list() 6 | with open(path,'r', encoding='utf-8-sig') as f: 7 | for line in f: 8 | line = line.strip() 9 | output.append(line) 10 | return output 11 | 12 | def print_neighden_results(output_filename, neighbors, output_format): 13 | with open(output_filename, mode='w', encoding='utf-8-sig', newline='') as outf: 14 | writer = csv.writer(outf,delimiter='\t') 15 | for n in neighbors: 16 | output = str(getattr(n, output_format)).replace('.','') 17 | writer.writerow([output]) 18 | 19 | def print_all_neighden_results(output_filename, neighors_dict): 20 | with open(output_filename, mode='w', encoding='utf-8-sig') as outf: 21 | print('Spelling\tTranscription\tDensity\tNeighbours', file=outf) 22 | for word, neighbors in neighors_dict.items(): 23 | try: 24 | s, t = word.split('[') # s is for spelling t is for transcription 25 | except ValueError: 26 | s = word 27 | t = '' 28 | t = t[:-2].replace('.', '') 29 | if not neighbors: 30 | print('\t'.join([s, t, '0', '']), file=outf) 31 | else: 32 | line = '\t'.join([s, t, str(len(neighbors)), ', '.join([str(n).replace('.', '') for n in neighbors])]) 33 | print(line, file=outf) 34 | -------------------------------------------------------------------------------- /corpustools/phonoprob/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /corpustools/phonosearch/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .phonosearch import phonological_search 3 | -------------------------------------------------------------------------------- /corpustools/prod/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /corpustools/sphinx/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /corpustools/sphinx/ipatexescape.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | import sphinx.util.texescape as te 3 | 4 | replacements = r''' 5 | ɔ \textipa{O} 6 | ɪ \textipa{I} 7 | ʊ \textipa{U} 8 | ʃ \textipa{S} 9 | ː \textipa{:} 10 | ʒ \textipa{Z} 11 | ɚ \textipa{\textrhookschwa} 12 | ɑ \textipa{A} 13 | ŋ \textipa{N} 14 | æ \textipa{\ae} 15 | ə \textipa{@} 16 | ɹ \textipa{\*r} 17 | ɡ \textipa{g} 18 | ∅ $\varnothing$ 19 | ɛ \textipa{E} 20 | ā \textipa{\=a} 21 | ''' 22 | 23 | def setup(app): 24 | replacement_list = [ 25 | tuple(line.strip().split()) 26 | for line in replacements.strip().splitlines() 27 | ] 28 | 29 | te.tex_replacements += replacement_list 30 | te.init() 31 | -------------------------------------------------------------------------------- /corpustools/symbolsim/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /corpustools/symbolsim/io.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def print_pairs_results(output_filename, related_data_return): 4 | with open(output_filename, mode='w', encoding='utf-8-sig') as outf: 5 | for w1, w2, score in related_data_return: 6 | outf.write('{}\t{}\t{}\n'.format(w1, w2, score)) 7 | 8 | def print_one_word_results(output_filename, query, string_type, related_data, min_rel, max_rel): 9 | with open(output_filename, mode='w', encoding='utf-8-sig') as outf: 10 | for score, word in related_data: 11 | if isinstance(word, str): 12 | w = word 13 | else: 14 | w = getattr(word, string_type) 15 | 16 | if not isinstance(w, str): 17 | w = ''.join([seg.symbol for seg in w]) 18 | 19 | if isinstance(score, list): 20 | score = score[0] 21 | 22 | outf.write(w + '\t' + str(score) + '\n') 23 | 24 | def read_pairs_file(path): 25 | output = [] 26 | with open(path,'r') as f: 27 | for line in f: 28 | fields = line.strip().split('\t') 29 | output.append(fields) 30 | return output 31 | -------------------------------------------------------------------------------- /corpustools/symbolsim/phono_edit_distance.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from corpustools.symbolsim.phono_align import Aligner 4 | 5 | def phono_edit_distance(word1, word2, sequence_type, features): 6 | """Returns an analogue to Levenshtein edit distance but uses 7 | phonological _features instead of characters 8 | 9 | Parameters 10 | ---------- 11 | word1: Word 12 | Word object containing transcription tiers which will be compared 13 | to another word containing transcription tiers 14 | 15 | word2: Word 16 | The other word containing transcription tiers to which word1 will 17 | be compared 18 | 19 | sequence_type: string 20 | Name of the sequence type (transcription or a tier) to use for comparisons 21 | 22 | _features: FeatureMatrix 23 | FeatureMatrix that contains all the segments in both transcriptions 24 | to be compared 25 | 26 | Returns 27 | ------- 28 | float 29 | the phonological edit distance between two words 30 | """ 31 | 32 | w1 = word1 33 | w2 = word2 34 | 35 | a = Aligner(features_tf=True, features=features) 36 | 37 | m = a.make_similarity_matrix(w1, w2) 38 | 39 | return m[-1][-1]['f'] 40 | 41 | -------------------------------------------------------------------------------- /corpustools/utils.py: -------------------------------------------------------------------------------- 1 | import random 2 | import copy 3 | 4 | 5 | from corpustools.corpus.classes.spontaneous import WordToken, Discourse 6 | 7 | def generate_discourse(corpus): 8 | kwargs = dict(name=f'{corpus.name} discourse', 9 | wav_path=corpus.wav_path) 10 | for a in corpus.attributes: 11 | if a.att_type == 'tier': 12 | kwargs['transcription_name'] = a.name 13 | elif a.att_type == 'spelling': 14 | kwargs['spelling_name'] = a.name 15 | if 'transcription_name' in kwargs and 'spelling_name' in kwargs: 16 | break 17 | 18 | d = Discourse(kwargs=kwargs) 19 | lookup_list = list() 20 | for k in corpus.keys(): 21 | lookup_list.extend([k for x in range(int(corpus[k].frequency))]) 22 | random.shuffle(lookup_list) 23 | end = None 24 | for i, k in enumerate(lookup_list): 25 | word = d.lexicon.get_or_create_word(spelling = corpus[k].spelling, 26 | transcription = corpus[k].transcription) 27 | word.frequency += 1 28 | wordtoken = WordToken(word = word,begin = i) 29 | d.lexicon[k].wordtokens.append(wordtoken) 30 | d.add_word(wordtoken) 31 | 32 | 33 | return d 34 | 35 | 36 | -------------------------------------------------------------------------------- /corpustools/visualize.py: -------------------------------------------------------------------------------- 1 | import re 2 | import numpy as np 3 | import pdb 4 | 5 | from scipy.cluster.hierarchy import linkage, dendrogram 6 | 7 | from matplotlib import pyplot as plt 8 | from matplotlib.collections import LineCollection 9 | import seaborn as sns 10 | 11 | from sklearn.decomposition import PCA 12 | 13 | 14 | def organize_data(reader, visualization_method, value_column, segment_column): 15 | raw_data = {tuple([x[1:-1] for x in re.findall("'.+?'", r[segment_column])]): float(r[value_column]) for r in reader} 16 | all_segments = list(set([segment for pair in raw_data for segment in pair])) 17 | 18 | # ## TEMP: REMOVE VOWELS 19 | # VOWELS = ['IY', 'UW', 'IH', 'EH', 'ER', 'AW', 'AY', 'EY', 'OW', 'OY', 'AA', 'AE', 'AH', 'AO', 'UH'] 20 | # all_segments = [s for s in all_segments if s not in VOWELS] 21 | # ## 22 | 23 | if visualization_method in ['pca', 'hm']: 24 | m = np.zeros(shape=(len(all_segments),len(all_segments)), dtype=float) 25 | for i, s1 in enumerate(all_segments): 26 | for j, s2 in enumerate(all_segments): 27 | if j > i: 28 | try: 29 | value = raw_data[(s1, s2)] 30 | except KeyError: 31 | value = raw_data[(s2, s1)] 32 | m[i][j] = value 33 | m[j][i] = value 34 | m /= np.amax(m) 35 | return (all_segments, m) 36 | 37 | elif visualization_method == 'hc': 38 | a = np.array([], dtype=float) 39 | for i, s1 in enumerate(all_segments): 40 | for j, s2 in enumerate(all_segments): 41 | if j > i: 42 | try: 43 | value = raw_data[(s1, s2)] 44 | except KeyError: 45 | value = raw_data[(s2, s1)] 46 | a = np.append(a, value) 47 | a = (max(a) * 2) - a 48 | return (all_segments, a) 49 | 50 | 51 | 52 | def visualize(reader, visualization_method, value_column, segment_column): 53 | # original_data = {row['result']: row['segment(s)'] for row in reader} 54 | labels, data = organize_data(reader, visualization_method, value_column, segment_column) 55 | data_dict = {label: datum for label, datum in zip(labels, data)} 56 | 57 | if visualization_method == 'hc': 58 | link = linkage(data) 59 | dendrogram(link, leaf_label_func=lambda i: labels[i]) 60 | ax = plt.axes() 61 | ax.set_title('Segment pair functional load: hierarchical clustering') 62 | plt.gcf() 63 | plt.show() 64 | 65 | if visualization_method == 'hm': 66 | ax = sns.heatmap(data) 67 | ax.set_title('Segment pair functional load: heatmap') 68 | plt.xticks([p+0.5 for p in range(len(labels))], labels) 69 | plt.yticks([p+0.5 for p in range(len(labels))], reversed(labels)) 70 | plt.show() 71 | 72 | if visualization_method == 'pca': 73 | n = len(labels) 74 | data -= data.mean() 75 | clf = PCA(n_components=2) 76 | transformed = clf.fit_transform(data) 77 | 78 | # def get_sim(s1, s2): 79 | # i1 = labels.index(s1) 80 | # i2 = labels.index(s2) 81 | # print(similarities[i1][i2]) 82 | 83 | fig = plt.figure(1) 84 | ax = plt.axes([0., 0., 1., 1.]) 85 | ax.set_title('Segment pair functional load: first two principal components') 86 | 87 | plt.scatter(transformed[:, 0], transformed[:, 1], marker=',', c='b', s=0) 88 | 89 | for label, x, y in zip(labels, transformed[:, 0], transformed[:, 1]): 90 | plt.annotate( 91 | label, 92 | xy = (x, y), xytext = (0, 0), 93 | textcoords = 'offset points') 94 | 95 | plt.show() 96 | 97 | -------------------------------------------------------------------------------- /docs/images/favicon.icns: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/images/favicon.icns -------------------------------------------------------------------------------- /docs/images/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/images/favicon.png -------------------------------------------------------------------------------- /docs/images/icon.icns: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/images/icon.icns -------------------------------------------------------------------------------- /docs/images/logo.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/images/logo.ico -------------------------------------------------------------------------------- /docs/images/logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/images/logo.jpg -------------------------------------------------------------------------------- /docs/source/about.rst: -------------------------------------------------------------------------------- 1 | .. _about: 2 | 3 | .. image:: static/PCT_logo.png 4 | :height: 200px 5 | :align: center 6 | :target: # 7 | 8 | .. _PCT website: http://phonologicalcorpustools.github.io/CorpusTools/ 9 | 10 | .. _GitHub repository: https://github.com/PhonologicalCorpusTools/CorpusTools/ 11 | 12 | .. _kathleen.hall@ubc.ca: kathleen.hall@ubc.ca 13 | 14 | ***** 15 | About 16 | ***** 17 | 18 | Phonological CorpusTools allows for easy computation of phonological metrics 19 | used in the literature for any corpus of language that you provide. 20 | 21 | `PCT website`_ 22 | 23 | `GitHub repository`_ 24 | 25 | Contributors 26 | ------------ 27 | | 28 | | Kathleen Currie Hall (`kathleen.hall@ubc.ca`_) 29 | | Blake Allen 30 | | Edith Coates 31 | | Michael Fry 32 | | Serena Huang 33 | | Khia Johnson 34 | | Roger Lo 35 | | Scott Mackie 36 | | Michael McAuliffe 37 | | Stanley Nam 38 | | 39 | | 40 | | Department of Linguistics 41 | | The University of British Columbia 42 | 43 | Acknowledgments 44 | --------------- 45 | 46 | We give special thanks to Kevin McMullin for help throughout 47 | the first several years of this project; to Ivy Hu for help in updating the documentation; 48 | to Andy Wedel for help with the implementation of functional load; 49 | to Barbara Sennott, originally from Kispiox / Anspayax, for the use of her Gitksan 50 | love story as an example; to Paul Tupper, John Goldsmith, and Jason Riggle for 51 | discussion of mutual information; to Alfred Ko for help in compiling various 52 | transcription / feature files; and to Kenny Vaden, Marc Brysbaert, Bruce Hayes, 53 | and Jeff Mielke for the use of the IPHOD corpus, SUBTLEX frequencies, 54 | Hayes features, and P-base features (respectively) within PCT. Financial 55 | support for this project comes from a SSHRC Insight Development Grant to 56 | Kathleen Currie Hall. 57 | 58 | Citing PCT 59 | ---------- 60 | 61 | Please see :ref:`citing_pct` for information. 62 | -------------------------------------------------------------------------------- /docs/source/bigram_selector.rst: -------------------------------------------------------------------------------- 1 | .. _bigram_selection: 2 | 3 | ************************* 4 | Bigram selection 5 | ************************* 6 | 7 | The analysis functions :ref:`mutual_information` and 8 | :ref:`transitional_probability` are calculated from bigrams, or 9 | ordered pairs of segments. Both functions use a bigram selector window 10 | that lets a user specify a "left-hand side" and "right-hand side" from the 11 | inventory of a loaded corpus, or use features in order to select multiple 12 | left- or right-hand sides. The selector will return either one or multiple 13 | bigrams to the analysis function's main window, where they will be displayed 14 | in a table. 15 | 16 | .. _bigram_select_steps: 17 | 18 | Selecting bigrams using the GUI 19 | ------------------------------- 20 | 1. From the analysis function dialog, click 21 | on "Add Bigram." A new window will open, containing an inventory of all the 22 | segments in the loaded corpus, for selecting the left- and 23 | right-hand sides of a bigram: 24 | 25 | 26 | .. image:: static/bigram_selector.png 27 | :width: 90% 28 | :align: center 29 | 30 | 2. **Using the inventory:** Selecting a single segment from both sides 31 | will add a single bigram, while selecting multiple segments from either 32 | side will add a cartesian product of the possible left- and right-hand 33 | sides. 34 | 3. **Using features:** Use the box above the inventory to select segments 35 | by feature, for either the left- or right-hand side. Once the feature 36 | is written into the text box, the corresponding segments will be 37 | highlighted in the inventory. Click "Add highlighted segments" in order 38 | to accept the selected segments. If there are multiple segments on either 39 | side, the resulting bigrams will be a cartesian product of the left- and 40 | right-hand segments. See :ref:`feature_selection` for more information 41 | on features. 42 | 4. Click "Add" to return to the analysis function dialog with your selected 43 | bigrams. To add more than one bigram, click “Add and create another” to be automatically returned to the selection window. Once the last bigram has been selected, simply click “Add” to return to the Mutual Information dialogue box with all selected bigrams listed. 44 | 5. If an added bigram does not appear in the corpus, a message box will 45 | appear specifying which bigrams are missing. From it, click "Cancel" to return to the bigram selector window 46 | without adding the bigram; click "No" to add all of the bigrams that do 47 | appear in the corpus without adding the ones that do not; and click 48 | "Yes" to add all of the bigrams regardless of whether they appear in 49 | the corpus. Note that PCT cannot calculate Mutual Information or 50 | Transitional Probability on bigrams that do not appear in the corpus. 51 | 52 | For example, in the following window, we have selected {m, t} as the first element and {i, u, o} as the second element. Ideally, this would result in six total bigrams being added to our search list: [mi], [mu], [mo], and [ti], [tu], [to]. However, PCT warns us that [mu] and [mo] are not in the corpus. By clicking "No" (i.e., we do not wish to keep them), we are returned to the main analysis window (in this case, the transitional probability dialogur box), with only the four extant bigrams selected: 53 | 54 | .. image:: static/bigram_warning.png 55 | :width: 90% 56 | :align: center 57 | 58 | .. image:: static/bigrams_selected.png 59 | :width: 90% 60 | :align: center 61 | -------------------------------------------------------------------------------- /docs/source/feature_selection.rst: -------------------------------------------------------------------------------- 1 | .. _feature_selection: 2 | 3 | ***************** 4 | Feature Selection 5 | ***************** 6 | 7 | There are several instances in PCT where you are given the opportunity to 8 | select sounds for an analysis based on shared features. This section describes 9 | the general interface used in all such instances. See also :ref:`sound_selection` 10 | for general information on how to select individual segments for analysis, 11 | including using features to identify classes of segments. 12 | 13 | In many cases, you will just want to select individual sounds for an analysis 14 | (e.g., [e] vs. [o]). Occasionally, however, it is useful to be able to compare 15 | classes of sounds that differ along some dimension (e.g., comparing front vs. 16 | back non-low vowels, i.e., [i, e] on the one hand vs. [u, o] on the other). 17 | 18 | To do this in an analysis window, click on "Add sets of segments based on features" to open the 19 | "Select feature pair" dialogue box. 20 | 21 | At the top of the box, there is a place to enter the feature(s) along 22 | which the pairs will have OPPOSITE values. No "+" or "-" value should be 23 | entered here; rather, it should just be the name of the feature (e.g., 24 | "back"). Note that currently, PCT has some ability to automatically detect 25 | redundant features within a given domain. 26 | For instance, if the example corpus is open and 27 | associated with SPE features, and one wanted to calculate the predictability 28 | of distribution of [i,e] on the one hand vs. [u, o] on the other, one would 29 | could enter either "back" or "round", but would also have enter "-low" and "+voc" in the "Filter pairs" box. 30 | 31 | 32 | As soon as a feature or set of features has been entered that 33 | describes two sets of sounds that differ on exactly the feature values for 34 | the listed features, the sounds themselves will be shown in the box under 35 | "First segments" and "Second segments." 36 | 37 | One can then filter the entire set by entering in specific values of other 38 | features. E.g., if one wanted to limit the comparison to [i] vs. [e], 39 | one could enter "high" in the "feature to make pairs" box and then [-round] 40 | in the "filter pairs" box. (Of course, in this case, it might be easier to 41 | simply select those two sounds, [i] and [e], directly as segments, but the 42 | same principle works for more complicated sets of segments.) 43 | 44 | Once the correct segments are listed, click "Add" to add the pairs to the 45 | segment list in the original analysis dialogue box. If additional pairs are 46 | needed, one can click "Add and create another" instead. 47 | 48 | Here's an example of using both features and filters in the Lemurian 49 | corpus to select [o,u] vs. [e,i], to the exclusion of [w,j]: 50 | 51 | .. image:: static/featurepairselection.png 52 | :width: 90% 53 | :align: center 54 | 55 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. Phonological CorpusTools documentation master file, created by 2 | sphinx-quickstart on Mon Feb 2 16:59:56 2015. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to Phonological CorpusTools's documentation! 7 | ==================================================== 8 | 9 | Contents: 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | 14 | about.rst 15 | introduction.rst 16 | downloading_and_installing.rst 17 | loading_corpora.rst 18 | examplecorpora.rst 19 | transcriptions_and_feature_systems.rst 20 | sound_selection.rst 21 | bigram_selector.rst 22 | feature_selection.rst 23 | environment_selection.rst 24 | pronunciation_variants.rst 25 | phonological_search.rst 26 | phonotactic_probability.rst 27 | functional_load.rst 28 | predictability_of_distribution.rst 29 | informativity.rst 30 | kullback-leibler.rst 31 | string_similarity.rst 32 | neighborhood_density.rst 33 | 34 | .. frequency_of_alternation.rst 35 | 36 | mutual_information.rst 37 | transitional_probability.rst 38 | acoustic_similarity.rst 39 | citing_pct.rst 40 | references.rst 41 | apireference.rst 42 | release.rst 43 | 44 | 45 | 46 | Indices and tables 47 | ================== 48 | 49 | * :ref:`genindex` 50 | * :ref:`modindex` 51 | * :ref:`search` 52 | 53 | -------------------------------------------------------------------------------- /docs/source/release.rst: -------------------------------------------------------------------------------- 1 | ************* 2 | Release Notes 3 | ************* 4 | 5 | .. toctree:: 6 | :maxdepth: 2 7 | 8 | release/1.5.1-notes.rst 9 | release/1.5.0-notes.rst 10 | release/1.4.1-notes.rst 11 | release/1.4.0-notes.rst 12 | release/1.3.0-notes.rst 13 | release/1.2.0-notes.rst 14 | release/1.1.0-notes.rst 15 | release/1.0.1-notes.rst 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /docs/source/release/1.0.1-notes.rst: -------------------------------------------------------------------------------- 1 | CorpusTools 1.0.1 Release Notes 2 | ******************************* 3 | 4 | This is primarily a bugfix release in the 1.0.x series 5 | 6 | New features 7 | ============ 8 | 9 | * Implemented the ability to check for updates to PCT from the executable 10 | versions through the help menu of the main window 11 | 12 | Functional load 13 | =============== 14 | 15 | * Fixed a bug in functional load calculations that undercounted the number 16 | of minimal pairs found if homophones were present 17 | 18 | Corpora 19 | ======= 20 | * Numeric filters for subsetting corpora should be working as intended now 21 | 22 | TextGrid support 23 | ================ 24 | 25 | * Improved importing of TextGrids by allowing users to specify what the 26 | labels for orthography and transcription tiers are 27 | * Fixed a bug in TextGrid loading where the last segment from the previous 28 | word's transcription was duplicated in the following word's transcription 29 | * Fixed a bug where loading TextGrids resulted in an empty segment inventory 30 | 31 | GUI 32 | === 33 | 34 | * Improved error messages 35 | * Fixed a bug that blocked subsetting a corpus 36 | * Fix for running text window not getting cleared when switching corpora 37 | * Segments should now correctly default to a grid layout when the inventory 38 | is displayed and the feature system is missing some Hayes- or SPE-like 39 | features 40 | -------------------------------------------------------------------------------- /docs/source/release/1.1.0-notes.rst: -------------------------------------------------------------------------------- 1 | CorpusTools 1.1.0 Release Notes 2 | ******************************* 3 | 4 | This is a major version release for Phonological CorpusTools. 5 | 6 | Importing corpora 7 | ================= 8 | 9 | * Importing corpora functionality in the GUI received a large overhaul 10 | * All types of corpora are imported through a single dialog 11 | * PCT should autodetect many settings based on selected files or directories 12 | * Autodetected settings can be edited and refined by the user 13 | * Basic logging support saves parsing details entered by the user (i.e., 14 | multicharacter segments) 15 | * Numbers in transcriptions can be parsed as stress, tone, or as a normal 16 | character (note that tone and stress are currently not supported in functions 17 | or phonological search) 18 | 19 | Pronunciation variants 20 | ====================== 21 | 22 | * All algorithms that analyze segments support four strategies for dealing with 23 | pronunciation variants: canonical forms, most frequent variants, separated 24 | tokens as types, and tokens weighted by their relative frequenies 25 | * Algorithms that analyze words support two strategies for pronunciation 26 | variants: canonical forms and most frequent variants 27 | * Exporting corpora can now export pronunciation variants (and their frequencies) 28 | 29 | Functional load 30 | =============== 31 | 32 | * Added support for finding the average functional load of single segments 33 | 34 | Phonotactic probability 35 | ======================= 36 | 37 | * Fixed an issue where calculating biphone probabilities on single segment 38 | words would cause errors; now assigns a probability of 0 to those words 39 | 40 | Kullback-Leibler divergence 41 | =========================== 42 | 43 | * Added options to bring KL divergence in line with the other functions 44 | * Added command line script for calculating KL divergence 45 | 46 | GUI 47 | === 48 | 49 | * Added a dialog to the "View/change feature system" dialog to edit the 50 | categorization of segments into a coherent segment chart via features 51 | * Features can be used as input to the analysis functions, i.e. functional load 52 | of voice in the corpus (segements that are +voice compared to segments that 53 | are -voice) 54 | 55 | Segment selection 56 | ----------------- 57 | 58 | * Segment selection has been redone 59 | * Segments can be selected via the inventory 60 | * Features can be typed into the filter field, which will highlight 61 | segments that will be included with that feature selection 62 | * Once a feature specification has been entered, that segment set can 63 | be locked in 64 | 65 | Environments 66 | ------------ 67 | 68 | * Environment creation has been revamped 69 | * Users can select a set of center segments 70 | * Right hand and left hand can be added, with multiple sets of segments 71 | on each side 72 | 73 | Known issues 74 | ------------ 75 | 76 | * Help pages for the Mac binary require internet connection to view, due 77 | to issues including .html files in the .app binary 78 | -------------------------------------------------------------------------------- /docs/source/release/1.2.0-notes.rst: -------------------------------------------------------------------------------- 1 | CorpusTools 1.2.0 Release Notes 2 | ******************************* 3 | 4 | Version 1.2 was released in December 2016. 5 | 6 | Inventory Management 7 | ==================== 8 | 9 | * The tools for categorizing segments into an inventory chart have been updated to allow users to interactively update the chart based on nature classes, including the ability to add / delete / rearrange columns and rows in the chart. Uncategorized segments are more clearly shown and their features easily examined for reference. 10 | 11 | 12 | Environments 13 | ============ 14 | * Environments can be more flexibly defined, e.g., using wildcards and inserting / modifying / deleting segments or classes of segments within a linear string. 15 | 16 | 17 | Functional Load 18 | =============== 19 | * Within the functional load analysis, functional load can be calculated within individual sets of environments rather than exclusively at the word level. 20 | 21 | 22 | Other 23 | ===== 24 | 25 | * Numerous small updates have been implemented to aid usability, such as improving the ability to select segments based on features, returning results that list features if those were used in the selection of segments, updating the documentation for clarity, adding an option for normalizing functional load results, etc. 26 | -------------------------------------------------------------------------------- /docs/source/release/1.3.0-notes.rst: -------------------------------------------------------------------------------- 1 | CorpusTools 1.3.0 Release Notes 2 | ******************************* 3 | 4 | Version 1.3 was released in July 2017. 5 | 6 | New features 7 | ============ 8 | 9 | * Implemented an algorithm for calculating informativity. 10 | 11 | 12 | Functional Load 13 | =============== 14 | 15 | * The options for relativizing functional load have been revised and clarified. 16 | 17 | 18 | Other 19 | ===== 20 | 21 | Numerous small usability fixes have been included. 22 | -------------------------------------------------------------------------------- /docs/source/release/1.4.0-notes.rst: -------------------------------------------------------------------------------- 1 | CorpusTools 1.4.0 Release Notes 2 | ******************************* 3 | 4 | Version 1.4.0 was released in December 2018. 5 | 6 | New features 7 | ============ 8 | 9 | * Support for syllable structure has been added to the software, allowing searches based on stress or tone. 10 | 11 | 12 | Functional Load 13 | =============== 14 | 15 | * The functional load algorithm based on minimal pair counts has been FIXED -- previous versions returned counts based on token frequencies instead of type frequencies. 16 | * Functional load calculations with right-hand environment specifications should be working. 17 | 18 | 19 | Feature Systems 20 | =============== 21 | 22 | * The feature values in the pre-included feature systems have been updated to fix corruption issues. 23 | 24 | -------------------------------------------------------------------------------- /docs/source/release/1.4.1-notes.rst: -------------------------------------------------------------------------------- 1 | CorpusTools 1.4.1 Release Notes 2 | ******************************* 3 | 4 | Version 1.4.1 was released in September 2019. This version sizes inventory tables in a more appropriate way, includes the version number in output tables for archival purposes, and fixes bugs in the phonological search, functional load, and "About PCT" functions. -------------------------------------------------------------------------------- /docs/source/release/1.5.0-notes.rst: -------------------------------------------------------------------------------- 1 | CorpusTools 1.5.0 Release Notes 2 | ******************************* 3 | 4 | Version 1.5.0 was released in January of 2022. 5 | 6 | New features 7 | ============ 8 | 9 | * Implemented a transitional probability algorithm for calculating the conditional probabilities between segments, which widens the scope of use of the 'bigram selector' module. 10 | * Entry boxes that should have numeric values have now all been set to accept only numeric entries, to avoid inadvertent crashes with non-numeric values. 11 | 12 | Corpora 13 | ======= 14 | 15 | * There is now a syllabified version of the example corpus. 16 | * Example corpora are now bundled with the executable, though can also be manually downloaded directly from https://github.com/PhonologicalCorpusTools/PCT_Fileshare. 17 | * If a new word is added or an existing word is edited to be the same as an existing word, PCT offers options to either create separate items or merge the two, summing frequencies. 18 | 19 | Duplicated Analyses 20 | =================== 21 | 22 | * In prior versions of PCT, duplicated phonological searches / analyses could result in *cumulative* results, e.g., reported frequencies that summed over every instance of a repeated search. This has been corrected so that users are provided a warning when a search / analysis is duplicated, and either no change is made to the output table or the same results are repeated as a new line. 23 | 24 | Phonological Search 25 | =================== 26 | 27 | * Searches can be named. 28 | * Searches can include word frequency, phoneme number, and syllable number filters. 29 | 30 | String Similarity and Neighbourhood Density 31 | =========================================== 32 | 33 | * Fixed some bugs that were causing the algorithm to crash when lists of words were added. 34 | * The option to calculate neighbourhood density based on spelling has been removed, in order to avoid issues with trying to calculate an 'inventory' of spelling symbols. Note that it is still possible to calculate raw string similarity based on spelling, and it is possible to force PCT to read a spelling column as transcription (when reading the corpus in to the software initially), if ND based on spelling is required. 35 | 36 | 37 | Mutual Information 38 | ================== 39 | 40 | * Parameters for MI calculations have been clarified. 41 | * Options have been added for calculating MI only within particular specified environments. 42 | 43 | Functional Load 44 | =============== 45 | 46 | * Calculation algorithms have been re-factored to make them faster. 47 | * Minimal pairs can now be defined as either only "true" minimal pairs (e.g. "mad" and "pad") or as minimal pairs through neutralization (e.g., "mama" and "papa"). (Prior versions allowed only minimal pairs through neutralization.) 48 | 49 | Pronunciation Variants 50 | ====================== 51 | 52 | * It has been clarified that all corpora must include canonical pronunciations. It is not possible to have pronunciation variants that are linked to the same lexical item through shared spelling. 53 | 54 | Feature Systems 55 | =============== 56 | 57 | * The feature systems have been updated to be accurate. (As far as we can tell, the original released feature systems were accurate, but got corrupted at some point such that the feature values were all misaligned. We believe this error has now been fixed.) 58 | * Feature / transcription systems are now bundled with the executable, though can also be downloaded from https://github.com/PhonologicalCorpusTools/PCT_Fileshare. 59 | * Master Excel files of all features / transcription symbols have also been provided at https://github.com/PhonologicalCorpusTools/PCT_Fileshare for transparency and ease of personal modification. 60 | -------------------------------------------------------------------------------- /docs/source/release/1.5.1-notes.rst: -------------------------------------------------------------------------------- 1 | CorpusTools 1.5.1 Release Notes 2 | ******************************* 3 | 4 | Version 1.5.1 was released in May of 2022. 5 | 6 | Phonological Search 7 | =================== 8 | 9 | * A summary display option, "List target segments and environments separately in summary results" was added. By default, 10 | the summary results present each input environment as a row. The added option instead shows each target and environment 11 | separately. The individual results are identical between the two options. 12 | 13 | Bug fixes 14 | ========= 15 | 16 | * Autocompleting feature or segment selection with the tab key now works correctly. 17 | * PCT can now import TextGrid files without issues. 18 | -------------------------------------------------------------------------------- /docs/source/static/PCT_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/PCT_logo.png -------------------------------------------------------------------------------- /docs/source/static/a_matches.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/a_matches.png -------------------------------------------------------------------------------- /docs/source/static/about.css: -------------------------------------------------------------------------------- 1 | 2 | div.related{ 3 | display: none; 4 | width: 0px; 5 | height: 0px; 6 | } 7 | a.headerlink{ 8 | display: none; 9 | width: 0px; 10 | height: 0px; 11 | } 12 | div.sphinxsidebar{ 13 | display: none; 14 | width: 0px; 15 | height: 0px; 16 | } 17 | div.footer{ 18 | display: none; 19 | width: 0px; 20 | height: 0px; 21 | } 22 | div.bodywrapper{ 23 | margin: 0px 0px 0px 0px; 24 | } 25 | -------------------------------------------------------------------------------- /docs/source/static/acousticsimdialog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/acousticsimdialog.png -------------------------------------------------------------------------------- /docs/source/static/asresults.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/asresults.png -------------------------------------------------------------------------------- /docs/source/static/bigram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/bigram.png -------------------------------------------------------------------------------- /docs/source/static/bigram_selector.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/bigram_selector.png -------------------------------------------------------------------------------- /docs/source/static/bigram_warning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/bigram_warning.png -------------------------------------------------------------------------------- /docs/source/static/bigrams_selected.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/bigrams_selected.png -------------------------------------------------------------------------------- /docs/source/static/construct_syllables.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/construct_syllables.png -------------------------------------------------------------------------------- /docs/source/static/corpustranscribed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/corpustranscribed.png -------------------------------------------------------------------------------- /docs/source/static/corpustranscribed_digraphs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/corpustranscribed_digraphs.png -------------------------------------------------------------------------------- /docs/source/static/createtier.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/createtier.png -------------------------------------------------------------------------------- /docs/source/static/createtierfeatures.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/createtierfeatures.png -------------------------------------------------------------------------------- /docs/source/static/cvtier.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/cvtier.png -------------------------------------------------------------------------------- /docs/source/static/delta-H_vs_raw_min_pairs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/delta-H_vs_raw_min_pairs.png -------------------------------------------------------------------------------- /docs/source/static/digraph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/digraph.png -------------------------------------------------------------------------------- /docs/source/static/downloadfeature.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/downloadfeature.png -------------------------------------------------------------------------------- /docs/source/static/duplicated_search_warning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/duplicated_search_warning.png -------------------------------------------------------------------------------- /docs/source/static/editcategories.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/editcategories.png -------------------------------------------------------------------------------- /docs/source/static/editfeatures.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/editfeatures.png -------------------------------------------------------------------------------- /docs/source/static/environment1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/environment1.png -------------------------------------------------------------------------------- /docs/source/static/environment1.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/environment1.tiff -------------------------------------------------------------------------------- /docs/source/static/environment2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/environment2.png -------------------------------------------------------------------------------- /docs/source/static/environment2.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/environment2.tiff -------------------------------------------------------------------------------- /docs/source/static/environment3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/environment3.png -------------------------------------------------------------------------------- /docs/source/static/environment4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/environment4.png -------------------------------------------------------------------------------- /docs/source/static/examplecvtier.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/examplecvtier.png -------------------------------------------------------------------------------- /docs/source/static/examplevoweltier.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/examplevoweltier.png -------------------------------------------------------------------------------- /docs/source/static/featurefile.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/featurefile.png -------------------------------------------------------------------------------- /docs/source/static/featurepairselection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/featurepairselection.png -------------------------------------------------------------------------------- /docs/source/static/freqaltdialog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/freqaltdialog.png -------------------------------------------------------------------------------- /docs/source/static/freqaltresults.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/freqaltresults.png -------------------------------------------------------------------------------- /docs/source/static/functionalloaddialog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/functionalloaddialog.png -------------------------------------------------------------------------------- /docs/source/static/functionalloadresults.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/functionalloadresults.png -------------------------------------------------------------------------------- /docs/source/static/funtionalloaddialog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/funtionalloaddialog.png -------------------------------------------------------------------------------- /docs/source/static/funtionalloadresults.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/funtionalloadresults.png -------------------------------------------------------------------------------- /docs/source/static/gitksandelimited.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/gitksandelimited.png -------------------------------------------------------------------------------- /docs/source/static/gitksanloaded.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/gitksanloaded.png -------------------------------------------------------------------------------- /docs/source/static/gitksanoriginal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/gitksanoriginal.png -------------------------------------------------------------------------------- /docs/source/static/gitksanorthcorpus1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/gitksanorthcorpus1.png -------------------------------------------------------------------------------- /docs/source/static/gitksanparsing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/gitksanparsing.png -------------------------------------------------------------------------------- /docs/source/static/high_front_vowels_only.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/high_front_vowels_only.png -------------------------------------------------------------------------------- /docs/source/static/ilg_loading1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/ilg_loading1.png -------------------------------------------------------------------------------- /docs/source/static/importcsv1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/importcsv1.png -------------------------------------------------------------------------------- /docs/source/static/importspontaneous.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/importspontaneous.png -------------------------------------------------------------------------------- /docs/source/static/informativity1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/informativity1.png -------------------------------------------------------------------------------- /docs/source/static/informativity1GUI.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/informativity1GUI.png -------------------------------------------------------------------------------- /docs/source/static/informativity2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/informativity2.png -------------------------------------------------------------------------------- /docs/source/static/informativity2GUI.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/informativity2GUI.png -------------------------------------------------------------------------------- /docs/source/static/informativity_559tests.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/informativity_559tests.png -------------------------------------------------------------------------------- /docs/source/static/kldialog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/kldialog.png -------------------------------------------------------------------------------- /docs/source/static/klresults.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/klresults.png -------------------------------------------------------------------------------- /docs/source/static/lemurian_sorted.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/lemurian_sorted.png -------------------------------------------------------------------------------- /docs/source/static/lemurian_sorted_cons.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/lemurian_sorted_cons.png -------------------------------------------------------------------------------- /docs/source/static/lemurian_sorted_vowels.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/lemurian_sorted_vowels.png -------------------------------------------------------------------------------- /docs/source/static/lemurian_unsorted.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/lemurian_unsorted.png -------------------------------------------------------------------------------- /docs/source/static/lemurian_unsorted.png .png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/lemurian_unsorted.png .png -------------------------------------------------------------------------------- /docs/source/static/loadcorpus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/loadcorpus.png -------------------------------------------------------------------------------- /docs/source/static/loadexample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/loadexample.png -------------------------------------------------------------------------------- /docs/source/static/loadfeature.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/loadfeature.png -------------------------------------------------------------------------------- /docs/source/static/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/logo.png -------------------------------------------------------------------------------- /docs/source/static/midialog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/midialog.png -------------------------------------------------------------------------------- /docs/source/static/miresults.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/miresults.png -------------------------------------------------------------------------------- /docs/source/static/neighdencolumn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/neighdencolumn.png -------------------------------------------------------------------------------- /docs/source/static/neighdendialog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/neighdendialog.png -------------------------------------------------------------------------------- /docs/source/static/neighdendialogoutput.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/neighdendialogoutput.png -------------------------------------------------------------------------------- /docs/source/static/neighdeninput.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/neighdeninput.png -------------------------------------------------------------------------------- /docs/source/static/neighdeninputresults.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/neighdeninputresults.png -------------------------------------------------------------------------------- /docs/source/static/neighdenoutput.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/neighdenoutput.png -------------------------------------------------------------------------------- /docs/source/static/neighdenresults.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/neighdenresults.png -------------------------------------------------------------------------------- /docs/source/static/non-labial_onset_plus_A.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/non-labial_onset_plus_A.png -------------------------------------------------------------------------------- /docs/source/static/overwrite_file.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/overwrite_file.png -------------------------------------------------------------------------------- /docs/source/static/parsingsettings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/parsingsettings.png -------------------------------------------------------------------------------- /docs/source/static/pctguilog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/pctguilog.png -------------------------------------------------------------------------------- /docs/source/static/phonoprobdialog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/phonoprobdialog.png -------------------------------------------------------------------------------- /docs/source/static/phonoprobresults.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/phonoprobresults.png -------------------------------------------------------------------------------- /docs/source/static/phonoprobresults_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/phonoprobresults_2.png -------------------------------------------------------------------------------- /docs/source/static/phonosearchenvironment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/phonosearchenvironment.png -------------------------------------------------------------------------------- /docs/source/static/phonosearchenvironment2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/phonosearchenvironment2.png -------------------------------------------------------------------------------- /docs/source/static/phonosearchenvironment3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/phonosearchenvironment3.png -------------------------------------------------------------------------------- /docs/source/static/phonosearchenvironment4_individ_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/phonosearchenvironment4_individ_results.png -------------------------------------------------------------------------------- /docs/source/static/phonosearchenvironment4_not_sep.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/phonosearchenvironment4_not_sep.png -------------------------------------------------------------------------------- /docs/source/static/phonosearchenvironment4_sep.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/phonosearchenvironment4_sep.png -------------------------------------------------------------------------------- /docs/source/static/phonosearchenvironmentsyllable.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/phonosearchenvironmentsyllable.png -------------------------------------------------------------------------------- /docs/source/static/phonosearchenvironmentsyllable2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/phonosearchenvironmentsyllable2.png -------------------------------------------------------------------------------- /docs/source/static/phonosearchindividual.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/phonosearchindividual.png -------------------------------------------------------------------------------- /docs/source/static/phonosearchindividualnegative.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/phonosearchindividualnegative.png -------------------------------------------------------------------------------- /docs/source/static/phonosearchindividualsyllable.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/phonosearchindividualsyllable.png -------------------------------------------------------------------------------- /docs/source/static/phonosearchsaved.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/phonosearchsaved.png -------------------------------------------------------------------------------- /docs/source/static/phonosearchsaved2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/phonosearchsaved2.png -------------------------------------------------------------------------------- /docs/source/static/phonosearchsummary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/phonosearchsummary.png -------------------------------------------------------------------------------- /docs/source/static/phonosearchsummarynegative.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/phonosearchsummarynegative.png -------------------------------------------------------------------------------- /docs/source/static/phonosearchsummarysyllable.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/phonosearchsummarysyllable.png -------------------------------------------------------------------------------- /docs/source/static/prod_vs_raw_min_pairs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/prod_vs_raw_min_pairs.png -------------------------------------------------------------------------------- /docs/source/static/prod_vs_rel_min_pairs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/prod_vs_rel_min_pairs.png -------------------------------------------------------------------------------- /docs/source/static/proddialog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/proddialog.png -------------------------------------------------------------------------------- /docs/source/static/proderror.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/proderror.png -------------------------------------------------------------------------------- /docs/source/static/proderror2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/proderror2.png -------------------------------------------------------------------------------- /docs/source/static/prodfreq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/prodfreq.png -------------------------------------------------------------------------------- /docs/source/static/prodresults.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/prodresults.png -------------------------------------------------------------------------------- /docs/source/static/pronunciationvariant.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/pronunciationvariant.png -------------------------------------------------------------------------------- /docs/source/static/savingphonosearch1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/savingphonosearch1.png -------------------------------------------------------------------------------- /docs/source/static/savingphonosearch2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/savingphonosearch2.png -------------------------------------------------------------------------------- /docs/source/static/segmentpair.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/segmentpair.png -------------------------------------------------------------------------------- /docs/source/static/segmentpair1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/segmentpair1.png -------------------------------------------------------------------------------- /docs/source/static/segmentpair2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/segmentpair2.png -------------------------------------------------------------------------------- /docs/source/static/spontaneouscorpus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/spontaneouscorpus.png -------------------------------------------------------------------------------- /docs/source/static/stringsimilaritydialog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/stringsimilaritydialog.png -------------------------------------------------------------------------------- /docs/source/static/stringsimilarityresults.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/stringsimilarityresults.png -------------------------------------------------------------------------------- /docs/source/static/superset_search_individuals.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/superset_search_individuals.png -------------------------------------------------------------------------------- /docs/source/static/superset_search_summary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/superset_search_summary.png -------------------------------------------------------------------------------- /docs/source/static/tierpreview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/tierpreview.png -------------------------------------------------------------------------------- /docs/source/static/two_separate_searches_individual_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/two_separate_searches_individual_results.png -------------------------------------------------------------------------------- /docs/source/static/two_separate_searches_summary_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/two_separate_searches_summary_results.png -------------------------------------------------------------------------------- /docs/source/static/two_separate_syllable_searches.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/docs/source/static/two_separate_syllable_searches.png -------------------------------------------------------------------------------- /docs/source/templates/class.rst: -------------------------------------------------------------------------------- 1 | {{ objname }} 2 | {{ underline }} 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autoclass:: {{ objname }} 7 | 8 | {% block methods %} 9 | 10 | {% if methods %} 11 | .. rubric:: Methods 12 | 13 | .. autosummary:: 14 | {% for item in methods %} 15 | ~{{ name }}.{{ item }} 16 | {%- endfor %} 17 | {% endif %} 18 | {% endblock %} 19 | -------------------------------------------------------------------------------- /docs/source/templates/function.rst: -------------------------------------------------------------------------------- 1 | {{objname}} 2 | {{ underline }} 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autofunction:: {{ objname }} 7 | 8 | .. raw:: html 9 | 10 |
11 | -------------------------------------------------------------------------------- /docs/source/templates/layout.html: -------------------------------------------------------------------------------- 1 | {% extends "!layout.html" %} 2 | {% if pagename == 'about' %} 3 | {% set css_files = css_files + ['_static/about.css'] %} 4 | 5 | {% endif %} 6 | -------------------------------------------------------------------------------- /examples/example_corpus.txt: -------------------------------------------------------------------------------- 1 | spelling transcription frequency 2 | ta tɑ 67 3 | ʃi ʃi 2 4 | tusa tusɑ 32 5 | nata nɑtɑ 2 6 | mata mɑtɑ 2 7 | shashi ʃɑʃi 43 8 | sasi sɑsi 139 9 | shisata ʃisɑtɑ 3 10 | shushoma ʃuʃomɑ 126 11 | tishenishu tiʃeniʃu 96 12 | mashomisi mɑʃomisi 5 13 | tatomi tɑtomi 7 14 | toni toni 33 15 | atema ɑtemɑ 11 16 | enuta enutɑ 11 -------------------------------------------------------------------------------- /examples/using_freq_of_alt.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import freq_of_alt 4 | 5 | freq = freq_of_alt.Freqor('iphod', 4000) 6 | freq.calc_freq_of_alt('s', 'ʃ', 'khorsi', 'transcription', 'type', 'ess_esh_alt_with_min_pairs_1000.txt', -5, 20, 1, 1) -------------------------------------------------------------------------------- /examples/using_freq_of_alt_samples.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import freq_of_alt 4 | import corpustools 5 | import time 6 | 7 | def freq_of_alt_samples(corpus_name, s1, s2, relator_type, string_type, count_what, output_filename, min_rel, max_rel, phono_align, min_pairs_okay, num_of_samples, sample_size, features = 'hayes', ready_made_corpus = None): 8 | if ready_made_corpus is not None: 9 | corpus = ready_made_corpus 10 | else: 11 | print('Building Corpus') 12 | start_time = time.time() 13 | factory = corpustools.CorpusFactory() 14 | corpus = factory.make_corpus(corpus_name, features='spe', size='all') 15 | end_time = time.time() 16 | print('Corpus Complete') 17 | print('Corpus creation time: ' + str(end_time-start_time)) 18 | 19 | freq_of_alt_total = 0 20 | for i in range(num_of_samples): 21 | sub_corpus = corpus.get_random_subset(sample_size) 22 | freq = freq_of_alt.Freqor('iphod', ready_made_corpus = sub_corpus) 23 | output_filename = output_filename.replace('.txt.', '') 24 | curr_freq_of_alt = freq.calc_freq_of_alt(s1, s2, relator_type, string_type, count_what, output_filename + '_' + str(i) + '.txt', min_rel, max_rel, phono_align, min_pairs_okay) 25 | freq_of_alt_total += curr_freq_of_alt 26 | 27 | print('Average frequency of alternation: ' + str(freq_of_alt_total/num_of_samples)) 28 | 29 | 30 | 31 | freq = freq_of_alt.Freqor('iphod', 1000) 32 | freq.calc_freq_of_alt('s', 'ʃ', 'phono_edit_distance', 'transcription', 'type', 'ess_esh_alt_with_min_pairs2000.txt', 0,20, 1, 1) 33 | 34 | #freq_of_alt_samples('iphod', 's', 'ʃ', 'phono_edit_distance', 'transcription', 'type', 'ess_esh_alt_with_min_pairs', 0, 30, 1, 1, 2, 2000) 35 | 36 | 37 | -------------------------------------------------------------------------------- /examples/using_morph_relatedness.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on May 13, 2014 3 | 4 | @author: Michael 5 | ''' 6 | import morph_relatedness 7 | 8 | #Testing mass compare of one word 9 | morph_relatedness.morph_relatedness_word('iphod', 'string_similarity', 'transcription', 'type', 'dog', 0, output_filename = 'test.txt') 10 | #morph_relatedness.morph_relatedness_word('iphod', 'edit_distance', 'transcription', 'type', 'dog', output_filename = 'test4.txt') 11 | #morph_relatedness.morph_relatedness_word('iphod', 'edit_distance', 'spelling', 'type', 'pizza', 0, output_filename = 'test5.txt') 12 | 13 | #Testing single pair comparison 14 | #a = morph_relatedness.morph_relatedness_single_pair('iphod', 'string_similarity', 'transcription', 'dog', 'doggie') 15 | #b = morph_relatedness.morph_relatedness_single_pair('iphod', 'string_similarity', 'spelling', 'dog', 'doggie') 16 | #print(a) 17 | #print(b) 18 | 19 | #Testing pair comparison 20 | #morph_relatedness.morph_relatedness_pairs('iphod', 'string_similarity', 'spelling', 'type', 'words_2_spell.txt', 'words_2_spell_output.txt', -15) 21 | #morph_relatedness.morph_relatedness_pairs('iphod', 'string_similarity', 'transcription', 'type', 'words_2_trans.txt', 'words_2_trans_output.txt', -15) -------------------------------------------------------------------------------- /examples/using_string_similarity.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on May 13, 2014 3 | 4 | @author: Michael 5 | ''' 6 | import string_similarity 7 | import corpustools 8 | 9 | print('Building Corpus') 10 | factory = corpustools.CorpusFactory() 11 | corpus = factory.make_corpus('iphod', features='spe', size='all') 12 | print('Corpus Complete') 13 | 14 | string_similarity.string_similarity_word('iphod', 'edit_distance', 'transcription', 'type', 'cat', 0, 5, output_filename = 'test10.txt', ready_made_corpus = corpus) 15 | string_similarity.string_similarity_word('iphod', 'phono_edit_distance', 'transcription', 'type', 'cat', 0, 5, output_filename = 'test11.txt', ready_made_corpus = corpus) 16 | 17 | """ 18 | #Testing mass compare of one word 19 | string_similarity.string_similarity_word('iphod', 'khorsi', 'spelling', 'type', 'cat', -10, 50, output_filename = 'test1.txt', ready_made_corpus = corpus) 20 | string_similarity.string_similarity_word('iphod', 'khorsi', 'transcription', 'type', 'cat', -10, 50, output_filename = 'test2.txt', ready_made_corpus = corpus) 21 | string_similarity.string_similarity_word('iphod', 'edit_distance', 'spelling', 'type', 'cat', 0, 5, output_filename = 'test3.txt', ready_made_corpus = corpus) 22 | string_similarity.string_similarity_word('iphod', 'edit_distance', 'transcription', 'type', 'cat', 0, 5, output_filename = 'test4.txt', ready_made_corpus = corpus) 23 | 24 | #Testing single pair comparison 25 | a = string_similarity.string_similarity_single_pair('iphod', 'khorsi', 'spelling', 'dog', 'doggie', ready_made_corpus = corpus) 26 | b = string_similarity.string_similarity_single_pair('iphod', 'khorsi', 'transcription', 'dog', 'doggie', ready_made_corpus = corpus) 27 | c = string_similarity.string_similarity_single_pair('iphod', 'edit_distance', 'spelling', 'dog', 'doggie', ready_made_corpus = corpus) 28 | d = string_similarity.string_similarity_single_pair('iphod', 'edit_distance', 'transcription', 'dog', 'doggie', ready_made_corpus = corpus) 29 | 30 | print(a) 31 | print(b) 32 | print(c) 33 | print(d) 34 | 35 | #Testing pair comparison 36 | string_similarity.string_similarity_pairs('iphod', 'khorsi', 'spelling', 'type', 'words_2_spell.txt', 'test5.txt', ready_made_corpus = corpus) 37 | string_similarity.string_similarity_pairs('iphod', 'khorsi', 'transcription', 'type', 'words_2_spell.txt', 'test6.txt', ready_made_corpus = corpus) 38 | 39 | string_similarity.string_similarity_pairs('iphod', 'edit_distance', 'spelling', 'type', 'words_2_spell.txt', 'test7.txt', ready_made_corpus = corpus) 40 | string_similarity.string_similarity_pairs('iphod', 'edit_distance', 'transcription', 'type', 'words_2_spell.txt', 'test8.txt', ready_made_corpus = corpus) 41 | 42 | string_similarity.string_similarity_pairs('iphod', 'khorsi', 'spelling', 'type', 'words_2_spell.txt', 'test9.txt', 0, 50, ready_made_corpus = corpus) 43 | string_similarity.string_similarity_pairs('iphod', 'khorsi', 'transcription', 'type', 'words_2_spell.txt', 'test10.txt', 0, 50, ready_made_corpus = corpus) 44 | 45 | string_similarity.string_similarity_pairs('iphod', 'edit_distance', 'spelling', 'type', 'words_2_spell.txt', 'test11.txt', 0, 50, ready_made_corpus = corpus) 46 | string_similarity.string_similarity_pairs('iphod', 'edit_distance', 'transcription', 'type', 'words_2_spell.txt', 'test12.txt', 0, 50, ready_made_corpus = corpus) 47 | """ 48 | -------------------------------------------------------------------------------- /pa_setup.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is a setup.py script generated by py2applet 3 | 4 | Usage: 5 | python setup.py py2app 6 | """ 7 | 8 | from setuptools import setup 9 | 10 | APP = ['bin/pct.py'] 11 | DATA_FILES = [] 12 | OPTIONS = {'argv_emulation': True, 13 | "includes":['scipy','numpy']} 14 | 15 | setup( 16 | app=APP, 17 | data_files=DATA_FILES, 18 | options={'py2app': OPTIONS}, 19 | packages=['corpustools', 20 | 'corpustools.acousticsim', 21 | 'corpustools.corpus', 22 | 'corpustools.freqalt', 23 | 'corpustools.funcload', 24 | 'corpustools.prod', 25 | 'corpustools.gui', 26 | 'corpustools.symbolsim'], 27 | setup_requires=['py2app'], 28 | ) 29 | -------------------------------------------------------------------------------- /pct.iss: -------------------------------------------------------------------------------- 1 | #define MyAppName "Phonological CorpusTools" 2 | #define MyDistName "PhonologicalCorpusTools" 3 | #define MyAppVersion "1.5.1" 4 | #define MyPlatform "win-amd64" 5 | #define MyAppPublisher "PCT" 6 | #define MyAppURL "http://PhonologicalCorpusTools.github.io/CorpusTools/" 7 | #define MyAppExeName "pct.exe" 8 | 9 | [Setup] 10 | AppId={{9f3fd2c0-db11-4d9b-8124-2e91e6cfd19d} 11 | AppName={#MyAppName} 12 | AppVersion={#MyAppVersion} 13 | AppPublisher={#MyAppPublisher} 14 | AppPublisherURL={#MyAppURL} 15 | AppSupportURL={#MyAppURL} 16 | AppUpdatesURL={#MyAppURL} 17 | AppCopyright=Copyright (C) 2015 PCT 18 | DefaultDirName={pf}\{#MyAppPublisher} 19 | DefaultGroupName={#MyAppPublisher} 20 | AllowNoIcons=yes 21 | OutputBaseFilename={#MyDistName}_win64_{#MyAppVersion} 22 | SolidCompression=yes 23 | ArchitecturesInstallIn64BitMode=x64 24 | ShowLanguageDialog=no 25 | LanguageDetectionMethod=none 26 | UninstallDisplayName={#MyAppName} 27 | UninstallDisplayIcon={app}\{#MyAppExeName} 28 | ChangesAssociations=True 29 | MinVersion=0,6.0 30 | 31 | [Languages] 32 | Name: "english"; MessagesFile: "compiler:Default.isl" 33 | 34 | [Tasks] 35 | Name: "desktopicon"; Description: "{cm:CreateDesktopIcon}"; GroupDescription: "{cm:AdditionalIcons}"; Flags: unchecked 36 | 37 | [Files] 38 | ; x64 files 39 | Source: "dist\{#MyDistName}-{#MyAppVersion}.{#MyPlatform}\{#MyDistName}-{#MyAppVersion}.{#MyPlatform}\*"; DestDir: "{app}\appdata\{#MyDistName}-{#MyAppVersion}.{#MyPlatform}\"; Flags: ignoreversion; Check: Is64BitInstallMode 40 | Source: "dist\{#MyDistName}-{#MyAppVersion}.{#MyPlatform}\{#MyDistName}-{#MyAppVersion}.{#MyPlatform}\esky-files\*"; DestDir: "{app}\appdata\{#MyDistName}-{#MyAppVersion}.{#MyPlatform}\esky-files\"; Flags: ignoreversion; Check: Is64BitInstallMode 41 | Source: "dist\{#MyDistName}-{#MyAppVersion}.{#MyPlatform}\{#MyDistName}-{#MyAppVersion}.{#MyPlatform}\html\*"; DestDir: "{app}\appdata\{#MyDistName}-{#MyAppVersion}.{#MyPlatform}\html\"; Flags: ignoreversion recursesubdirs; Check: Is64BitInstallMode 42 | Source: "dist\{#MyDistName}-{#MyAppVersion}.{#MyPlatform}\{#MyDistName}-{#MyAppVersion}.{#MyPlatform}\imageformats\*"; DestDir: "{app}\appdata\{#MyDistName}-{#MyAppVersion}.{#MyPlatform}\imageformats\"; Flags: ignoreversion; Check: Is64BitInstallMode 43 | Source: "dist\{#MyDistName}-{#MyAppVersion}.{#MyPlatform}\{#MyDistName}-{#MyAppVersion}.{#MyPlatform}\mediaservice\*"; DestDir: "{app}\appdata\{#MyDistName}-{#MyAppVersion}.{#MyPlatform}\mediaservice\"; Flags: ignoreversion; Check: Is64BitInstallMode 44 | Source: "dist\{#MyDistName}-{#MyAppVersion}.{#MyPlatform}\{#MyDistName}-{#MyAppVersion}.{#MyPlatform}\platforms\*"; DestDir: "{app}\appdata\{#MyDistName}-{#MyAppVersion}.{#MyPlatform}\platforms\"; Flags: ignoreversion; Check: Is64BitInstallMode 45 | Source: "dist\{#MyDistName}-{#MyAppVersion}.{#MyPlatform}\{#MyAppExeName}"; DestDir: "{app}"; Flags: ignoreversion; Check: Is64BitInstallMode 46 | Source: "dist\{#MyDistName}-{#MyAppVersion}.{#MyPlatform}\{#MyDistName}-{#MyAppVersion}.{#MyPlatform}\platforms\*.dll"; DestDir: "{app}\platforms"; Flags: ignoreversion; Check: Is64BitInstallMode 47 | Source: "dist\{#MyDistName}-{#MyAppVersion}.{#MyPlatform}\*.dll"; DestDir: "{app}"; Flags: ignoreversion; Check: Is64BitInstallMode 48 | 49 | [Dirs] 50 | name: "{app}\appdata\updates\ready" 51 | 52 | [Icons] 53 | Name: "{group}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}" 54 | Name: "{group}\Uninstall Phonological CorpusTools"; Filename: "{uninstallexe}" 55 | Name: "{group}\Readme"; Filename: "{#MyAppURL}#phonological-corpustools" 56 | Name: "{commondesktop}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; Tasks: desktopicon 57 | 58 | [Run] 59 | Filename: "{app}\{#MyAppExeName}"; Description: "{cm:LaunchProgram,{#StringChange(MyAppName, '&', '&&')}}"; Flags: nowait postinstall 60 | 61 | [Messages] 62 | WelcomeLabel1=Welcome to the PCT Setup Wizard 63 | FinishedHeadingLabel=Completing the PCT Setup Wizard 64 | 65 | -------------------------------------------------------------------------------- /pct.spec: -------------------------------------------------------------------------------- 1 | # -*- mode: python -*- 2 | import os 3 | import sys 4 | sys.setrecursionlimit(5000) 5 | 6 | block_cipher = None 7 | pct_path = os.getcwd() 8 | icon_path = os.path.join(pct_path, 'resources', 'favicon32x32.ico') 9 | if sys.platform == 'win32': 10 | icon_path = os.path.join(pct_path, 'resources', '48x48_win.ico') 11 | if sys.platform == 'darwin': 12 | icon_path = os.path.join(pct_path, 'resources', 'favicon.icns') 13 | 14 | a = Analysis([os.path.join(pct_path, 'bin', 'pct_qt_debug.py')], 15 | pathex=[pct_path], 16 | binaries=None, 17 | datas=[('resources', 'resources')], 18 | hiddenimports=['PyQt5', 'urllib', 19 | 'scipy.spatial.transform._rotation_groups', 20 | 'scipy.special.cython_special'], 21 | hookspath=[], 22 | runtime_hooks=[], 23 | excludes=['matplotlib'], 24 | win_no_prefer_redirects=False, 25 | win_private_assemblies=False, 26 | cipher=block_cipher) 27 | pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher) 28 | exe = EXE(pyz, 29 | a.scripts, 30 | a.binaries, 31 | a.zipfiles, 32 | a.datas, 33 | name='Phonological CorpusTools', 34 | debug=False, 35 | strip=False, 36 | upx=True, 37 | console=False, 38 | icon=icon_path) 39 | 40 | if sys.platform == 'darwin': 41 | app = BUNDLE(exe, name='Phonological CorpusTools.app', icon=icon_path) -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.26 2 | scipy~=1.11.4 3 | pyqt5~=5.15.0 4 | regex~=2023.10.3 5 | scikit-learn~=1.3.0 6 | pyinstaller>=6.13.0 7 | -------------------------------------------------------------------------------- /requirements_dev.txt: -------------------------------------------------------------------------------- 1 | pytest~=8.0.0 2 | sphinx~=5.0.0 3 | sphinx_rtd_theme~=0.5.2 4 | numpydoc 5 | numpy>=1.21 6 | scipy~=1.11.4 7 | pyqt5~=5.15.0 8 | regex~=2023.10.3 9 | scikit-learn~=1.3.0 10 | -------------------------------------------------------------------------------- /resources/48x48_win.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/resources/48x48_win.ico -------------------------------------------------------------------------------- /resources/CORPUS/example.corpus: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/resources/CORPUS/example.corpus -------------------------------------------------------------------------------- /resources/CORPUS/example_syllabified.corpus: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/resources/CORPUS/example_syllabified.corpus -------------------------------------------------------------------------------- /resources/CORPUS/iphod_with_homographs.corpus: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/resources/CORPUS/iphod_with_homographs.corpus -------------------------------------------------------------------------------- /resources/CORPUS/iphod_without_homographs.corpus: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/resources/CORPUS/iphod_without_homographs.corpus -------------------------------------------------------------------------------- /resources/CORPUS/lemurian.corpus: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/resources/CORPUS/lemurian.corpus -------------------------------------------------------------------------------- /resources/FEATURE/arpabet2hayes.feature: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/resources/FEATURE/arpabet2hayes.feature -------------------------------------------------------------------------------- /resources/FEATURE/arpabet2spe.feature: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/resources/FEATURE/arpabet2spe.feature -------------------------------------------------------------------------------- /resources/FEATURE/buckeye2hayes.feature: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/resources/FEATURE/buckeye2hayes.feature -------------------------------------------------------------------------------- /resources/FEATURE/buckeye2spe.feature: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/resources/FEATURE/buckeye2spe.feature -------------------------------------------------------------------------------- /resources/FEATURE/celex2hayes.feature: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/resources/FEATURE/celex2hayes.feature -------------------------------------------------------------------------------- /resources/FEATURE/celex2spe.feature: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/resources/FEATURE/celex2spe.feature -------------------------------------------------------------------------------- /resources/FEATURE/cpa2hayes.feature: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/resources/FEATURE/cpa2hayes.feature -------------------------------------------------------------------------------- /resources/FEATURE/cpa2spe.feature: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/resources/FEATURE/cpa2spe.feature -------------------------------------------------------------------------------- /resources/FEATURE/disc2hayes.feature: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/resources/FEATURE/disc2hayes.feature -------------------------------------------------------------------------------- /resources/FEATURE/disc2spe.feature: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/resources/FEATURE/disc2spe.feature -------------------------------------------------------------------------------- /resources/FEATURE/ipa2hayes.feature: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/resources/FEATURE/ipa2hayes.feature -------------------------------------------------------------------------------- /resources/FEATURE/ipa2spe.feature: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/resources/FEATURE/ipa2spe.feature -------------------------------------------------------------------------------- /resources/FEATURE/klatt2hayes.feature: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/resources/FEATURE/klatt2hayes.feature -------------------------------------------------------------------------------- /resources/FEATURE/klatt2spe.feature: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/resources/FEATURE/klatt2spe.feature -------------------------------------------------------------------------------- /resources/FEATURE/sampa2hayes.feature: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/resources/FEATURE/sampa2hayes.feature -------------------------------------------------------------------------------- /resources/FEATURE/sampa2spe.feature: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/resources/FEATURE/sampa2spe.feature -------------------------------------------------------------------------------- /resources/favicon.icns: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/resources/favicon.icns -------------------------------------------------------------------------------- /resources/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/resources/favicon.png -------------------------------------------------------------------------------- /resources/favicon32x32.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/resources/favicon32x32.ico -------------------------------------------------------------------------------- /resources/logo.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/resources/logo.ico -------------------------------------------------------------------------------- /resources/mac_iconBase.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/resources/mac_iconBase.png -------------------------------------------------------------------------------- /resources/object-flip-horizontal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/resources/object-flip-horizontal.png -------------------------------------------------------------------------------- /resources/qt.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/resources/qt.conf -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from setuptools import setup 3 | from setuptools.command.test import test as TestCommand 4 | 5 | import corpustools 6 | 7 | def readme(): 8 | with open('README.md') as f: 9 | return f.read() 10 | 11 | class PyTest(TestCommand): 12 | def finalize_options(self): 13 | TestCommand.finalize_options(self) 14 | self.test_args = ['--strict', '--verbose', '--tb=long', 'tests'] 15 | self.test_suite = True 16 | 17 | def run_tests(self): 18 | import pytest 19 | errcode = pytest.main(self.test_args) 20 | sys.exit(errcode) 21 | 22 | setup(name='corpustools', 23 | version=corpustools.__version__, 24 | description='', 25 | long_description='', 26 | classifiers=[ 27 | 'Development Status :: 3 - Alpha', 28 | 'Programming Language :: Python', 29 | 'Programming Language :: Python :: 3', 30 | 'Operating System :: OS Independent', 31 | 'Topic :: Scientific/Engineering', 32 | 'Topic :: Text Processing :: Linguistic', 33 | ], 34 | keywords='phonology corpus phonetics', 35 | url='https://github.com/kchall/CorpusTools', 36 | author='Phonological Corpus Tools', 37 | author_email='kathleen.hall@ubc.ca', 38 | packages=['corpustools', 39 | 'corpustools.acousticsim', 40 | 'corpustools.corpus', 41 | 'corpustools.corpus.classes', 42 | 'corpustools.corpus.io', 43 | 'corpustools.freqalt', 44 | 'corpustools.funcload', 45 | 'corpustools.kl', 46 | 'corpustools.prod', 47 | 'corpustools.phonosearch', 48 | 'corpustools.gui', 49 | 'corpustools.informativity', 50 | 'corpustools.symbolsim', 51 | 'corpustools.neighdens', 52 | 'corpustools.mutualinfo', 53 | 'corpustools.phonoprob', 54 | 'corpustools.command_line'], 55 | dependency_links = ['https://github.com/kylebgorman/textgrid/tarball/master#egg=textgrid-1.0'], 56 | install_requires=[ 57 | 'numpy', 58 | 'scipy', 59 | 'textgrid', 60 | 'pyqt5', 61 | 'sklearn', 62 | 'regex' 63 | #'python-acoustic-similarity' 64 | ], 65 | entry_points = { 66 | 'console_scripts': ['pct=corpustools.command_line.pct:main', 67 | 'pct_corpus=corpustools.command_line.pct_corpus:main', 68 | 'pct_funcload=corpustools.command_line.pct_funcload:main', 69 | 'pct_neighdens=corpustools.command_line.pct_neighdens:main', 70 | 'pct_mutualinfo=corpustools.command_line.pct_mutualinfo:main', 71 | 'pct_kl=corpustools.command_line.pct_kl:main', 72 | 'pct_search=corpustools.command_line.pct_search:main', 73 | 'pct_visualize=corpustools.command_line.pct_visualize:main'], 74 | }, 75 | cmdclass={'test': PyTest}, 76 | extras_require={ 77 | 'testing': ['pytest'], 78 | } 79 | ) 80 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # 😐?? what is this for?? 2 | 3 | -------------------------------------------------------------------------------- /tests/data/Buckeye_sample/s0101a.log: -------------------------------------------------------------------------------- 1 | signal s0101.sd 2 | type 0 3 | comment created using xlabel Thu Oct 21 15:50:43 2004 4 | comment created using xlabel Thu Mar 21 15:22:35 2002 5 | color 122 6 | font -misc-*-bold-*-*-*-15-*-*-*-*-*-*-* 7 | separator ; 8 | nfields 1 9 | # 10 | 61.142603 122 11 | 61.397647 122 12 | 176.705681 121 13 | 177.442715 121 14 | 208.458474 121 15 | 208.998197 121 16 | 218.326046 121 17 | 218.619639 121 18 | 281.412600 121 19 | 282.015381 121 20 | 283.014140 121 21 | 283.342991 121 22 | 286.369100 121 23 | 286.587431 121 24 | 358.243781 121 25 | 358.766553 121 26 | 570.891209 121 27 | 570.988848 121 28 | 595.595736 121 29 | 596.178854 121 30 | -------------------------------------------------------------------------------- /tests/data/Buckeye_sample/s0101a.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhonologicalCorpusTools/CorpusTools/23833d8799468f1c41889af04c1b5d6086f2c6a9/tests/data/Buckeye_sample/s0101a.wav -------------------------------------------------------------------------------- /tests/data/buckeye/test.phones: -------------------------------------------------------------------------------- 1 | signal s0201.sd 2 | type 0 3 | comment created using xlabel Thu Jan 19 17:52:50 2006 4 | comment created using xlabel Wed Jan 10 15:11:26 2001 5 | color 122 6 | font -misc-*-bold-*-*-*-15-*-*-*-*-*-*-* 7 | separator ; 8 | nfields 1 9 | # 10 | 2.609000 122 {B_TRANS} 11 | 2.714347 122 IVER 12 | 2.753000 122 eh 13 | 2.892000 122 s 14 | 3.206890 122 IVER 15 | 3.244160 122 dh 16 | 3.327000 122 ae 17 | 3.377192 122 s 18 | 3.438544 122 s 19 | 3.526272 122 ae 20 | 3.614398 122 tq 21 | 3.673454 122 VOCNOISE 22 | 3.718614 122 ah 23 | 3.771112 122 w 24 | 3.851000 122 ah 25 | 3.881000 122 dx 26 | 3.941000 122 eh 27 | 4.001000 122 v 28 | 4.036022 122 er 29 | 4.111000 122 ey 30 | 4.246000 122 k 31 | 4.326000 122 ao 32 | 4.369000 122 l 33 | 4.443707 122 ah 34 | 4.501000 122 t 35 | -------------------------------------------------------------------------------- /tests/data/buckeye/test.words: -------------------------------------------------------------------------------- 1 | signal s0201.sd 2 | type 0 3 | comment created using xlabel Thu Jan 19 17:52:50 2006 4 | comment F O 5 | color 122 6 | font -misc-*-bold-*-*-*-15-*-*-*-*-*-*-* 7 | separator ; 8 | nfields 3 9 | # 10 | 2.609000 122 {B_TRANS}; B; B; null 11 | 2.714347 122 ; S; S; null 12 | 2.892096 122 that's; dh ae t s; eh s; DT_VBZ 13 | 3.206317 122 ; S; S; null 14 | 3.377192 122 that's; dh ae t s; dh ae s; DT_VBZ 15 | 3.614398 122 that; dh ae t; s ae tq; IN 16 | 3.673454 122 ; U; U; null 17 | 4.036022 122 whatever; w ah t eh v er; ah w ah dx eh v er; WDT 18 | 4.111000 122 they; dh ey; ey; PRP 19 | 4.369000 122 call; k aa l; k ao l; VBP 20 | 4.501000 122 it; ih t; ah t; PRP 21 | -------------------------------------------------------------------------------- /tests/data/csv/stressed.txt: -------------------------------------------------------------------------------- 1 | spelling,transcription,frequency 2 | tusi,t.uw1.s.iy0,13.0 3 | tuni,t.uw0.n.iy1,33.0 4 | shashi,sh.ah0.sh.iy1,43.0 5 | -------------------------------------------------------------------------------- /tests/data/csv/tiered.txt: -------------------------------------------------------------------------------- 1 | spelling,transcription,frequency,vowel_tier 2 | tusi,t.u.s.i,13.0,u.i 3 | tuni,t.u.n.i,33.0,o.i 4 | shashi,ʃ.ɑ.ʃ.i,43.0,ɑ.i 5 | -------------------------------------------------------------------------------- /tests/data/csv/tonal.txt: -------------------------------------------------------------------------------- 1 | spelling,transcription,frequency 2 | fam,fam11,13.0 3 | fam,fam12,33.0 4 | fan,fan12,43.0 5 | -------------------------------------------------------------------------------- /tests/data/features/basic.txt: -------------------------------------------------------------------------------- 1 | symbol,feat1,feat2 2 | p,+,- 3 | b,+,0 4 | d,0,0 5 | -------------------------------------------------------------------------------- /tests/data/features/missing_value.txt: -------------------------------------------------------------------------------- 1 | symbol,feat1,feat2 2 | p,+,- 3 | b,+,0 4 | d,0 5 | -------------------------------------------------------------------------------- /tests/data/features/no_symbol_column.txt: -------------------------------------------------------------------------------- 1 | notsymbol,feat1,feat2 2 | p,+,- 3 | b,+,0 4 | d,0,0 5 | -------------------------------------------------------------------------------- /tests/data/features/no_unspecified.txt: -------------------------------------------------------------------------------- 1 | symbol,feat1,feat2 2 | p,+,- 3 | b,-,+ 4 | d,+,+ 5 | -------------------------------------------------------------------------------- /tests/data/features/overlapping_specified.txt: -------------------------------------------------------------------------------- 1 | symbol,feat1,feat2 2 | p,+,- 3 | b,+,- 4 | d,0,0 5 | -------------------------------------------------------------------------------- /tests/data/features/overlapping_symbols.txt: -------------------------------------------------------------------------------- 1 | symbol,feat1,feat2 2 | p,+,- 3 | b,+,0 4 | d,0,0 5 | b,+,0 6 | -------------------------------------------------------------------------------- /tests/data/features/overlapping_symbols_diff_feat.txt: -------------------------------------------------------------------------------- 1 | symbol,feat1,feat2 2 | p,+,- 3 | b,+,0 4 | d,0,0 5 | b,+,+ 6 | -------------------------------------------------------------------------------- /tests/data/features/overlapping_unspecified.txt: -------------------------------------------------------------------------------- 1 | symbol,feat1,feat2 2 | p,+,- 3 | b,0,0 4 | d,0,0 5 | -------------------------------------------------------------------------------- /tests/data/features/test_feature_matrix.txt: -------------------------------------------------------------------------------- 1 | symbol,feature1,feature2 2 | a,+,+ 3 | b,+,- 4 | c,-,+ 5 | d,-,- 6 | -------------------------------------------------------------------------------- /tests/data/features/test_feature_matrix_extra_feature.txt: -------------------------------------------------------------------------------- 1 | symbol,feature1,feature2,feature3 2 | a,+,+ 3 | b,+,- 4 | c,-,+ 5 | d,-,- 6 | -------------------------------------------------------------------------------- /tests/data/features/test_feature_matrix_missing_segment.txt: -------------------------------------------------------------------------------- 1 | symbol,feature1,feature2 2 | a,+,+ 3 | -------------------------------------------------------------------------------- /tests/data/features/test_feature_matrix_missing_value.txt: -------------------------------------------------------------------------------- 1 | symbol,feature1,feature2 2 | a,+,+ 3 | b,+,- 4 | c,-,+ 5 | d,- 6 | -------------------------------------------------------------------------------- /tests/data/homophones/ND_test_README.rtf: -------------------------------------------------------------------------------- 1 | {\rtf1\ansi\ansicpg1252\cocoartf1561\cocoasubrtf100 2 | {\fonttbl\f0\fswiss\fcharset0 Helvetica;} 3 | {\colortbl;\red255\green255\blue255;} 4 | {\*\expandedcolortbl;;} 5 | \margl1440\margr1440\vieww10800\viewh8400\viewkind0 6 | \pard\tx566\tx1133\tx1700\tx2267\tx2834\tx3401\tx3968\tx4535\tx5102\tx5669\tx6236\tx6803\pardirnatural\partightenfactor0 7 | 8 | \f0\fs24 \cf0 Tests for Neighbourhood Density with a File Containing Words\ 9 | \ 10 | (A) files contain only words that do also happen to occur in the corpus\ 11 | (B) files contain only words that do not occur in the corpus\ 12 | (C) files contain both words that do occur and words that don\'92t occur in the corpus\ 13 | \ 14 | (1) files contain spelling\ 15 | (3) files contain transcription\ 16 | \ 17 | Note that in PCT, if the file contains spelling, the tier for calculation MUST be spelling as well, and if the file contains transcription, the tier for calculation MUST be transcription as well. PCT will automatically change the tier to match what the file is.\ 18 | \ 19 | Expected results from the Example corpus, not collapsing homophones in any cases:\ 20 | \ 21 | A1: 4, 4, 4, 1\ 22 | A3: 5, 5, 5, 1\ 23 | B1: 1, 2, 1\ 24 | B3: 5, 5, 2\ 25 | C1: 4, 4, 4, 1, 1, 2, 1\ 26 | C3: 5, 5, 5, 1, 5, 5, 2} -------------------------------------------------------------------------------- /tests/data/ilg/test_basic.txt: -------------------------------------------------------------------------------- 1 | a a.b a b a.b c.d -------------------------------------------------------------------------------- /tests/data/ilg/test_contains_translations.txt: -------------------------------------------------------------------------------- 1 | a 2 | a.b 3 | 'a' 4 | a b 5 | a.b c.d 6 | 'a b' 7 | -------------------------------------------------------------------------------- /tests/data/ilg/test_mismatched.txt: -------------------------------------------------------------------------------- 1 | a a.b a b a.b -------------------------------------------------------------------------------- /tests/data/text/test_text_spelling.txt: -------------------------------------------------------------------------------- 1 | ab cab'd ad ab ab. 2 | -------------------------------------------------------------------------------- /tests/data/text/test_text_transcription.txt: -------------------------------------------------------------------------------- 1 | a.b c.a.b a.d a.b a.b 2 | -------------------------------------------------------------------------------- /tests/data/text/test_text_transcription_morpheme_boundaries.txt: -------------------------------------------------------------------------------- 1 | a.b c.a.-.b c.a.=.b a.d a.b a.b 2 | -------------------------------------------------------------------------------- /tests/data/textgrids/2speakers.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | xmin = 0 5 | xmax = 1 6 | tiers? 7 | size = 4 8 | item []: 9 | item [1]: 10 | class = "IntervalTier" 11 | name = "Speaker 1 - phone" 12 | xmin = 0 13 | xmax = 1 14 | intervals: size = 6 15 | intervals [1]: 16 | xmin = 0 17 | xmax = 0.1 18 | text = "" 19 | intervals [2]: 20 | xmin = 0.1 21 | xmax = 0.2 22 | text = "a" 23 | intervals [3]: 24 | xmin = 0.2 25 | xmax = 0.3 26 | text = "b" 27 | intervals [4]: 28 | xmin = 0.3 29 | xmax = 0.4 30 | text = "c" 31 | intervals [5]: 32 | xmin = 0.4 33 | xmax = 0.5 34 | text = "d" 35 | intervals [6]: 36 | xmin = 0.5 37 | xmax = 1 38 | text = "" 39 | item [2]: 40 | class = "IntervalTier" 41 | name = "Speaker 1 - word" 42 | xmin = 0 43 | xmax = 1 44 | intervals: size = 4 45 | intervals [1]: 46 | xmin = 0 47 | xmax = 0.1 48 | text = "" 49 | intervals [2]: 50 | xmin = 0.1 51 | xmax = 0.3 52 | text = "a" 53 | intervals [3]: 54 | xmin = 0.3 55 | xmax = 0.5 56 | text = "b" 57 | intervals [4]: 58 | xmin = 0.5 59 | xmax = 1 60 | text = "" 61 | item [3]: 62 | class = "IntervalTier" 63 | name = "Speaker 2 - phone" 64 | xmin = 0 65 | xmax = 1 66 | intervals: size = 6 67 | intervals [1]: 68 | xmin = 0 69 | xmax = 0.5 70 | text = "" 71 | intervals [2]: 72 | xmin = 0.5 73 | xmax = 0.6 74 | text = "a" 75 | intervals [3]: 76 | xmin = 0.6 77 | xmax = 0.7 78 | text = "b" 79 | intervals [4]: 80 | xmin = 0.7 81 | xmax = 0.8 82 | text = "d" 83 | intervals [5]: 84 | xmin = 0.8 85 | xmax = 0.9 86 | text = "e" 87 | intervals [6]: 88 | xmin = 0.9 89 | xmax = 1 90 | text = "" 91 | item [4]: 92 | class = "IntervalTier" 93 | name = "Speaker 2 - word" 94 | xmin = 0 95 | xmax = 1 96 | intervals: size = 4 97 | intervals [1]: 98 | xmin = 0 99 | xmax = 0.5 100 | text = "" 101 | intervals [2]: 102 | xmin = 0.5 103 | xmax = 0.7 104 | text = "a" 105 | intervals [3]: 106 | xmin = 0.7 107 | xmax = 0.9 108 | text = "c" 109 | intervals [4]: 110 | xmin = 0.9 111 | xmax = 1 112 | text = "" 113 | -------------------------------------------------------------------------------- /tests/data/textgrids/hashtag_BC.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | xmin = 0 5 | xmax = 0.6195238095238095 6 | tiers? 7 | size = 3 8 | item []: 9 | item [1]: 10 | class = "IntervalTier" 11 | name = "Mary" 12 | xmin = 0 13 | xmax = 0.6195238095238095 14 | intervals: size = 3 15 | intervals [1]: 16 | xmin = 0 17 | xmax = 0.09798169116185962 18 | text = "" 19 | intervals [2]: 20 | xmin = 0.09798169116185962 21 | xmax = 0.2359742285302148 22 | text = "syllable1" 23 | intervals [3]: 24 | xmin = 0.2359742285302148 25 | xmax = 0.6195238095238095 26 | text = "" 27 | item [2]: 28 | class = "IntervalTier" 29 | name = "John" 30 | xmin = 0 31 | xmax = 0.6195238095238095 32 | intervals: size = 3 33 | intervals [1]: 34 | xmin = 0 35 | xmax = 0.09798169116185962 36 | text = "" 37 | intervals [2]: 38 | xmin = 0.09798169116185962 39 | xmax = 0.5988434934618154 40 | text = "word1" 41 | intervals [3]: 42 | xmin = 0.5988434934618154 43 | xmax = 0.6195238095238095 44 | text = "" 45 | item [3]: 46 | class = "TextTier" 47 | name = "bell" 48 | xmin = 0 49 | xmax = 0.6195238095238095 50 | points: size = 1 51 | points [1]: 52 | number = 0.09798169116185962 53 | mark = "start" 54 | -------------------------------------------------------------------------------- /tests/data/textgrids/phone_word.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | xmin = 0 5 | xmax = 1 6 | tiers? 7 | size = 2 8 | item []: 9 | item [1]: 10 | class = "IntervalTier" 11 | name = "phone" 12 | xmin = 0 13 | xmax = 1 14 | intervals: size = 4 15 | intervals [1]: 16 | xmin = 0 17 | xmax = 0.25 18 | text = "" 19 | intervals [2]: 20 | xmin = 0.25 21 | xmax = 0.5 22 | text = "a" 23 | intervals [3]: 24 | xmin = 0.5 25 | xmax = 0.75 26 | text = "b" 27 | intervals [4]: 28 | xmin = 0.75 29 | xmax = 1 30 | text = "" 31 | item [2]: 32 | class = "IntervalTier" 33 | name = "word" 34 | xmin = 0 35 | xmax = 1 36 | intervals: size = 3 37 | intervals [1]: 38 | xmin = 0 39 | xmax = 0.25 40 | text = "" 41 | intervals [2]: 42 | xmin = 0.25 43 | xmax = 0.75 44 | text = "a" 45 | intervals [3]: 46 | xmin = 0.75 47 | xmax = 1 48 | text = "" 49 | -------------------------------------------------------------------------------- /tests/data/textgrids/phone_word_notes.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | xmin = 0 5 | xmax = 1 6 | tiers? 7 | size = 3 8 | item []: 9 | item [1]: 10 | class = "IntervalTier" 11 | name = "phone" 12 | xmin = 0 13 | xmax = 1 14 | intervals: size = 1 15 | intervals [1]: 16 | xmin = 0 17 | xmax = 1 18 | text = "" 19 | item [2]: 20 | class = "IntervalTier" 21 | name = "word" 22 | xmin = 0 23 | xmax = 1 24 | intervals: size = 1 25 | intervals [1]: 26 | xmin = 0 27 | xmax = 1 28 | text = "" 29 | item [3]: 30 | class = "IntervalTier" 31 | name = "notes" 32 | xmin = 0 33 | xmax = 1 34 | intervals: size = 1 35 | intervals [1]: 36 | xmin = 0 37 | xmax = 1 38 | text = "" 39 | -------------------------------------------------------------------------------- /tests/data/textgrids/phone_word_silence.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | xmin = 0 5 | xmax = 1 6 | tiers? 7 | size = 2 8 | item []: 9 | item [1]: 10 | class = "IntervalTier" 11 | name = "phone" 12 | xmin = 0 13 | xmax = 1 14 | intervals: size = 4 15 | intervals [1]: 16 | xmin = 0 17 | xmax = 0.25 18 | text = "" 19 | intervals [2]: 20 | xmin = 0.25 21 | xmax = 0.5 22 | text = "a" 23 | intervals [3]: 24 | xmin = 0.5 25 | xmax = 0.75 26 | text = "b" 27 | intervals [4]: 28 | xmin = 0.75 29 | xmax = 1 30 | text = "" 31 | item [2]: 32 | class = "IntervalTier" 33 | name = "word" 34 | xmin = 0 35 | xmax = 1 36 | intervals: size = 3 37 | intervals [1]: 38 | xmin = 0 39 | xmax = 0.25 40 | text = "silence" 41 | intervals [2]: 42 | xmin = 0.25 43 | xmax = 0.75 44 | text = "a" 45 | intervals [3]: 46 | xmin = 0.75 47 | xmax = 1 48 | text = "silence" 49 | -------------------------------------------------------------------------------- /tests/data/textgrids/phone_word_sp_phone.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | xmin = 0 5 | xmax = 1 6 | tiers? 7 | size = 2 8 | item []: 9 | item [1]: 10 | class = "IntervalTier" 11 | name = "phone" 12 | xmin = 0 13 | xmax = 1 14 | intervals: size = 4 15 | intervals [1]: 16 | xmin = 0 17 | xmax = 0.25 18 | text = "sp" 19 | intervals [2]: 20 | xmin = 0.25 21 | xmax = 0.5 22 | text = "a" 23 | intervals [3]: 24 | xmin = 0.5 25 | xmax = 0.75 26 | text = "b" 27 | intervals [4]: 28 | xmin = 0.75 29 | xmax = 1 30 | text = "sp" 31 | item [2]: 32 | class = "IntervalTier" 33 | name = "word" 34 | xmin = 0 35 | xmax = 1 36 | intervals: size = 3 37 | intervals [1]: 38 | xmin = 0 39 | xmax = 0.25 40 | text = "" 41 | intervals [2]: 42 | xmin = 0.25 43 | xmax = 0.75 44 | text = "a" 45 | intervals [3]: 46 | xmin = 0.75 47 | xmax = 1 48 | text = "" 49 | -------------------------------------------------------------------------------- /tests/data/textgrids/phone_word_sp_word.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | xmin = 0 5 | xmax = 1 6 | tiers? 7 | size = 2 8 | item []: 9 | item [1]: 10 | class = "IntervalTier" 11 | name = "phone" 12 | xmin = 0 13 | xmax = 1 14 | intervals: size = 4 15 | intervals [1]: 16 | xmin = 0 17 | xmax = 0.25 18 | text = "" 19 | intervals [2]: 20 | xmin = 0.25 21 | xmax = 0.5 22 | text = "a" 23 | intervals [3]: 24 | xmin = 0.5 25 | xmax = 0.75 26 | text = "b" 27 | intervals [4]: 28 | xmin = 0.75 29 | xmax = 1 30 | text = "" 31 | item [2]: 32 | class = "IntervalTier" 33 | name = "word" 34 | xmin = 0 35 | xmax = 1 36 | intervals: size = 3 37 | intervals [1]: 38 | xmin = 0 39 | xmax = 0.25 40 | text = "sp" 41 | intervals [2]: 42 | xmin = 0.25 43 | xmax = 0.75 44 | text = "a" 45 | intervals [3]: 46 | xmin = 0.75 47 | xmax = 1 48 | text = "sp" 49 | -------------------------------------------------------------------------------- /tests/data/textgrids/word_phone.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | xmin = 0 5 | xmax = 1 6 | tiers? 7 | size = 2 8 | item []: 9 | item [1]: 10 | class = "IntervalTier" 11 | name = "word" 12 | xmin = 0 13 | xmax = 1 14 | intervals: size = 1 15 | intervals [1]: 16 | xmin = 0 17 | xmax = 1 18 | text = "" 19 | item [2]: 20 | class = "IntervalTier" 21 | name = "phone" 22 | xmin = 0 23 | xmax = 1 24 | intervals: size = 1 25 | intervals [1]: 26 | xmin = 0 27 | xmax = 1 28 | text = "" 29 | -------------------------------------------------------------------------------- /tests/data/textgrids/word_segment.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | xmin = 0 5 | xmax = 1 6 | tiers? 7 | size = 2 8 | item []: 9 | item [1]: 10 | class = "IntervalTier" 11 | name = "word" 12 | xmin = 0 13 | xmax = 1 14 | intervals: size = 1 15 | intervals [1]: 16 | xmin = 0 17 | xmax = 1 18 | text = "" 19 | item [2]: 20 | class = "IntervalTier" 21 | name = "segment" 22 | xmin = 0 23 | xmax = 1 24 | intervals: size = 1 25 | intervals [1]: 26 | xmin = 0 27 | xmax = 1 28 | text = "" 29 | -------------------------------------------------------------------------------- /tests/data/textgrids/words_phones.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | xmin = 0 5 | xmax = 1 6 | tiers? 7 | size = 2 8 | item []: 9 | item [1]: 10 | class = "IntervalTier" 11 | name = "words" 12 | xmin = 0 13 | xmax = 1 14 | intervals: size = 1 15 | intervals [1]: 16 | xmin = 0 17 | xmax = 1 18 | text = "" 19 | item [2]: 20 | class = "IntervalTier" 21 | name = "phones" 22 | xmin = 0 23 | xmax = 1 24 | intervals: size = 1 25 | intervals [1]: 26 | xmin = 0 27 | xmax = 1 28 | text = "" 29 | -------------------------------------------------------------------------------- /tests/data/textgrids/words_segments.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | xmin = 0 5 | xmax = 1 6 | tiers? 7 | size = 2 8 | item []: 9 | item [1]: 10 | class = "IntervalTier" 11 | name = "words" 12 | xmin = 0 13 | xmax = 1 14 | intervals: size = 1 15 | intervals [1]: 16 | xmin = 0 17 | xmax = 1 18 | text = "" 19 | item [2]: 20 | class = "IntervalTier" 21 | name = "segments" 22 | xmin = 0 23 | xmax = 1 24 | intervals: size = 1 25 | intervals [1]: 26 | xmin = 0 27 | xmax = 1 28 | text = "" 29 | -------------------------------------------------------------------------------- /tests/test_acousticsim_io.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from numpy import array 4 | import unittest 5 | import os 6 | import sys 7 | 8 | test_dir = os.path.dirname(os.path.abspath(__file__)) 9 | corpustools_path = os.path.split(os.path.split(os.path.split(test_dir)[0])[0])[0] 10 | sys.path.insert(0,corpustools_path) 11 | from corpustools.acousticsim.io import load_path_mapping 12 | 13 | TEST_DIR = r'C:\Users\michael\Dropbox\Measuring_Phonological_Relations\Computational\CorpusTools_test_files\Acoustic_similarity' 14 | 15 | class IOTest(unittest.TestCase): 16 | def setUp(self): 17 | self.valid_path = os.path.join(TEST_DIR,'mapping_test.txt') 18 | self.invalid_path = os.path.join(TEST_DIR,'invalid_mapping_test.txt') 19 | 20 | def test_valid(self): 21 | return 22 | mapping = load_path_mapping(self.valid_path) 23 | for line in mapping: 24 | self.assertEqual(len(line),2) 25 | 26 | def test_invalid(self): 27 | return 28 | self.assertRaises(OSError,load_path_mapping,self.invalid_path) 29 | 30 | if __name__ == '__main__': 31 | unittest.main() 32 | -------------------------------------------------------------------------------- /tests/test_cl.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | import os 4 | 5 | 6 | 7 | 8 | def test_non_minimal_pair_corpus(unspecified_test_corpus): 9 | pass 10 | 11 | 12 | -------------------------------------------------------------------------------- /tests/test_freq_of_alt.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | import os 4 | 5 | from corpustools.freqalt.freq_of_alt import calc_freq_of_alt 6 | from corpustools.contextmanagers import (CanonicalVariantContext, 7 | MostFrequentVariantContext) 8 | 9 | def test_freqalt(specified_test_corpus): 10 | with CanonicalVariantContext(specified_test_corpus, 'transcription', 'type') as c: 11 | 12 | result = calc_freq_of_alt(c,'s','ʃ','khorsi', min_rel = -15, phono_align=True) 13 | assert(result==(8,3,0.375)) 14 | 15 | result = calc_freq_of_alt(c,'s','ʃ','khorsi', min_rel = -6, phono_align=True) 16 | assert(result==(8,0,0)) 17 | 18 | result = calc_freq_of_alt(c,'s','ʃ','khorsi', min_rel = -6, phono_align=False) 19 | assert(result==(8,2,0.25)) 20 | 21 | result = calc_freq_of_alt(c,'s','ʃ','khorsi', min_rel = -15, phono_align=False) 22 | assert(result==(8,7,0.875)) 23 | 24 | result = calc_freq_of_alt(c,'s','ʃ','edit_distance', max_rel = 2, phono_align=True) 25 | assert(result==(8,2,0.25)) 26 | 27 | result = calc_freq_of_alt(c,'s','ʃ','edit_distance', max_rel = 2, phono_align=False) 28 | assert(result==(8,2,0.25)) 29 | 30 | result = calc_freq_of_alt(c,'s','ʃ','phono_edit_distance', max_rel = 6, phono_align=True) 31 | assert(result==(8,2,0.25)) 32 | 33 | result = calc_freq_of_alt(c,'s','ʃ','phono_edit_distance', max_rel = 6, phono_align=False) 34 | assert(result==(8,2,0.25)) 35 | 36 | with CanonicalVariantContext(specified_test_corpus, 'transcription', 'token') as c: 37 | 38 | result = calc_freq_of_alt(c,'s','ʃ','khorsi', min_rel = -15, phono_align=True) 39 | assert(result==(8,3,0.375)) 40 | 41 | result = calc_freq_of_alt(c,'s','ʃ','khorsi', min_rel = -6, phono_align=True) 42 | assert(result==(8,2,0.25)) 43 | 44 | result = calc_freq_of_alt(c,'s','ʃ','khorsi', min_rel = -15, phono_align=False) 45 | assert(result==(8,7,0.875)) 46 | 47 | result = calc_freq_of_alt(c,'s','ʃ','khorsi', min_rel = -6, phono_align=False) 48 | assert(result==(8,3,0.375)) 49 | 50 | result = calc_freq_of_alt(c,'s','ʃ','edit_distance', max_rel = 4, phono_align=True) 51 | assert(result==(8,3,0.375)) 52 | 53 | result = calc_freq_of_alt(c,'s','ʃ','edit_distance', max_rel = 4, phono_align=False) 54 | assert(result==(8,6,0.75)) 55 | 56 | result = calc_freq_of_alt(c,'s','ʃ','phono_edit_distance', max_rel = 20, phono_align=True) 57 | assert(result==(8,3,0.375)) 58 | 59 | result = calc_freq_of_alt(c,'s','ʃ','phono_edit_distance', max_rel = 20, phono_align=False) 60 | assert(result==(8,6,0.75)) 61 | -------------------------------------------------------------------------------- /tests/test_gui_asgui.py: -------------------------------------------------------------------------------- 1 | 2 | from corpustools.gui.asgui import ASDialog 3 | 4 | def test_asgui(qtbot, settings): 5 | dialog = ASDialog(None, settings, True) 6 | qtbot.addWidget(dialog) 7 | 8 | dialog.fileRadio.clicked.emit() 9 | assert(dialog.compType == 'file') 10 | 11 | dialog.oneDirectoryRadio.clicked.emit() 12 | assert(dialog.compType == 'one') 13 | assert(dialog.representationWidget.value() == 'mfcc') 14 | assert(dialog.distAlgWidget.value() == 'dtw') 15 | dialog.oneDirectoryWidget.pathEdit.setText('tests/data') 16 | kwargs = dialog.generateKwargs() 17 | assert(kwargs['type'] == 'one') 18 | assert(kwargs['rep'] == 'mfcc') 19 | assert(kwargs['match_func'] == 'dtw') 20 | assert(kwargs['num_filters'] == 26) 21 | assert(kwargs['num_coeffs'] == 12) 22 | assert(kwargs['freq_lims'] == (80, 7800)) 23 | assert(kwargs['query'] == 'tests/data') 24 | 25 | dialog.twoDirectoryRadio.clicked.emit() 26 | assert(dialog.compType == 'two') 27 | dialog.directoryOneWidget.pathEdit.setText('tests/data') 28 | dialog.directoryTwoWidget.pathEdit.setText('tests/data') 29 | dialog.representationWidget.click(1) 30 | assert(dialog.representationWidget.value() == 'envelopes') 31 | dialog.distAlgWidget.click(1) 32 | assert(dialog.distAlgWidget.value() == 'xcorr') 33 | kwargs = dialog.generateKwargs() 34 | assert(kwargs['type'] == 'two') 35 | assert(kwargs['rep'] == 'envelopes') 36 | assert(kwargs['match_func'] == 'xcorr') 37 | assert(kwargs['num_filters'] == 8) 38 | assert(kwargs['freq_lims'] == (80, 7800)) 39 | assert(kwargs['query'] == ['tests/data','tests/data']) 40 | 41 | -------------------------------------------------------------------------------- /tests/test_gui_config.py: -------------------------------------------------------------------------------- 1 | 2 | from corpustools.gui.config import * 3 | 4 | def test_preferences(qtbot, settings): 5 | dialog = PreferencesDialog(None, settings) 6 | qtbot.addWidget(dialog) 7 | 8 | dialog.accept() 9 | -------------------------------------------------------------------------------- /tests/test_gui_corpusgui.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from corpustools.gui.corpusgui import * 4 | #from corpustools.gui.widgets import AddTierDialog 5 | 6 | 7 | def test_add_abstract_tier(qtbot, specified_test_corpus): 8 | dialog = AddAbstractTierDialog(None,specified_test_corpus) 9 | qtbot.addWidget(dialog) 10 | 11 | def test_add_column(qtbot, specified_test_corpus): 12 | dialog = AddColumnDialog(None, specified_test_corpus) 13 | qtbot.addWidget(dialog) 14 | 15 | def test_add_count_column(qtbot, specified_test_corpus): 16 | dialog = AddCountColumnDialog(None, specified_test_corpus) 17 | qtbot.addWidget(dialog) 18 | 19 | def test_add_tier(qtbot, specified_test_corpus): 20 | dialog = AddTierDialog(None, specified_test_corpus) 21 | qtbot.addWidget(dialog) 22 | 23 | def test_add_word(qtbot, specified_test_corpus): 24 | dialog = AddWordDialog(None, specified_test_corpus) 25 | qtbot.addWidget(dialog) 26 | 27 | def test_attribute_summary(qtbot, specified_test_corpus): 28 | widget = AttributeSummary(specified_test_corpus) 29 | qtbot.addWidget(widget) 30 | 31 | def test_corpus_summary(qtbot, specified_test_corpus): 32 | dialog = CorpusSummary(None, specified_test_corpus) 33 | qtbot.addWidget(dialog) 34 | 35 | def test_inventory_summary(qtbot, specified_test_corpus): 36 | widget = InventorySummary(specified_test_corpus) 37 | qtbot.addWidget(widget) 38 | 39 | def test_remove_attribute(qtbot, specified_test_corpus): 40 | dialog = RemoveAttributeDialog(None, specified_test_corpus) 41 | qtbot.addWidget(dialog) 42 | -------------------------------------------------------------------------------- /tests/test_gui_fagui.py: -------------------------------------------------------------------------------- 1 | 2 | from corpustools.gui.fagui import * 3 | 4 | def test_fagui(qtbot, specified_test_corpus, settings): 5 | dialog = FADialog(None, settings,specified_test_corpus, True) 6 | qtbot.addWidget(dialog) 7 | -------------------------------------------------------------------------------- /tests/test_gui_featuregui.py: -------------------------------------------------------------------------------- 1 | 2 | from corpustools.gui.featuregui import * 3 | 4 | 5 | def test_feature_system_select(qtbot, settings): 6 | widget = FeatureSystemSelect(settings, None) 7 | qtbot.addWidget(widget) 8 | 9 | 10 | def test_add_feature(qtbot, spe_specifier): 11 | dialog = AddFeatureDialog(None, spe_specifier) 12 | qtbot.addWidget(dialog) 13 | 14 | def test_download_feature_matrix(qtbot, settings): 15 | dialog = DownloadFeatureMatrixDialog(None, settings) 16 | qtbot.addWidget(dialog) 17 | 18 | def test_edit_feature_matrix(qtbot, specified_test_corpus, settings): 19 | dialog = EditFeatureMatrixDialog(None, specified_test_corpus, settings) 20 | qtbot.addWidget(dialog) 21 | 22 | def test_edit_segment(qtbot, spe_specifier): 23 | dialog = EditSegmentDialog(None, spe_specifier) 24 | qtbot.addWidget(dialog) 25 | 26 | def test_export_feature_matrix(qtbot, specified_test_corpus): 27 | dialog = ExportFeatureSystemDialog(None, specified_test_corpus) 28 | qtbot.addWidget(dialog) 29 | 30 | def test_feature_matrix_manager(qtbot, settings): 31 | dialog = FeatureMatrixManager(None, settings) 32 | qtbot.addWidget(dialog) 33 | 34 | def test_system_from_csv(qtbot, settings): 35 | dialog = SystemFromCsvDialog(None, settings) 36 | qtbot.addWidget(dialog) 37 | -------------------------------------------------------------------------------- /tests/test_gui_flgui.py: -------------------------------------------------------------------------------- 1 | 2 | from corpustools.gui.flgui import * 3 | 4 | def test_flgui(qtbot, specified_test_corpus, settings): 5 | dialog = FLDialog(None, settings,specified_test_corpus, True) 6 | qtbot.addWidget(dialog) 7 | -------------------------------------------------------------------------------- /tests/test_gui_iogui.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from corpustools.gui.iogui import * 4 | 5 | def test_import_corpus_dialog(qtbot, settings): 6 | dialog = LoadCorpusDialog(None,settings) 7 | qtbot.addWidget(dialog) 8 | 9 | def test_corpus_load_dialog(qtbot, settings): 10 | dialog = CorpusLoadDialog(None, settings) 11 | qtbot.addWidget(dialog) 12 | 13 | def test_corpus_download(qtbot, settings): 14 | dialog = DownloadCorpusDialog(None, settings) 15 | qtbot.addWidget(dialog) 16 | 17 | def test_corpus_export(qtbot, specified_test_corpus): 18 | dialog = ExportCorpusDialog(None, specified_test_corpus) 19 | qtbot.addWidget(dialog) 20 | 21 | def test_subset_corpus(qtbot, specified_test_corpus): 22 | dialog = SubsetCorpusDialog(None, specified_test_corpus) 23 | qtbot.addWidget(dialog) 24 | -------------------------------------------------------------------------------- /tests/test_gui_klgui.py: -------------------------------------------------------------------------------- 1 | 2 | from corpustools.gui.klgui import * 3 | 4 | def test_klgui(qtbot, specified_test_corpus, settings): 5 | dialog = KLDialog(None, settings,specified_test_corpus, True) 6 | qtbot.addWidget(dialog) 7 | -------------------------------------------------------------------------------- /tests/test_gui_main.py: -------------------------------------------------------------------------------- 1 | 2 | from corpustools.gui.main import * 3 | 4 | def test_main_window(qtbot): 5 | window = MainWindow(qtbot._app) 6 | qtbot.addWidget(window) 7 | -------------------------------------------------------------------------------- /tests/test_gui_migui.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from corpustools.gui.migui import * 4 | 5 | def test_migui(qtbot, specified_test_corpus, settings): 6 | dialog = MIDialog(None, settings,specified_test_corpus, True) 7 | qtbot.addWidget(dialog) 8 | -------------------------------------------------------------------------------- /tests/test_gui_ndgui.py: -------------------------------------------------------------------------------- 1 | 2 | from corpustools.gui.ndgui import * 3 | 4 | from corpustools.gui.models import CorpusModel 5 | 6 | def test_ndgui(qtbot, specified_test_corpus, settings): 7 | dialog = NDDialog(None, settings,CorpusModel(specified_test_corpus, settings), True) 8 | qtbot.addWidget(dialog) 9 | 10 | dialog.oneNonwordRadio.clicked.emit() 11 | assert(dialog.compType == 'nonword') 12 | dialog.fileRadio.clicked.emit() 13 | assert(dialog.compType == 'file') 14 | 15 | dialog.oneWordRadio.clicked.emit() 16 | assert(dialog.compType == 'one') 17 | dialog.algorithmWidget.click(0) 18 | assert(dialog.algorithmWidget.value() == 'edit_distance') 19 | assert(not dialog.typeTokenWidget.widgets[0].isEnabled()) 20 | dialog.oneWordEdit.setText('atema') 21 | kwargs = dialog.generateKwargs() 22 | assert(kwargs['algorithm'] == 'edit_distance') 23 | assert(kwargs['sequence_type'] == 'spelling') 24 | assert(kwargs['query'][0].spelling == 'atema') 25 | assert(kwargs['max_distance'] == 1) 26 | 27 | dialog.allwordsRadio.clicked.emit() 28 | assert(dialog.compType == 'all') 29 | dialog.algorithmWidget.click(0) 30 | assert(dialog.algorithmWidget.value() == 'edit_distance') 31 | assert(not dialog.typeTokenWidget.widgets[0].isEnabled()) 32 | dialog.columnEdit.setText('test') 33 | kwargs = dialog.generateKwargs() 34 | assert(kwargs['algorithm'] == 'edit_distance') 35 | assert(kwargs['sequence_type'] == 'spelling') 36 | assert(kwargs['max_distance'] == 1) 37 | assert(kwargs['attribute'].display_name == 'test') 38 | assert(kwargs['attribute'].att_type == 'numeric') 39 | 40 | -------------------------------------------------------------------------------- /tests/test_gui_ppgui.py: -------------------------------------------------------------------------------- 1 | 2 | from corpustools.gui.ppgui import * 3 | 4 | from corpustools.gui.models import CorpusModel 5 | 6 | def test_ppgui(qtbot, specified_test_corpus, settings): 7 | dialog = PPDialog(None, settings,CorpusModel(specified_test_corpus, settings), True) 8 | qtbot.addWidget(dialog) 9 | 10 | dialog.oneNonwordRadio.clicked.emit() 11 | assert(dialog.compType == 'nonword') 12 | dialog.fileRadio.clicked.emit() 13 | assert(dialog.compType == 'file') 14 | 15 | dialog.oneWordRadio.clicked.emit() 16 | assert(dialog.compType == 'one') 17 | dialog.algorithmWidget.widgets[0].setChecked(True) 18 | assert(dialog.algorithmWidget.value() == 'vitevitch') 19 | assert(dialog.typeTokenWidget.widgets[0].isEnabled()) 20 | dialog.oneWordEdit.setText('atema') 21 | kwargs = dialog.generateKwargs() 22 | assert(kwargs['algorithm'] == 'vitevitch') 23 | assert(kwargs['sequence_type'] == 'transcription') 24 | assert(kwargs['query'][0].spelling == 'atema') 25 | assert(kwargs['probability_type'] == 'bigram') 26 | 27 | dialog.allwordsRadio.clicked.emit() 28 | assert(dialog.compType == 'all') 29 | dialog.algorithmWidget.widgets[0].setChecked(True) 30 | assert(dialog.algorithmWidget.value() == 'vitevitch') 31 | dialog.columnEdit.setText('test') 32 | dialog.probabilityTypeWidget.click(1) 33 | kwargs = dialog.generateKwargs() 34 | assert(kwargs['algorithm'] == 'vitevitch') 35 | assert(kwargs['sequence_type'] == 'transcription') 36 | assert(kwargs['probability_type'] == 'unigram') 37 | assert(kwargs['attribute'].display_name == 'test') 38 | assert(kwargs['attribute'].att_type == 'numeric') 39 | -------------------------------------------------------------------------------- /tests/test_gui_psgui.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from corpustools.gui.psgui import * 4 | 5 | def test_psgui(qtbot, specified_test_corpus, settings): 6 | dialog = PhonoSearchDialog(None, settings, specified_test_corpus, True) 7 | qtbot.addWidget(dialog) 8 | -------------------------------------------------------------------------------- /tests/test_gui_ssgui.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from corpustools.gui.ssgui import * 4 | 5 | from corpustools.gui.models import CorpusModel 6 | 7 | def test_ssgui(qtbot, specified_test_corpus, settings): 8 | dialog = SSDialog(None, settings, CorpusModel(specified_test_corpus, settings), True) 9 | qtbot.addWidget(dialog) 10 | 11 | dialog.twoWordRadio.clicked.emit() 12 | assert(dialog.compType == 'two') 13 | dialog.fileRadio.clicked.emit() 14 | assert(dialog.compType == 'file') 15 | 16 | dialog.clearCreated() 17 | 18 | dialog.oneWordRadio.clicked.emit() 19 | assert(dialog.compType == 'one') 20 | dialog.algorithmWidget.click(0) 21 | assert(dialog.algorithmWidget.value() == 'edit_distance') 22 | assert(not dialog.typeTokenWidget.widgets[0].isEnabled()) 23 | dialog.oneWordEdit.setText('atema') 24 | kwargs = dialog.generateKwargs() 25 | assert(kwargs['algorithm'] == 'edit_distance') 26 | assert(kwargs['sequence_type'] == 'spelling') 27 | assert(kwargs['query'].spelling == 'atema') 28 | assert(kwargs['min_rel'] is None) 29 | assert(kwargs['max_rel'] is None) 30 | 31 | dialog.algorithmWidget.click(2) 32 | assert(dialog.algorithmWidget.value() == 'khorsi') 33 | assert(dialog.typeTokenWidget.widgets[0].isEnabled()) 34 | dialog.wordOneEdit.setText('atema') 35 | dialog.wordTwoEdit.setText('mata') 36 | dialog.minEdit.setText('20') 37 | dialog.maxEdit.setText('21') 38 | kwargs = dialog.generateKwargs() 39 | assert(kwargs['algorithm'] == 'khorsi') 40 | assert(kwargs['sequence_type'] == 'spelling') 41 | assert(kwargs['query'][0].spelling == 'atema', kwargs['query'][0].spelling == 'mata') 42 | assert(kwargs['min_rel'] == 20) 43 | assert(kwargs['max_rel'] == 21) 44 | assert(kwargs['type_token'] == 'type') 45 | 46 | dialog.algorithmWidget.click(1) 47 | assert(dialog.algorithmWidget.value() == 'phono_edit_distance') 48 | assert(not dialog.typeTokenWidget.widgets[0].isEnabled()) 49 | dialog.wordOneEdit.setText('mata') 50 | dialog.wordTwoEdit.setText('atema') 51 | dialog.minEdit.setText('blahhhh') 52 | dialog.maxEdit.setText('blahhhh2') 53 | kwargs = dialog.generateKwargs() 54 | assert(kwargs['algorithm'] == 'phono_edit_distance') 55 | assert(kwargs['sequence_type'] == 'transcription') 56 | assert(kwargs['query'][0].spelling == 'mata', kwargs['query'][0].spelling == 'atema') 57 | assert(kwargs['min_rel'] is None) 58 | assert(kwargs['max_rel'] is None) 59 | 60 | -------------------------------------------------------------------------------- /tests/test_gui_views.py: -------------------------------------------------------------------------------- 1 | 2 | from corpustools.gui.views import * 3 | from corpustools.gui.models import CorpusModel 4 | 5 | def test_discourse_view(qtbot): 6 | widget = DiscourseView() 7 | qtbot.addWidget(widget) 8 | 9 | def test_lexicon_view(qtbot, unspecified_test_corpus, settings): 10 | widget = LexiconView() 11 | model = CorpusModel(unspecified_test_corpus, settings) 12 | qtbot.addWidget(widget) 13 | qtbot.addWidget(model) 14 | 15 | widget.setModel(model) 16 | widget.search() 17 | assert(len(widget.table.selectionModel().selectedRows()) == 0) 18 | widget.searchField.setText('ma') 19 | widget.search() 20 | assert(len(widget.table.selectionModel().selectedRows()) == 1) 21 | assert(widget.table.selectionModel().selectedRows()[0].row() == 0) 22 | widget.search() 23 | assert(len(widget.table.selectionModel().selectedRows()) == 1) 24 | assert(widget.table.selectionModel().selectedRows()[0].row() == 2) 25 | widget.searchField.setText('matemma') 26 | widget.search() 27 | assert(len(widget.table.selectionModel().selectedRows()) == 0) 28 | 29 | w = model.wordObject(0) 30 | widget.highlightType(w) 31 | assert(len(widget.table.selectionModel().selectedRows()) == 1) 32 | assert(widget.table.selectionModel().selectedRows()[0].row() == 0) 33 | 34 | 35 | 36 | 37 | #def test_phono_search_results(): 38 | # widget = PhonoSearchResults() 39 | #qtbot.addWidget(widget) 40 | 41 | def test_tree_widget(qtbot): 42 | widget = TreeWidget() 43 | qtbot.addWidget(widget) 44 | 45 | def test_table_widget(qtbot): 46 | widget = TableWidget() 47 | qtbot.addWidget(widget) 48 | 49 | def test_text_view(qtbot): 50 | widget = TextView() 51 | qtbot.addWidget(widget) 52 | 53 | def test_variant_view(qtbot, unspecified_test_corpus): 54 | w = unspecified_test_corpus['atema'] 55 | widget = VariantView(None, w) 56 | qtbot.addWidget(widget) 57 | -------------------------------------------------------------------------------- /tests/test_gui_windows.py: -------------------------------------------------------------------------------- 1 | 2 | from corpustools.gui.windows import * 3 | 4 | #def test_pctdialog(qtbot): 5 | # dialog = FunctionDialog() 6 | #qtbot.addWidget(dialog) 7 | 8 | def test_progress_dialog(qtbot): 9 | dialog = ProgressDialog(None) 10 | qtbot.addWidget(dialog) 11 | 12 | 13 | dialog.updateText('testing!') 14 | assert(dialog.labelText() == 'testing!\nTime left: Unknown') 15 | 16 | 17 | dialog.updateProgress(0) 18 | assert(dialog.startTime is not None) 19 | dialog.updateProgress(0.1) 20 | assert(len(dialog.rates) == 1) 21 | 22 | dialog.cancel() 23 | assert(dialog.labelText() == 'Canceling...') 24 | assert(not dialog.cancelButton.isEnabled()) 25 | 26 | dialog.reject() 27 | -------------------------------------------------------------------------------- /tests/test_io_binary.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import os 3 | 4 | from corpustools.corpus.io.binary import download_binary, save_binary, load_binary 5 | 6 | def test_save(export_test_dir, unspecified_test_corpus): 7 | save_path = os.path.join(export_test_dir, 'testsave.corpus') 8 | save_binary(unspecified_test_corpus,save_path) 9 | 10 | c = load_binary(save_path) 11 | 12 | assert(unspecified_test_corpus == c) 13 | 14 | 15 | #class BinaryCorpusLoadTest(unittest.TestCase): 16 | #def setUp(self): 17 | #self.example_path = os.path.join(TEST_DIR,'example.corpus') 18 | 19 | #def test_load(self): 20 | #return 21 | #if not os.path.exists(TEST_DIR): 22 | #return 23 | #c = load_binary(self.example_path) 24 | 25 | #example_c = create_unspecified_test_corpus() 26 | 27 | #self.assertEqual(c,example_c) 28 | 29 | #class BinaryFeatureMatrixSaveTest(unittest.TestCase): 30 | #def setUp(self): 31 | #self.basic_path = os.path.join(TEST_DIR,'test_feature_matrix.txt') 32 | #self.basic_save_path = os.path.join(TEST_DIR,'basic.feature') 33 | #self.missing_segment_path = os.path.join(TEST_DIR,'test_feature_matrix_missing_segment.txt') 34 | #self.missing_save_path = os.path.join(TEST_DIR,'missing_segments.feature') 35 | 36 | #def test_save(self): 37 | #if not os.path.exists(TEST_DIR): 38 | #return 39 | #fm = load_feature_matrix_csv('test',self.basic_path,',') 40 | #save_binary(fm,self.basic_save_path) 41 | #saved_fm = load_binary(self.basic_save_path) 42 | #self.assertEqual(fm,saved_fm) 43 | 44 | #fm = load_feature_matrix_csv('test',self.missing_segment_path,',') 45 | #save_binary(fm,self.missing_save_path) 46 | #saved_fm = load_binary(self.missing_save_path) 47 | #self.assertEqual(fm,saved_fm) 48 | -------------------------------------------------------------------------------- /tests/test_io_csv.py: -------------------------------------------------------------------------------- 1 | 2 | import pytest 3 | import os 4 | 5 | from corpustools.corpus.io.csv import (load_corpus_csv, export_corpus_csv, inspect_csv, 6 | load_feature_matrix_csv, export_feature_matrix_csv) 7 | 8 | from corpustools.exceptions import DelimiterError 9 | 10 | from corpustools.corpus.classes import (Word, Corpus, FeatureMatrix) 11 | 12 | def test_inspect_example(csv_test_dir): 13 | example_path = os.path.join(csv_test_dir, 'example.txt') 14 | atts, coldelim = inspect_csv(example_path) 15 | assert(coldelim == ',') 16 | for a in atts: 17 | if a.name == 'frequency': 18 | assert(a.attribute.att_type == 'numeric') 19 | elif a.name == 'transcription': 20 | assert(a.attribute.att_type == 'tier') 21 | assert(a.delimiter == '.') 22 | elif a.name == 'spelling': 23 | assert(a.attribute.att_type == 'spelling') 24 | 25 | 26 | def test_corpus_csv(csv_test_dir, unspecified_test_corpus): 27 | example_path = os.path.join(csv_test_dir, 'example.txt') 28 | with pytest.raises(DelimiterError): 29 | load_corpus_csv('example',example_path,delimiter='\t') 30 | #with pytest.raises(DelimiterError): 31 | # load_corpus_csv('example',example_path,delimiter=',') 32 | 33 | 34 | c = load_corpus_csv('example',example_path,delimiter=',') 35 | 36 | assert(isinstance(c, Corpus)) 37 | assert(c == unspecified_test_corpus) 38 | 39 | 40 | #def test_load_with_fm(self): 41 | #c = load_transcription_corpus('test',self.transcription_path,' ', 42 | #['-','=','.'],trans_delimiter='.', 43 | #feature_system_path = self.full_feature_matrix_path) 44 | 45 | #self.assertEqual(c.lexicon.specifier,load_binary(self.full_feature_matrix_path)) 46 | 47 | #self.assertEqual(c.lexicon['cab'].frequency, 1) 48 | 49 | #self.assertEqual(c.lexicon.check_coverage(),[]) 50 | 51 | #c = load_transcription_corpus('test',self.transcription_path,' ', 52 | #['-','=','.'],trans_delimiter='.', 53 | #feature_system_path = self.missing_feature_matrix_path) 54 | 55 | #self.assertEqual(c.lexicon.specifier,load_binary(self.missing_feature_matrix_path)) 56 | 57 | #self.assertEqual(sorted(c.lexicon.check_coverage()),sorted(['b','c','d'])) 58 | 59 | 60 | def test_basic_feature_matrix(features_test_dir): 61 | basic_path = os.path.join(features_test_dir, 'test_feature_matrix.txt') 62 | 63 | with pytest.raises(DelimiterError): 64 | load_feature_matrix_csv('test',basic_path,' ') 65 | 66 | fm = load_feature_matrix_csv('test',basic_path,',') 67 | 68 | assert(fm.name == 'test') 69 | assert(fm['a','feature1'] == '+') 70 | 71 | def test_missing_value(features_test_dir): 72 | missing_value_path = os.path.join(features_test_dir, 'test_feature_matrix_missing_value.txt') 73 | fm = load_feature_matrix_csv('test',missing_value_path,',') 74 | 75 | assert(fm['d','feature2'] == 'n') 76 | 77 | def test_extra_feature(features_test_dir): 78 | extra_feature_path = os.path.join(features_test_dir, 'test_feature_matrix_extra_feature.txt') 79 | fm = load_feature_matrix_csv('test',extra_feature_path,',') 80 | 81 | with pytest.raises(KeyError): 82 | fm.__getitem__(('a','feature3')) 83 | 84 | 85 | def test_stressed(csv_test_dir): 86 | stressed_path = os.path.join(csv_test_dir, 'stressed.txt') 87 | ats,_ = inspect_csv(stressed_path, coldelim = ',') 88 | print(ats) 89 | ats[1].number_behavior = 'stress' 90 | c = load_corpus_csv('stressed',stressed_path,',', ats) 91 | assert(c.inventory['uw'].symbol == 'uw') 92 | assert(c.inventory.stresses == {'1': set(['uw','iy']), 93 | '0': set(['uw','iy','ah'])}) 94 | -------------------------------------------------------------------------------- /tests/test_io_text.py: -------------------------------------------------------------------------------- 1 | 2 | import pytest 3 | import os 4 | 5 | from corpustools.corpus.io.text_spelling import (load_discourse_spelling, 6 | load_directory_spelling, 7 | inspect_discourse_spelling, 8 | export_discourse_spelling) 9 | from corpustools.corpus.io.text_transcription import (load_discourse_transcription, 10 | load_directory_transcription, 11 | inspect_discourse_transcription, 12 | export_discourse_transcription) 13 | 14 | from corpustools.exceptions import DelimiterError 15 | 16 | from corpustools.corpus.classes import (Word, Corpus, FeatureMatrix, Discourse) 17 | 18 | from corpustools.utils import generate_discourse 19 | 20 | def test_export_spelling(export_test_dir, unspecified_test_corpus): 21 | d = generate_discourse(unspecified_test_corpus) 22 | export_path = os.path.join(export_test_dir, 'test_export_spelling.txt') 23 | export_discourse_spelling(d, export_path, single_line = False) 24 | 25 | d2 = load_discourse_spelling('test', export_path) 26 | for k in unspecified_test_corpus.keys(): 27 | assert(d2.lexicon[k].spelling == unspecified_test_corpus[k].spelling) 28 | assert(d2.lexicon[k].frequency == unspecified_test_corpus[k].frequency) 29 | 30 | def test_export_transcription(export_test_dir, unspecified_test_corpus): 31 | d = generate_discourse(unspecified_test_corpus) 32 | export_path = os.path.join(export_test_dir, 'test_export_transcription.txt') 33 | export_discourse_transcription(d, export_path, single_line = False) 34 | 35 | d2 = load_discourse_transcription('test', export_path) 36 | words = sorted([x for x in unspecified_test_corpus], key = lambda x: x.transcription) 37 | words2 = sorted([x for x in d2.lexicon], key = lambda x: x.transcription) 38 | for i,w in enumerate(words): 39 | w2 = words2[i] 40 | assert(w.transcription == w2.transcription) 41 | assert(w.frequency == w2.frequency) 42 | 43 | def test_load_spelling_no_ignore(text_test_dir): 44 | spelling_path = os.path.join(text_test_dir, 'test_text_spelling.txt') 45 | 46 | c = load_discourse_spelling('test',spelling_path) 47 | 48 | assert(c.lexicon['ab'].frequency == 2) 49 | 50 | 51 | def test_load_spelling_ignore(text_test_dir): 52 | spelling_path = os.path.join(text_test_dir, 'test_text_spelling.txt') 53 | a = inspect_discourse_spelling(spelling_path) 54 | a[0].ignored_characters = set(["'",'.']) 55 | c = load_discourse_spelling('test',spelling_path, a) 56 | 57 | assert(c.lexicon['ab'].frequency == 3) 58 | assert(c.lexicon['cabd'].frequency == 1) 59 | 60 | def text_test_dir(text_test_dir): 61 | transcription_path = os.path.join(text_test_dir, 'test_text_transcription.txt') 62 | with pytest.raises(DelimiterError): 63 | load_discourse_transcription('test', 64 | transcription_path," ",[], 65 | trans_delimiter = ',') 66 | 67 | c = load_discourse_transcription('test',transcription_path) 68 | 69 | assert(sorted(c.lexicon.inventory) == sorted(['#','a','b','c','d'])) 70 | 71 | def test_load_transcription_morpheme(text_test_dir): 72 | transcription_morphemes_path = os.path.join(text_test_dir, 'test_text_transcription_morpheme_boundaries.txt') 73 | ats = inspect_discourse_transcription(transcription_morphemes_path) 74 | ats[0].morph_delimiters = set('-=') 75 | c = load_discourse_transcription('test',transcription_morphemes_path, ats) 76 | 77 | assert(c.lexicon['cab'].frequency == 2) 78 | assert(str(c.lexicon['cab'].transcription) == 'c.a-b') 79 | 80 | -------------------------------------------------------------------------------- /tests/test_io_textgrids.py: -------------------------------------------------------------------------------- 1 | 2 | import pytest 3 | import os 4 | 5 | from corpustools.corpus.classes import Speaker 6 | 7 | from corpustools.corpus.io.helper import AnnotationType, Annotation, BaseAnnotation 8 | 9 | from corpustools.corpus.io.pct_textgrid import (textgrid_to_data,load_textgrid, 10 | guess_tiers) 11 | 12 | #def test_guess_tiers(textgrid_test_dir): 13 | # tg = load_textgrid(os.path.join(textgrid_test_dir,'phone_word.TextGrid')) 14 | # result = guess_tiers(tg) 15 | # assert(result[0] == ['word']) 16 | # assert(result[1] == ['phone']) 17 | # assert(result[2] == []) 18 | 19 | def test_basic(textgrid_test_dir): 20 | speaker = Speaker(None) 21 | path = os.path.join(textgrid_test_dir,'phone_word.TextGrid') 22 | data = textgrid_to_data(path, [AnnotationType('word','phone',None, anchor=True), 23 | AnnotationType('phone',None,None, base=True)]) 24 | expected_words = [] 25 | 26 | a = Annotation('') 27 | a.references.append('phone') 28 | a.begins.append(0) 29 | a.ends.append(1) 30 | expected_words.append(a) 31 | 32 | a = Annotation('a') 33 | a.references.append('phone') 34 | a.begins.append(1) 35 | a.ends.append(3) 36 | expected_words.append(a) 37 | 38 | a = Annotation('') 39 | a.references.append('phone') 40 | a.begins.append(3) 41 | a.ends.append(4) 42 | expected_words.append(a) 43 | assert(data['word']._list == expected_words) 44 | 45 | assert(data['phone']._list == [BaseAnnotation('#', 0, 0.25), 46 | BaseAnnotation('a', 0.25, 0.5), 47 | BaseAnnotation('b', 0.5, 0.75), 48 | BaseAnnotation('#', 0.75, 1)]) 49 | 50 | @pytest.mark.xfail 51 | def test_two_speakers(textgrid_test_dir): 52 | path = os.path.join(textgrid_test_dir,'2speakers.TextGrid') 53 | data = textgrid_to_data(path, [AnnotationType('Speaker 1 - word','Speaker 1 - phone',None, anchor=True, speaker = 'Speaker 1'), 54 | AnnotationType('Speaker 1 - phone',None,None, base=True, speaker = 'Speaker 1'), 55 | AnnotationType('Speaker 2 - word','Speaker 2 - phone',None, anchor=True, speaker = 'Speaker 2'), 56 | AnnotationType('Speaker 2 - phone',None,None, base=True, speaker = 'Speaker 2')]) 57 | data.collapse_speakers() 58 | print(data['word']._list) 59 | assert(data['word']._list == [{'label': '','token':{}, 'phone':(0,1)}, 60 | {'label': 'a','token':{}, 'phone':(1,3)}, 61 | {'label': 'b','token':{}, 'phone':(3,5)}, 62 | {'label': 'a','token':{}, 'phone':(5,7)}, 63 | {'label': 'c','token':{}, 'phone':(7,9)}, 64 | {'label': '','token':{}, 'phone':(9,10)}]) 65 | assert(data['phone']._list == [{'label':'', 'begin': 0, 'end': 0.1}, 66 | {'label':'a', 'begin': 0.1, 'end': 0.2}, 67 | {'label':'b', 'begin': 0.2, 'end': 0.3}, 68 | {'label':'c', 'begin': 0.3, 'end': 0.4}, 69 | {'label':'d', 'begin': 0.4, 'end': 0.5}, 70 | {'label':'a', 'begin': 0.5, 'end': 0.6}, 71 | {'label':'b', 'begin': 0.6, 'end': 0.7}, 72 | {'label':'d', 'begin': 0.7, 'end': 0.8}, 73 | {'label':'e', 'begin': 0.8, 'end': 0.9}, 74 | {'label':'', 'begin': 0.9, 'end': 1}]) 75 | -------------------------------------------------------------------------------- /tests/test_kl.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import sys 3 | import os 4 | 5 | from corpustools.kl.kl import KullbackLeibler as KL 6 | from corpustools.contextmanagers import (CanonicalVariantContext, 7 | MostFrequentVariantContext, 8 | WeightedVariantContext) 9 | 10 | 11 | def test_identical(specified_test_corpus): 12 | #Test 1, things that are identical 13 | with CanonicalVariantContext(specified_test_corpus, 'transcription', 'type') as c: 14 | seg1_entropy, seg2_entropy, distance, ur, is_spurious = KL(c, 's', 's','b') 15 | assert(distance == 0.0) 16 | assert(seg1_entropy == seg2_entropy) 17 | 18 | def test_allophones(specified_test_corpus): 19 | #Test 2, things are supposed to be allophones 20 | with CanonicalVariantContext(specified_test_corpus, 'transcription', 'type') as c: 21 | seg1_entropy, seg2_entropy, distance, ur, is_spurious = KL(c, 's', 'ʃ','b') 22 | assert(abs(distance - 0.15113518339295337) < 0.001) 23 | assert(abs(seg1_entropy - 0.035000140096702444) < 0.001) 24 | assert(abs(seg2_entropy - 0.06074393445793598) < 0.001) 25 | 26 | @pytest.mark.xfail 27 | def test_pseudo_allophones(specified_test_corpus): 28 | #Test 3, things that are allophones by coincidence 29 | with CanonicalVariantContext(specified_test_corpus, 'transcription', 'type') as c: 30 | seg1_entropy, seg2_entropy, distance, ur, is_spurious = KL(c, 's', 'ɑ','b') 31 | assert(abs(distance - 0.23231302100802534) < 0.001) 32 | assert(abs(seg1_entropy - 0.03500014009670246) < 0.001) 33 | assert(abs(seg2_entropy - 0.07314589775440267) < 0.001) 34 | #assertEqual(ur,sr)#both should be None, to be fixed with _features 35 | 36 | def test_default(specified_test_corpus): 37 | #Test 4, things that have no assumed relationship 38 | with CanonicalVariantContext(specified_test_corpus, 'transcription', 'type') as c: 39 | seg1_entropy, seg2_entropy, distance, ur, is_spurious = KL(c, 's', 'm','b') 40 | assert(abs(distance - 0.14186314747884132) < 0.001) 41 | assert(abs(seg1_entropy - 0.035000140096702444) < 0.001) 42 | assert(abs(seg2_entropy - 0.06074393445793598) < 0.001) 43 | 44 | def test_error(specified_test_corpus): 45 | #Test 5, things not in the corpus 46 | with pytest.raises(ValueError): 47 | with CanonicalVariantContext(specified_test_corpus, 'transcription', 'type') as c: 48 | KL(c, 's', '!','') 49 | 50 | 51 | -------------------------------------------------------------------------------- /tests/test_mutual_information.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | import os 4 | 5 | from corpustools.mutualinfo.mutual_information import pointwise_mi, all_mis 6 | from corpustools.contextmanagers import (CanonicalVariantContext, 7 | MostFrequentVariantContext, 8 | WeightedVariantContext) 9 | 10 | def test_pointwise_mi(unspecified_test_corpus): 11 | with CanonicalVariantContext(unspecified_test_corpus, 'transcription', 'type') as c: 12 | calls = [ 13 | ({'corpus_context': c, 14 | 'query':('e', 'm')}, 2.7319821866519507), 15 | ({'corpus_context': c, 16 | 'query':('t', 'n'), 17 | 'in_word':True}, 0.5849625007211564), 18 | ({'corpus_context': c, 19 | 'query':('e', 'm'), 20 | 'halve_edges':True}, 2.7319821866519507) 21 | 22 | ] 23 | 24 | for c,v in calls: 25 | result = pointwise_mi(**c) 26 | assert(abs(result-v) < 0.0001) 27 | 28 | #with CanonicalVariantContext(unspecified_test_corpus, 'spelling', 'type') as c: 29 | # result = pointwise_mi(c, query = ('t', 'a')) 30 | # assert(result == 0) 31 | -------------------------------------------------------------------------------- /tests/test_neighborhood_density.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | import os 4 | 5 | from corpustools.corpus.classes import Word 6 | 7 | from corpustools.neighdens.neighborhood_density import (neighborhood_density, 8 | find_mutation_minpairs) 9 | 10 | from corpustools.contextmanagers import (CanonicalVariantContext, 11 | MostFrequentVariantContext, 12 | WeightedVariantContext) 13 | 14 | def test_basic_corpus_nd(specified_test_corpus): 15 | calls = [({'query':specified_test_corpus.find('mata'), 16 | 'max_distance':1},1.0), 17 | ({'query':specified_test_corpus.find('nata'), 18 | 'max_distance':2},3.0), 19 | ({'query':specified_test_corpus.find('mata'), 20 | 'algorithm':'phono_edit_distance', 21 | 'max_distance':3},1.0)] 22 | with CanonicalVariantContext(specified_test_corpus, 'transcription', 'type') as c: 23 | for kwargs,v in calls: 24 | result = neighborhood_density(c, **kwargs) 25 | assert(abs(result[0]-v) < 0.0001) 26 | 27 | 28 | def test_basic_corpus_mutation_minpairs(specified_test_corpus): 29 | calls = [({'query':Word(**{'transcription': ['s', 'ɑ', 't', 'ɑ']}), 30 | },2)] 31 | 32 | with CanonicalVariantContext(specified_test_corpus, 'transcription', 'type') as c: 33 | for kwargs,v in calls: 34 | result = find_mutation_minpairs(c, **kwargs) 35 | assert(result[0] == v) 36 | assert(sorted(result[1]) == sorted(['n.ɑ.t.ɑ', 'm.ɑ.t.ɑ'])) 37 | 38 | 39 | # def test_neighborhood_density_graph(specified_test_corpus): 40 | # calls = [({'corpus': specified_test_corpus, 41 | # 'query':specified_test_corpus.find('mata'), 42 | # 'max_distance':1},1.0)] 43 | 44 | # for c,v in calls: 45 | # result = neighborhood_density_graph(**c) 46 | # assert(result[0] == v) 47 | # assert(result[1] == ['n.ɑ.t.ɑ']) 48 | -------------------------------------------------------------------------------- /tests/test_phono_aligner.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/test_phono_edit_distance.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/test_phonosearch.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | import os 4 | 5 | from corpustools.phonosearch import phonological_search 6 | from corpustools.corpus.classes import EnvironmentFilter, Environment 7 | 8 | def test_non_minimal_pair_corpus_minpair(unspecified_test_corpus): 9 | envs = [EnvironmentFilter(['n'],['#'])] 10 | results = phonological_search(unspecified_test_corpus, envs) 11 | print(results) 12 | e = results[0][1][0] 13 | print(e.middle, e.position, e.lhs, e.rhs) 14 | expected_e = Environment('n', 1, ('#',)) 15 | print(expected_e.middle, expected_e.position, expected_e.lhs, expected_e.rhs) 16 | assert(e == expected_e) 17 | 18 | -------------------------------------------------------------------------------- /tests/test_seg_select_dialog.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import sys 3 | import os 4 | import pickle 5 | from PyQt5.QtTest import QTest 6 | from PyQt5.QtGui import * 7 | base = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 8 | sys.path.insert(0,base) 9 | from corpustools.gui.widgets import * 10 | from corpustools.gui.models import InventoryModel 11 | app = QApplication(sys.argv) 12 | 13 | class SingleSegmentSelectWidgetTest(unittest.TestCase): 14 | 15 | def setUp(self): 16 | with open(os.path.join(os.getcwd(), 'lemurian.corpus'), 'rb') as f: 17 | self.test_corpus = pickle.load(f) 18 | self.settings = {'tooltips': False} # mock settings object 19 | self.test_corpus.inventoryModel = InventoryModel(self.test_corpus.inventory, copy_mode=True) 20 | self.dialog = SegmentPairSelectWidget(self.test_corpus.inventoryModel, None, True, True) 21 | #(inventory, parent = None, features = True, single_segment = False) 22 | 23 | def test_add_one(self): 24 | QTest.mouseClick(self.addSingleButton, Qt.LeftButton) 25 | -------------------------------------------------------------------------------- /tests/test_spontaneous_classes.py: -------------------------------------------------------------------------------- 1 | 2 | import pytest 3 | import os 4 | import sys 5 | 6 | from corpustools.corpus.classes import (Word, Corpus, FeatureMatrix, 7 | Environment, EnvironmentFilter, Transcription, 8 | WordToken, Discourse, SpontaneousSpeechCorpus) 9 | 10 | 11 | def test_init(): 12 | word_type_only = {'begin':0,'end':1,'word':Word(**{'spelling':'a','transcription':['a','b']})} 13 | 14 | word_type_and = {'begin':0,'end':1,'spelling':'a2','transcription':['a','b2'], 15 | 'word':Word(**{'spelling':'a','transcription':['a','b']})} 16 | wt = WordToken(**word_type_only) 17 | assert(wt.spelling == 'a') 18 | assert(str(wt.transcription) == 'a.b') 19 | 20 | wt = WordToken(**word_type_and) 21 | assert(wt.spelling == 'a2') 22 | assert(str(wt.transcription) == 'a.b2') 23 | 24 | def test_duration(): 25 | word_tokens = [{'begin':0,'end':1,'spelling':'a','transcription':['a','b']}, 26 | {'begin':1,'end':2,'spelling':'c','transcription':['c','a','b']}, 27 | {'begin':2,'end':3,'spelling':'a','transcription':['a','b']}, 28 | {'begin':3,'end':4,'spelling':'d','transcription':['a','d']}] 29 | for wt in word_tokens: 30 | w = WordToken(**wt) 31 | assert(w.duration == 1) 32 | 33 | def test_init(): 34 | word_tokens = [{'begin':0,'end':1,'word':Word(**{'spelling':'a','transcription':['a','b']})}, 35 | {'begin':1,'end':2,'word':Word(**{'spelling':'c','transcription':['c','a','b']})}, 36 | {'begin':2,'end':3,'word':Word(**{'spelling':'a','transcription':['a','b']})}, 37 | {'begin':3,'end':4,'word':Word(**{'spelling':'d','transcription':['a','d']})}] 38 | d = Discourse() 39 | for wt in word_tokens: 40 | d.add_word(WordToken(**wt)) 41 | 42 | #assert(d[0].previous_token, None) 43 | #assert(d[1].previous_token, d[0]) 44 | 45 | def test_init(): 46 | word_tokens = [{'begin':0,'end':1,'word':{'spelling':'a','transcription':['a','b']},'following_token_time':1}, 47 | {'begin':1,'end':2,'word':{'spelling':'c','transcription':['c','a','b']}, 'previous_token_time':0,'following_token_time':2}, 48 | {'begin':2,'end':3,'word':{'spelling':'a','transcription':['a','b']}, 'previous_token_time':1,'following_token_time':3}, 49 | {'begin':3,'end':4,'word':{'spelling':'d','transcription':['a','d']}, 'previous_token_time':2,}] 50 | d = Discourse() 51 | for wt in word_tokens: 52 | w = d.lexicon.get_or_create_word(**wt['word']) 53 | w.frequency += 1 54 | wt['word'] = w 55 | d.add_word(WordToken(**wt)) 56 | corpus = SpontaneousSpeechCorpus('','') 57 | 58 | corpus.add_discourse(d) 59 | 60 | d = corpus.discourses[''] 61 | 62 | assert(d[0].wordtype.frequency == 2) 63 | assert(d[1].wordtype.frequency == 1) 64 | -------------------------------------------------------------------------------- /tests/test_string_similarity.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | 2 | from corpustools.utils import generate_discourse 3 | 4 | from corpustools.corpus.classes import Discourse 5 | 6 | def test_discourse_generate(unspecified_test_corpus): 7 | d = generate_discourse(unspecified_test_corpus) 8 | assert(isinstance(d, Discourse)) 9 | --------------------------------------------------------------------------------