├── .gitignore ├── 0_README.txt ├── LICENSE ├── README.md ├── config_templates ├── GlottExtractor.cfg ├── MFCCExtractor.cfg ├── SPTKExtractor.cfg └── glott_config_template.txt ├── doc ├── Makefile └── source │ ├── STORAGE │ ├── extending.rst │ ├── modules │ │ ├── corpus_utterance.rst │ │ └── index.rst │ ├── python_install.rst │ └── queries.rst │ ├── acoustic_modelling_scripts.rst │ ├── basic.rst │ ├── complete_recipes.rst │ ├── conf.py │ ├── conf.py.initial │ ├── gold_standard_recipes.rst │ ├── index.rst │ ├── initial_voice.rst │ ├── overview.rst │ ├── python_install.rst │ ├── refinements.rst │ ├── s4a.png │ ├── setting_up.rst │ └── todo_list.rst ├── make_release.sh ├── recipes ├── lex_01_nn.cfg ├── lex_02_nn.cfg ├── naive_01_hts.cfg ├── naive_01_nn.cfg ├── naive_SIMP2.cfg.py └── world_extraction.cfg ├── rules └── en │ └── textnorm │ ├── rules │ ├── abbrevlist │ ├── abbrevmap │ ├── hyphenated │ ├── num_excp │ └── tldlist │ └── scripts │ ├── filter_text1.pl │ ├── filter_text_gigaword.pl │ ├── final_cleanup.pl │ ├── normalize_puncts.pl │ ├── numproc │ ├── remove_dups.pl │ ├── ted2ascii_puncts.pl │ ├── tokenize_words.pl │ └── utf2ascii_puncts.pl ├── scripts ├── acoustic_model_training │ ├── steps │ │ ├── build_MDL_trees.sh │ │ ├── clone_monophone_to_fullcontext.sh │ │ ├── increase_mixture_components.sh │ │ ├── initial_alignment.sh │ │ ├── make_alignment_lexicon.sh │ │ ├── make_alignment_monophone.sh │ │ ├── make_engine_model.sh │ │ ├── make_engine_model.sh.OLD │ │ ├── make_monophone.sh │ │ ├── realign.sh │ │ ├── realign_to_labels.sh │ │ ├── reestimate.sh │ │ ├── reestimate_alignment_model.sh │ │ ├── set_up_data.py │ │ ├── subset_data.py │ │ └── untie_models.sh │ ├── subrecipes │ │ ├── config_template │ │ │ ├── quick_voicebuild_01.cfg │ │ │ ├── quick_voicebuild_01.cfg.OLD │ │ │ ├── standard_alignment.cfg │ │ │ ├── standard_voicebuild.cfg │ │ │ ├── standard_voicebuild.cfg.OLD │ │ │ └── standard_voicebuild_STRAIGHT.cfg │ │ └── script │ │ │ ├── extend_standard_alignment.sh │ │ │ ├── extend_standard_alignment_external_lexicon.sh │ │ │ ├── quick_voicebuild_01.sh │ │ │ ├── quick_voicebuild_01.sh.OLD │ │ │ ├── quick_voicebuild_02.sh │ │ │ ├── standard_alignment.sh │ │ │ ├── standard_voicebuild.sh │ │ │ └── standard_voicebuild.sh.OLD │ └── util │ │ ├── filter_questions.py │ │ ├── make_config.sh │ │ ├── make_proto_hsmm.pl │ │ ├── make_proto_hsmm.py │ │ ├── make_proto_skip_hsmm.py │ │ ├── separate_trees.py │ │ ├── setup_directory.sh │ │ ├── update_train_list.py │ │ └── util.py ├── batch_align_and_utt.py ├── batch_speak.py ├── default │ ├── __init__.py │ ├── const.py │ └── fnames.py ├── download_tundra_subset.sh ├── main │ ├── Corpus.py │ ├── Resources.py │ ├── Utterance.py │ ├── Voice.py │ └── __init__.py ├── merlin_interface │ ├── feed_forward_dnn_ossian_acoustic_model.conf │ └── feed_forward_dnn_ossian_duration_model.conf ├── naive │ ├── __init__.py │ ├── naive_util.py │ ├── nudge_boundaries.py │ ├── train_static_vsm.py │ ├── train_static_vsm_direct-to-disk.py │ └── train_static_vsm_gensim.py ├── processors │ ├── AcousticModel.py │ ├── Aligner.py │ ├── BasicTokenisers.py │ ├── EnglishGoldProcessors.py │ ├── FeatureDumper.py │ ├── FeatureExtractor.py │ ├── GenericProcessor.py │ ├── IndianScriptLatiniser.py │ ├── Lexicon.py │ ├── Lexicon.py.20170605 │ ├── MiscProcessor.py │ ├── NN.py │ ├── NN.py.MSCOLD │ ├── NodeEnricher.py │ ├── NodeRemover.py │ ├── NodeSplitter.py │ ├── PhoneClassifier.py │ ├── Phonetisers.py │ ├── PhraseMaker.py │ ├── ProminenceLabeller.py │ ├── SKLProcessors.py │ ├── SimpleChildAdder.py │ ├── Syllabifier.py │ ├── Tokenisers.py │ ├── UtteranceProcessor.py │ ├── VSMTagger.py │ ├── WaveSynthesiser.py │ └── __init__.py ├── setup_tools.sh ├── shell │ ├── combine_lsf_and_gain.pl │ ├── compose_glott_features.pl │ ├── do_align_multisyn_lexicon │ ├── make_hts_training_lists.sh │ ├── setup_alignment.sh │ ├── split_cmp.py │ ├── train_backend.sh │ ├── train_cart.R │ └── window.pl ├── speak.py ├── test.py ├── tools │ ├── __init__.py │ └── morfessor.py ├── train.py └── util │ ├── Environment.py │ ├── LookupTable.py │ ├── NodeProcessors.py │ ├── TTS.py │ ├── Wavelets.py │ ├── __init__.py │ ├── acoustic_feats.py │ ├── acoustic_stats.py │ ├── append_acoustic_model.py │ ├── cwt_utils.py │ ├── discretise_vsm.py │ ├── draw_hts_tree_simple.py │ ├── gpu_lock.py │ ├── indian2latin.py │ ├── make_corpus_with_clickable_audio.py │ ├── make_hts_training_lists.sh │ ├── penn_treebank_tokenizer.sed │ ├── print_proms.py │ ├── speech_manip.py │ ├── store_merlin_model.py │ ├── submit.sh │ ├── trim_silences.py │ ├── uttsdata_to_text.py │ └── xpath_extensions_for_ossian.py ├── test └── txt │ ├── english.txt │ ├── hindi.txt │ ├── romanian.txt │ └── romanian2.txt ├── test_release.sh └── tools └── patch ├── ossian_engine.patch ├── ossian_hts.patch └── sequitur_compilation.patch /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /0_README.txt: -------------------------------------------------------------------------------- 1 | 2 | ==================================== 3 | Ossian Speech Synthesis Toolkit 4 | Simple4All Consortium 5 | Copyright (c) 2013-2014 6 | All Rights Reserved. 7 | ==================================== 8 | 9 | THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK 10 | DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING 11 | ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT 12 | SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE 13 | FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN 15 | AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 16 | ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 17 | THIS SOFTWARE. 18 | 19 | Authors: Simple4All consortium members 20 | Date: November 2013-2014 21 | Contact: owatts@staffmail.ed.ac.uk 22 | 23 | 24 | Please point an HTML browser at ./doc/build/html/index.html for some tips and pointers. -------------------------------------------------------------------------------- /config_templates/GlottExtractor.cfg: -------------------------------------------------------------------------------- 1 | LPC_ORDER = 30 2 | LPC_ORDER_SOURCE = 10 3 | HNR_CHANNELS = 5 4 | 5 | F0_MIN = 50.0 6 | F0_MAX = 360.0 7 | 8 | 9 | 10 | POSTFILTER_COEFFICIENT = 0.5 11 | 12 | 13 | HPFILTER_FILENAME = hp_16khz 14 | GLOTTAL_PULSE_NAME = pulse 15 | 16 | -------------------------------------------------------------------------------- /config_templates/MFCCExtractor.cfg: -------------------------------------------------------------------------------- 1 | TARGETRATE = 20000.0 2 | TARGETKIND = MFCC_E 3 | SOURCEFORMAT = NIST 4 | ENORMALISE = F 5 | SAVECOMPRESSED = T 6 | SOURCEKIND = WAVEFORM 7 | SAVEWITHCRC = T 8 | USEHAMMING = T 9 | WINDOWSIZE = 100000.0 10 | CEPLIFTER = 22 11 | NUMCHANS = 26 12 | NUMCEPS = 12 13 | PREEMCOEF = 0.97 -------------------------------------------------------------------------------- /config_templates/SPTKExtractor.cfg: -------------------------------------------------------------------------------- 1 | order = 12 2 | 3 | static_window = 1.0 4 | delta_window = -0.5 0.0 0.5 5 | delta_delta_window = 1.0 -2.0 1.0 6 | 7 | framelength = 400 ## in samples (i.e. 400 = 25 ms at 16000 hz sampling) 8 | frameshift = 80 ## in samples ( " 80 = 5 ms " ) 9 | fft_length = 512 10 | 11 | f0_method = swipe ## 0 = swipe, 1 = rapt 12 | lo_f0=30 13 | hi_f0=500 14 | 15 | frameshift_ms = 5 16 | target_sample_rate = 16000 -------------------------------------------------------------------------------- /doc/source/STORAGE/extending.rst: -------------------------------------------------------------------------------- 1 | 2 | Extending Ossian with your own recipes and processors 3 | ===================================================== 4 | 5 | Note on the recipes included here 6 | --------------------------------- 7 | 8 | 9 | None of the recipes mentioned is intended to be in any way definitive -- for a given 10 | recipe, there are probably lots of different imaginable alternative pipelines of 11 | processors that will produce the same result. 12 | 13 | One major possible area of variation is processor granularity -- rather than, e.g. the 14 | separate segment_adder, silence_adder and endsilence_adder processors in demo05, it 15 | would be straightforward to write a single processor that performs the functions of all 16 | three. There has been a lot of indecision about a reasonable level of granularity during 17 | development. Trade-off between making elements so specific that they can’t be 18 | reconfigured, and making a recipe so long with so many elements that it can’t be 19 | understood. Personal taste; also depends on your role as to what level of granularity 20 | is a good one. If you just want to run existing recipes on new data, .... 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /doc/source/STORAGE/modules/corpus_utterance.rst: -------------------------------------------------------------------------------- 1 | Corpus 2 | ------ 3 | 4 | Data processed by Ossian is held in a ``corpus`` object. 5 | 6 | 7 | 8 | 9 | .. autoclass:: main.Corpus.Corpus 10 | :members: 11 | :undoc-members: 12 | 13 | 14 | 15 | 16 | Utterance 17 | --------- 18 | 19 | Utterance struct held herecd 20 | 21 | 22 | Various inherited methods are specialised to act on the ``data`` attribute. These 'rerouted' 23 | methods are not documented here. 24 | 25 | .. todo:: Is there a more proper way to do this? 26 | 27 | 28 | .. autoclass:: main.Utterance.Utterance 29 | :members: 30 | :undoc-members: 31 | :exclude-members: iterdescendants, pretty_print, remove, get, set, xpath, insert, has_attribute 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /doc/source/STORAGE/modules/index.rst: -------------------------------------------------------------------------------- 1 | Ossian modules 2 | ============== 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | corpus_utterance 8 | -------------------------------------------------------------------------------- /doc/source/STORAGE/python_install.rst: -------------------------------------------------------------------------------- 1 | 2 | .. note:: A couple of notes on making an intallation of Python that satisfies TASSAL's requirements. Made for Chee Yong, could be more generally useful documentation with some editing. 3 | 4 | ======================================= 5 | MAKING YOUR OWN PYTHON INSTALLATION 6 | ======================================= 7 | 8 | 9 | On Linux 10 | -------- 11 | 12 | Make a directory for your Python installation: 13 | 14 | MYPYTHON=~/temp/python ## your chosen directory might be e.g. /afs/inf.ed.ac.uk/group/cstr/projects/simple4all/malay/chee_yong_tools/python 15 | 16 | mkdir $MYPYTHON 17 | 18 | With a browser go to: 19 | 20 | http://www.activestate.com/activepython/downloads 21 | 22 | ... click on the link for Python version 2.7.2.5 for Linux (x86_64) to download the package. 23 | When you've done this, move the package to the current directory (or wherever you want to install it): 24 | 25 | mv ~/Downloads/ActivePython-2.7.2.5-linux-x86_64.tar.gz $MYPYTHON 26 | 27 | Unpack it: 28 | 29 | cd $MYPYTHON 30 | tar xvf ActivePython-2.7.2.5-linux-x86_64.tar.gz 31 | 32 | Use the installer provided to install: 33 | 34 | cd ActivePython-2.7.2.5-linux-x86_64 35 | ./install.sh 36 | 37 | When prompted to specify "Install directory: ", enter ".." to install to $MYPYTHON and type "y" when it asks for confirmation. 38 | 39 | This should have installed Python at $MYPYTHON/bin/python -- type it in the command 40 | line and check you get a Python prompt like this: 41 | 42 | 43 | [channings]owatts: $MYPYTHON/bin/python 44 | ActivePython 2.7.2.5 (ActiveState Software Inc.) based on 45 | Python 2.7.2 (default, Jun 24 2011, 11:24:26) 46 | [GCC 4.0.2 20051125 (Red Hat 4.0.2-8)] on linux2 47 | Type "help", "copyright", "credits" or "license" for more information. 48 | >>> 49 | 50 | If that works, type quit() to exit the interactive session. 51 | 52 | 53 | Use these 3 commands to install numpy, configobj, and lxml (which are all required by TASSAL): 54 | 55 | $MYPYTHON/bin/pip install numpy 56 | $MYPYTHON/bin/pip install scipy 57 | $MYPYTHON/bin/pip install configobj 58 | $MYPYTHON/bin/pip install lxml 59 | 60 | 61 | installation of these on mac with macports 62 | ------------------------------------------- 63 | 64 | sudo port install py27-lxml 65 | sudo port install py27-configobj 66 | 67 | OR: 68 | 69 | sudo port install py27-pip 70 | 71 | ### 72 | 73 | pip-2.7 install scikit-learn 74 | 75 | ## off-topic: 76 | 77 | sudo port install R 78 | -------------------------------------------------------------------------------- /doc/source/STORAGE/queries.rst: -------------------------------------------------------------------------------- 1 | Queries 2 | ======= 3 | 4 | 5 | 6 | corpus object 7 | ------------- 8 | 9 | corpus -- contain utt objects, not filenames to avoid this in e.g. vsm tagger / dt proc.s 10 | 11 | for utt_name in speech_corpus: 12 | utterance = Utterance(utt_name) 13 | 14 | 15 | text and speech in copurs 16 | ------------------------- 17 | 18 | in vsmtagger.py: 19 | 20 | .. todo:: Add the text from the unspoken parts 21 | 22 | 23 | 24 | others 25 | ------- 26 | 27 | - separate train and voice 28 | 29 | - letter vsm uses cross-word contexts... 30 | 31 | - lowercasing should be separated from safetexting 32 | 33 | 34 | Document whole module like this: 35 | 36 | .. automodule:: naive.naive_util 37 | :members: 38 | 39 | Document functions like this: 40 | 41 | 42 | .. autofunction:: naive.naive_util.unicode_character_to_safetext 43 | 44 | 45 | 46 | utt mod 47 | -------- 48 | .. automodule:: main.Utterance 49 | :members: 50 | :undoc-members: 51 | 52 | 53 | TODO list 54 | --------- 55 | 56 | .. todolist:: -------------------------------------------------------------------------------- /doc/source/acoustic_modelling_scripts.rst: -------------------------------------------------------------------------------- 1 | ============================================ 2 | Scripts for acoustic model training 3 | ============================================ 4 | 5 | Ossian includes a collection of scripts for training acoustic models which in the normal course of things are used to train HMMs after speech coding and text analysis have been done. Infact, some of these scripts will have been called if the commands given in this documentation to build demonstration voices have been run. This collection of scripts can be found under: 6 | 7 | .. code-block:: bash 8 | 9 | ossian-v.?.?/scripts/acoustic_model_training/ 10 | 11 | A number of different subrecipes are available, and can be added to. These subrecipes specify different ways of training acoustic models, and are selected by the relevant processors of top-level recipes. E.g. the recipe ``ossian-v.1.2/recipes/naive.cfg`` configures an acoustic model which is to be trained using the subrecipe called ``quick_voicebuild_01``: 12 | 13 | .. code-block:: ini 14 | 15 | [acoustic_model] 16 | class = AcousticModel.AcousticModel 17 | acoustic_subrecipe = quick_voicebuild_01 18 | [[training_settings]] 19 | BINMOD = " -B " 20 | 21 | The scripts for these subrecipes are contained in ``ossian-v.?.?/scripts/acoustic_model_training/subrecipes/script``, and default configurations for them are in ``ossian-v.?.?/scripts/acoustic_model_training/subrecipes/config_templates``. The default configuration settings are overridden by top-level recipes by ``training_settings`` subsections: in the above excerpt from ``naive.cfg``, for example, the value of ``BINMOD`` is set to be ``" -B "`` instead of the default ``" "`` -- this means that acoustic models will be written out in HTK binary model format instead of the default ASCII. 22 | 23 | 24 | Using the scripts without Ossian's text processing 25 | -------------------------------------------------- 26 | 27 | As well as using these scripts as part of an Ossian recipe, it is also possible to use them with already parameterised and annotated data (i.e. without using Ossian to do any speech coding or text analysis). This might be done to try out e.g. alternative TTS front-ends using acoustic models trained in a comparable way. 28 | 29 | With the environment variable $OSSIAN pointing to the top directory of an Ossian installation (called something like ``./ossian-v.1.2``), the following command line can be used to train an acoustic model from some acoustic feature files in ``$FEAT_DIRECTORY``, label files in ``$LABEL_DIRECTORY``, and the question file at ``$QUESTIONS``, and output a trained model under ``$OUTPUT``: 30 | 31 | .. code-block:: bash 32 | 33 | $OSSIAN/scripts/acoustic_model_training/subrecipes/script/standard_voicebuild.sh \ 34 | $FEAT_DIRECTORY $LABEL_DIRECTORY $QUESTIONS $OSSIAN/tools/bin/ $OUTPUT \ 35 | $OSSIAN/scripts/acoustic_model_training/subrecipes/config_template/standard_voicebuild.cfg 36 | 37 | 38 | .. comment:: $OSSIAN/script/standard_voicebuild.sh ~/temp/ossian-v.1.2/train/rm/speakers/rss_toy_demo/naive/cmp/ ~/temp/ossian-v.1.2/train/rm/speakers/rss_toy_demo/naive/lab/ ~/temp/ossian-v.1.2/train/rm/speakers/rss_toy_demo/naive/questions.hed ~/temp/ossian-v.1.2/tools/bin/ ~/temp/voicetest1/ ./config_template/standard_voicebuild.cfg 39 | 40 | Please modify ``standard_voicebuild.cfg`` or a copy of it to change default settings (e.g. 4 streams, 25 mel cepstral coefficients in the spectrum stream, etc.). ``standard_voicebuild_STRAIGHT.cfg`` is given for use with acoustic features like those used in the `Voice Cloning Toolkit `_. 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /doc/source/complete_recipes.rst: -------------------------------------------------------------------------------- 1 | New improved recipes 2 | ====================================== 3 | 4 | There are several recipes which build on and improve the ``naive`` one already described. Some examples are given here. 5 | 6 | 7 | ``naive_glott``: naive recipe with GlottHMM vocoder 8 | --------------------------------------------------- 9 | 10 | .. code-block:: bash 11 | 12 | ## Assuming that you want to start from scratch: 13 | rm -r ./train/rm/speakers/rss_toy_demo/naive_glott/ ./voices/rm/rss_toy_demo/naive_glott/ 14 | 15 | ## Train: 16 | python ./scripts/train.py -s rss_toy_demo -l rm -text wikipedia_10K_words naive_glott 17 | 18 | ## Synthesise: 19 | ./scripts/speak.py -l rm -s rss_toy_demo -o ./test/wav/romanian_toy_naive_glott.wav \ 20 | -play naive_glott ./test/txt/romanian.txt 21 | 22 | 23 | 24 | This is the same as the ``naive`` recipe but uses the high-quality vocoder `GlottHMM `_ for 25 | speech analysis and synthesis. 26 | 27 | ``naive_glott_prom``: wavelet-based prominence labelling 28 | -------------------------------------------------------- 29 | 30 | .. code-block:: bash 31 | 32 | ## Assuming that you want to start from scratch: 33 | rm -r ./train/rm/speakers/rss_toy_demo/naive_glott_prom/ ./voices/rm/rss_toy_demo/naive_glott_prom/ 34 | 35 | ## Train: 36 | python ./scripts/train.py -s rss_toy_demo -l rm -text wikipedia_10K_words naive_glott_prom 37 | 38 | ## Synthesise: 39 | ./scripts/speak.py -l rm -s rss_toy_demo -o ./test/wav/romanian_toy_naive_glott_prom.wav \ 40 | -play naive_glott_prom ./test/txt/romanian.txt 41 | 42 | 43 | 44 | This is the same as the ``naive_glott`` recipe but also makes use of an unsupervised 45 | representation of prominence similar to the one described `here `_. Extraction of the representation is based on wavelet transform-derived acoustic features and prediction makes use of vector space models of words and a decision 46 | tree classifier. 47 | 48 | Voices from non-alphabetic script data 49 | -------------------------------------------------------- 50 | 51 | A Hindi toy corpus (extracted from the IIIT Indic database available `here `_) is included to demonstrate parts of the recipe developed for the Simple4All 52 | Blizzard Challenge entry described in `this paper `_. 53 | The recipes ``blizzard_2014_naive_latinised`` and ``blizzard_2014_naive_latinised_syl`` incrementally introduce the naive alphabetisation and syllabification described in the paper. Due to the toy corpus's small size, the syllabification severely affects the quality of the speech. The recipe ``blizzard_2014_naive_latinised_glott`` adds 54 | the latinisation and GlottHMM vocoder: 55 | 56 | .. code-block:: bash 57 | 58 | ## Assuming that you want to start from scratch: 59 | rm -r ./train/hi/speakers/toy/blizzard_2014_naive_latinised_glott/ ./voices/hi/toy/blizzard_2014_naive_latinised_glott/ 60 | 61 | ## Train: 62 | python ./scripts/train.py -s toy -l hi -text wikipedia_10K_words blizzard_2014_naive_latinised_glott 63 | 64 | ## Synthesise: 65 | ./scripts/speak.py -l hi -s toy -o ./test/wav/hindi_naive_latinised_glott.wav \ 66 | -play blizzard_2014_naive_latinised_glott ./test/txt/hindi.txt 67 | 68 | A simpler recipe like the ``naive`` one can be used here for comparison: 69 | 70 | .. code-block:: bash 71 | 72 | ## Assuming that you want to start from scratch: 73 | rm -r ./train/hi/speakers/toy/naive/ ./voices/hi/toy/naive/ 74 | 75 | python ./scripts/train.py -s toy -l hi -text wikipedia_10K_words naive 76 | 77 | ./scripts/speak.py -l hi -s toy -o ./test/wav/hindi_naive.wav \ 78 | -play naive ./test/txt/hindi.txt 79 | -------------------------------------------------------------------------------- /doc/source/index.rst: -------------------------------------------------------------------------------- 1 | .. ossian documentation master file, created by 2 | sphinx-quickstart on Mon Nov 11 14:45:09 2013. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Ossian documentation 7 | ================================== 8 | 9 | 10 | 11 | .. image:: s4a.png 12 | :height: 100px 13 | :width: 200 px 14 | :scale: 50 % 15 | :alt: alternate text 16 | :align: right 17 | 18 | Ossian is a collection of Python code for building text-to-speech (TTS) systems, with an 19 | emphasis on easing research into building TTS systems with minimal expert 20 | supervision. Work on it started with funding from the `EU FP7 Project Simple4All `_. 21 | 22 | 23 | 24 | A core idea of Ossian is that a lot of the work for making a Python TTS system 25 | can be done by using existing modules. For example, instead of writing a module for 26 | manipulating and querying utterance structures from scratch, we can use existing XML 27 | and  XPATH implementations. Instead implementing a decision tree learning from 28 | scratch, we can use simply 29 | design the tools to work with existing open source machine learning packages, with the 30 | obvious benefit that many different methods besides decision trees are implemented with 31 | a unified interface. By depending on relevant Python core or 3rd party packages, we aim 32 | to make the original code of Ossian as minimal as possible.    33 | 34 | 35 | If you are interested only in running existing voices, please take a look at *Setting up* and *Basic operations*. 36 | If you plan to build voices using already-defined recipes, these will also be helpful. 37 | If you plan to extend existing recipes or write new ones, the *Tutorial* sections might be of use. 38 | 39 | 40 | The online version of this documentation `here `_ is often more up-to-date than the one included with releases of the code. 41 | 42 | Contents: 43 | 44 | .. toctree:: 45 | :maxdepth: 4 46 | 47 | setting_up 48 | 49 | basic 50 | complete_recipes 51 | 52 | gold_standard_recipes 53 | acoustic_modelling_scripts 54 | 55 | initial_voice 56 | refinements 57 | 58 | todo_list 59 | 60 | 61 | 62 | Indices and tables 63 | ================== 64 | 65 | * :ref:`genindex` 66 | * :ref:`modindex` 67 | * :ref:`search` 68 | 69 | -------------------------------------------------------------------------------- /doc/source/overview.rst: -------------------------------------------------------------------------------- 1 | 2 | 3 | =============== 4 | Overview 5 | =============== 6 | 7 | 8 | 9 | .. ---- These are just comments ----: 10 | .. But it's not straightforward! The whole Festival system seems to be designed to be complicated and keep non-geeks out! 11 | 12 | .. --digitaltoast 13 | 14 | .. [http://ubuntuforums.org/showthread.php?t=751169&page=12] -------------------------------------------------------------------------------- /doc/source/python_install.rst: -------------------------------------------------------------------------------- 1 | ============================================= 2 | MAKING YOUR OWN PYTHON INSTALLATION ON LINUX 3 | ============================================= 4 | 5 | Here are some steps to install a Python interpreter from scratch for running Ossian, 6 | using the 'Community Edition' distribution at ``http://www.activestate.com/activepython``. 7 | This is just one possible way to install Python. 8 | 9 | Make a directory for your Python installation and `cd` to it, and set an environment variable 10 | to point to it for these instructions: 11 | 12 | export MYPYTHON=$PWD 13 | 14 | With a browser go to: 15 | 16 | ``http://www.activestate.com/activepython/downloads`` 17 | 18 | and click on the link for Python version 2.7.5.6 for Linux (x86_64) to download the package. 19 | This 'Community Edition' is for non-commercial or non-production use -- please refer to 20 | the Activestate license for details. 21 | When you've done this, move the package to the new Python directory: 22 | 23 | mv ~/Downloads/ActivePython-2.7.5.6-linux-x86_64.tar.gz $MYPYTHON 24 | 25 | Unpack it: 26 | 27 | .. code-block:: bash 28 | 29 | cd $MYPYTHON 30 | tar xvf ActivePython-2.7.5.6-linux-x86_64.tar.gz 31 | 32 | Use the installer provided to install: 33 | 34 | .. code-block:: bash 35 | 36 | cd ActivePython-2.7.5.6-linux-x86_64 37 | ./install.sh 38 | 39 | When prompted to specify "Install directory: ", enter ".." to install to $MYPYTHON and type "y" when it asks for confirmation. 40 | 41 | This should have installed Python at $MYPYTHON/bin/python -- type it in the command 42 | line and check you get a Python prompt like this: 43 | 44 | .. code-block:: bash 45 | 46 | ActivePython 2.7.5.6 (ActiveState Software Inc.) based on 47 | Python 2.7.5 (default, Sep 16 2013, 23:05:39) 48 | [GCC 4.0.2 20051125 (Red Hat 4.0.2-8)] on linux2 49 | Type "help", "copyright", "credits" or "license" for more information. 50 | >>> 51 | 52 | If that works, type ``quit()`` to exit the interactive session. 53 | 54 | Add the new Python ``bin`` directory to the start of system path so that the new Python 55 | will be used for the rest of the session (you can make this last beyond the session by 56 | editing e.g. your ``~/.bashrc``): 57 | 58 | .. code-block:: bash 59 | 60 | export PATH=$MYPYTHON/python/bin/:$PATH 61 | 62 | Use the ``pip`` package installer to get some necessary packages: 63 | 64 | .. code-block:: bash 65 | 66 | pip install numpy==1.8.0 67 | pip install scipy==0.12.0 68 | pip install configobj==4.7.2 69 | pip install scikit-learn==0.13.1 70 | pip install regex 71 | pip install lxml 72 | 73 | The version of scikit-learn is probably important; we have not yet determined how much 74 | flexibility there is with the versions of the other packages, but the above combination works. 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /doc/source/s4a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSTR-Edinburgh/Ossian/fd01c8f9e1e5fa4f4f00dd444a565b714973b7a9/doc/source/s4a.png -------------------------------------------------------------------------------- /doc/source/todo_list.rst: -------------------------------------------------------------------------------- 1 | -------------------------------------- 2 | List of some obvious things to do next 3 | -------------------------------------- 4 | 5 | There are lots of these -- a few that come to mind: 6 | 7 | - Merge in recent extensions to trunk from Antti, Peter, Jari... 8 | - Morfessor 9 | - ... 10 | 11 | - Languages: 12 | - Add toy demo corpora from all Tundra/Indic languages 13 | - Train and distribute voices on decent amounts of data for all these languages 14 | - Make tars of the Tundra etc. data that can be unpacked at $OSSIAN so the data lands 15 | in the right place 16 | 17 | - Online demo: 18 | - Get it working at a decent speed -- move from STRAIGHT resynthesis to MLSA? 19 | - Client/server mode to avoid loading voices per utterance? 20 | 21 | 22 | 23 | 24 | 25 | .. - Vocoding: 26 | .. - Currently using SPTK's mcep and hts_engine's MLSA on 16kH speech 27 | .. - Higher sampling rates 28 | .. - Move to STRAIGHT (at least for extraction -- can .cmp files be distributed? ) 29 | .. - Incorporate Cassia's modifications to hts_engine 30 | .. - GlottHMM -------------------------------------------------------------------------------- /make_release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | VERSION="1.3" ## set this by hand 4 | 5 | 6 | TOPDIR="./ossian-v.${VERSION}" 7 | 8 | 9 | if [ -e $TOPDIR ] ; then 10 | echo $TOPDIR exists -- please delete it an try again 11 | exit 1 12 | fi 13 | if [ -e ./ossian-v.${VERSION}.tgz ] ; then 14 | echo ./ossian-v.${VERSION}.tgz exists -- please delete it an try again 15 | exit 1 16 | fi 17 | 18 | 19 | ### -------- build and add html doc ----------- 20 | HERE=`pwd` 21 | cd ./doc 22 | gsed "s/__VERSION__/${VERSION}/" ./source/conf.py.initial > ./source/conf.py 23 | make html 24 | [ $# -ne 0 ] && echo "make doc failed" && exit 1 ; 25 | cd $HERE 26 | 27 | mkdir $TOPDIR 28 | 29 | ### --------- pack and unpack the stuff with tar to preserve all dir structure: ------ 30 | 31 | 32 | tar cf $TOPDIR/ossian_package.tar \ 33 | ./config_templates/ \ 34 | ./corpus/rm/speakers/rss_toy_demo/ \ 35 | ./corpus/rm/text_corpora/wikipedia_10K_words/ \ 36 | ./corpus/en/speakers/tundra_toy_demo/ \ 37 | ./corpus/en/labelled_corpora/cmudict/cmudict_phones.table \ 38 | ./corpus/en/labelled_corpora/cmudict/letter.names \ 39 | ./corpus/hi/ \ 40 | ./rules/ \ 41 | ./doc/build/ \ 42 | ./example_voices/rm-rss_toy-naive_example.tar \ 43 | ./example_voices/rm-rss_rnd1-naive_example.tar \ 44 | ./scripts \ 45 | ./test/txt/*.txt \ 46 | ./test/ref_wav/*.wav \ 47 | ./0_README.txt \ 48 | ./recipes/demo*.cfg \ 49 | ./recipes/baseline*.cfg \ 50 | ./recipes/naive*.cfg \ 51 | ./recipes/blizzard_2014_naive*.cfg \ 52 | ./recipes/english_gold_basic.cfg \ 53 | ./tools/patch/*.patch \ 54 | ./tools/downloads \ 55 | ./tools/GlottHMM/ 56 | 57 | cd $TOPDIR 58 | tar xf ossian_package.tar 59 | rm ossian_package.tar 60 | cd .. 61 | 62 | 63 | ## ----- add some more directries ----- 64 | 65 | mkdir -p $TOPDIR/tools/bin 66 | mkdir -p $TOPDIR/tools/downloads 67 | 68 | 69 | mkdir $TOPDIR/train/ 70 | mkdir $TOPDIR/voices/ 71 | 72 | mkdir $TOPDIR/test/wav 73 | 74 | # --- remove any copied junk from release (.pyc and .svn stuff): ---- 75 | for FNAME in `find $TOPDIR/* -name *.pyc` ; do 76 | rm $FNAME 77 | done 78 | 79 | for FNAME in `find $TOPDIR/* | grep .svn` ; do 80 | rm -rf $FNAME 81 | done 82 | 83 | ## strip some other mac-crap: 84 | for FNAME in `find $TOPDIR/* -name '.DS_Store'` ; do 85 | rm $FNAME 86 | done 87 | 88 | # --- remove compiled GlottHMM files, etc.: ---- 89 | for FNAME in `find $TOPDIR/tool/GlottHMM/* -name *.o` ; do 90 | rm $FNAME 91 | done 92 | rm $TOPDIR/tool/GlottHMM/{Analysis,Synthesis} 93 | 94 | ### ----- pack up into a tgz file: ---- 95 | 96 | tar cvzf ./ossian-v.${VERSION}.tgz $TOPDIR 97 | rm -r $TOPDIR 98 | 99 | 100 | 101 | 102 | -------------------------------------------------------------------------------- /recipes/world_extraction.cfg: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Not a real recipe for building a whole voice, just a convenience for extracting World features from a database 4 | 5 | 6 | import sys 7 | import os 8 | import inspect 9 | current_dir = os.path.realpath(os.path.abspath(os.path.dirname(inspect.getfile(inspect.currentframe())))) 10 | 11 | ## for when config is still in recipes directory: 12 | sys.path.append(current_dir + '/../scripts/') 13 | sys.path.append(current_dir + '/../scripts/processors/') 14 | 15 | ## for after config is copied to voice.cfg: 16 | sys.path.append(current_dir + '/../../../../scripts/') 17 | sys.path.append(current_dir + '/../../../../scripts/processors/') 18 | 19 | from FeatureExtractor import WorldExtractor 20 | 21 | 22 | import default.const as c 23 | 24 | 25 | 26 | 27 | 28 | 29 | speech_coding_config = {'order': 59, 'static_window': '1', 'delta_window': '-0.5 0.0 0.5', 'delta_delta_window': '1.0 -2.0 1.0'} 30 | 31 | 32 | ## ---------------------------------------------------------------- 33 | ## Now, a number of utterance processors are defined:-- 34 | 35 | 36 | 37 | speech_feature_extractor = WorldExtractor('acoustic_feature_extractor', input_filetype='wav', output_filetype='cmp', \ 38 | coding_config=speech_coding_config, sample_rate=48000, alpha=0.77, mcep_order=59) 39 | 40 | 41 | ## ----------------------------------------------------------------- 42 | ## The processors are grouped for convenience into several 'stages': 43 | 44 | single_stage = [speech_feature_extractor] 45 | 46 | 47 | 48 | ## ---------------------------------------------------------------- 49 | ## The final part of the config specifies which stages are run in each of the modes 50 | ## "train" and "runtime" (and optionally extra, specialised, modes): 51 | 52 | train_stages = [single_stage] 53 | 54 | runtime_stages = [single_stage] 55 | 56 | -------------------------------------------------------------------------------- /rules/en/textnorm/rules/abbrevlist: -------------------------------------------------------------------------------- 1 | Adj. 2 | Adm. 3 | Adv. 4 | Ala. 5 | Alex. 6 | Apr. 7 | Ariz. 8 | Ark. 9 | Assn. 10 | Asst. 11 | Aug. 12 | Ave. 13 | B.A.IT. 14 | B.A.T. 15 | B.Sc. 16 | Bart. 17 | Bhd. 18 | Bldg. 19 | Blvd. 20 | Brig. 21 | Bros. 22 | Ca. 23 | Cal. 24 | Calif. 25 | Capt. 26 | Cdr. 27 | Ch. 28 | Cie. 29 | Cmdr. 30 | Co. 31 | Col. 32 | Colo. 33 | Comdr. 34 | Con. 35 | Conn. 36 | Corp. 37 | Cos. 38 | Cpl. 39 | Cpt. 40 | Ct. 41 | D.Phil. 42 | D.Sc. 43 | DR. 44 | Dec. 45 | Del. 46 | Dept. 47 | dept. 48 | Dr. 49 | Drs. 50 | Ens. 51 | Etc. 52 | Feb. 53 | Fla. 54 | Fr. 55 | Fri. 56 | Ft. 57 | Ga. 58 | Gen. 59 | Gov. 60 | Hon. 61 | Hosp. 62 | Hr. 63 | Hwy. 64 | Ill. 65 | Inc. 66 | Ind. 67 | Insp. 68 | Jan. 69 | Jl. 70 | Jr. 71 | Jul. 72 | Jun. 73 | Kan. 74 | Ky. 75 | La. 76 | Lt. 77 | Ltd. 78 | Ltda. 79 | M.Phil. 80 | M.Sc. 81 | MCorp. 82 | MM. 83 | MR. 84 | MRS. 85 | MS. 86 | Maj. 87 | Mar. 88 | Mass. 89 | Md. 90 | Me. 91 | Me.T.A. 92 | Messrs. 93 | Mfg. 94 | Mich. 95 | Minn. 96 | Miss. 97 | Mlle. 98 | Mme. 99 | Mo. 100 | Mohd. 101 | Mont. 102 | Mr. 103 | Mrs. 104 | Ms. 105 | Msgr. 106 | Mt. 107 | Muhd. 108 | Neb. 109 | Nev. 110 | Nov. 111 | Oct. 112 | Okla. 113 | Ont. 114 | Op. 115 | Ord. 116 | Ore. 117 | Oreg. 118 | Pa. 119 | Penn. 120 | Pfc. 121 | Ph. 122 | Ph.D. 123 | PhD. 124 | Pkwy. 125 | Prof. 126 | Prop. 127 | Pte. 128 | Pty. 129 | Pvt. 130 | Qtr. 131 | Rd. 132 | Rep. 133 | Reps. 134 | Res. 135 | Rev. 136 | Rt. 137 | S.p.A. 138 | Sen. 139 | Sens. 140 | Sep. 141 | Sept. 142 | Sfc. 143 | Sgt. 144 | Spc. 145 | Sr. 146 | St. 147 | Ste. 148 | Supt. 149 | Surg. 150 | Tel. 151 | Tenn. 152 | Tex. 153 | Tk. 154 | U.K. 155 | U.S. 156 | U.S.A. 157 | U.Conn. 158 | U-Conn. 159 | U.Mass. 160 | U-Mass. 161 | U.Md. 162 | U-Md. 163 | U.Penn. 164 | U-Penn. 165 | U.Va. 166 | U-Va. 167 | Va. 168 | Vt. 169 | W.Va. 170 | Wash. 171 | Wis. 172 | Wisc. 173 | Wyo. 174 | Yr. 175 | 176 | D-Ala. 177 | D-Ariz. 178 | D-Ark. 179 | D-Cal. 180 | D-Calif. 181 | D-Colo. 182 | D-Conn. 183 | D-Ct. 184 | D-D.C. 185 | D-Del. 186 | D-Fla. 187 | D-Ga. 188 | D-Ia. 189 | D-Ida. 190 | D-Ill. 191 | D-Ind. 192 | D-Kan. 193 | D-Ky. 194 | D-La. 195 | D-Mass. 196 | D-Md. 197 | D-Mich. 198 | D-Minn. 199 | D-Miss. 200 | D-Mo. 201 | D-Mont. 202 | D-N.C. 203 | D-N.D. 204 | D-N.H. 205 | D-N.J. 206 | D-N.M. 207 | D-N.Y. 208 | D-Neb. 209 | D-Nev. 210 | D-Okla. 211 | D-Ore. 212 | D-Oreg. 213 | D-Pa. 214 | D-Penn. 215 | D-R.I. 216 | D-S.C. 217 | D-S.D. 218 | D-Tenn. 219 | D-Tex. 220 | D-Va. 221 | D-Vt. 222 | D-W.Va. 223 | D-Wash. 224 | D-Wis. 225 | D-Wisc. 226 | D-Wyo. 227 | R-Ala. 228 | R-Ariz. 229 | R-Ark. 230 | R-Cal. 231 | R-Calif. 232 | R-Colo. 233 | R-Conn. 234 | R-Ct. 235 | R-D.C. 236 | R-Del. 237 | R-Fla. 238 | R-Ga. 239 | R-Ia. 240 | R-Ida. 241 | R-Ill. 242 | R-Ind. 243 | R-Kan. 244 | R-Ky. 245 | R-La. 246 | R-Mass. 247 | R-Md. 248 | R-Mich. 249 | R-Minn. 250 | R-Miss. 251 | R-Mo. 252 | R-Mont. 253 | R-N.C. 254 | R-N.D. 255 | R-N.H. 256 | R-N.J. 257 | R-N.M. 258 | R-N.Y. 259 | R-Neb. 260 | R-Nev. 261 | R-Okla. 262 | R-Ore. 263 | R-Oreg. 264 | R-Pa. 265 | R-Penn. 266 | R-R.I. 267 | R-S.C. 268 | R-S.D. 269 | R-Tenn. 270 | R-Tex. 271 | R-Va. 272 | R-Vt. 273 | R-W.Va. 274 | R-Wash. 275 | R-Wis. 276 | R-Wisc. 277 | R-Wyo. 278 | 279 | a.d. 280 | a.m. 281 | b.c. 282 | co. 283 | cu. 284 | dlrs. 285 | e.g. 286 | etc. 287 | ft. 288 | i.e. 289 | inc. 290 | km. 291 | lb. 292 | lbs. 293 | mfg. 294 | mg. 295 | mi. 296 | mm. 297 | p.m. 298 | r.p.m. 299 | sq. 300 | rev. 301 | v. 302 | vs. 303 | 304 | o.tel.o 305 | O.tel.o 306 | 307 | A. 308 | B. 309 | C. 310 | D. 311 | E. 312 | F. 313 | G. 314 | H. 315 | I. 316 | J. 317 | K. 318 | L. 319 | M. 320 | N. 321 | O. 322 | P. 323 | Q. 324 | R. 325 | S. 326 | T. 327 | U. 328 | V. 329 | W. 330 | X. 331 | Y. 332 | Z. 333 | 334 | D-W. 335 | D-N. 336 | D-S. 337 | R-W. 338 | R-N. 339 | R-S. 340 | 341 | Art. #number 342 | No. #number 343 | no. #number 344 | Nos. #number 345 | Nr. #number 346 | p. #number 347 | pp. #number 348 | -------------------------------------------------------------------------------- /rules/en/textnorm/rules/hyphenated: -------------------------------------------------------------------------------- 1 | U-Haul 2 | U-Hauls 3 | U-Hauls' 4 | U-Haul's 5 | U-Conn. 6 | U-Mass. 7 | U-Md. 8 | U-Penn. 9 | U-Va. 10 | U-Conn 11 | U-MASS 12 | U-Mass 13 | U-Md 14 | U-Penn 15 | U-Tapao 16 | U-Texas 17 | U-VA 18 | U-Va 19 | U-Conn.'s 20 | U-Mass.'s 21 | U-Md.'s 22 | U-Penn.'s 23 | U-Va.'s 24 | U-Conn's 25 | U-MASS's 26 | U-Mass's 27 | U-Md's 28 | U-Penn's 29 | U-Tapao's 30 | U-Texas's 31 | U-VA's 32 | U-Va's 33 | vis-a-vis 34 | vis-à-vis 35 | -------------------------------------------------------------------------------- /rules/en/textnorm/rules/tldlist: -------------------------------------------------------------------------------- 1 | # Top-level domain names from http://data.iana.org/TLD/tlds-alpha-by-domain.txt 2 | # and lowercased, with the internationalized TLDs like xn--45brj9c removed. 3 | # Version 2012070501, Last Updated Fri Jul 6 07:07:01 2012 UTC 4 | ac 5 | ad 6 | ae 7 | aero 8 | af 9 | ag 10 | ai 11 | al 12 | am 13 | an 14 | ao 15 | aq 16 | ar 17 | arpa 18 | as 19 | asia 20 | at 21 | au 22 | aw 23 | ax 24 | az 25 | ba 26 | bb 27 | bd 28 | be 29 | bf 30 | bg 31 | bh 32 | bi 33 | biz 34 | bj 35 | bm 36 | bn 37 | bo 38 | br 39 | bs 40 | bt 41 | bv 42 | bw 43 | by 44 | bz 45 | ca 46 | cat 47 | cc 48 | cd 49 | cf 50 | cg 51 | ch 52 | ci 53 | ck 54 | cl 55 | cm 56 | cn 57 | co 58 | com 59 | coop 60 | cr 61 | cu 62 | cv 63 | cw 64 | cx 65 | cy 66 | cz 67 | de 68 | dj 69 | dk 70 | dm 71 | do 72 | dz 73 | ec 74 | edu 75 | ee 76 | eg 77 | er 78 | es 79 | et 80 | eu 81 | fi 82 | fj 83 | fk 84 | fm 85 | fo 86 | fr 87 | ga 88 | gb 89 | gd 90 | ge 91 | gf 92 | gg 93 | gh 94 | gi 95 | gl 96 | gm 97 | gn 98 | gov 99 | gp 100 | gq 101 | gr 102 | gs 103 | gt 104 | gu 105 | gw 106 | gy 107 | hk 108 | hm 109 | hn 110 | hr 111 | ht 112 | hu 113 | id 114 | ie 115 | il 116 | im 117 | in 118 | info 119 | int 120 | io 121 | iq 122 | ir 123 | is 124 | it 125 | je 126 | jm 127 | jo 128 | jobs 129 | jp 130 | ke 131 | kg 132 | kh 133 | ki 134 | km 135 | kn 136 | kp 137 | kr 138 | kw 139 | ky 140 | kz 141 | la 142 | lb 143 | lc 144 | li 145 | lk 146 | lr 147 | ls 148 | lt 149 | lu 150 | lv 151 | ly 152 | ma 153 | mc 154 | md 155 | me 156 | mg 157 | mh 158 | mil 159 | mk 160 | ml 161 | mm 162 | mn 163 | mo 164 | mobi 165 | mp 166 | mq 167 | mr 168 | ms 169 | mt 170 | mu 171 | museum 172 | mv 173 | mw 174 | mx 175 | my 176 | mz 177 | na 178 | name 179 | nc 180 | ne 181 | net 182 | nf 183 | ng 184 | ni 185 | nl 186 | no 187 | np 188 | nr 189 | nu 190 | nz 191 | om 192 | org 193 | pa 194 | pe 195 | pf 196 | pg 197 | ph 198 | pk 199 | pl 200 | pm 201 | pn 202 | pr 203 | pro 204 | ps 205 | pt 206 | pw 207 | py 208 | qa 209 | re 210 | ro 211 | rs 212 | ru 213 | rw 214 | sa 215 | sb 216 | sc 217 | sd 218 | se 219 | sg 220 | sh 221 | si 222 | sj 223 | sk 224 | sl 225 | sm 226 | sn 227 | so 228 | sr 229 | st 230 | su 231 | sv 232 | sx 233 | sy 234 | sz 235 | tc 236 | td 237 | tel 238 | tf 239 | tg 240 | th 241 | tj 242 | tk 243 | tl 244 | tm 245 | tn 246 | to 247 | tp 248 | tr 249 | travel 250 | tt 251 | tv 252 | tw 253 | tz 254 | ua 255 | ug 256 | uk 257 | us 258 | uy 259 | uz 260 | va 261 | vc 262 | ve 263 | vg 264 | vi 265 | vn 266 | vu 267 | wf 268 | ws 269 | xxx 270 | ye 271 | yt 272 | za 273 | zm 274 | zw 275 | -------------------------------------------------------------------------------- /rules/en/textnorm/scripts/final_cleanup.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | # Copyright 2012 Arnab Ghoshal 4 | # Modified by Fergus McInnes (FRM), 2013 5 | 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 14 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 15 | # MERCHANTABLITY OR NON-INFRINGEMENT. 16 | # See the Apache 2 License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | 20 | use strict; 21 | use Unicode::Normalize; 22 | use open ':encoding(utf8)'; 23 | # use feature 'unicode_strings'; 24 | 25 | # Make sure we are reading and writing in UTF-8. 26 | binmode(STDIN, ":encoding(utf8)"); 27 | binmode(STDOUT, ":encoding(utf8)"); 28 | binmode(STDERR, ":encoding(utf8)"); 29 | 30 | my $help_message="USAGE: final_cleanup.pl < in > out\n"; 31 | 32 | while () { 33 | chomp; 34 | $_ = NFD($_); # UTF8 decompose 35 | 36 | s/^/ /; 37 | s/$/ /; 38 | 39 | # Heuristics by FRM to distinguish article "A" from initial "A.": 40 | s/ A\&/ A. and /g; 41 | s/ A (ROD|Rod|BAT) / A. $1 /g; 42 | s/ A A / A. A. /g; 43 | s/ A A / A. A. /g; 44 | s/A\. A\. A /A. A. A. /g; 45 | s/ ([B-Z]) A / $1 A. /g; 46 | s/ ([B-Z]) A / $1 A. /g; 47 | s/ A ([A-Z][A-Z])/ a $1/g; # prevent conversion to "A." in capitalised text 48 | # Exceptions and default for title case (correct for "Dial A Book" etc, but 49 | # not for names like "David A Cardona"): 50 | s/ (Single|Double|Triple|Type|Class|Serie|Series|Group|Model|Avenue|The|An) A / $1 A. /g; 51 | s/ ([A-Z][a-z\']+) A ([A-Z][a-z])/ $1 a $2/g; 52 | # Default for "A" in lower-case context: 53 | s/([a-z\&]) A /$1 A. /g; 54 | # After a comma, "A Title" keeps "A", but "A P Herbert" or "A to Z" gets "A.": 55 | s/, A ([^A-Z])/, A. $1/g; 56 | s/, A ([A-Z] )/, A. $1/g; 57 | # After any other punctuation, retain "A" with no dot 58 | 59 | # Convert email and Twitter notation: 60 | s/\@/ at /g; 61 | #s/ \#(\p{L}+) )/ hashtag $1 /g; # judged not worth doing for ASR LM 62 | # "#" on its own is sometimes "number", but not always 63 | 64 | # Remove punctuation: 65 | s/( [\"\'\-\.\?\!\,\:\;]+)+ / /g; 66 | s/([^\.])\&/$1 and /g; # this leaves "A.T.&T." etc unchanged 67 | s/[\|\$\#\%\*\+\,\^\:\;\?\~\\\/\!]/ /g; # will handle a**holes incorrectly 68 | s/\.{2,}/ /g; 69 | 70 | s/\p{M}//g; # Remove diacritics 71 | s/[\x{007F}-\x{00BF}]/ /g; # Remove some other unicode junk 72 | s/[\x{2190}-\x{21FF}]/ /g; # Remove arrows 73 | 74 | $_ = lc($_); 75 | 76 | s/(^| )(b|c|d|e|f|g|h|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)( |$)/ $2. /g; 77 | s/(^| )(b|c|d|e|f|g|h|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)( |$)/ $2. /g; 78 | 79 | s/^\s*//; s/\s*$//; s/\s+/ /g; # Normalize spaces 80 | next if /^$/; # Skip empty lines 81 | print NFC($_), "\n"; # UTF8 recompose & reorder canonically 82 | } 83 | -------------------------------------------------------------------------------- /rules/en/textnorm/scripts/remove_dups.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | use strict; 4 | # use open ':encoding(utf8)'; 5 | # use feature 'unicode_strings'; 6 | 7 | # Make sure we are reading and writing in UTF-8. 8 | binmode(STDIN, ":encoding(utf8)"); 9 | binmode(STDOUT, ":encoding(utf8)"); 10 | binmode(STDERR, ":encoding(utf8)"); 11 | 12 | my %seen_lines = (); 13 | 14 | while () { 15 | if (!defined($seen_lines{$_})) { 16 | $seen_lines{$_} = 1; 17 | print; 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /scripts/acoustic_model_training/steps/build_MDL_trees.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | 6 | #---------------------------------------------------------------------- 7 | 8 | INDIR=$1 9 | OUTDIR=$2 10 | MDLWEIGHT=$3 11 | QUESTIONS=$4 12 | BIN=$5 13 | 14 | [ $# -ne 5 ] && echo "build_MDL_trees.sh: Wrong number of arguments supplied" && exit 1 ; 15 | 16 | UTIL="$( cd "$( dirname "$0" )/../util" && pwd )" ## location of util script 17 | 18 | if [ -z $VOICE_BUILD_CONFIG ] ; then 19 | echo 'Environment variable VOICE_BUILD_CONFIG has not been set!' ; exit 1 20 | fi 21 | source $VOICE_BUILD_CONFIG 22 | 23 | #---------------------------------------------------------------------- 24 | 25 | source $UTIL/setup_directory.sh $INDIR $OUTDIR 26 | 27 | 28 | 29 | 30 | 31 | 32 | endstate=$[$NSTATE + 1] 33 | 34 | 35 | 36 | 37 | ## Make edit file for building ALL cmp trees: 38 | 39 | echo "RO 0 $INDIR/stat.cmp" > $OUTDIR/cluster_cmp.hed 40 | echo "TR 1" >> $OUTDIR/cluster_cmp.hed 41 | cat $QUESTIONS >> $OUTDIR/cluster_cmp.hed 42 | echo "TR 1" >> $OUTDIR/cluster_cmp.hed 43 | for STREAM in $STREAMS ; do 44 | for STATE in `seq 2 $endstate` ; do 45 | NAME="stream-${STREAM}-state-${STATE}" 46 | echo "TB 0 ${NAME}_ {*.state[${STATE}].stream[${STREAM}]}" >> $OUTDIR/cluster_cmp.hed 47 | done 48 | #echo "ST $OUTDIR/tree_cmp_str_${STREAM}.txt" >> $OUTDIR/cluster_cmp.hed 49 | done 50 | echo "TR 1" >> $OUTDIR/cluster_cmp.hed 51 | echo "ST $OUTDIR/tree_cmp.txt" >> $OUTDIR/cluster_cmp.hed 52 | 53 | 54 | $BIN/HHEd -A $BINMOD -C $OUTDIR/config/general.conf -D -T 1 \ 55 | -i -m -a $MDLWEIGHT -H $INDIR/cmp.mmf \ 56 | -p -r 1 -s -w $OUTDIR/cmp.mmf $OUTDIR/cluster_cmp.hed $OUTDIR/data/modellist.full 57 | 58 | 59 | 60 | 61 | ## Duration: 62 | 63 | echo "RO 0 $INDIR/stat.dur" > $OUTDIR/cluster_dur.hed 64 | echo "TR 1" >> $OUTDIR/cluster_dur.hed 65 | cat $QUESTIONS >> $OUTDIR/cluster_dur.hed 66 | echo "TR 1" >> $OUTDIR/cluster_dur.hed 67 | echo "TB 0 duration_ {*.state[2].stream[1-${NSTATE}]}" >> $OUTDIR/cluster_dur.hed 68 | echo "TR 1" >> $OUTDIR/cluster_dur.hed 69 | echo "ST $OUTDIR/tree_dur.txt" >> $OUTDIR/cluster_dur.hed 70 | 71 | 72 | $BIN/HHEd -A $BINMOD -C $OUTDIR/config/general.conf -D -T 1 \ 73 | -i -m -a $MDLWEIGHT -H $INDIR/dur.mmf \ 74 | -p -r 1 -s -w $OUTDIR/dur.mmf $OUTDIR/cluster_dur.hed $OUTDIR/data/modellist.full 75 | 76 | 77 | 78 | 79 | 80 | 81 | ## ------------------------ check success ---------------------------- 82 | if [ -z `grep -l QS $OUTDIR/tree_cmp.txt` ] ; then 83 | echo "Building emission trees failed: no QS lines in $OUTDIR/tree_cmp.txt" 84 | exit 1 85 | fi 86 | if [ -z `grep -l QS $OUTDIR/tree_dur.txt` ] ; then 87 | echo "Building duration trees failed: no QS lines in $OUTDIR/tree_dur.txt" 88 | exit 1 89 | fi 90 | if [ ! -e $OUTDIR/cmp.mmf ] ; then 91 | echo "Building duration trees failed: no $OUTDIR/cmp.mmf" 92 | exit 1 93 | fi 94 | if [ ! -e $OUTDIR/dur.mmf ] ; then 95 | echo "Building duration trees failed: no $OUTDIR/dur.mmf" 96 | exit 1 97 | fi 98 | ## ------------------------------------------------------------------- 99 | -------------------------------------------------------------------------------- /scripts/acoustic_model_training/steps/clone_monophone_to_fullcontext.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | 6 | 7 | #---------------------------------------------------------------------- 8 | 9 | INDIR=$1 10 | OUTDIR=$2 11 | BIN=$3 12 | 13 | [ $# -ne 3 ] && echo "Wrong number of arguments supplied" && exit 1 ; 14 | 15 | UTIL="$( cd "$( dirname "$0" )/../util" && pwd )" ## location of util script 16 | 17 | if [ -z $VOICE_BUILD_CONFIG ] ; then 18 | echo 'Environment variable VOICE_BUILD_CONFIG has not been set!' ; exit 1 19 | fi 20 | source $VOICE_BUILD_CONFIG 21 | 22 | #---------------------------------------------------------------------- 23 | 24 | source $UTIL/setup_directory.sh $INDIR $OUTDIR 25 | 26 | 27 | 28 | 29 | ## hardcoded: TODO 30 | NSTATE=5 31 | BINMOD="" 32 | 33 | 34 | 35 | endstate=$[$NSTATE + 1] 36 | 37 | 38 | ## cmp 39 | echo "TI \"SWeight\" { *.state[2-${endstate}].weights }" > $OUTDIR/clone_cmp.hed 40 | echo "MM \"trP\" { *.transP }" >> $OUTDIR/clone_cmp.hed 41 | echo "CL \"$OUTDIR/data/modellist.full\"" >> $OUTDIR/clone_cmp.hed 42 | 43 | $BIN/HHEd -A $BINMOD -C $OUTDIR/config/general.conf -D -V -T 1 -H $INDIR/cmp.mmf -s -p -i -w $OUTDIR/cmp.mmf $OUTDIR/clone_cmp.hed $INDIR/data/modellist.mono 44 | 45 | 46 | ## dur 47 | echo "MM \"trP\" { *.transP }" > $OUTDIR/clone_dur.hed 48 | echo "CL \"$OUTDIR/data/modellist.full\"" >> $OUTDIR/clone_dur.hed 49 | 50 | $BIN/HHEd -A $BINMOD -C $OUTDIR/config/general.conf -D -V -T 1 -H $INDIR/dur.mmf -s -p -i -w $OUTDIR/dur.mmf $OUTDIR/clone_dur.hed $INDIR/data/modellist.mono -------------------------------------------------------------------------------- /scripts/acoustic_model_training/steps/increase_mixture_components.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | 6 | 7 | #---------------------------------------------------------------------- 8 | 9 | INDIR=$1 10 | OUTDIR=$2 11 | NCOMPONENTS=$3 12 | BIN=$4 13 | 14 | 15 | [ $# -ne 4 ] && echo "Wrong number of arguments supplied" && exit 1 ; 16 | 17 | UTIL="$( cd "$( dirname "$0" )/../util" && pwd )" ## location of util script 18 | 19 | if [ -z $VOICE_BUILD_CONFIG ] ; then 20 | echo 'Environment variable VOICE_BUILD_CONFIG has not been set!' ; exit 1 21 | fi 22 | source $VOICE_BUILD_CONFIG 23 | 24 | #---------------------------------------------------------------------- 25 | 26 | ## no label prune!!!! 27 | 28 | source $UTIL/setup_directory.sh $INDIR $OUTDIR 29 | 30 | LAST_STATE=$[${NSTATE}+2] 31 | 32 | echo "MU $NCOMPONENTS {*.state[2-${LAST_STATE}].stream[${MIXUP_STREAMS}].mix}" > $OUTDIR/mixup.hed 33 | 34 | $BIN/HHEd -A $BINMOD -C $OUTDIR/config/general.conf -D -T 1 -H $INDIR/cmp.mmf -M $OUTDIR/ $OUTDIR/mixup.hed $OUTDIR/data/modellist.mono 35 | 36 | ## ------------------------ check success ---------------------------- 37 | if [ ! -e $OUTDIR/cmp.mmf ] ; then 38 | echo "Reestimation failed: cmp.mmf not made" 39 | exit 1 40 | fi 41 | ## ------------------------------------------------------------------- 42 | 43 | 44 | -------------------------------------------------------------------------------- /scripts/acoustic_model_training/steps/initial_alignment.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | 6 | 7 | #---------------------------------------------------------------------- 8 | 9 | INDIR=$1 10 | OUTDIR=$2 11 | BIN=$3 12 | 13 | [ $# -ne 3 ] && echo "Wrong number of arguments supplied" && exit 1 ; 14 | 15 | UTIL="$( cd "$( dirname "$0" )/../util" && pwd )" ## location of util script 16 | 17 | #---------------------------------------------------------------------- 18 | 19 | 20 | source $UTIL/setup_directory.sh $INDIR $OUTDIR 21 | 22 | 23 | $UTIL/make_proto_hsmm.pl $OUTDIR/proto.txt 24 | 25 | 26 | 27 | ## -------- floor variance ---------: 28 | ## cmp: 29 | $BIN/HCompV -A -C $OUTDIR/config/general.conf -D -V -S $OUTDIR/data/uttlist.cmp -T 1 -M $OUTDIR $OUTDIR/proto.txt 30 | if [ $? -gt 0 ] ; then echo "Floor variance failed" ;exit 1 ; fi 31 | head -n 1 $OUTDIR/proto.txt | cat - $OUTDIR/vFloors > $OUTDIR/floor_cmp.mmf 32 | 33 | 34 | 35 | 36 | ## dur -- floor variance to 1.0: 37 | rm $OUTDIR/floor_dur.mmf 38 | for i in 1 2 3 4 5 ; do 39 | echo "~v varFloor${i}" >> $OUTDIR/floor_dur.mmf 40 | echo " 1" >> $OUTDIR/floor_dur.mmf 41 | echo "1.0" >> $OUTDIR/floor_dur.mmf 42 | done 43 | 44 | 45 | 46 | ## ------- segmental K-means & EM-based estimation of monophones: ------ 47 | mkdir $OUTDIR/hinit 48 | mkdir $OUTDIR/hrest_cmp 49 | mkdir $OUTDIR/hrest_dur 50 | 51 | for phone in `cat $OUTDIR/data/modellist.mono`; do 52 | echo $phone 53 | $BIN/HInit -A -C $OUTDIR/config/general.conf -D -V -H $OUTDIR/floor_cmp.mmf \ 54 | -I $OUTDIR/data/mlf.mono -M $OUTDIR/hinit -o $phone -S $OUTDIR/data/uttlist.cmp \ 55 | -T 1 -l $phone -m 1 -u tmvw -w 3 $OUTDIR/proto.txt 56 | $BIN/HRest -A -C $OUTDIR/config/general.conf -D -V -H $OUTDIR/floor_cmp.mmf \ 57 | -I $OUTDIR/data/mlf.mono -M $OUTDIR/hrest_cmp -o $phone -S $OUTDIR/data/uttlist.cmp \ 58 | -T 1 -l $phone -g $OUTDIR/hrest_dur/$phone -m 1 -u tmvw -w 3 $OUTDIR/hinit/$phone 59 | done 60 | 61 | 62 | ## ------ join individual monophone files together -------- 63 | mkdir $OUTDIR/joined_0 64 | 65 | ## cmp: 66 | echo "FV $OUTDIR/floor_cmp.mmf" > $OUTDIR/join_cmp.hed ## make hed file 67 | 68 | arg="" 69 | for phone in `cat $OUTDIR/data/modellist.mono`; do 70 | arg="$arg -H $OUTDIR/hrest_cmp/$phone" 71 | done 72 | 73 | $BIN/HHEd -A -B -C $OUTDIR/config/general.conf -D -V -T 1 $arg -s -p -i -w $OUTDIR/joined_0/cmp.mmf $OUTDIR/join_cmp.hed $OUTDIR/data/modellist.mono 74 | 75 | 76 | ## dur: 77 | echo "FV $OUTDIR/floor_dur.mmf" > $OUTDIR/join_dur.hed ## make hed file 78 | 79 | arg="" 80 | for phone in `cat $OUTDIR/data/modellist.mono`; do 81 | arg="$arg -H $OUTDIR/hrest_dur/$phone" 82 | done 83 | 84 | $BIN/HHEd -A -B -C $OUTDIR/config/general.conf -D -V -T 1 $arg -s -p -i -w $OUTDIR/joined_0/dur.mmf $OUTDIR/join_dur.hed $OUTDIR/data/modellist.mono 85 | 86 | 87 | 88 | NREEST=5 89 | ## ------ embedded reestimation -------- 90 | for new in `seq ${NREEST}` ; do 91 | old=$[$new - 1] 92 | mkdir $OUTDIR/joined_${new} 93 | $BIN/HERest -A -B -C $OUTDIR/config/general.conf -D -V -H $OUTDIR/joined_${old}/cmp.mmf \ 94 | -N $OUTDIR/joined_${old}/dur.mmf -e 2 -I $OUTDIR/data/mlf.mono -M $OUTDIR/joined_${new} \ 95 | -R $OUTDIR/joined_${new} -S $OUTDIR/data/uttlist.cmp -T 1 -m 1 -t 5000 5000 10000 \ 96 | -u mvwtdmv -w 3 $OUTDIR/data/modellist.mono $OUTDIR/data/modellist.mono 97 | done 98 | cp $OUTDIR/joined_${NREEST}/cmp.mmf $OUTDIR/cmp.mmf 99 | cp $OUTDIR/joined_${NREEST}/dur.mmf $OUTDIR/dur.mmf 100 | 101 | 102 | -------------------------------------------------------------------------------- /scripts/acoustic_model_training/steps/make_alignment_lexicon.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | 6 | 7 | #---------------------------------------------------------------------- 8 | 9 | INDIR=$1 10 | BIN=$2 11 | 12 | [ $# -ne 2 ] && echo "Wrong number of arguments supplied" && exit 1 ; 13 | 14 | UTIL="$( cd "$( dirname "$0" )/../util" && pwd )" ## location of util script 15 | 16 | if [ -z $VOICE_BUILD_CONFIG ] ; then 17 | echo 'Environment variable VOICE_BUILD_CONFIG has not been set!' ; exit 1 18 | fi 19 | source $VOICE_BUILD_CONFIG 20 | 21 | #---------------------------------------------------------------------- 22 | 23 | if [ ! -e $INDIR/data/modellist.mono ] ; then 24 | echo "$INDIR/data/modellist.mono doesn't exist" ; exit 1 ; 25 | fi 26 | 27 | echo "" > $INDIR/data/lexicon.txt ## clear existing lexicon data 28 | echo "" > $INDIR/data/lexicon.tmp 29 | 30 | if [ ! -z $EXTRA_SUBSTITUTIONS ] ; then 31 | cat $EXTRA_SUBSTITUTIONS > $INDIR/data/lexicon.txt 32 | fi 33 | 34 | 35 | for MODELNAME in `cat $INDIR/data/modellist.mono | sort ` ; do 36 | case $MODELNAME in 37 | _SPACE_ ) 38 | ## Note the order of skip and sil -- the first provides initial 39 | ## expansion; after models are initialised like this, both options 40 | ## are allowed. 41 | echo '_SPACE_ skip' >> $INDIR/data/lexicon.tmp 42 | echo '_SPACE_ sil' >> $INDIR/data/lexicon.tmp 43 | ;; 44 | _PUNC_ ) 45 | echo '_PUNC_ sil' >> $INDIR/data/lexicon.tmp 46 | echo '_PUNC_ skip' >> $INDIR/data/lexicon.tmp 47 | ;; 48 | * ) 49 | echo "$MODELNAME $MODELNAME" >> $INDIR/data/lexicon.tmp ;; 50 | esac 51 | done 52 | 53 | for ENTRY in `awk '{print $1}' $INDIR/data/lexicon.txt` ; do 54 | grep -v "^$ENTRY " $INDIR/data/lexicon.tmp > $INDIR/data/lexicon.tmp2 55 | mv $INDIR/data/lexicon.tmp2 $INDIR/data/lexicon.tmp 56 | done 57 | 58 | cat $INDIR/data/lexicon.tmp >> $INDIR/data/lexicon.txt 59 | rm $INDIR/data/lexicon.tmp 60 | 61 | 62 | #--------- initial expansion of labels using this lexicon ------------- 63 | cp $INDIR/data/mlf.full $INDIR/data/mlf.words 64 | 65 | echo "EX" > $INDIR/expand_labels.hed 66 | 67 | $BIN/HLEd -I $INDIR/data/mlf.words -i $INDIR/data/mlf.mono -l '*' -d $INDIR/data/lexicon.txt $INDIR/expand_labels.hed $INDIR/data/mlf.words 68 | 69 | #--------- re-make monophone list (to add e.g. skip) ------------------ 70 | awk '{print $2}' $INDIR/data/lexicon.txt | sort -u > $INDIR/data/modellist.mono 71 | -------------------------------------------------------------------------------- /scripts/acoustic_model_training/steps/make_alignment_monophone.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | 6 | 7 | #---------------------------------------------------------------------- 8 | 9 | INDIR=$1 10 | OUTDIR=$2 11 | BIN=$3 12 | 13 | 14 | [ $# -ne 3 ] && echo "Wrong number of arguments supplied" && exit 1 ; 15 | 16 | UTIL="$( cd "$( dirname "$0" )/../util" && pwd )" ## location of util script 17 | 18 | if [ -z $VOICE_BUILD_CONFIG ] ; then 19 | echo 'Environment variable VOICE_BUILD_CONFIG has not been set!' ; exit 1 20 | fi 21 | source $VOICE_BUILD_CONFIG 22 | 23 | #---------------------------------------------------------------------- 24 | 25 | 26 | source $UTIL/setup_directory.sh $INDIR $OUTDIR 27 | 28 | 29 | 30 | $UTIL/make_proto_hsmm.py $OUTDIR/proto.txt $VOICE_BUILD_CONFIG 31 | $UTIL/make_proto_skip_hsmm.py $OUTDIR/proto_skip.txt $VOICE_BUILD_CONFIG 32 | 33 | 34 | 35 | 36 | echo "Floor variance..." 37 | $BIN/HCompV -A -C $OUTDIR/config/general.conf -D -V -f 0.01 -m -S $OUTDIR/data/uttlist.cmp -T 1 -M $OUTDIR $OUTDIR/proto.txt 38 | if [ $? -gt 0 ] ; then echo "Floor variance failed" ; exit 1 ; fi 39 | 40 | 41 | 42 | echo "Generate models..." 43 | mkdir -p $OUTDIR/hcompv/ 44 | for m in `cat $OUTDIR/data/modellist.mono` ; do 45 | echo "phone $m" 46 | if [ "$m" != "skip" ] ; then 47 | grep -v "~h" $OUTDIR/proto > $OUTDIR/hcompv/$m 48 | else 49 | cp $OUTDIR/proto_skip.txt $OUTDIR/hcompv/$m # null topol for skip 50 | fi 51 | done 52 | echo "models made OK" 53 | 54 | echo "Combine models into single file..." 55 | echo " " > $OUTDIR/null.hed 56 | $BIN/HHEd -d $OUTDIR/hcompv/ -w $OUTDIR/cmp.mmf $OUTDIR/null.hed $OUTDIR/data/modellist.mono 57 | if [ $? -gt 0 ] ; then echo "Model combination failed" ; exit 1 ; fi 58 | 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /scripts/acoustic_model_training/steps/make_engine_model.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | 6 | 7 | #---------------------------------------------------------------------- 8 | 9 | INDIR=$1 10 | OUTDIR=$2 11 | BIN=$3 12 | 13 | [ $# -ne 3 ] && echo "Wrong number of arguments supplied" && exit 1 ; 14 | 15 | UTIL="$( cd "$( dirname "$0" )/../util" && pwd )" ## location of util script 16 | 17 | if [ -z $VOICE_BUILD_CONFIG ] ; then 18 | echo 'Environment variable VOICE_BUILD_CONFIG has not been set!' ; exit 1 19 | fi 20 | source $VOICE_BUILD_CONFIG 21 | 22 | #---------------------------------------------------------------------- 23 | 24 | source $UTIL/setup_directory.sh $INDIR $OUTDIR 25 | 26 | 27 | mkdir -p $OUTDIR/engine/ 28 | 29 | 30 | 31 | 32 | cp $INDIR/tree* $OUTDIR/ 33 | python $UTIL/separate_trees.py -treefile $OUTDIR/tree_cmp.txt 34 | 35 | #$UTIL/make_config.sh $OUTDIR/config/ 36 | 37 | for STREAM in $SHORT_STREAM_NAMES ; do 38 | 39 | echo "LT $OUTDIR/tree_cmp.txt_${STREAM}" > $OUTDIR/engine.hed 40 | echo "CT $OUTDIR/engine/" >> $OUTDIR/engine.hed 41 | echo "CM $OUTDIR/engine/" >> $OUTDIR/engine.hed 42 | 43 | $BIN/HHEd -A -B -C $OUTDIR/config/engine_convert.conf -D -V -T 1 -H $INDIR/cmp.mmf \ 44 | -s -p -i $OUTDIR/engine.hed $OUTDIR/data/modellist.full 45 | 46 | if [ $? -gt 0 ] ; then echo "Convert to engine format failed for stream $STREAM" ; exit 1 ; fi 47 | 48 | done 49 | 50 | 51 | ## Rename final models -- this is hard-coded for SPTK / STRAIGHT stream-names, 52 | ## need to generalise for GlottHMM etc.:-- 53 | i=1 54 | for TYPE in $STREAM_NAMES; do 55 | mv $OUTDIR/engine/pdf.${i} $OUTDIR/engine/${TYPE}.pdf 56 | mv $OUTDIR/engine/trees.${i} $OUTDIR/engine/tree-${TYPE}.inf 57 | i=$((i+1)) 58 | done 59 | 60 | ## hardcoded for WORLD 61 | mv $OUTDIR/engine/trees.5 $OUTDIR/engine/tree-bap.inf 62 | mv $OUTDIR/engine/pdf.5 $OUTDIR/engine/bap.pdf 63 | 64 | 65 | 66 | ## dur 67 | echo "LT $OUTDIR/tree_dur.txt" > $OUTDIR/engine.hed 68 | echo "CT $OUTDIR/engine/" >> $OUTDIR/engine.hed 69 | echo "CM $OUTDIR/engine/" >> $OUTDIR/engine.hed 70 | 71 | 72 | $BIN/HHEd -A -B -C $OUTDIR/config/engine_convert.conf -D -V -T 1 -H $INDIR/dur.mmf \ 73 | -s -p -i $OUTDIR/engine.hed $OUTDIR/data/modellist.full 74 | 75 | if [ $? -gt 0 ] ; then echo "Convert to engine format failed for duration " ; exit 1 ; fi 76 | 77 | 78 | mv $OUTDIR/engine/pdf.1 $OUTDIR/engine/duration.pdf ## $OUTDIR/engine/duration-2.3.pdf 79 | mv $OUTDIR/engine/trees.1 $OUTDIR/engine/tree-duration.inf 80 | 81 | 82 | 83 | ## ------------------------ check success ---------------------------- 84 | #for fname in tree-duration.inf duration.pdf tree-mcep.inf mcep.pdf \ 85 | # logF0.pdf tree-logF0.inf ; do 86 | # if [ ! -e $OUTDIR/engine/$fname ] ; then 87 | # echo "Making engine files failed: no $OUTDIR/engine/$fname" 88 | # exit 1 89 | # fi 90 | #done 91 | 92 | ## ------------------------------------------------------------------- 93 | 94 | -------------------------------------------------------------------------------- /scripts/acoustic_model_training/steps/make_engine_model.sh.OLD: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | 6 | 7 | #---------------------------------------------------------------------- 8 | 9 | INDIR=$1 10 | OUTDIR=$2 11 | BIN=$3 12 | 13 | [ $# -ne 3 ] && echo "Wrong number of arguments supplied" && exit 1 ; 14 | 15 | UTIL="$( cd "$( dirname "$0" )/../util" && pwd )" ## location of util script 16 | 17 | if [ -z $VOICE_BUILD_CONFIG ] ; then 18 | echo 'Environment variable VOICE_BUILD_CONFIG has not been set!' ; exit 1 19 | fi 20 | source $VOICE_BUILD_CONFIG 21 | 22 | #---------------------------------------------------------------------- 23 | 24 | source $UTIL/setup_directory.sh $INDIR $OUTDIR 25 | 26 | 27 | mkdir -p $OUTDIR/engine/ 28 | 29 | 30 | 31 | 32 | cp $INDIR/tree* $OUTDIR/ 33 | python $UTIL/separate_trees.py -treefile $OUTDIR/tree_cmp.txt 34 | 35 | #$UTIL/make_config.sh $OUTDIR/config/ 36 | 37 | for STREAM in $SHORT_STREAM_NAMES ; do 38 | 39 | echo "LT $OUTDIR/tree_cmp.txt_${STREAM}" > $OUTDIR/engine.hed 40 | echo "CT $OUTDIR/engine/" >> $OUTDIR/engine.hed 41 | echo "CM $OUTDIR/engine/" >> $OUTDIR/engine.hed 42 | 43 | $BIN/HHEd -A -B -C $OUTDIR/config/engine_convert.conf -D -V -T 1 -H $INDIR/cmp.mmf \ 44 | -s -p -i $OUTDIR/engine.hed $OUTDIR/data/modellist.full 45 | 46 | if [ $? -gt 0 ] ; then echo "Convert to engine format failed for stream $STREAM" ; exit 1 ; fi 47 | 48 | done 49 | 50 | 51 | ## Rename final models -- this is hard-coded for SPTK / STRAIGHT stream-names, 52 | ## need to generalise for GlottHMM etc.:-- 53 | 54 | mv $OUTDIR/engine/pdf.1 $OUTDIR/engine/mcep.pdf 55 | mv $OUTDIR/engine/pdf.2 $OUTDIR/engine/logF0.pdf 56 | ## For STRAIGHT case: 57 | if [ -e $OUTDIR/engine/pdf.5 ] ; then 58 | mv $OUTDIR/engine/pdf.5 $OUTDIR/engine/bndap.pdf 59 | fi 60 | 61 | 62 | mv $OUTDIR/engine/trees.1 $OUTDIR/engine/tree-mcep.inf 63 | mv $OUTDIR/engine/trees.2 $OUTDIR/engine/tree-logF0.inf 64 | ## For STRAIGHT case: 65 | if [ -e $OUTDIR/engine/trees.5 ] ; then 66 | mv $OUTDIR/engine/trees.5 $OUTDIR/engine/tree-bndap.inf 67 | fi 68 | 69 | 70 | 71 | ## dur 72 | echo "LT $OUTDIR/tree_dur.txt" > $OUTDIR/engine.hed 73 | echo "CT $OUTDIR/engine/" >> $OUTDIR/engine.hed 74 | echo "CM $OUTDIR/engine/" >> $OUTDIR/engine.hed 75 | 76 | $BIN/HHEd -A -B -C $OUTDIR/config/engine_convert.conf -D -V -T 1 -H $INDIR/dur.mmf \ 77 | -s -p -i $OUTDIR/engine.hed $OUTDIR/data/modellist.full 78 | 79 | if [ $? -gt 0 ] ; then echo "Convert to engine format failed for duration " ; exit 1 ; fi 80 | 81 | 82 | mv $OUTDIR/engine/pdf.1 $OUTDIR/engine/duration.pdf ## $OUTDIR/engine/duration-2.3.pdf 83 | mv $OUTDIR/engine/trees.1 $OUTDIR/engine/tree-duration.inf 84 | 85 | 86 | 87 | ## ------------------------ check success ---------------------------- 88 | for fname in tree-duration.inf duration.pdf tree-mcep.inf mcep.pdf \ 89 | logF0.pdf tree-logF0.inf ; do 90 | if [ ! -e $OUTDIR/engine/$fname ] ; then 91 | echo "Making engine files failed: no $OUTDIR/engine/$fname" 92 | exit 1 93 | fi 94 | done 95 | ## ------------------------------------------------------------------- 96 | 97 | -------------------------------------------------------------------------------- /scripts/acoustic_model_training/steps/make_monophone.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | 6 | 7 | 8 | #---------------------------------------------------------------------- 9 | 10 | INDIR=$1 11 | OUTDIR=$2 12 | BIN=$3 13 | 14 | [ $# -ne 3 ] && echo "Wrong number of arguments supplied" && exit 1 ; 15 | 16 | UTIL="$( cd "$( dirname "$0" )/../util" && pwd )" ## location of util script 17 | 18 | if [ -z $VOICE_BUILD_CONFIG ] ; then 19 | echo 'Environment variable VOICE_BUILD_CONFIG has not been set!' ; exit 1 20 | fi 21 | source $VOICE_BUILD_CONFIG 22 | 23 | #---------------------------------------------------------------------- 24 | 25 | set -e 26 | 27 | source $UTIL/setup_directory.sh $INDIR $OUTDIR 28 | 29 | 30 | 31 | 32 | $UTIL/make_proto_hsmm.py $OUTDIR/proto.txt $VOICE_BUILD_CONFIG 33 | 34 | 35 | 36 | ## -------- floor variance ---------: 37 | ## cmp: 38 | $BIN/HCompV -A -C $OUTDIR/config/general.conf -D -V -S $OUTDIR/data/uttlist.cmp -T 1 -M $OUTDIR $OUTDIR/proto.txt 39 | if [ $? -gt 0 ] ; then echo "Floor variance failed" ;exit 1 ; fi 40 | head -n 1 $OUTDIR/proto.txt | cat - $OUTDIR/vFloors > $OUTDIR/floor_cmp.mmf 41 | 42 | 43 | 44 | 45 | ## dur -- floor variance to 1.0: 46 | rm -f $OUTDIR/floor_dur.mmf 47 | for i in `seq $NSTATE` ; do 48 | echo "~v varFloor${i}" >> $OUTDIR/floor_dur.mmf 49 | echo " 1" >> $OUTDIR/floor_dur.mmf 50 | echo "1.0" >> $OUTDIR/floor_dur.mmf 51 | done 52 | 53 | 54 | ## ------- segmental K-means & EM-based estimation of monophones: ------ 55 | mkdir $OUTDIR/hinit 56 | mkdir $OUTDIR/hrest_cmp 57 | mkdir $OUTDIR/hrest_dur 58 | i=1 59 | 60 | for phone in `cat $OUTDIR/data/modellist.mono`; do 61 | echo $phone 62 | $BIN/HInit -A -C $OUTDIR/config/general.conf -D -V -H $OUTDIR/floor_cmp.mmf \ 63 | -I $OUTDIR/data/mlf.mono -M $OUTDIR/hinit -o $phone -S $OUTDIR/data/uttlist.cmp \ 64 | -T 1 -l $phone -m 1 -u tmvw -w 3 $OUTDIR/proto.txt 65 | $BIN/HRest -A -C $OUTDIR/config/general.conf -D -V -H $OUTDIR/floor_cmp.mmf \ 66 | -I $OUTDIR/data/mlf.mono -M $OUTDIR/hrest_cmp -o $phone -S $OUTDIR/data/uttlist.cmp \ 67 | -T 1 -l $phone -g $OUTDIR/hrest_dur/$phone -m 1 -u tmvw -w 3 $OUTDIR/hinit/$phone 68 | mv $OUTDIR/hrest_cmp/$phone $OUTDIR/hrest_cmp/$i-mmf 69 | mv $OUTDIR/hrest_dur/$phone $OUTDIR/hrest_dur/$i-mmf 70 | i=$[$i + 1] 71 | if [ $? -gt 0 ] ; then echo "HInit failed for $phone" ;exit 1 ; fi 72 | done 73 | 74 | 75 | ## ------ join individual monophone files together -------- 76 | mkdir $OUTDIR/joined_0 77 | 78 | ## cmp: 79 | echo "FV $OUTDIR/floor_cmp.mmf" > $OUTDIR/join_cmp.hed ## make hed file 80 | 81 | arg="" 82 | i=1 83 | for phone in `cat $OUTDIR/data/modellist.mono`; do 84 | arg="$arg -H $OUTDIR/hrest_cmp/$i-mmf" 85 | i=$[$i + 1] 86 | done 87 | 88 | $BIN/HHEd -A -B -C $OUTDIR/config/general.conf -D -V -T 1 $arg -s -p -i -w $OUTDIR/joined_0/cmp.mmf $OUTDIR/join_cmp.hed $OUTDIR/data/modellist.mono 89 | 90 | 91 | ## dur: 92 | echo "FV $OUTDIR/floor_dur.mmf" > $OUTDIR/join_dur.hed ## make hed file 93 | 94 | arg="" 95 | i=1 96 | for phone in `cat $OUTDIR/data/modellist.mono`; do 97 | arg="$arg -H $OUTDIR/hrest_dur/$i-mmf" 98 | i=$[$i + 1] 99 | done 100 | 101 | $BIN/HHEd -A -B -C $OUTDIR/config/general.conf -D -V -T 1 $arg -s -p -i -w $OUTDIR/joined_0/dur.mmf $OUTDIR/join_dur.hed $OUTDIR/data/modellist.mono 102 | 103 | 104 | NREEST=5 105 | ## ------ embedded reestimation -------- 106 | for new in `seq ${NREEST}` ; do 107 | old=$[$new - 1] 108 | mkdir $OUTDIR/joined_${new} 109 | $BIN/HERest -A -B -C $OUTDIR/config/general.conf -D -V -H $OUTDIR/joined_${old}/cmp.mmf \ 110 | -N $OUTDIR/joined_${old}/dur.mmf -e 2 -I $OUTDIR/data/mlf.mono -M $OUTDIR/joined_${new} \ 111 | -R $OUTDIR/joined_${new} -S $OUTDIR/data/uttlist.cmp -T 1 -m 1 -t 5000 5000 10000 \ 112 | -u mvwtdmv -w 3 $OUTDIR/data/modellist.mono $OUTDIR/data/modellist.mono 113 | done 114 | 115 | cp $OUTDIR/joined_${NREEST}/cmp.mmf $OUTDIR/cmp.mmf 116 | cp $OUTDIR/joined_${NREEST}/dur.mmf $OUTDIR/dur.mmf 117 | 118 | 119 | -------------------------------------------------------------------------------- /scripts/acoustic_model_training/steps/realign.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | 6 | 7 | #---------------------------------------------------------------------- 8 | 9 | INDIR=$1 10 | OUTDIR=$2 11 | BIN=$3 12 | 13 | [ $# -ne 3 ] && echo "Wrong number of arguments supplied" && exit 1 ; 14 | 15 | UTIL="$( cd "$( dirname "$0" )/../util" && pwd )" ## location of util script 16 | 17 | source $VOICE_BUILD_CONFIG 18 | #---------------------------------------------------------------------- 19 | 20 | 21 | 22 | source $UTIL/setup_directory.sh $INDIR $OUTDIR 23 | 24 | ## copy models: 25 | cp $INDIR/cmp.mmf $OUTDIR/ 26 | cp $INDIR/dur.mmf $OUTDIR/ 27 | 28 | ## Copy decision trees if present: 29 | if [ -e $INDIR/tree_dur.txt ] ; then 30 | cp $INDIR/tree* $OUTDIR 31 | fi 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | endstate=$[$NSTATE + 1] 41 | 42 | 43 | 44 | mkdir -p $OUTDIR/data/newlab 45 | 46 | $BIN/HSMMAlign -A -C $OUTDIR/config/general.conf -D -V $RELAXED_LABEL_PRUNE -H $INDIR/cmp.mmf -N $INDIR/dur.mmf \ 47 | -I $OUTDIR/data/mlf.full -S $OUTDIR/data/uttlist.cmp -T 1 -t 4000 -w 1.0 \ 48 | -m $OUTDIR/data/newlab $OUTDIR/data/modellist.full $OUTDIR/data/modellist.full 49 | rm -f $OUTDIR/data/newlab_list 50 | find $OUTDIR/data/newlab/ -name '*.lab' -print > $OUTDIR/data/newlab_list 51 | echo " " > $OUTDIR/null.hed 52 | mv $OUTDIR/data/mlf.full $OUTDIR/data/mlf.full.OLD 53 | $BIN/HLEd -A -D -T 1 -V -l '*' -i $OUTDIR/data/mlf.full -S $OUTDIR/data/newlab_list $OUTDIR/null.hed 54 | -------------------------------------------------------------------------------- /scripts/acoustic_model_training/steps/realign_to_labels.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | 6 | 7 | #---------------------------------------------------------------------- 8 | 9 | INDIR=$1 10 | OUTDIR=$2 11 | BIN=$3 12 | 13 | 14 | [ $# -ne 3 ] && echo "Wrong number of arguments supplied" && exit 1 ; 15 | 16 | UTIL="$( cd "$( dirname "$0" )/../util" && pwd )" ## location of util script 17 | 18 | if [ -z $VOICE_BUILD_CONFIG ] ; then 19 | echo 'Environment variable VOICE_BUILD_CONFIG has not been set!' ; exit 1 20 | fi 21 | source $VOICE_BUILD_CONFIG 22 | 23 | #---------------------------------------------------------------------- 24 | 25 | 26 | #------------- 27 | # Hard coded: 28 | HVITE_BEAM=" 1000 100000 1000000 " 29 | #------------- 30 | 31 | 32 | ## no label prune!!!! 33 | 34 | source $UTIL/setup_directory.sh $INDIR $OUTDIR 35 | 36 | 37 | $BIN/HVite -l \* -A -C $OUTDIR/config/general.conf -D -V -T 1 -a -m -I $OUTDIR/data/mlf.words \ 38 | -H $INDIR/cmp.mmf -i $OUTDIR/data/mlf.mono.NEW -o SW \ 39 | -t $HVITE_BEAM -S $OUTDIR/data/uttlist.cmp -y lab $OUTDIR/data/lexicon.txt \ 40 | $INDIR/data/modellist.mono 41 | if [ $? -gt 0 ] ; then echo "Alignment failed" ; exit 1 ; fi 42 | 43 | ## ------------------------ check success ---------------------------- 44 | if [ ! -e $OUTDIR/data/mlf.mono.NEW ] ; then 45 | echo "Alignment failed: cmp.mmf not made" 46 | exit 1 47 | fi 48 | ## ------------------------------------------------------------------- 49 | 50 | 51 | ## rename new alignment so it will be used in future: 52 | mv $OUTDIR/data/mlf.mono.NEW $OUTDIR/data/mlf.mono 53 | 54 | ## remove names of utts for which no label has been found from training list: 55 | $UTIL/update_train_list.py -mlf $OUTDIR/data/mlf.mono -trainlist $OUTDIR/data/uttlist.cmp 56 | 57 | ## copy models (which weren't updated): 58 | cp $INDIR/cmp.mmf $OUTDIR/cmp.mmf 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /scripts/acoustic_model_training/steps/reestimate.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | 6 | 7 | #---------------------------------------------------------------------- 8 | 9 | INDIR=$1 10 | OUTDIR=$2 11 | BIN=$3 12 | FLOOR=$4 13 | 14 | [ $# -ne 4 ] && echo "Wrong number of arguments supplied" && exit 1 ; 15 | 16 | UTIL="$( cd "$( dirname "$0" )/../util" && pwd )" ## location of util script 17 | 18 | if [ -z $VOICE_BUILD_CONFIG ] ; then 19 | echo 'Environment variable VOICE_BUILD_CONFIG has not been set!' ; exit 1 20 | fi 21 | source $VOICE_BUILD_CONFIG 22 | 23 | #---------------------------------------------------------------------- 24 | 25 | 26 | source $UTIL/setup_directory.sh $INDIR $OUTDIR 27 | 28 | 29 | 30 | 31 | ## FLOOR=0 is used to perturb untied models before clustering, 1 used for clustered training. 32 | if [ $FLOOR == 0 ] ; then 33 | OPT=" -C $OUTDIR/config/general-unfloor.conf -w 0.0 " # -w: set mix weight floor to f*MINMIX 34 | else 35 | OPT=" -C $OUTDIR/config/general.conf -w 3.0 " 36 | fi 37 | 38 | $BIN/HERest -A $BINMOD -D -T 1 -S $OUTDIR/data/uttlist.cmp \ 39 | -I $OUTDIR/data/mlf.full -m 1 -u tmvwdmv -t $BEAM \ 40 | -H $INDIR/cmp.mmf -N $INDIR/dur.mmf \ 41 | -M $OUTDIR/ -R $OUTDIR/ \ 42 | $OPT -s $OUTDIR/stat.cmp $STRICT_LABEL_PRUNE \ 43 | $OUTDIR/data/modellist.full $OUTDIR/data/modellist.full 44 | 45 | ## Make duration stats file: 46 | awk '{print $1 " " $2 " " $3 " " $3 }' $OUTDIR/stat.cmp > $OUTDIR/stat.dur 47 | 48 | ## Copy decision trees if present: 49 | if [ -e $INDIR/tree_dur.txt ] ; then 50 | cp $INDIR/tree* $OUTDIR 51 | fi 52 | 53 | 54 | ## ------------------------ check success ---------------------------- 55 | if [ ! -e $OUTDIR/cmp.mmf ] ; then 56 | echo "Reestimation failed: cmp.mmf not made" 57 | exit 1 58 | fi 59 | if [ ! -e $OUTDIR/dur.mmf ] ; then 60 | echo "Reestimation failed: dur.mmf not made" 61 | exit 1 62 | fi 63 | ## ------------------------------------------------------------------- 64 | 65 | 66 | -------------------------------------------------------------------------------- /scripts/acoustic_model_training/steps/reestimate_alignment_model.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | 6 | 7 | #---------------------------------------------------------------------- 8 | 9 | INDIR=$1 10 | OUTDIR=$2 11 | BIN=$3 12 | 13 | 14 | [ $# -ne 3 ] && echo "Wrong number of arguments supplied" && exit 1 ; 15 | 16 | UTIL="$( cd "$( dirname "$0" )/../util" && pwd )" ## location of util script 17 | 18 | if [ -z $VOICE_BUILD_CONFIG ] ; then 19 | echo 'Environment variable VOICE_BUILD_CONFIG has not been set!' ; exit 1 20 | fi 21 | source $VOICE_BUILD_CONFIG 22 | 23 | #---------------------------------------------------------------------- 24 | 25 | #----------- 26 | # Hard coded: 27 | ALIGN_BEAM=" -t 250.0 500.0 1000.0 " 28 | #----------- 29 | 30 | 31 | ## no label prune!!!! 32 | 33 | source $UTIL/setup_directory.sh $INDIR $OUTDIR 34 | 35 | ## strip skip models from input MLF as an easy way to avoid 'ERROR [+7332] CreateInsts: Cannot have successive Tee models' 36 | grep -v skip $OUTDIR/data/mlf.mono > $OUTDIR/data/mlf.mono.noskip 37 | 38 | $BIN/HERest -A $BINMOD -C $OUTDIR/config/general.conf -D -V -T 1 -S $OUTDIR/data/uttlist.cmp \ 39 | -I $OUTDIR/data/mlf.mono.noskip -m 1 -u tmvw $LABEL_PRUNE $ALIGN_BEAM \ 40 | -H $INDIR/cmp.mmf -M $OUTDIR/ $OUTDIR/data/modellist.mono 41 | 42 | 43 | ## ------------------------ check success ---------------------------- 44 | sleep 1 45 | if [ ! -e $OUTDIR/cmp.mmf ] ; then 46 | echo "Reestimation failed: cmp.mmf not made" 47 | exit 1 48 | fi 49 | ## ------------------------------------------------------------------- 50 | 51 | 52 | -------------------------------------------------------------------------------- /scripts/acoustic_model_training/steps/subset_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | 6 | import sys 7 | import re 8 | import os 9 | 10 | from argparse import ArgumentParser 11 | 12 | ## find location of util relative to current script: 13 | loc = os.path.abspath(os.path.join( __file__, os.pardir, os.pardir, 'util') ) 14 | sys.path.append(loc) 15 | 16 | ''' 17 | Choose subset of data randomly -- either x minutes or at least x or each monophone. 18 | ''' 19 | from util import * 20 | 21 | def main_work(): 22 | 23 | ################################################# 24 | 25 | # ======== Get stuff from command line ========== 26 | 27 | a = ArgumentParser() 28 | a.add_argument('-indir', required=True, help= "...") 29 | a.add_argument('-outdir', required=True, help= "...") 30 | a.add_argument('-bindir', required=True, help= "...") 31 | a.add_argument('-choose', required=True, help= "e.g. 30minutes / 5examples (=> 5 of each monophone)") 32 | 33 | opts = a.parse_args() 34 | BIN=opts.bindir 35 | 36 | ## parse 'choose': 37 | if not re.search('(\d+)(minutes|examples)', opts.choose): 38 | sys.exit('bad value for "choose" option') 39 | 40 | s = re.search('(\d+)(min|examples)', opts.choose) 41 | #(quantity, kind) = s.groups 42 | print s.groups() # (quantity, kind) 43 | sys.exit('www') 44 | # =============================================== 45 | 46 | if not os.path.isdir(opts.outdir + '/data/'): 47 | os.makedirs(opts.outdir + '/data/') 48 | 49 | ## find 50 | 51 | cmplist = os.path.join(opts.outdir, 'data', 'uttlist.cmp') 52 | lablist = os.path.join(opts.outdir, 'data', 'uttlist.lab') 53 | monolist = os.path.join(opts.outdir, 'data', 'modellist.mono') 54 | fulllist = os.path.join(opts.outdir, 'data', 'modellist.full') 55 | monomlf = os.path.join(opts.outdir, 'data', 'mlf.mono') 56 | fullmlf = os.path.join(opts.outdir, 'data', 'mlf.full') 57 | 58 | 59 | ## 1) Make lists of .cmp and .lab (acoustic and linguistic feature) files: 60 | lab_ext = os.listdir(opts.labdir)[0].split('.')[-1] 61 | cmp = [re.sub('\.cmp\Z', '', fname) for fname in os.listdir(opts.cmpdir) \ 62 | if fname.endswith('.cmp')] 63 | lab = [re.sub('\.'+lab_ext+'\Z', '', fname) for fname in os.listdir(opts.labdir) ] 64 | intersect = [name for name in lab if name in cmp] ## only where both are present 65 | if intersect == []: 66 | sys.exit('set_up_data.py: No matching data files found in %s and %s'%( \ 67 | opts.labdir, opts.cmpdir)) 68 | cmp = [os.path.join(opts.cmpdir, name + '.cmp') for name in intersect] 69 | lab = [os.path.join(opts.labdir, name + '.' + lab_ext) for name in intersect] 70 | writelist(cmp, cmplist) 71 | writelist(lab, lablist) 72 | 73 | 74 | ## 2) Make mlfs and model lists for monophones and fullcontext phones: 75 | comm=BIN+"""/HLEd -A -D -T 1 -V -l '*' -n %s -i %s -S %s %s 76 | """%(fulllist, fullmlf, lablist, nullhed(opts.outdir)) 77 | print comm 78 | os.system(comm) 79 | 80 | comm=BIN+"""/HLEd -A -D -T 1 -V -l '*' -n %s -i %s -S %s -m %s %s 81 | """%(monolist, monomlf, lablist, nullhed(opts.outdir), fullmlf) 82 | print comm 83 | os.system(comm) 84 | 85 | 86 | if __name__=="__main__": 87 | 88 | main_work() 89 | 90 | -------------------------------------------------------------------------------- /scripts/acoustic_model_training/steps/untie_models.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | 6 | #---------------------------------------------------------------------- 7 | 8 | INDIR=$1 9 | OUTDIR=$2 10 | BIN=$3 11 | 12 | [ $# -ne 3 ] && echo "Wrong number of arguments supplied" && exit 1 ; 13 | 14 | UTIL="$( cd "$( dirname "$0" )/../util" && pwd )" ## location of util script 15 | 16 | if [ -z $VOICE_BUILD_CONFIG ] ; then 17 | echo 'Environment variable VOICE_BUILD_CONFIG has not been set!' ; exit 1 18 | fi 19 | source $VOICE_BUILD_CONFIG 20 | #---------------------------------------------------------------------- 21 | 22 | 23 | source $UTIL/setup_directory.sh $INDIR $OUTDIR 24 | 25 | ## Use grep to get number of last stream from $STREAMS (which looks like e.g. "1 2-4"): 26 | LAST_STREAM=`grep -E -o "[0-9]+" <<<$STREAMS | tail -1` 27 | 28 | 29 | 30 | 31 | endstate=$[$NSTATE + 1] 32 | 33 | 34 | 35 | ## cmp 36 | 37 | echo "LT $INDIR/tree_cmp.txt" > $OUTDIR/untie_cmp.hed 38 | echo "AU $OUTDIR/data/modellist.full" >> $OUTDIR/untie_cmp.hed 39 | 40 | echo "UT {*.state[2-$endstate].stream[1-$LAST_STREAM]}" >> $OUTDIR/untie_cmp.hed 41 | echo "UT {*.state[2-$endstate] }" >> $OUTDIR/untie_cmp.hed 42 | echo "TI \"SWeight\" { *.state[2-$endstate].weights }" >> $OUTDIR/untie_cmp.hed 43 | echo "UT {*.transP}" >> $OUTDIR/untie_cmp.hed 44 | echo "TI TrP {*.transP}" >> $OUTDIR/untie_cmp.hed 45 | 46 | $BIN/HHEd -A $BINMOD -C $OUTDIR/config/general.conf -D -V -T 1 -H $INDIR/cmp.mmf -s -p -i -w $OUTDIR/cmp.mmf $OUTDIR/untie_cmp.hed $INDIR/data/modellist.full 47 | # 48 | [ $? -gt 0 ] && echo "HHEd untie cmp failed" && exit 1 ; 49 | 50 | 51 | 52 | 53 | 54 | ## dur 55 | 56 | echo "LT $INDIR/tree_dur.txt" > $OUTDIR/untie_dur.hed 57 | echo "AU $OUTDIR/data/modellist.full" >> $OUTDIR/untie_dur.hed 58 | 59 | echo "UT {*.state[2].stream[1-${NSTATE}] }" >> $OUTDIR/untie_dur.hed 60 | echo "UT {*.state[2] }" >> $OUTDIR/untie_dur.hed 61 | echo "UT {*.transP}" >> $OUTDIR/untie_dur.hed 62 | echo "TI TrP {*.transP}" >> $OUTDIR/untie_dur.hed 63 | 64 | $BIN/HHEd -A $BINMOD -C $OUTDIR/config/general.conf -D -V -T 1 -H $INDIR/dur.mmf -s -p -i -w $OUTDIR/dur.mmf $OUTDIR/untie_dur.hed $INDIR/data/modellist.full 65 | # 66 | [ $? -gt 0 ] && echo "HHEd untie dur failed" && exit 1 ; 67 | 68 | 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /scripts/acoustic_model_training/subrecipes/config_template/quick_voicebuild_01.cfg: -------------------------------------------------------------------------------- 1 | 2 | 3 | MONOPHONE_SUBSET="1examples" 4 | SUBSETS="5minutes 1minutes 2minutes " # all" 5 | SHORT_QUESTION_PATT="segment_is" 6 | 7 | 8 | STATIC_STREAM_SIZES="25 1 1 1" 9 | MSD_STREAM_INFO="0 1 1 1" 10 | STREAM_WEIGHTS="1.0 0.0 0.0 0.0" ## best to keep F0 streams turned off for initial alignment 11 | VFLOORSCALESTR="Vector 4 0.01 0.01 0.01 0.01" 12 | 13 | 14 | STREAMS="1 2-4" ## <--sptk, ; "1 2-4 4" <- straight 15 | SHORT_STREAM_NAMES="1 2" # first digit only! No 5 for SPTK 2_3_4 5 ; do 16 | STREAM_NAMES = "mcep logF0" 17 | 18 | 19 | NSTATE=5 20 | 21 | BINMOD=" " ## should we write models in binary format? " -B " = yes, " " = no. 22 | 23 | 24 | BEAM=" 5000 5000 10000 " 25 | STRICT_LABEL_PRUNE=" -e 2 " 26 | RELAXED_LABEL_PRUNE=" -e 10 " 27 | 28 | 29 | NRECLUSTER=5 30 | NREEST=5 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /scripts/acoustic_model_training/subrecipes/config_template/quick_voicebuild_01.cfg.OLD: -------------------------------------------------------------------------------- 1 | 2 | 3 | MONOPHONE_SUBSET="1examples" 4 | SUBSETS="5minutes 1minutes 2minutes " # all" 5 | SHORT_QUESTION_PATT="segment_is" 6 | 7 | 8 | STATIC_STREAM_SIZES="25 1 1 1" 9 | MSD_STREAM_INFO="0 1 1 1" 10 | STREAM_WEIGHTS="1.0 0.0 0.0 0.0" ## best to keep F0 streams turned off for initial alignment 11 | VFLOORSCALESTR="Vector 4 0.01 0.01 0.01 0.01" 12 | 13 | 14 | STREAMS="1 2-4" ## <--sptk, ; "1 2-4 4" <- straight 15 | SHORT_STREAM_NAMES="1 2" # first digit only! No 5 for SPTK 2_3_4 5 ; do 16 | 17 | 18 | 19 | NSTATE=5 20 | 21 | BINMOD=" " ## should we write models in binary format? " -B " = yes, " " = no. 22 | 23 | 24 | BEAM=" 5000 5000 10000 " 25 | STRICT_LABEL_PRUNE=" -e 2 " 26 | RELAXED_LABEL_PRUNE=" -e 10 " 27 | 28 | 29 | NRECLUSTER=5 30 | NREEST=5 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /scripts/acoustic_model_training/subrecipes/config_template/standard_alignment.cfg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | STATIC_STREAM_SIZES="25 1 1 1" 5 | MSD_STREAM_INFO="0 1 1 1" 6 | STREAM_WEIGHTS="1.0 0.0 0.0 0.0" ## best to keep F0 streams turned off for initial alignment 7 | VFLOORSCALESTR="Vector 4 0.01 0.01 0.01 0.01" 8 | 9 | 10 | NSTATE=5 11 | 12 | BINMOD=" " ## should we write models in binary format? " -B " = yes, " " = no. 13 | 14 | MIXTURE_SCHEDULE="0 0 0 2 4 " 15 | 16 | NREEST=3 17 | 18 | MIXUP_STREAMS=1 ## nasty thing happen if we try to increase mixtures of MSD streams 19 | 20 | LABEL_PRUNE=" -e 2 " # empty string to turn this off " -e 2 " -------------------------------------------------------------------------------- /scripts/acoustic_model_training/subrecipes/config_template/standard_voicebuild.cfg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | STATIC_STREAM_SIZES="25 1 1 1" 5 | MSD_STREAM_INFO="0 1 1 1" 6 | STREAM_WEIGHTS="1.0 0.0 0.0 0.0" ## best to keep F0 streams turned off for initial alignment 7 | VFLOORSCALESTR="Vector 4 0.01 0.01 0.01 0.01" 8 | 9 | 10 | STREAMS="1 2-4" ## <--sptk, ; "1 2-4 4" <- straight 11 | SHORT_STREAM_NAMES="1 2" # first digit only! No 5 for SPTK 2_3_4 5 ; do 12 | STREAM_NAMES ="mcep logF0" 13 | 14 | 15 | NSTATE=5 16 | 17 | BINMOD=" " ## should we write models in binary format? " -B " = yes, " " = no. 18 | 19 | 20 | BEAM=" 5000 5000 10000 " 21 | STRICT_LABEL_PRUNE=" -e 2 " 22 | RELAXED_LABEL_PRUNE=" -e 20 " 23 | 24 | 25 | NRECLUSTER=2 26 | NREEST=5 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /scripts/acoustic_model_training/subrecipes/config_template/standard_voicebuild.cfg.OLD: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | STATIC_STREAM_SIZES="25 1 1 1" 5 | MSD_STREAM_INFO="0 1 1 1" 6 | STREAM_WEIGHTS="1.0 0.0 0.0 0.0" ## best to keep F0 streams turned off for initial alignment 7 | VFLOORSCALESTR="Vector 4 0.01 0.01 0.01 0.01" 8 | 9 | 10 | STREAMS="1 2-4" ## <--sptk, ; "1 2-4 4" <- straight 11 | SHORT_STREAM_NAMES="1 2" # first digit only! No 5 for SPTK 2_3_4 5 ; do 12 | 13 | 14 | 15 | NSTATE=5 16 | 17 | BINMOD=" " ## should we write models in binary format? " -B " = yes, " " = no. 18 | 19 | 20 | BEAM=" 5000 5000 10000 " 21 | STRICT_LABEL_PRUNE=" -e 2 " 22 | RELAXED_LABEL_PRUNE=" -e 20 " 23 | 24 | 25 | NRECLUSTER=2 26 | NREEST=5 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /scripts/acoustic_model_training/subrecipes/config_template/standard_voicebuild_STRAIGHT.cfg: -------------------------------------------------------------------------------- 1 | ### Alternative configuration file for standard_voicebuild subrecipe. The only differences 2 | ### are to handle the extra aperiodicity stream, and to change 25 -> 60 for spectral stream 3 | ### size. 4 | ### 5 | ### This config file is intended for use from commandline external to Ossian. 6 | 7 | 8 | 9 | STATIC_STREAM_SIZES="60 1 1 1 25" 10 | MSD_STREAM_INFO="0 1 1 1 0" 11 | STREAM_WEIGHTS="1.0 0.0 0.0 0.0 0.0" ## best to keep F0 streams turned off for initial alignment 12 | VFLOORSCALESTR="Vector 5 0.01 0.01 0.01 0.01 0.01" 13 | 14 | 15 | STREAMS="1 2-4 5" 16 | SHORT_STREAM_NAMES="1 2 5" 17 | 18 | 19 | 20 | NSTATE=5 21 | 22 | BINMOD=" " ## should we write models in binary format? " -B " = yes, " " = no. 23 | 24 | 25 | BEAM=" 5000 5000 10000 " 26 | STRICT_LABEL_PRUNE=" -e 2 " 27 | RELAXED_LABEL_PRUNE=" -e 20 " 28 | 29 | 30 | NRECLUSTER=2 31 | NREEST=5 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /scripts/acoustic_model_training/subrecipes/script/extend_standard_alignment.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | 6 | #---------------------------------------------------------------------- 7 | 8 | CMPDIR=$1 9 | LABDIR=$2 10 | BIN=$3 11 | OUT=$4 12 | CONFIG=$5 13 | EXISTING_MODEL=$6 14 | 15 | [ $# -ne 6 ] && echo "Wrong number of arguments supplied" && exit 1 ; 16 | 17 | ## location of directory 2 above that the script is in: 18 | TOPDIR="$( cd "$( dirname "$0" )"/../../ && pwd )" 19 | 20 | #---------------------------------------------------------------------- 21 | 22 | export VOICE_BUILD_CONFIG=$CONFIG 23 | 24 | source $VOICE_BUILD_CONFIG 25 | 26 | 27 | STEPS=$TOPDIR/steps/ 28 | 29 | function check_step { 30 | ## use global $? and $STEPNUM 31 | if [ $? -gt 0 ] ; then 32 | echo ; echo "Step ${STEPNUM} in script $0 failed, aborted!" ; echo ; exit 1 ; 33 | fi 34 | } 35 | 36 | 37 | STEPNUM=1 38 | 39 | start_time=$(date +"%s") 40 | 41 | ## ------ preparation ------ 42 | 43 | 44 | 45 | python $STEPS/set_up_data.py -labdir $LABDIR -cmpdir $CMPDIR -outdir $OUT/${STEPNUM} -bindir $BIN -subset $DATA_SUBSET 46 | check_step ; 47 | 48 | $STEPS/make_alignment_lexicon.sh $OUT/$STEPNUM $BIN 49 | check_step 50 | 51 | #$STEPS/make_alignment_monophone.sh $OUT/$PREVIOUS $OUT/$STEPNUM $BIN 52 | #check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 53 | 54 | cp $EXISTING_MODEL/final_model/cmp.mmf $OUT/$STEPNUM 55 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 56 | 57 | 58 | ## TODO -- check all nec, models exist 59 | 60 | ## ------ training ----- 61 | 62 | for NMIX in $MIXTURE_SCHEDULE ; do 63 | echo "$NMIX ==== " 64 | if [ ! $NMIX == 0 ] ; then 65 | $STEPS/increase_mixture_components.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $NMIX $BIN 66 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 67 | fi 68 | ## --- reestimation --- 69 | for i in `seq $NREEST` ; do 70 | $STEPS/reestimate_alignment_model.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 71 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 72 | done 73 | ## --- realignment --- 74 | $STEPS/realign_to_labels.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 75 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 76 | done 77 | 78 | rm -rf $OUT/final_model/ 79 | cp -r $OUT/$PREVIOUS/ $OUT/final_model/ 80 | 81 | end_time=$(date +"%s") 82 | time_diff=$(($end_time-$start_time)) 83 | 84 | echo "Aligner training took $(($time_diff / 60)) minutes and $(($time_diff % 60)) seconds to run." 85 | 86 | 87 | -------------------------------------------------------------------------------- /scripts/acoustic_model_training/subrecipes/script/extend_standard_alignment_external_lexicon.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | 6 | #---------------------------------------------------------------------- 7 | 8 | CMPDIR=$1 9 | LABDIR=$2 10 | BIN=$3 11 | OUT=$4 12 | CONFIG=$5 13 | EXISTING_MODEL=$6 14 | LEXICON=$7 15 | 16 | [ $# -ne 7 ] && echo "Wrong number of arguments supplied" && exit 1 ; 17 | 18 | ## location of directory 2 above that the script is in: 19 | TOPDIR="$( cd "$( dirname "$0" )"/../../ && pwd )" 20 | 21 | #---------------------------------------------------------------------- 22 | 23 | export VOICE_BUILD_CONFIG=$CONFIG 24 | 25 | source $VOICE_BUILD_CONFIG 26 | 27 | 28 | STEPS=$TOPDIR/steps/ 29 | 30 | function check_step { 31 | ## use global $? and $STEPNUM 32 | if [ $? -gt 0 ] ; then 33 | echo ; echo "Step ${STEPNUM} in script $0 failed, aborted!" ; echo ; exit 1 ; 34 | fi 35 | } 36 | 37 | 38 | STEPNUM=1 39 | 40 | start_time=$(date +"%s") 41 | 42 | ## ------ preparation ------ 43 | 44 | 45 | 46 | python $STEPS/set_up_data.py -labdir $LABDIR -cmpdir $CMPDIR -outdir $OUT/${STEPNUM} -bindir $BIN ## -subset $DATA_SUBSET 47 | check_step ; 48 | 49 | 50 | #$STEPS/make_alignment_monophone.sh $OUT/$PREVIOUS $OUT/$STEPNUM $BIN 51 | #check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 52 | 53 | mkdir -p $OUT/$STEPNUM/data/ 54 | cp $EXISTING_MODEL/final_model/cmp.mmf $OUT/$STEPNUM 55 | cp $EXISTING_MODEL/final_model/data/modellist.mono $OUT/$STEPNUM/data/modellist.mono 56 | 57 | 58 | #$STEPS/make_alignment_lexicon.sh $OUT/$STEPNUM $BIN 59 | #check_step 60 | 61 | ## copy existing lexicon -- must contain _PUNC_ etc. 62 | cp $LEXICON $OUT/${STEPNUM}/data/lexicon.txt 63 | 64 | echo '_SPACE_ skip' >> $OUT/${STEPNUM}/data/lexicon.txt 65 | echo '_SPACE_ sil' >> $OUT/${STEPNUM}/data/lexicon.txt 66 | echo '_PUNC_ sil' >> $OUT/${STEPNUM}/data/lexicon.txt 67 | echo '_PUNC_ skip' >> $OUT/${STEPNUM}/data/lexicon.txt 68 | echo 'sil sil' >> $OUT/${STEPNUM}/data/lexicon.txt 69 | 70 | # initial expansion of labels using this lexicon 71 | cp $OUT/${STEPNUM}/data/mlf.full $OUT/${STEPNUM}/data/mlf.words 72 | 73 | echo "EX" > $OUT/${STEPNUM}/expand_labels.hed 74 | 75 | $BIN/HLEd -I $OUT/${STEPNUM}/data/mlf.words -i $OUT/${STEPNUM}/data/mlf.mono -l '*' -d $OUT/${STEPNUM}/data/lexicon.txt $OUT/${STEPNUM}/expand_labels.hed $OUT/${STEPNUM}/data/mlf.words 76 | 77 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 78 | 79 | ## TODO -- check all nec, models exist 80 | 81 | ## ------ training ----- 82 | 83 | 84 | for NMIX in $MIXTURE_SCHEDULE ; do 85 | echo "$NMIX ==== " 86 | if [ ! $NMIX == 0 ] ; then 87 | $STEPS/increase_mixture_components.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $NMIX $BIN 88 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 89 | fi 90 | ## --- reestimation --- 91 | for i in `seq $NREEST` ; do 92 | $STEPS/reestimate_alignment_model.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 93 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 94 | done 95 | ## --- realignment --- 96 | $STEPS/realign_to_labels.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 97 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 98 | done 99 | 100 | rm -rf $OUT/final_model/ 101 | cp -r $OUT/$PREVIOUS/ $OUT/final_model/ 102 | 103 | end_time=$(date +"%s") 104 | time_diff=$(($end_time-$start_time)) 105 | 106 | echo "Aligner training took $(($time_diff / 60)) minutes and $(($time_diff % 60)) seconds to run." 107 | 108 | 109 | -------------------------------------------------------------------------------- /scripts/acoustic_model_training/subrecipes/script/quick_voicebuild_01.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | 6 | # speed up voice building by using subset of questions until final clustering. 7 | 8 | #---------------------------------------------------------------------- 9 | 10 | CMPDIR=$1 11 | LABDIR=$2 12 | QUESTIONS=$3 13 | BIN=$4 14 | OUT=$5 15 | CONFIG=$6 16 | 17 | [ $# -ne 6 ] && echo "Wrong number of arguments supplied" && exit 1 ; 18 | 19 | ## location of directory 2 above that the script is in: 20 | TOPDIR="$( cd "$( dirname "$0" )"/../../ && pwd )" 21 | 22 | #---------------------------------------------------------------------- 23 | 24 | ## defaults for configured values: 25 | SHORT_QUESTION_PATT="segment_is" 26 | 27 | #---------------------------------------------------------------------- 28 | 29 | 30 | 31 | 32 | export VOICE_BUILD_CONFIG=$CONFIG 33 | 34 | source $VOICE_BUILD_CONFIG 35 | 36 | 37 | STEPS=$TOPDIR/steps/ 38 | UTIL=$TOPDIR/util/ 39 | 40 | function check_step { 41 | ## use global $? and $STEPNUM 42 | if [ $? -gt 0 ] ; then 43 | echo ; echo "Step ${STEPNUM} in script $0 failed, aborted!" ; echo ; exit 1 ; 44 | fi 45 | } 46 | 47 | 48 | 49 | 50 | 51 | start_time=$(date +"%s") 52 | 53 | 54 | 55 | 56 | 57 | ## prepare SHORT_QUESTIONS: 58 | mkdir -p $OUT/ 59 | SHORT_QUESTIONS=$OUT/short_questions.hed 60 | grep $SHORT_QUESTION_PATT $QUESTIONS > $SHORT_QUESTIONS 61 | 62 | 63 | 64 | 65 | STEPNUM=1 66 | 67 | python $TOPDIR/steps/set_up_data.py -labdir $LABDIR -cmpdir $CMPDIR -outdir $OUT/${STEPNUM} -bindir $BIN 68 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 69 | 70 | $STEPS/make_monophone.sh $OUT/$PREVIOUS $OUT/$STEPNUM $BIN 71 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 72 | 73 | FIRST_ITER=1 74 | for j in `seq $NRECLUSTER` ; do 75 | 76 | if [ $FIRST_ITER -eq 1 ] ; then 77 | $STEPS/clone_monophone_to_fullcontext.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 78 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 79 | else 80 | $STEPS/untie_models.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 81 | #check_step ; ### this gave fail even when ran ok... 82 | PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 83 | fi 84 | 85 | $STEPS/reestimate.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 0 86 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 87 | 88 | QUESTIONS_TO_USE=$SHORT_QUESTIONS 89 | if [ $j -eq $NRECLUSTER ] ; then 90 | QUESTIONS_TO_USE=$QUESTIONS 91 | fi 92 | 93 | $STEPS/build_MDL_trees.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM 1.0 $QUESTIONS_TO_USE $BIN 94 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 95 | 96 | for i in `seq $NREEST` ; do 97 | $STEPS/reestimate.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 1 98 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 99 | done 100 | 101 | if [ $FIRST_ITER -eq 1 ] ; then 102 | $STEPS/realign.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 103 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 104 | fi 105 | FIRST_ITER=0 106 | done 107 | 108 | $STEPS/make_engine_model.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 109 | check_step 110 | 111 | rm -rf $OUT/final_model/ 112 | cp -r $OUT/$STEPNUM/ $OUT/final_model/ 113 | 114 | end_time=$(date +"%s") 115 | time_diff=$(($end_time-$start_time)) 116 | 117 | echo "Model training took $(($time_diff / 60)) minutes and $(($time_diff % 60)) seconds to run." 118 | -------------------------------------------------------------------------------- /scripts/acoustic_model_training/subrecipes/script/quick_voicebuild_01.sh.OLD: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | 6 | # speed up voice building by using subset of questions until final clustering. 7 | 8 | #---------------------------------------------------------------------- 9 | 10 | CMPDIR=$1 11 | LABDIR=$2 12 | QUESTIONS=$3 13 | BIN=$4 14 | OUT=$5 15 | CONFIG=$6 16 | 17 | [ $# -ne 6 ] && echo "Wrong number of arguments supplied" && exit 1 ; 18 | 19 | ## location of directory 2 above that the script is in: 20 | TOPDIR="$( cd "$( dirname "$0" )"/../../ && pwd )" 21 | 22 | #---------------------------------------------------------------------- 23 | 24 | ## defaults for configured values: 25 | SHORT_QUESTION_PATT="segment_is" 26 | 27 | #---------------------------------------------------------------------- 28 | 29 | 30 | 31 | 32 | export VOICE_BUILD_CONFIG=$CONFIG 33 | 34 | source $VOICE_BUILD_CONFIG 35 | 36 | 37 | STEPS=$TOPDIR/steps/ 38 | UTIL=$TOPDIR/util/ 39 | 40 | function check_step { 41 | ## use global $? and $STEPNUM 42 | if [ $? -gt 0 ] ; then 43 | echo ; echo "Step ${STEPNUM} in script $0 failed, aborted!" ; echo ; exit 1 ; 44 | fi 45 | } 46 | 47 | 48 | 49 | 50 | 51 | start_time=$(date +"%s") 52 | 53 | 54 | 55 | 56 | 57 | ## prepare SHORT_QUESTIONS: 58 | mkdir -p $OUT/ 59 | SHORT_QUESTIONS=$OUT/short_questions.hed 60 | grep $SHORT_QUESTION_PATT $QUESTIONS > $SHORT_QUESTIONS 61 | 62 | 63 | 64 | 65 | STEPNUM=1 66 | 67 | python $TOPDIR/steps/set_up_data.py -labdir $LABDIR -cmpdir $CMPDIR -outdir $OUT/${STEPNUM} -bindir $BIN 68 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 69 | 70 | $STEPS/make_monophone.sh $OUT/$PREVIOUS $OUT/$STEPNUM $BIN 71 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 72 | 73 | FIRST_ITER=1 74 | for j in `seq $NRECLUSTER` ; do 75 | 76 | if [ $FIRST_ITER -eq 1 ] ; then 77 | $STEPS/clone_monophone_to_fullcontext.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 78 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 79 | else 80 | $STEPS/untie_models.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 81 | #check_step ; ### this gave fail even when ran ok... 82 | PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 83 | fi 84 | 85 | $STEPS/reestimate.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 0 86 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 87 | 88 | QUESTIONS_TO_USE=$SHORT_QUESTIONS 89 | if [ $j -eq $NRECLUSTER ] ; then 90 | QUESTIONS_TO_USE=$QUESTIONS 91 | fi 92 | 93 | $STEPS/build_MDL_trees.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM 1.0 $QUESTIONS_TO_USE $BIN 94 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 95 | 96 | for i in `seq $NREEST` ; do 97 | $STEPS/reestimate.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 1 98 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 99 | done 100 | 101 | if [ $FIRST_ITER -eq 1 ] ; then 102 | $STEPS/realign.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 103 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 104 | fi 105 | FIRST_ITER=0 106 | done 107 | 108 | $STEPS/make_engine_model.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 109 | check_step 110 | 111 | rm -rf $OUT/final_model/ 112 | cp -r $OUT/$STEPNUM/ $OUT/final_model/ 113 | 114 | end_time=$(date +"%s") 115 | time_diff=$(($end_time-$start_time)) 116 | 117 | echo "Model training took $(($time_diff / 60)) minutes and $(($time_diff % 60)) seconds to run." 118 | -------------------------------------------------------------------------------- /scripts/acoustic_model_training/subrecipes/script/quick_voicebuild_02.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | 6 | #---------------------------------------------------------------------- 7 | 8 | CMPDIR=$1 9 | LABDIR=$2 10 | QUESTIONS=$3 11 | BIN=$4 12 | OUT=$5 13 | CONFIG=$6 14 | 15 | [ $# -ne 6 ] && echo "Wrong number of arguments supplied" && exit 1 ; 16 | 17 | ## location of directory 2 above that the script is in: 18 | TOPDIR="$( cd "$( dirname "$0" )"/../../ && pwd )" 19 | 20 | #---------------------------------------------------------------------- 21 | 22 | ## defaults for configured values: 23 | MONOPHONE_SUBSET="1examples" 24 | SUBSETS="5minutes 10minutes 20minutes all" 25 | SHORT_QUESTION_PATT="segment_is" 26 | 27 | #---------------------------------------------------------------------- 28 | 29 | 30 | 31 | 32 | export VOICE_BUILD_CONFIG=$CONFIG 33 | 34 | source $VOICE_BUILD_CONFIG 35 | 36 | 37 | STEPS=$TOPDIR/steps/ 38 | UTIL=$TOPDIR/util/ 39 | 40 | function check_step { 41 | ## use global $? and $STEPNUM 42 | if [ $? -gt 0 ] ; then 43 | echo ; echo "Step ${STEPNUM} in script $0 failed, aborted!" ; echo ; exit 1 ; 44 | fi 45 | } 46 | 47 | 48 | 49 | 50 | 51 | start_time=$(date +"%s") 52 | 53 | 54 | 55 | 56 | 57 | ## prepare SHORT_QUESTIONS: 58 | mkdir -p $OUT/ 59 | SHORT_QUESTIONS=$OUT/short_questions.hed 60 | grep $SHORT_QUESTION_PATT $QUESTIONS > $SHORT_QUESTIONS 61 | 62 | 63 | 64 | 65 | STEPNUM=1 66 | 67 | ## Make small data set for training monophone models: 68 | $TOPDIR/steps/set_up_data.py -labdir $LABDIR -cmpdir $CMPDIR -outdir $OUT/data_${MONOPHONE_SUBSET} \ 69 | -bindir $BIN -subset $MONOPHONE_SUBSET 70 | 71 | $STEPS/make_monophone.sh $OUT/data_${MONOPHONE_SUBSET} $OUT/$STEPNUM $BIN 72 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 73 | 74 | 75 | FIRST_ITER=1 76 | for SUBSET in $SUBSETS ; do 77 | 78 | ## Move up to more data -- make it in $OUT/$STEPNUM where future models are trained: 79 | $TOPDIR/steps/set_up_data.py -labdir $LABDIR -cmpdir $CMPDIR -outdir $OUT/$STEPNUM \ 80 | -bindir $BIN -subset $SUBSET 81 | check_step ; 82 | 83 | if [ $FIRST_ITER -eq 1 ] ; then 84 | $STEPS/clone_monophone_to_fullcontext.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 85 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 86 | else 87 | $STEPS/untie_models.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 88 | #check_step ; ### this gave fail even when ran ok... 89 | PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 90 | fi 91 | 92 | $STEPS/reestimate.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 0 93 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 94 | 95 | QUESTIONS_TO_USE=$SHORT_QUESTIONS 96 | if [ $SUBSET == "all" ] ; then 97 | QUESTIONS_TO_USE=$QUESTIONS 98 | fi 99 | 100 | echo "$STEPS/build_MDL_trees.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM 1.0 $QUESTIONS_TO_USE $BIN" 101 | $STEPS/build_MDL_trees.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM 1.0 $QUESTIONS_TO_USE $BIN 102 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 103 | 104 | for i in `seq $NREEST` ; do 105 | $STEPS/reestimate.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 1 106 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 107 | done 108 | 109 | # if [ $FIRST_ITER -eq 1 ] ; then 110 | # $STEPS/realign.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 111 | # check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 112 | # fi 113 | FIRST_ITER=0 114 | done 115 | 116 | $STEPS/make_engine_model.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 117 | check_step 118 | 119 | rm -rf $OUT/final_model/ 120 | cp -r $OUT/$STEPNUM/ $OUT/final_model/ 121 | 122 | end_time=$(date +"%s") 123 | time_diff=$(($end_time-$start_time)) 124 | 125 | echo "Model training took $(($time_diff / 60)) minutes and $(($time_diff % 60)) seconds to run." 126 | -------------------------------------------------------------------------------- /scripts/acoustic_model_training/subrecipes/script/standard_alignment.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | 6 | #---------------------------------------------------------------------- 7 | 8 | CMPDIR=$1 9 | LABDIR=$2 10 | BIN=$3 11 | OUT=$4 12 | CONFIG=$5 13 | 14 | [ $# -ne 5 ] && echo "Wrong number of arguments supplied" && exit 1 ; 15 | 16 | ## location of directory 2 above that the script is in: 17 | TOPDIR="$( cd "$( dirname "$0" )"/../../ && pwd )" 18 | 19 | #---------------------------------------------------------------------- 20 | 21 | export VOICE_BUILD_CONFIG=$CONFIG 22 | 23 | source $VOICE_BUILD_CONFIG 24 | 25 | 26 | STEPS=$TOPDIR/steps/ 27 | 28 | function check_step { 29 | ## use global $? and $STEPNUM 30 | if [ $? -gt 0 ] ; then 31 | echo ; echo "Step ${STEPNUM} in script $0 failed, aborted!" ; echo ; exit 1 ; 32 | fi 33 | } 34 | 35 | 36 | STEPNUM=1 37 | 38 | start_time=$(date +"%s") 39 | 40 | ## ------ preparation ------ 41 | 42 | python $STEPS/set_up_data.py -labdir $LABDIR -cmpdir $CMPDIR -outdir $OUT/${STEPNUM} -bindir $BIN 43 | check_step ; 44 | $STEPS/make_alignment_lexicon.sh $OUT/$STEPNUM $BIN 45 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 46 | 47 | $STEPS/make_alignment_monophone.sh $OUT/$PREVIOUS $OUT/$STEPNUM $BIN 48 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 49 | 50 | 51 | ## ------ training ----- 52 | 53 | for NMIX in $MIXTURE_SCHEDULE ; do 54 | echo "$NMIX ==== " 55 | if [ ! $NMIX == 0 ] ; then 56 | $STEPS/increase_mixture_components.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $NMIX $BIN 57 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 58 | fi 59 | ## --- reestimation --- 60 | for i in `seq $NREEST` ; do 61 | $STEPS/reestimate_alignment_model.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 62 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 63 | done 64 | ## --- realignment --- 65 | $STEPS/realign_to_labels.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 66 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 67 | done 68 | 69 | rm -rf $OUT/final_model/ 70 | cp -r $OUT/$PREVIOUS/ $OUT/final_model/ 71 | 72 | end_time=$(date +"%s") 73 | time_diff=$(($end_time-$start_time)) 74 | 75 | echo "Aligner training took $(($time_diff / 60)) minutes and $(($time_diff % 60)) seconds to run." 76 | 77 | 78 | -------------------------------------------------------------------------------- /scripts/acoustic_model_training/subrecipes/script/standard_voicebuild.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | 6 | #---------------------------------------------------------------------- 7 | 8 | CMPDIR=$1 9 | LABDIR=$2 10 | QUESTIONS=$3 11 | BIN=$4 12 | OUT=$5 13 | CONFIG=$6 14 | 15 | [ $# -ne 6 ] && echo "Wrong number of arguments supplied" && exit 1 ; 16 | 17 | ## location of directory 2 above that the script is in: 18 | TOPDIR="$( cd "$( dirname "$0" )"/../../ && pwd )" 19 | 20 | #---------------------------------------------------------------------- 21 | 22 | 23 | ##!!!!! temp: !!!!! 24 | 25 | #SPTK=/Users/owatts/repos/simple4all/CSTRVoiceClone/trunk/bin 26 | #OLDHTS=/Users/owatts/simple4all/hts_on_speed/code/hts2_2/bin ## ~/repos/simple4all/CSTRVoiceClone/trunk/bin/ 27 | 28 | 29 | 30 | export VOICE_BUILD_CONFIG=$CONFIG 31 | 32 | source $VOICE_BUILD_CONFIG 33 | 34 | 35 | STEPS=$TOPDIR/steps/ 36 | 37 | 38 | function check_step { 39 | ## use global $? and $STEPNUM 40 | if [ $? -gt 0 ] ; then 41 | echo ; echo "Step ${STEPNUM} in script $0 failed, aborted!" ; echo ; exit 1 ; 42 | fi 43 | } 44 | 45 | 46 | STEPNUM=1 47 | 48 | start_time=$(date +"%s") 49 | 50 | 51 | $TOPDIR/steps/set_up_data.py -labdir $LABDIR -cmpdir $CMPDIR -outdir $OUT/${STEPNUM} -bindir $BIN 52 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 53 | 54 | $STEPS/make_monophone.sh $OUT/$PREVIOUS $OUT/$STEPNUM $BIN 55 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 56 | 57 | for j in `seq $NRECLUSTER` ; do 58 | 59 | if [ $j -eq 1 ] ; then 60 | $STEPS/clone_monophone_to_fullcontext.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 61 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 62 | else 63 | $STEPS/untie_models.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 64 | #check_step ; 65 | PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 66 | fi 67 | 68 | $STEPS/reestimate.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 0 69 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 70 | 71 | $STEPS/build_MDL_trees.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM 1.0 $QUESTIONS $BIN 72 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 73 | 74 | for i in `seq $NREEST` ; do 75 | $STEPS/reestimate.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 1 76 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 77 | done 78 | 79 | if [ $j -eq 1 ] ; then 80 | $STEPS/realign.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 81 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 82 | fi 83 | 84 | done 85 | 86 | 87 | 88 | $STEPS/make_engine_model.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 89 | check_step 90 | 91 | rm -rf $OUT/final_model/ 92 | cp -r $OUT/$STEPNUM/ $OUT/final_model/ 93 | 94 | end_time=$(date +"%s") 95 | time_diff=$(($end_time-$start_time)) 96 | 97 | echo "Model training took $(($time_diff / 60)) minutes and $(($time_diff % 60)) seconds to run." 98 | -------------------------------------------------------------------------------- /scripts/acoustic_model_training/subrecipes/script/standard_voicebuild.sh.OLD: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | 6 | #---------------------------------------------------------------------- 7 | 8 | CMPDIR=$1 9 | LABDIR=$2 10 | QUESTIONS=$3 11 | BIN=$4 12 | OUT=$5 13 | CONFIG=$6 14 | 15 | [ $# -ne 6 ] && echo "Wrong number of arguments supplied" && exit 1 ; 16 | 17 | ## location of directory 2 above that the script is in: 18 | TOPDIR="$( cd "$( dirname "$0" )"/../../ && pwd )" 19 | 20 | #---------------------------------------------------------------------- 21 | 22 | 23 | ##!!!!! temp: !!!!! 24 | 25 | #SPTK=/Users/owatts/repos/simple4all/CSTRVoiceClone/trunk/bin 26 | #OLDHTS=/Users/owatts/simple4all/hts_on_speed/code/hts2_2/bin ## ~/repos/simple4all/CSTRVoiceClone/trunk/bin/ 27 | 28 | 29 | 30 | export VOICE_BUILD_CONFIG=$CONFIG 31 | 32 | source $VOICE_BUILD_CONFIG 33 | 34 | 35 | STEPS=$TOPDIR/steps/ 36 | 37 | 38 | function check_step { 39 | ## use global $? and $STEPNUM 40 | if [ $? -gt 0 ] ; then 41 | echo ; echo "Step ${STEPNUM} in script $0 failed, aborted!" ; echo ; exit 1 ; 42 | fi 43 | } 44 | 45 | 46 | STEPNUM=1 47 | 48 | start_time=$(date +"%s") 49 | 50 | 51 | $TOPDIR/steps/set_up_data.py -labdir $LABDIR -cmpdir $CMPDIR -outdir $OUT/${STEPNUM} -bindir $BIN 52 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 53 | 54 | $STEPS/make_monophone.sh $OUT/$PREVIOUS $OUT/$STEPNUM $BIN 55 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 56 | 57 | for j in `seq $NRECLUSTER` ; do 58 | 59 | if [ $j -eq 1 ] ; then 60 | $STEPS/clone_monophone_to_fullcontext.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 61 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 62 | else 63 | $STEPS/untie_models.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 64 | #check_step ; 65 | PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 66 | fi 67 | 68 | $STEPS/reestimate.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 0 69 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 70 | 71 | $STEPS/build_MDL_trees.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM 1.0 $QUESTIONS $BIN 72 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 73 | 74 | for i in `seq $NREEST` ; do 75 | $STEPS/reestimate.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 1 76 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 77 | done 78 | 79 | if [ $j -eq 1 ] ; then 80 | $STEPS/realign.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 81 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1] 82 | fi 83 | 84 | done 85 | 86 | 87 | 88 | $STEPS/make_engine_model.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 89 | check_step 90 | 91 | rm -rf $OUT/final_model/ 92 | cp -r $OUT/$STEPNUM/ $OUT/final_model/ 93 | 94 | end_time=$(date +"%s") 95 | time_diff=$(($end_time-$start_time)) 96 | 97 | echo "Model training took $(($time_diff / 60)) minutes and $(($time_diff % 60)) seconds to run." 98 | -------------------------------------------------------------------------------- /scripts/acoustic_model_training/util/filter_questions.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | 6 | import sys 7 | import re 8 | import os 9 | import random 10 | 11 | from argparse import ArgumentParser 12 | 13 | ## find location of util relative to current script: 14 | loc = os.path.abspath(os.path.join( __file__, os.pardir, os.pardir, 'util') ) 15 | sys.path.append(loc) 16 | 17 | 18 | from util import * 19 | 20 | def main_work(): 21 | 22 | ################################################# 23 | 24 | # ======== Get stuff from command line ========== 25 | 26 | a = ArgumentParser() 27 | a.add_argument('-infile', required=True, help= "...") 28 | a.add_argument('-outfile', required=True, help= "...") 29 | a.add_argument('-models', required=True, help= "...") 30 | a.add_argument('-percent', required=True, type=int, help= "...") 31 | 32 | 33 | opts = a.parse_args() 34 | 35 | # =============================================== 36 | 37 | questions = readlist(opts.infile) 38 | models = readlist(opts.models) 39 | 40 | nmod = float(len(models)) 41 | 42 | filtered = [] 43 | 44 | for line in questions: 45 | print line 46 | line = line.strip() 47 | if line != '': 48 | 49 | (QS,name,patt) = re.split('\s+', line) 50 | regex_patt = htk_wildcard_pattern_to_regex(patt) 51 | for mod in models: 52 | count = 0 53 | if re.match(regex_patt, mod): 54 | count += 1 55 | percent_matched = count / nmod 56 | if percent_matched < opts.percent or percent_matched > (100.0 - opts.percent): 57 | pass 58 | else: 59 | filtered.append(line) 60 | 61 | writelist(filtered, opts.outfile) 62 | 63 | if __name__=="__main__": 64 | 65 | main_work() 66 | 67 | -------------------------------------------------------------------------------- /scripts/acoustic_model_training/util/make_config.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | 6 | 7 | #---------------------------------------------------------------------- 8 | 9 | DIR=$1 10 | 11 | [ $# -ne 1 ] && echo "Wrong number of arguments supplied" && exit 1 ; 12 | 13 | source $VOICE_BUILD_CONFIG 14 | 15 | #---------------------------------------------------------------------- 16 | 17 | 18 | 19 | 20 | 21 | mkdir -p $DIR 22 | 23 | cat > $DIR/general.conf< $DIR/engine_convert.conf< $DIR/general-unfloor.conf< $DIR/clust.conf< $DIR/clust-dur.conf< Junichi's script -> Reima made stream independent -> Oliver 12 | ## put in separate script and moved from perl to python 13 | 14 | # sub routine for generating proto-type model (Copy from HTS-2.1) 15 | 16 | # Made stream-independent 23/4/2012 rk 17 | 18 | 19 | proto_out = sys.argv[1] 20 | config_in = sys.argv[2] 21 | 22 | 23 | config = ConfigObj(config_in) 24 | 25 | static_stream_sizes = config.get('STATIC_STREAM_SIZES', default='25 1 1 1') ### defaults for SPTK 26 | MSD_stream_info = config.get('MSD_STREAM_INFO', default='0 1 1 1') 27 | stream_weights = config.get('STREAM_WEIGHTS', default='1.0 1.0 1.0 0.9') 28 | NSTATE = int(config.get('NSTATE', default=5)) 29 | 30 | 31 | ## string -> numeric list conversion: 32 | def int_list(string): 33 | seq = re.split('\s+', string.strip()) 34 | return [int(item) for item in seq] 35 | 36 | static_stream_sizes = int_list(static_stream_sizes) 37 | MSD_stream_info = int_list(MSD_stream_info) 38 | 39 | 40 | 41 | n_weights = len(re.split('\s+', stream_weights.strip())) 42 | num_stream = len(static_stream_sizes) 43 | if (len(MSD_stream_info) != num_stream) or (n_weights!= num_stream): 44 | sys.exit('stream info not same: %s %s %s'%(static_stream_sizes, MSD_stream_info, stream_weights)) 45 | 46 | 47 | stream_indexes = range(1, num_stream+1) 48 | 49 | 50 | 51 | total_stream_sizes = [] 52 | for (MSD,size) in zip(MSD_stream_info, static_stream_sizes): 53 | if MSD: 54 | total_stream_sizes.append(size) 55 | else: 56 | total_stream_sizes.append(size * 3) 57 | 58 | vsize = sum(total_stream_sizes) 59 | 60 | 61 | 62 | 63 | d = '' 64 | 65 | ## ----- HEADER ----- 66 | d += '~o %s '%(vsize) 67 | 68 | d += ' %s '%(num_stream) 69 | d += ' '.join([str(val) for val in MSD_stream_info]) 70 | d += '\n' 71 | 72 | d += ' %s '%(num_stream) 73 | d += ' '.join([str(val) for val in total_stream_sizes]) 74 | d += '\n' 75 | 76 | ## ----- output HMMs ------ 77 | d += "\n" 78 | d += " %d\n"%(NSTATE+2) 79 | 80 | # output HMM states 81 | for i in range(2, NSTATE+2): 82 | 83 | # output state information 84 | d += " %s\n"%(i) 85 | 86 | # output stream weight 87 | d += ' %d '%(num_stream) 88 | 89 | d += stream_weights 90 | d += '\n' 91 | 92 | 93 | for (i, MSD, size) in zip(stream_indexes, MSD_stream_info, total_stream_sizes): 94 | d += " %d\n"%(i) 95 | 96 | if not MSD: 97 | d += " %d\n "%(size) 98 | for j in range(size): 99 | d += "0.0 " 100 | d += '\n' 101 | 102 | d += " %d\n "%(size) 103 | for j in range(size): 104 | d += "1.0 " 105 | d += '\n' 106 | 107 | else: 108 | 109 | d += " 2\n" 110 | 111 | # output 1st space (non 0-dimensional space) 112 | d += " 1 0.5000\n" 113 | d += " 1 0.0 \n" 114 | d += " 1 1.0 \n" 115 | 116 | # output 2nd space (0-dimensional space) 117 | d += " 2 0.5000\n" 118 | d += " 0 \n" 119 | d += " 0 \n" 120 | 121 | # output state transition matrix 122 | d += ' %d\n'%(NSTATE+2) 123 | d += " " 124 | d += "0.000e+0 1.000e+0 " + " ".join(["0.000e+0"] * (NSTATE)) 125 | 126 | d += "\n " 127 | 128 | for i in range(2, NSTATE+2): 129 | for j in range(1, NSTATE+3): 130 | if i==j: 131 | d += "6.000e-1 " 132 | elif i == j-1: 133 | d += "4.000e-1 " 134 | else: 135 | d += "0.000e+0 " 136 | d += "\n " 137 | 138 | for j in range(NSTATE+2): 139 | d += "0.000e+0 " 140 | 141 | d += "\n\n" 142 | 143 | f = open(proto_out, 'w') 144 | for line in d: 145 | f.write(line) 146 | f.close() 147 | 148 | 149 | -------------------------------------------------------------------------------- /scripts/acoustic_model_training/util/make_proto_skip_hsmm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | ## 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | 6 | 7 | import sys 8 | import re 9 | from configobj import ConfigObj 10 | 11 | ## History: public HTS -> Junichi's script -> Reima made stream independent -> Oliver 12 | ## put in separate script and moved from perl to python 13 | 14 | # sub routine for generating proto-type model (Copy from HTS-2.1) 15 | 16 | # Made stream-independent 23/4/2012 rk 17 | 18 | 19 | proto_out = sys.argv[1] 20 | config_in = sys.argv[2] 21 | 22 | 23 | config = ConfigObj(config_in) 24 | 25 | static_stream_sizes = config.get('STATIC_STREAM_SIZES', default='25 1 1 1') ### defaults for SPTK 26 | MSD_stream_info = config.get('MSD_STREAM_INFO', default='0 1 1 1') 27 | stream_weights = config.get('STREAM_WEIGHTS', default='1.0 1.0 1.0 0.9') 28 | 29 | #static_stream_sizes = config.get('static_stream_sizes', default='25 1 1 1') ### defaults for SPTK 30 | #MSD_stream_info = config.get('MSD_stream_info', default='0 1 1 1') 31 | #stream_weights = config.get('stream_weights', default='1.0 1.0 1.0 0.9') 32 | 33 | 34 | NSTATE = 1 ## fixed for skip model 35 | 36 | 37 | ## string -> numeric list conversion: 38 | def int_list(string): 39 | seq = re.split('\s+', string.strip()) 40 | return [int(item) for item in seq] 41 | 42 | static_stream_sizes = int_list(static_stream_sizes) 43 | MSD_stream_info = int_list(MSD_stream_info) 44 | 45 | 46 | 47 | n_weights = len(re.split('\s+', stream_weights.strip())) 48 | num_stream = len(static_stream_sizes) 49 | if (len(MSD_stream_info) != num_stream) or (n_weights!= num_stream): 50 | sys.exit('stream info not same: %s %s %s'%(static_stream_sizes, MSD_stream_info, stream_weights)) 51 | 52 | 53 | stream_indexes = range(1, num_stream+1) 54 | 55 | 56 | 57 | total_stream_sizes = [] 58 | for (MSD,size) in zip(MSD_stream_info, static_stream_sizes): 59 | if MSD: 60 | total_stream_sizes.append(size) 61 | else: 62 | total_stream_sizes.append(size * 3) 63 | 64 | vsize = sum(total_stream_sizes) 65 | 66 | 67 | 68 | 69 | d = '' 70 | 71 | ## ----- HEADER ----- 72 | d += '~o %s '%(vsize) 73 | 74 | d += ' %s '%(num_stream) 75 | d += ' '.join([str(val) for val in MSD_stream_info]) 76 | d += '\n' 77 | 78 | d += ' %s '%(num_stream) 79 | d += ' '.join([str(val) for val in total_stream_sizes]) 80 | d += '\n' 81 | 82 | ## ----- output HMMs ------ 83 | d += "\n" 84 | d += " %d\n"%(NSTATE+2) 85 | 86 | # output HMM states 87 | for i in range(2, NSTATE+2): 88 | 89 | # output state information 90 | d += " %s\n"%(i) 91 | 92 | # output stream weight 93 | d += ' %d '%(num_stream) 94 | 95 | d += stream_weights 96 | d += '\n' 97 | 98 | 99 | for (i, MSD, size) in zip(stream_indexes, MSD_stream_info, total_stream_sizes): 100 | d += " %d\n"%(i) 101 | 102 | if not MSD: 103 | d += " %d\n "%(size) 104 | for j in range(size): 105 | d += "0.0 " 106 | d += '\n' 107 | 108 | d += " %d\n "%(size) 109 | for j in range(size): 110 | d += "1.0 " 111 | d += '\n' 112 | 113 | else: 114 | 115 | d += " 2\n" 116 | 117 | # output 1st space (non 0-dimensional space) 118 | d += " 1 0.5000\n" 119 | d += " 1 0.0 \n" 120 | d += " 1 1.0 \n" 121 | 122 | # output 2nd space (0-dimensional space) 123 | d += " 2 0.5000\n" 124 | d += " 0 \n" 125 | d += " 0 \n" 126 | 127 | 128 | # output state transition matrix 129 | d += ' %d\n'%(NSTATE+2) 130 | d += " 0.0 0.0 1.0 \n" 131 | d += " 0.0 0.5 0.5 \n" 132 | d += " 0.0 0.0 0.0 \n" 133 | d += "\n\n" 134 | 135 | f = open(proto_out, 'w') 136 | for line in d: 137 | f.write(line) 138 | f.close() 139 | 140 | 141 | -------------------------------------------------------------------------------- /scripts/acoustic_model_training/util/separate_trees.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | 6 | import sys 7 | import re 8 | import os 9 | 10 | from argparse import ArgumentParser 11 | 12 | from util import * 13 | 14 | def main_work(): 15 | 16 | ################################################# 17 | 18 | # ======== Get stuff from command line ========== 19 | 20 | a = ArgumentParser() 21 | a.add_argument('-treefile', required=True, help= "...") 22 | 23 | opts = a.parse_args() 24 | # =============================================== 25 | 26 | f = open(opts.treefile, 'r') 27 | data = f.read() 28 | f.close() 29 | trees = re.split('\n\s*\n', data) 30 | trees = [t for t in trees if t != ''] 31 | 32 | ## first block is questions: 33 | questions = trees[0] + '\n\n' 34 | assert questions[:2] == 'QS' 35 | 36 | tree_dict = {} 37 | for tree in trees[1:]: 38 | stream = re.search('(?<=stream\[)[^\]]+(?=\])', tree).group() 39 | if ',' in stream: 40 | stream = stream.split(',')[0] 41 | if stream not in tree_dict: 42 | tree_dict[stream] = [] 43 | tree_dict[stream].append(tree + '\n\n') 44 | 45 | for (stream, trees) in tree_dict.items(): 46 | writelist([questions] + trees, opts.treefile+'_'+stream) 47 | 48 | if __name__=="__main__": 49 | 50 | main_work() 51 | 52 | -------------------------------------------------------------------------------- /scripts/acoustic_model_training/util/setup_directory.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | 6 | 7 | #---------------------------------------------------------------------- 8 | 9 | INDIR=$1 10 | OUTDIR=$2 11 | 12 | [ $# -ne 2 ] && echo "Wrong number of arguments supplied" && exit 1 ; 13 | 14 | UTIL="$( cd "$( dirname "$0" )/../util" && pwd )" ## location of util script 15 | 16 | #---------------------------------------------------------------------- 17 | 18 | 19 | ### Don't overwrite existing data! Allows bigger data etc to be copied in before this is called. 20 | mkdir -p $OUTDIR/data 21 | for datafile in uttlist.cmp uttlist.lab modellist.mono modellist.full mlf.mono mlf.full ; do 22 | if [ ! -e $OUTDIR/data/$datafile ] ; then 23 | cp $INDIR/data/$datafile $OUTDIR/data/$datafile 24 | fi 25 | done 26 | 27 | for optional_file in mlf.words lexicon.txt ; do 28 | if [ -e $INDIR/data/$optional_file ] ; then 29 | cp $INDIR/data/$optional_file $OUTDIR/data/$optional_file ; 30 | fi 31 | done 32 | 33 | $UTIL/make_config.sh $OUTDIR/config/ 34 | -------------------------------------------------------------------------------- /scripts/acoustic_model_training/util/update_train_list.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | 6 | import sys 7 | import re 8 | import os 9 | 10 | from argparse import ArgumentParser 11 | 12 | from util import * 13 | 14 | def main_work(): 15 | 16 | ################################################# 17 | 18 | # ======== Get stuff from command line ========== 19 | 20 | a = ArgumentParser() 21 | a.add_argument('-mlf', required=True, help= "...") 22 | a.add_argument('-trainlist', required=True, help= "...") 23 | 24 | opts = a.parse_args() 25 | # =============================================== 26 | 27 | mlf = readlist(opts.mlf) 28 | trainlist = readlist(opts.trainlist) 29 | 30 | mlf_files = [line for line in mlf if re.match('\A"\*.+\.lab\"\Z', line)] 31 | mlf_files = [line.strip('"*/') for line in mlf_files] 32 | mlf_files = [line.replace('.lab', '') for line in mlf_files] 33 | 34 | trainlist_files = [line.split('/')[-1].replace('.cmp','') for line in trainlist] 35 | train_dict = dict(zip(trainlist_files, trainlist)) 36 | 37 | outlist = [] 38 | for f in mlf_files: 39 | if f in train_dict: 40 | outlist.append(train_dict[f]) 41 | else: 42 | print '%s skipped -- no label for it'%(f) 43 | 44 | ## overwrite training list: 45 | writelist(outlist, opts.trainlist) 46 | 47 | 48 | if __name__=="__main__": 49 | 50 | main_work() 51 | 52 | -------------------------------------------------------------------------------- /scripts/acoustic_model_training/util/util.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | 6 | 7 | import os 8 | import re 9 | 10 | 11 | def nullhed(dirname): 12 | '''Make empty null.hed file, return its name''' 13 | fname = os.path.join(dirname, 'null.hed') 14 | open(fname, 'w').close() 15 | return fname 16 | 17 | def writelist(data, fname): 18 | f = open(fname, "w") 19 | f.write("\n".join(data) + '\n') 20 | f.close() 21 | 22 | def readlist(fname): 23 | f = open(fname) 24 | lines = f.readlines() 25 | f.close() 26 | return [line.strip(' \n') for line in lines] 27 | 28 | def htk_to_sec(htk_time): 29 | """ 30 | Convert time in HTK (100 ns) units to sec 31 | """ 32 | if type(htk_time)==type("string"): 33 | htk_time = float(htk_time) 34 | return htk_time / 10000000.0 35 | 36 | def htk_wildcard_pattern_to_regex(pattern): 37 | pattern = pattern.strip('{}').split(',') 38 | chunks = [] 39 | for chunk in pattern: 40 | chunk = re.escape(chunk) 41 | chunk = chunk.replace('\*', '.*') 42 | chunk = chunk.replace('\?', '.') 43 | chunks.append(chunk) 44 | new_chunks = '(' + '|'.join(chunks) + ')' 45 | reg = re.compile(new_chunks) 46 | return reg -------------------------------------------------------------------------------- /scripts/default/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSTR-Edinburgh/Ossian/fd01c8f9e1e5fa4f4f00dd444a565b714973b7a9/scripts/default/__init__.py -------------------------------------------------------------------------------- /scripts/default/const.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | ## Project: Simple4All - January 2013 - www.simple4all.org 4 | ## Contact: Antti Suni - Antti.Suni@helsinki.fi 5 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 6 | 7 | PI = 3 8 | 9 | 10 | #directory constants 11 | CONFIG = "config" 12 | PROCESSOR = "processors" 13 | MODEL = "models" 14 | TRAIN = "train" 15 | VOICE = "voice" 16 | CORPUS = "corpus" 17 | RULES = "rules" 18 | CONTEXT_FEATS = "context_feats" 19 | SCRIPT = "scripts" 20 | SPEAKER ="speakers" 21 | COMPONENT="components" 22 | LANG = "lang" 23 | BIN = "bin" 24 | HTS = "htk" 25 | EST = "speech_tools" 26 | SPTK = "bin" 27 | 28 | ACOUSTIC_MODELLING_SCRIPT = "acoustic_modelling_script" 29 | ACOUSTIC_MODELLING_CONFIG = "acoustic_modelling_config" 30 | 31 | 32 | #resource types 33 | FILE = "file" 34 | DIRECTORY = "dir" 35 | STRING = "string" 36 | FLAG = "flag" 37 | 38 | 39 | # file manipulation 40 | 41 | CREATE = "create" 42 | REPLACE = "replace" 43 | APPEND = "append" 44 | DELETE = "delete" 45 | BACKUP = "backup" 46 | 47 | #possible units in utterance hierarchy from bottom 48 | 49 | STATE = "state" 50 | PHONE = "segment" 51 | LETTER = "letter" 52 | SYLLABLE = "syl" 53 | MORPH = "morph" 54 | SUBWORD = "subword" 55 | WORD = "word" 56 | TOKEN = "token" 57 | XP = "xp" 58 | PHRASE = "phrase" 59 | UTTERANCE = "utt" 60 | PARAGRAPH = "paragraph" 61 | CHAPTER = "chapter" 62 | TEXT = "text" 63 | 64 | ## osw: for utterance.py: 65 | #UTTEND = "_UTTEND_" 66 | TERMINAL = "_END_" 67 | SKIP_MODEL_NAME = 'skip' ## name of htk model with no emissions 68 | PROB_PAUSE = '_PROB_PAUSE_' 69 | POSS_PAUSE = '_POSS_PAUSE_' 70 | -------------------------------------------------------------------------------- /scripts/default/fnames.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | ## Project: Simple4All - January 2013 - www.simple4all.org 4 | ## Contact: Antti Suni - Antti.Suni@helsinki.fi 5 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 6 | 7 | 8 | """ 9 | default file names 10 | 11 | """ 12 | 13 | RECIPE = "recipe.cfg" 14 | VOICE_CONF = "voice.cfg" 15 | SPEAKERS = "speakers" 16 | TEXT_CORPORA = "text_corpora" 17 | CONFIG_EXT = '.cfg' 18 | 19 | 20 | # direcory constants, magic of capitals :) 21 | BIN = "bin" 22 | HTS = "hts" 23 | EST = "speech_tools" 24 | STRAIGHT = "straight" 25 | HTS_ENGINE = "hts_engine" 26 | 27 | -------------------------------------------------------------------------------- /scripts/download_tundra_subset.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | 5 | 6 | LANG_CODE=$1 7 | 8 | 9 | USAGE="Please supply a single language code, from the set: bg, de, en, fi, hu, it, pl " 10 | 11 | echo $# 12 | 13 | if [ $# -ne 1 ] ; then 14 | echo $USAGE ; 15 | exit 1 ; 16 | fi 17 | 18 | 19 | URLSTEM="http://tundra.simple4all.org/data" 20 | DIR="$( cd "$( dirname "$0" )" && pwd )" ## location of this script 21 | CORPUS_OUT=$DIR/../corpus/$LANG_CODE/speakers/tundra_v1_1hour/ 22 | 23 | echo $CORPUS_OUT 24 | echo $LANG_CODE 25 | 26 | case $LANG_CODE in 27 | bg ) 28 | DATA_ARCHIVE=BG_zhetvariat_1hr.zip 29 | ;; 30 | de ) 31 | DATA_ARCHIVE=DE_doriangray.zip 32 | ;; 33 | en ) 34 | DATA_ARCHIVE=EN_livingalone_1hr.zip 35 | ;; 36 | fi ) 37 | DATA_ARCHIVE=FI_rautatie_1hr.zip 38 | ;; 39 | hu ) 40 | DATA_ARCHIVE=HU_egri_1hr.zip 41 | ;; 42 | it ) 43 | DATA_ARCHIVE=IT_galatea_1hr.zip 44 | ;; 45 | pl ) 46 | DATA_ARCHIVE=PL_siedem_1hr.zip 47 | ;; 48 | *) 49 | echo $USAGE ; exit 1 ;; 50 | esac 51 | 52 | mkdir -p $CORPUS_OUT 53 | 54 | 55 | 56 | 57 | echo "Download Tundra 1 hour subset for language $LANG_CODE..." 58 | HERE=`pwd` 59 | cd $CORPUS_OUT 60 | wget $URLSTEM/$DATA_ARCHIVE 61 | unzip $DATA_ARCHIVE 62 | 63 | 64 | 65 | mv */train/text/ ./txt/ 66 | mv */train/wav/ ./wav/ 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /scripts/main/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSTR-Edinburgh/Ossian/fd01c8f9e1e5fa4f4f00dd444a565b714973b7a9/scripts/main/__init__.py -------------------------------------------------------------------------------- /scripts/merlin_interface/feed_forward_dnn_ossian_acoustic_model.conf: -------------------------------------------------------------------------------- 1 | [DEFAULT] 2 | 3 | ## The DEFAULT section just gives a few global variables -- this is designed to reduce the 4 | ## number of paths you have to change when modifying this config file. 5 | 6 | ##!!! Change the following line to the top level of your copy of the Ossian code: 7 | OSSIAN: __INSERT_PATH_TO_OSSIAN_HERE__ 8 | LANGUAGE: __INSERT_LANGUAGE_HERE__ 9 | SPEAKER: __INSERT_SPEAKER_HERE__ 10 | RECIPE: __INSERT_RECIPE_HERE__ 11 | 12 | 13 | ## This line should point to the language/data/recipe combination you are working on: 14 | TOP: %(OSSIAN)s/train/%(LANGUAGE)s/speakers/%(SPEAKER)s/%(RECIPE)s/ 15 | 16 | ## spot for putting things in training -- not the final stored model: 17 | WORKDIR: %(TOP)s/dnn_training_ACOUST/ 18 | DATADIR: %(TOP)s/cmp/ 19 | 20 | [Paths] 21 | 22 | work: %(WORKDIR)s/ 23 | data: %(DATADIR)s/ 24 | 25 | plot: %(WORKDIR)s/plots 26 | 27 | file_id_list: __INSERT_FILELIST_HERE__ 28 | 29 | log_config_file: %(OSSIAN)s/tools/merlin/egs/slt_arctic/s1/conf/logging_config.conf 30 | log_file: %(WORKDIR)s/log/log.txt 31 | log_path: %(WORKDIR)s/log/ 32 | 33 | ## You won't need these -- just leave the placeholder paths here: 34 | sptk : /this/path/does/not/exist 35 | straight : /this/path/does/not/exist 36 | 37 | in_mgc_dir: %(DATADIR)s/ 38 | in_lf0_dir: %(DATADIR)s/ 39 | in_bap_dir: %(DATADIR)s/ 40 | 41 | [Labels] 42 | 43 | question_file_name : %(TOP)s/questions_dnn.hed.cont 44 | silence_pattern: ['*-sil+*'] 45 | label_type: state_align 46 | label_align: %(TOP)s/lab_dnn 47 | add_frame_features: True 48 | subphone_feats: full 49 | 50 | 51 | [Extensions] 52 | 53 | lab_ext: .lab_dnn 54 | mgc_ext: .mgc 55 | bap_ext: .bap 56 | lf0_ext: .lf0 57 | 58 | [Outputs] 59 | ## mgc, bap and lf0 need to be the same sizes as the static streams used when calling 60 | ## split_cmp.py previously; the corresponding variables starting d* are just the static 61 | ## value multiplied by 3: 62 | mgc : __INSERT_MGC_DIM_HERE__ 63 | dmgc : __INSERT_DELTA_MGC_DIM_HERE__ 64 | bap : __INSERT_BAP_DIM_HERE__ 65 | dbap : __INSERT_DELTA_BAP_DIM_HERE__ 66 | lf0 : __INSERT_LF0_DIM_HERE__ 67 | dlf0 : __INSERT_DELTA_LF0_DIM_HERE__ 68 | 69 | 70 | [Waveform] 71 | 72 | ## This won't be used -- but keep it here as a placeholder: 73 | vocoder_type : WORLD 74 | framelength : 2048 75 | 76 | [Architecture] 77 | 78 | ## Adjust the number and size of hidden layers here: 79 | 80 | hidden_layer_size : [1024, 1024, 1024, 1024, 1024, 1024] 81 | hidden_layer_type : ['TANH', 'TANH', 'TANH', 'TANH', 'TANH', 'TANH'] 82 | 83 | ## if RNN or sequential training is used, please set sequential_training to True. For 84 | ## use with Ossian, we will only train DNNs, so don't alter this. 85 | sequential_training : False 86 | 87 | ## You might want to experiment with different learning rates, batch sizes, and maximum 88 | ## number of training epochs: 89 | learning_rate : 0.002 90 | batch_size : 256 91 | training_epochs : 12 92 | ## set warmup_epoch to a number larger than training_epochs to effectively disable it 93 | warmup_epoch : 1000 94 | 95 | L1_regularization: 0.0 96 | L2_regularization: 0.0 97 | hidden_activation: tanh 98 | output_activation: linear 99 | warmup_momentum : 0.0 100 | private_l2_reg : 0.0 101 | 102 | [Streams] 103 | # which feature to be used in the output 104 | output_features : ['mgc', 'lf0', 'vuv', 'bap'] 105 | 106 | 107 | [Data] 108 | ## We need to divide the files available up into train/validation/test data. We don't need 109 | ## to do any testing, but set test_file_number to 1 to keep the tools happy. Split the remaining 110 | ## files between train and validation. Using about 5% or 10% of the data for validation is 111 | ## pretty standard. This is how you might divide up 28 files: 112 | train_file_number: __INSERT_NUMBER_OF_TRAINING_FILES_HERE__ 113 | valid_file_number: __INSERT_NUMBER_OF_VALIDATION_FILES_HERE__ 114 | test_file_number : __INSERT_NUMBER_OF_TEST_FILES_HERE__ 115 | #buffer size of each block of data to 116 | buffer_size: 100000 117 | 118 | [Utility] 119 | 120 | plot : True 121 | 122 | [Processes] 123 | ## For use with Ossian, just keep the first 4 set to True -- we will generate speech later 124 | ## within Ossian itself. You can run each of the 4 steps individually if you like: 125 | NORMLAB : True 126 | MAKECMP : True 127 | NORMCMP : True 128 | TRAINDNN : True 129 | DNNGEN : False 130 | GENWAV : False 131 | CALMCD : False 132 | 133 | 134 | -------------------------------------------------------------------------------- /scripts/merlin_interface/feed_forward_dnn_ossian_duration_model.conf: -------------------------------------------------------------------------------- 1 | [DEFAULT] 2 | 3 | ## The DEFAULT section just gives a few global variables -- this is designed to reduce the 4 | ## number of paths you have to change when modifying this config file. 5 | 6 | ##!!! Change the following line to the top level of your copy of the Ossian code: 7 | OSSIAN: __INSERT_PATH_TO_OSSIAN_HERE__ 8 | LANGUAGE: __INSERT_LANGUAGE_HERE__ 9 | SPEAKER: __INSERT_SPEAKER_HERE__ 10 | RECIPE: __INSERT_RECIPE_HERE__ 11 | 12 | 13 | ## This line should point to the language/data/recipe combination you are working on: 14 | TOP: %(OSSIAN)s/train/%(LANGUAGE)s/speakers/%(SPEAKER)s/%(RECIPE)s/ 15 | 16 | 17 | ## spot for putting things in training -- not the final stored model: 18 | WORKDIR: %(TOP)s/dnn_training_DUR/ 19 | DATADIR: %(TOP)s/ 20 | 21 | 22 | 23 | 24 | [Paths] 25 | 26 | work: %(WORKDIR)s/ 27 | data: %(DATADIR)s/ 28 | 29 | plot: %(WORKDIR)s/plots 30 | 31 | file_id_list: __INSERT_FILELIST_HERE__ 32 | 33 | log_config_file: %(OSSIAN)s/tools/merlin/egs/slt_arctic/s1/conf/logging_config.conf 34 | log_file: %(WORKDIR)s/log/log.txt 35 | log_path: %(WORKDIR)s/log/ 36 | 37 | ## You won't need these -- just leave the placeholder paths here: 38 | sptk : /this/path/does/not/exist 39 | straight : /this/path/does/not/exist 40 | 41 | 42 | in_dur_dir: %(DATADIR)s/dur 43 | 44 | 45 | 46 | 47 | 48 | 49 | [Labels] 50 | 51 | 52 | question_file_name : %(TOP)s/questions_dur.hed.cont 53 | silence_pattern: ['*/THIS-STRING-DOESNT-APPEAR-IN-LABELS/*'] 54 | label_type: phone_align 55 | label_align: %(TOP)s/lab_dur 56 | add_frame_features: False 57 | subphone_feats: none 58 | 59 | [Extensions] 60 | 61 | lab_ext: .lab_dur 62 | dur_ext: .dur 63 | 64 | [Outputs] 65 | ## This says that we are predicting 5 state durations per example (letter/phone) 66 | dur : 5 67 | 68 | 69 | [Waveform] 70 | 71 | ## This won't be used -- but keep it here as a placeholder: 72 | vocoder_type : WORLD 73 | framelength : 2048 74 | 75 | [Architecture] 76 | 77 | ## Adjust the number and size of hidden layers here: 78 | hidden_layer_size : [512, 512, 512] 79 | hidden_layer_type : ['TANH', 'TANH', 'TANH'] 80 | 81 | 82 | ## if RNN or sequential training is used, please set sequential_training to True. For 83 | ## use with Ossian, we will only train DNNs, so don't alter this. 84 | sequential_training : False 85 | 86 | ## You might want to experiment with different learning rates, batch sizes, and maximum 87 | ## number of training epochs: 88 | learning_rate : 0.002 89 | batch_size : 256 90 | training_epochs : 6 91 | ## set warmup_epoch to a number larger than training_epochs to effectively disable it 92 | warmup_epoch : 1000 93 | 94 | L1_regularization: 0.0 95 | L2_regularization: 0.0 96 | hidden_activation: tanh 97 | output_activation: linear 98 | warmup_momentum : 0.0 99 | private_l2_reg : 0.0 100 | 101 | [Streams] 102 | # which feature to be used in the output 103 | output_features : ['dur'] 104 | 105 | 106 | [Data] 107 | ## We need to divide the files available up into train/validation/test data. We don't need 108 | ## to do any testing, but set test_file_number to 1 to keep the tools happy. Split the remaining 109 | ## files between train and validation. Using about 5% or 10% of the data for validation is 110 | ## pretty standard. This is how you might divide up 28 files: 111 | train_file_number: __INSERT_NUMBER_OF_TRAINING_FILES_HERE__ 112 | valid_file_number: __INSERT_NUMBER_OF_VALIDATION_FILES_HERE__ 113 | test_file_number : __INSERT_NUMBER_OF_TEST_FILES_HERE__ 114 | #buffer size of each block of data to 115 | buffer_size: 200000 116 | 117 | [Utility] 118 | 119 | plot : True 120 | 121 | [Processes] 122 | ## For use with Ossian, just keep the first 4 set to True -- we will generate speech later 123 | ## within Ossian itself. You can run each of the 4 steps individually if you like: 124 | NORMLAB : True 125 | MAKECMP : True 126 | NORMCMP : True 127 | TRAINDNN : True 128 | DNNGEN : False 129 | GENWAV : False 130 | CALMCD : False 131 | 132 | 133 | -------------------------------------------------------------------------------- /scripts/naive/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSTR-Edinburgh/Ossian/fd01c8f9e1e5fa4f4f00dd444a565b714973b7a9/scripts/naive/__init__.py -------------------------------------------------------------------------------- /scripts/processors/GenericProcessor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | ## Project: Simple4All - January 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | ## Contact: Antti Suni - Antti.Suni@helsinki.fi 6 | 7 | 8 | import tempfile 9 | from UtteranceProcessor import * 10 | import util.NodeProcessors 11 | import logging 12 | 13 | class GenericProcessor(UtteranceProcessor): 14 | 15 | ''' 16 | ''' 17 | 18 | def load(self): 19 | 20 | assert self.config["function_name"] in dir(util.NodeProcessors) 21 | self.function = getattr(util.NodeProcessors, self.config["function_name"]) 22 | 23 | ### Could give the poss of calling fuction with args, but keep it simple for now: 24 | ## self.function_args = {} 25 | ## if "function_args" in self.config: 26 | ## for (k,v) in self.config["function_args"].items(): 27 | ## self.function_args[k] = v 28 | 29 | 30 | def process_utterance(self, utt): 31 | 32 | # if utt.has_attribute(self.config["skip_condition"]): 33 | # pass 34 | # else: 35 | for node in utt.xpath(self.config["target_nodes"]): 36 | self.function(node) ## , **self.function_args) ## see above 37 | 38 | 39 | def do_training(self, speech_corpus, text_corpus): 40 | print 'GenericProcessor requires no training' 41 | return 42 | 43 | 44 | -------------------------------------------------------------------------------- /scripts/processors/IndianScriptLatiniser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | ## Project: Simple4All - January 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | ## Contact: Antti Suni - Antti.Suni@helsinki.fi 6 | 7 | from naive.naive_util import * 8 | import unicodedata 9 | import glob 10 | from processors.UtteranceProcessor import * 11 | from processors.NodeSplitter import * 12 | from processors.NodeEnricher import * 13 | import datetime 14 | 15 | from naive import naive_util 16 | 17 | from util.indian2latin import latinise_indian_script_string 18 | 19 | import default.const as c 20 | 21 | class IndianScriptLatiniser(NodeSplitter): 22 | ''' 23 | Alphabetise indian language alphasyllabic representations of words, and add 24 | the alphabetic representations as children 25 | ''' 26 | def load(self): 27 | NodeSplitter.load(self) 28 | 29 | ## TODO: tidier way to use as_bool with default values 30 | try: 31 | self.add_terminal_tokens = self.config.as_bool('add_terminal_tokens') 32 | except KeyError: 33 | self.add_terminal_tokens = False 34 | 35 | def splitting_function(self, instring): 36 | tokens = latinise_indian_script_string(instring) 37 | tokens = [t for t in tokens if t != ''] 38 | if self.add_terminal_tokens: 39 | tokens = [c.TERMINAL] + tokens + [c.TERMINAL] 40 | return tokens 41 | 42 | def do_training(self, speech_corpus, text_corpus): 43 | print "IndianScriptLatiniser requires no training" 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /scripts/processors/MiscProcessor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | ## Project: Simple4All - January 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | ## Contact: Antti Suni - Antti.Suni@helsinki.fi 6 | 7 | ''' 8 | These are mainly one-use things which I found useful in specific cases. Often hardcoded 9 | and not configurable... 10 | ''' 11 | import codecs 12 | 13 | from UtteranceProcessor import * 14 | from NodeEnricher import NodeEnricher, AttributeAdder 15 | 16 | class TextPrinter(UtteranceProcessor): 17 | 18 | ''' 19 | ''' 20 | 21 | def load(self): 22 | 23 | self.target_nodes = '//token' 24 | 25 | 26 | def process_utterance(self, utt): 27 | 28 | accum_text = "" 29 | 30 | for token in utt.xpath(self.target_nodes): 31 | if token.get('token_class') != '_END_': 32 | if token.get('token_class') == 'space': 33 | if token.get('has_silence') == 'yes': 34 | accum_text += ', ' ## add a comma 35 | else: 36 | accum_text += ' ' 37 | elif token.get('token_class') == 'punctuation': 38 | if token.get('has_silence') == 'yes': 39 | accum_text += token.get('text') 40 | else: 41 | accum_text += ' ' 42 | else: 43 | accum_text += token.get('text') 44 | 45 | 46 | outf = utt.get_filename('txt_punc') 47 | f = codecs.open(outf, 'w', encoding='utf-8') 48 | f.write(accum_text) 49 | f.close() 50 | 51 | def do_training(self, speech_corpus, text_corpus): 52 | print 'TextPrinter requires no training' 53 | return 54 | 55 | 56 | class BadDataMasker(NodeEnricher): 57 | ''' 58 | The intended use of this class is to allow phones [e.g. a] to be 59 | rewritten in 'marked form' (e.g. a_MASKED) conditioned on position 60 | and attributes in utterance structure. This is so there are 61 | models to soak up bad data in alignment but keep the models 62 | built on good data pure. The masked versions will never be used in 63 | synthesis. E.g. if our LTS is bad, we could mask phones in words 64 | which are OOV. Ditto for number words where our number normalisation 65 | is known to be poor. 66 | ''' 67 | def enriching_function(self, input): 68 | return input + '_BAD_DATA_MASKED' 69 | 70 | 71 | -------------------------------------------------------------------------------- /scripts/processors/NodeEnricher.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | ## Project: Simple4All - January 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | ## Contact: Antti Suni - Antti.Suni@helsinki.fi 6 | 7 | from UtteranceProcessor import * 8 | 9 | class NodeEnricher(UtteranceProcessor): 10 | """ 11 | Refines UtteranceProcessor to enrich the target nodes of the utterance to 12 | which it is applied by taking the target node's input_attribute, performing 13 | some enriching_function on it, and writing the result to the target node's 14 | output_attribute. 15 | 16 | The enriching_function should be provided to subclasses. 17 | """ 18 | 19 | def load(self): 20 | 21 | ## get attributes from config, converting type and supplying defaults: 22 | self.target_nodes = self.config.get('target_nodes', '//') 23 | self.input_attribute = self.config.get('input_attribute', 'text') 24 | self.output_attribute = self.config.get('output_attribute', 'some_attribute') 25 | 26 | 27 | def process_utterance(self, utt): 28 | # print "-----" 29 | # print "-----" 30 | # utt.pretty_print() 31 | # print "-----" 32 | # print self.target_nodes 33 | for node in utt.xpath(self.target_nodes): 34 | assert node.has_attribute(self.input_attribute) 35 | input = node.get(self.input_attribute) 36 | 37 | transformed = self.enriching_function(input) 38 | 39 | node.set(self.output_attribute, transformed) 40 | 41 | # utt.pretty_print() 42 | 43 | def enriching_function(self, input): 44 | raise NotImplementedError, 'Please provide an enriching_function when subclassing NodeEnricher' 45 | 46 | 47 | def do_training(self, speech_corpus, text_corpus): 48 | return 49 | 50 | 51 | class AttributeAdder(NodeEnricher): 52 | ''' 53 | Can be used to add attributes to target nodes, also to overwrite existing ones. 54 | ''' 55 | def load(self): 56 | NodeEnricher.load(self) 57 | self.output_value = self.config.get('output_value', 'some_value') 58 | 59 | def enriching_function(self, input): 60 | return self.output_value 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /scripts/processors/NodeRemover.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSTR-Edinburgh/Ossian/fd01c8f9e1e5fa4f4f00dd444a565b714973b7a9/scripts/processors/NodeRemover.py -------------------------------------------------------------------------------- /scripts/processors/NodeSplitter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | ## Project: Simple4All - January 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | ## Contact: Antti Suni - Antti.Suni@helsinki.fi 6 | 7 | from UtteranceProcessor import * 8 | 9 | class NodeSplitter(UtteranceProcessor): 10 | """ 11 | Split contents of node's parent_attribute on delimiter, make children 12 | of node with tag child_tag and add split contents of parent_attribute 13 | as child_attribute, one chunk per child. 14 | 15 | Using the defaults, this provides very crude tokenisation on whitespace. 16 | """ 17 | 18 | 19 | 20 | def load(self): 21 | 22 | ## get attributes from config, converting type and supplying defaults: 23 | self.target_nodes = self.config.get('target_nodes', '//') 24 | self.split_attribute = self.config.get('split_attribute', 'some_attribute') 25 | self.child_node_type = self.config.get('child_node_type', 'some_child_type') 26 | 27 | 28 | # #print dir(self.shared_models[self.my_model]) 29 | # #func_name = self.config['function_to_apply'] 30 | # 31 | # #self.my_function = getattr(self.shared_models[self.my_model], self.config['function_to_apply']) 32 | # #print self.my_function 33 | # #sys.exit(1) 34 | 35 | def process_utterance(self, utt): 36 | # print "-----" 37 | # print "-----" 38 | # utt.pretty_print() 39 | # print "-----" 40 | # print self.target_nodes 41 | # print utt.xpath(self.target_nodes) 42 | for node in utt.xpath(self.target_nodes): 43 | assert node.has_attribute(self.split_attribute) 44 | to_split = node.get(self.split_attribute) 45 | 46 | child_chunks = self.splitting_function(to_split) 47 | 48 | for chunk in child_chunks: 49 | 50 | child = Element(self.child_node_type) 51 | child.set(self.split_attribute, chunk) 52 | node.add_child(child) 53 | 54 | # utt.pretty_print() 55 | 56 | def splitting_function(self, instring): 57 | ## Default -- burst into list. Replace this in subclasses. 58 | return list(instring) 59 | 60 | 61 | def do_training(self, speech_corpus, text_corpus): 62 | return 63 | 64 | 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /scripts/processors/Phonetisers.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | ## Project: Ossian - May 2017 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | 6 | from processors.UtteranceProcessor import SUtteranceProcessor, Element 7 | from naive import naive_util 8 | import default.const as c 9 | 10 | 11 | # import os 12 | # import sys 13 | # import re 14 | # import regex 15 | # import unicodedata 16 | # import shutil 17 | # import glob 18 | # import fileinput 19 | # import subprocess 20 | # import codecs 21 | 22 | # import default.const as c 23 | 24 | # from processors.NodeEnricher import NodeEnricher 25 | # from processors.UtteranceProcessor import UtteranceProcessor 26 | 27 | # from util.LookupTable import LookupTable 28 | 29 | # from naive.naive_util import readlist, writelist 30 | 31 | 32 | class NaivePhonetiser(SUtteranceProcessor): 33 | ''' 34 | Add 'phonetic' segments consisting of standard orthography characters, converted into an ASCII-safe 'safetext' form 35 | ''' 36 | def __init__(self, processor_name='naive_phonetiser', target_nodes="//token", \ 37 | target_attribute='text', child_node_type='segment', output_attribute='pronunciation', \ 38 | class_attribute='token_class', word_classes=['word'], probable_pause_classes=['punctuation', c.TERMINAL], \ 39 | possible_pause_classes=['space']): 40 | 41 | self.processor_name = processor_name 42 | self.target_nodes = target_nodes 43 | self.target_attribute = target_attribute 44 | self.child_node_type = child_node_type 45 | self.output_attribute = output_attribute 46 | self.class_attribute = class_attribute 47 | self.word_classes = word_classes 48 | self.probable_pause_classes = probable_pause_classes 49 | self.possible_pause_classes = possible_pause_classes 50 | 51 | super(NaivePhonetiser, self).__init__() 52 | 53 | def process_utterance(self, utt): 54 | for node in utt.xpath(self.target_nodes): 55 | assert node.has_attribute(self.class_attribute) 56 | assert node.has_attribute(self.target_attribute) 57 | 58 | current_class = node.attrib[self.class_attribute] 59 | 60 | if current_class in self.word_classes: 61 | word = node.attrib[self.target_attribute] 62 | children = self.get_phonetic_segments(word) 63 | elif current_class in self.probable_pause_classes: 64 | children = [c.PROB_PAUSE] 65 | elif current_class in self.possible_pause_classes: 66 | children = [c.POSS_PAUSE] 67 | else: 68 | sys.exit('Class "%s" not in any of word_classes, probable_pause_classes, possible_pause_classes') 69 | for chunk in children: 70 | child = Element(self.child_node_type) 71 | child.set(self.output_attribute, chunk) 72 | node.add_child(child) 73 | 74 | def get_phonetic_segments(self, word): 75 | safetext_letters = [] 76 | for letter in list(word.lower()): 77 | safetext_letters.append(naive_util.safetext(letter)) 78 | return safetext_letters 79 | 80 | def do_training(self, speech_corpus, text_corpus): 81 | print "NaivePhonetiser requires no training" 82 | 83 | 84 | -------------------------------------------------------------------------------- /scripts/processors/PhraseMaker.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | ## Project: Simple4All - January 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | ## Contact: Antti Suni - Antti.Suni@helsinki.fi 6 | 7 | 8 | from UtteranceProcessor import SUtteranceProcessor 9 | from util.NodeProcessors import add_phrase_tags, restructure 10 | 11 | class PhraseMaker(SUtteranceProcessor): 12 | ''' 13 | ## Regroup token nodes under new phrase parent nodes: 14 | 15 | ## Group so that phrases are delimited by silence; to find silence, look for 16 | ## nodes with attribute segment_name having value sil under each token: 17 | 18 | ''' 19 | 20 | def __init__(self, processor_name='phrase_maker', node_type_to_regroup='token', parent_node_type='phrase', \ 21 | attribute_with_silence='segment_name', silence_symbol='sil'): 22 | 23 | self.processor_name = processor_name 24 | self.node_type_to_regroup = node_type_to_regroup 25 | self.parent_node_type = parent_node_type 26 | self.attribute_with_silence = attribute_with_silence 27 | self.silence_symbol = silence_symbol 28 | 29 | ## derived attribute: 30 | self.target_xpath='//' + self.node_type_to_regroup 31 | 32 | super(PhraseMaker, self).__init__() 33 | 34 | def process_utterance(self, utt): 35 | 36 | ### Perform 2 'atomic' operations on the utterance: 37 | 38 | ## add phrase start / end attributes on tokens (True/False values): 39 | add_phrase_tags(utt, target_xpath=self.target_xpath, silence_symbol=self.silence_symbol, \ 40 | attribute_with_silence=self.attribute_with_silence) 41 | 42 | ## Use those attributes to restructure the utterance using a generic 43 | ## restructuring function: 44 | restructure(utt, regroup_nodes_of_type=self.node_type_to_regroup, 45 | start_criterion="phrase_start", end_criterion="phrase_end", 46 | new_parent_type="phrase") 47 | 48 | 49 | -------------------------------------------------------------------------------- /scripts/processors/SimpleChildAdder.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | ## Project: Simple4All - January 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | ## Contact: Antti Suni - Antti.Suni@helsinki.fi 6 | 7 | from UtteranceProcessor import * 8 | 9 | class SimpleChildAdder(UtteranceProcessor): 10 | """ 11 | Simplest kind of manipulation, no model. For each node in target nodes, add a child 12 | with tag child_tag, and child_attribute as child_attribute_value. The xpath given for target nodes 13 | can be tailored to match the desired set of nodes. 14 | 15 | TODO: doc 16 | """ 17 | 18 | def load(self): 19 | 20 | ## get attributes from config, converting type and supplying defaults: 21 | self.target_nodes = self.config.get('target_nodes', '//') 22 | self.child_tag = self.config.get('child_tag', 'some_tag') 23 | self.child_attribute = self.config.get('child_attribute', 'some_attribute') 24 | self.child_attribute_value = self.config.get('child_attribute_value', 'some_value') 25 | 26 | 27 | def process_utterance(self, utt): 28 | 29 | for node in utt.xpath(self.target_nodes): 30 | child = Element(self.child_tag) 31 | child.set(self.child_attribute, self.child_attribute_value) 32 | node.add_child(child) 33 | 34 | 35 | 36 | def do_training(self, speech_corpus, text_corpus): 37 | return 38 | 39 | 40 | -------------------------------------------------------------------------------- /scripts/processors/Syllabifier.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from configobj import ConfigObj 5 | from UtteranceProcessor import * 6 | #import util.NodeProcessors 7 | import logging 8 | import os 9 | import re 10 | import default.const as c 11 | class Syllabifier(UtteranceProcessor): 12 | 13 | 14 | 15 | 16 | def load(self): 17 | self.parent_node_type = self.config.get('parent_node_type', '//token') 18 | self.target_nodes = self.config.get('target_nodes', "//token[@token_class='word']/descendant::segment") 19 | ## read phonetic classes, either unsupervised or human produced 20 | self.phoneclass_filename = os.path.join(self.get_location()+"/../phonetic_classifier", self.config['phone_classes']) 21 | #filename = os.path.join(self.voice_resources.get_path(c.LANG), self.config['phone_classes']) 22 | if os.path.isfile(self.phoneclass_filename): 23 | self.phones = ConfigObj(self.phoneclass_filename, encoding='utf8') 24 | # culcurate legexprs on init 25 | 26 | self.regexps = self._compile_syllable_regexps() 27 | self.trained = True 28 | else: 29 | self.trained = False 30 | 31 | 32 | 33 | 34 | 35 | def do_training(self, speech_corpus, text_corpus): 36 | 37 | self.load() ## because phoneclass_filename prob. didn't exist when processor was first loaded. 38 | 39 | if self.trained == True: 40 | return 41 | if self.phones: 42 | self.regexps = self._compile_syllable_regexps() 43 | self.trained = True 44 | 45 | 46 | 47 | 48 | 49 | 50 | def _compile_syllable_regexps(self): 51 | # should only be letters, but better quote_meta anyway 52 | quoted_cons = [re.escape(c) for c in self.phones['consonant']] 53 | quoted_vow = [re.escape(c) for c in self.phones['vowel']] 54 | quoted_legal = [re.escape(c) for c in self.phones['legal']] 55 | cons = u'|'.join(quoted_cons) 56 | vow = u'|'.join(quoted_vow) 57 | #cons = u'|'.join(self.phones['consonant']) 58 | #vow = u'|'.join(self.phones['vowel']) 59 | MAX_ONSET = 20 60 | legal_cons=[""]*MAX_ONSET 61 | 62 | #make regexp from legal onsets 63 | #for l in self.phones['legal']: 64 | for l in quoted_legal: 65 | if legal_cons[len(l)] == "": 66 | legal_cons[len(l)]= l 67 | else: 68 | legal_cons[len(l)]= legal_cons[len(l)]+'|' + l 69 | 70 | regexps = [] 71 | # legality principle with max onset 72 | for i in range(len(legal_cons)-1, 0, -1): 73 | if len(legal_cons[i]) > 0: 74 | #regexps.append(re.compile('((?:%s) (?:%s|\s)*)((?:%s) (?:%s))'% (vow,cons,legal_cons[i],vow), re.UNICODE)) # max onset for frequent legal 75 | regexps.append(re.compile('((?:%s) (?:%s|\s)*) ((?:%s) (?:%s))'% (vow,cons,legal_cons[i],vow), re.UNICODE)) # max onset for frequent legal 76 | #defaults 77 | # V.CV 78 | regexps.append(re.compile('(%s) ((?:%s) (?:%s))'% (vow, cons, vow), re.UNICODE)) 79 | # VC+.CV 80 | regexps.append(re.compile('((?:%s) (?:%s|\s)+) ((?:%s) (?:%s))'% (vow,cons,cons,vow), re.UNICODE)) # at least one consonant before 81 | #for r in regexps: 82 | # print r.pattern 83 | 84 | # finally hiatus 85 | for h in self.phones['non_diphthongs']: 86 | (h1, h2) = h.split() 87 | regexps.append(re.compile('(%s+) (%s+)'% (h1,h2), re.UNICODE)) 88 | 89 | return regexps 90 | 91 | 92 | # TODO: remove hard-coding, morph level? 93 | def process_utterance(self, utt): 94 | 95 | for node in utt.xpath('//token[@token_class=\"word\"]'): 96 | 97 | segments = [s.get('text') for s in node.xpath('./segment')] 98 | if len(segments) == 0: 99 | continue 100 | text = u" ".join(segments).lower() 101 | syllables = self._syllabify(text) 102 | 103 | # add syllable level between token and letter 104 | # TODO: maybe apply Oliver's generic transform 105 | segments = node.xpath('./segment') 106 | for s in syllables: 107 | syl_node = Element('syllable', text=u"".join(s.split(' '))) 108 | node.add_child(syl_node) 109 | for p in s.split(' '): 110 | phone_node = segments.pop(0) 111 | phone_node.getparent().remove(phone_node) 112 | syl_node.add_child(phone_node) 113 | 114 | 115 | 116 | 117 | 118 | def _syllabify(self, word): 119 | 120 | for regex in (self.regexps): 121 | 122 | while re.search(regex, word): 123 | word = re.sub(regex, '\\1 ||| \\2', word) 124 | 125 | # some regexp produces additional space ... 126 | word = word.replace(' ',' ') 127 | 128 | return word.split(' ||| ') 129 | 130 | 131 | 132 | 133 | 134 | -------------------------------------------------------------------------------- /scripts/processors/WaveSynthesiser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | ## Project: Simple4All - January 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | ## Contact: Antti Suni - Antti.Suni@helsinki.fi 6 | 7 | from UtteranceProcessor import * 8 | from util.NodeProcessors import * 9 | 10 | from distutils.spawn import find_executable 11 | 12 | class WaveSynthesiser(UtteranceProcessor): 13 | 14 | ''' 15 | As with AcousticModel, this class needs to be generalised to glottHMM etc. 16 | ''' 17 | 18 | def load(self): 19 | pass 20 | # ## Check necessary binaries are on system path: 21 | # for tool in ["synthesis_fft", "x2x", "mgc2sp"]: 22 | # if not find_executable(tool): 23 | # sys.exit("Binary %s must be on system path"%(tool)) 24 | 25 | def process_utterance(self, utt): 26 | 27 | if utt.has_attribute("waveform"): 28 | print "Utt has a natural waveform -- don't synthesise" 29 | return 30 | 31 | ## Check we've got everything to synthesise with: 32 | for filetype in ["gen_f0", "gen_mcep", "gen_bndap"]: 33 | if not utt.has_external_data(filetype): 34 | print 'Utterance does not have filetype %s associated with it -- cannot synthesise a wave'%(filetype) 35 | return 36 | 37 | fzero = utt.get_filename("gen_f0") 38 | mcep = utt.get_filename("gen_mcep") 39 | bndap = utt.get_filename("gen_bndap") 40 | 41 | ## TODO: !!! fix hardcoded values here !!! 42 | shift = 5 43 | rate = 48000 44 | alpha = "0.77" ## Assume 48kH and Bark cepstrum (Julius) <<-- this should be shared from vocoder config!! 45 | gamma = "0" ## for mcep 46 | order = "59" 47 | fft_len = "2048" 48 | 49 | ## convert params: 50 | comm = "x2x +fd %s > %s.double"%( bndap, bndap) 51 | #print comm 52 | os.system(comm) 53 | comm = "x2x +fa %s > %s.txt"%(fzero, fzero) 54 | #print comm 55 | os.system(comm) 56 | comm = "mgc2sp -a %s -g %s -m %s -l %s -o 2 %s | x2x +fd > %s.spec.double"%(alpha, gamma, order, fft_len, mcep, mcep) 57 | 58 | #print comm 59 | os.system(comm) 60 | 61 | gen_wav = utt.get_filename("gen_wav") 62 | 63 | comm = "%s "%("synthesis_fft") # self.RESYNTH_BIN) 64 | comm += " -f %s "%(rate) 65 | comm += " -fftl %s "%(fft_len) 66 | comm += " -spec " 67 | comm += " -order %s "%(order) 68 | comm += " -shift %s "%(shift) 69 | comm += " -sigp %s "%(1.2) 70 | comm += " -sd %s "%(0.5) 71 | comm += " -cornf %s "%(4000) 72 | comm += " -bw %s "%(70.0) 73 | comm += " -delfrac %s "%(0.2) 74 | comm += " -bap " 75 | comm += " -apfile %s.double "%(bndap) 76 | comm += " %s.txt "%(fzero) 77 | comm += " %s.spec.double "%(mcep) 78 | comm += " %s > %s"%(gen_wav, gen_wav.replace(".wav", ".log")) 79 | 80 | #print comm 81 | os.system(comm) 82 | 83 | assert os.path.isfile(gen_wav) 84 | 85 | 86 | 87 | ## def train -- not necessary for vocoder (yet). 88 | 89 | 90 | -------------------------------------------------------------------------------- /scripts/processors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSTR-Edinburgh/Ossian/fd01c8f9e1e5fa4f4f00dd444a565b714973b7a9/scripts/processors/__init__.py -------------------------------------------------------------------------------- /scripts/shell/combine_lsf_and_gain.pl: -------------------------------------------------------------------------------- 1 | #args 2 | # 1: lsf 2: lsf coeff count 3: gain 3 | 4 | 5 | $DIM = $ARGV[1]; 6 | open GAIN, $ARGV[2]; 7 | @gain = ; 8 | close GAIN; 9 | $i = 1; 10 | open LSF, "$ARGV[0]"; 11 | while (){ 12 | 13 | print; 14 | if ($i % $DIM == 0) { 15 | print shift @gain; 16 | } 17 | $i++; 18 | 19 | } 20 | close LSF; 21 | -------------------------------------------------------------------------------- /scripts/shell/make_hts_training_lists.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cmp_dir=$1 4 | lab_dir=$2 5 | outfile_stem=$3 6 | 7 | 8 | lab_out=$outfile_stem.lab 9 | cmp_out=$outfile_stem.cmp 10 | 11 | rm $lab_out $cmp_out 12 | 13 | for file in $lab_dir/* ; do 14 | base=`basename $file .lab` ; 15 | cmpfile=$cmp_dir/$base.cmp ; 16 | if [ -e $cmpfile ] ; then 17 | echo $cmpfile >> $cmp_out ; 18 | echo $file >> $lab_out ; 19 | fi 20 | done 21 | -------------------------------------------------------------------------------- /scripts/shell/split_cmp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | ## Project: Natural Speech Technology - February 2015 - www.natural-speech-technology.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | 6 | import sys 7 | import os 8 | import struct 9 | import glob 10 | import numpy 11 | from numpy import array 12 | from argparse import ArgumentParser 13 | 14 | 15 | def main_work(): 16 | 17 | ################################################# 18 | 19 | # ======== Get stuff from command line ========== 20 | 21 | a = ArgumentParser() 22 | a.add_argument('-cmp', dest='cmpdir', required=True) 23 | a.add_argument('-out', dest='outdir', required=True, \ 24 | help= "Put output here: make it if it doesn't exist") 25 | a.add_argument('-streams', default='LSF,LSFsource,HNR,Gain,F0') 26 | a.add_argument('-widths', default='30,10,5,1,1') 27 | a.add_argument('-deltas', default=3, type=int, \ 28 | help='e.g. 3 for static + delta + deltadelta') 29 | opts = a.parse_args() 30 | 31 | # =============================================== 32 | streams = opts.streams.split(',') 33 | widths = [int(val) for val in opts.widths.split(',')] 34 | 35 | assert len(streams) == len(widths) 36 | 37 | # =============================================== 38 | streams_out = [os.path.join(opts.outdir, stream) for stream in streams] 39 | 40 | for direc in [opts.outdir] + streams_out: 41 | if not os.path.isdir(direc): 42 | os.makedirs(direc) 43 | 44 | total_dim = sum(widths) * opts.deltas 45 | 46 | 47 | for cmp in glob.glob(os.path.join(opts.cmpdir, '*.cmp')): 48 | junkpath,base=os.path.split(cmp) 49 | base=base.replace('.cmp','') 50 | data = get_speech(cmp, total_dim, remove_htk_header=True) 51 | start = 0 52 | #print '========' 53 | print base 54 | for (stream, width) in zip(streams, widths): 55 | #print ' ' + stream 56 | outfile = os.path.join(opts.outdir, stream, base + '.' + stream) 57 | end = start + width 58 | stream_data = data[:, start:end] 59 | put_speech(stream_data, outfile) 60 | start = start + (width * opts.deltas) 61 | 62 | 63 | 64 | 65 | def get_speech(infile, dim, remove_htk_header=False): 66 | 67 | data = read_floats(infile) 68 | if remove_htk_header: 69 | data = data[3:] ## 3 floats correspond to 12 byte htk header 70 | 71 | assert len(data) % float(dim) == 0,"Bad dimension!" 72 | m = len(data) / dim 73 | data = array(data).reshape((m,dim)) 74 | return data 75 | 76 | def put_speech(data, outfile): 77 | m,n = numpy.shape(data) 78 | size = m*n 79 | flat_data = list(data.reshape((size, 1))) 80 | write_floats(flat_data, outfile) 81 | 82 | def write_floats(data, outfile): 83 | m = len(data) 84 | format = str(m)+"f" 85 | 86 | packed = struct.pack(format, *data) 87 | f = open(outfile, "w") 88 | f.write(packed) 89 | f.close() 90 | 91 | def read_floats(infile): 92 | f = open(infile, "r") 93 | l = os.stat(infile)[6] # length in bytes 94 | data = f.read(l) # = read until bytes run out (l) 95 | f.close() 96 | 97 | m = l / 4 98 | format = str(m)+"f" 99 | 100 | unpacked = struct.unpack(format, data) 101 | unpacked = list(unpacked) 102 | return unpacked 103 | 104 | 105 | if __name__=="__main__": 106 | 107 | main_work() 108 | 109 | -------------------------------------------------------------------------------- /scripts/shell/train_backend.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ## Script to run other scripts to train HTS back-end using STRAIGHT features. 4 | ## In the near future, feature extraction and synthesis model building 5 | ## will also be handled inside the train.py script, at which point this script will 6 | ## no longer need to exist ;-) 7 | 8 | 9 | 10 | 11 | LNG=$1 12 | SPEAKER=$2 13 | RECIPE=$3 14 | VCDIR=$4 ## location of S4A VCTK (e.g. /simple4all/CSTRVoiceClone/ ) 15 | 16 | echo $VCDIR 17 | 18 | ## location of script: 19 | ROOT="$( cd "$( dirname "$0" )" && pwd )" 20 | 21 | ## derived paths: 22 | NAIVE_DIR=$ROOT/../../ ## parent of config, script, etc. 23 | VOICE_DIR=$NAIVE_DIR/train/$LNG/speakers/$SPEAKER/$RECIPE/ ## location of the voice being trained 24 | WAV_DIR=$NAIVE_DIR/corpus/$LNG/speakers/$SPEAKER/wav/ ## 25 | 26 | 27 | 28 | ## Make paths absolute: 29 | VOICE_DIR=`greadlink -fn $VOICE_DIR` 30 | WAV_DIR=`greadlink -fn $WAV_DIR` 31 | NAIVE_DIR=`greadlink -fn $NAIVE_DIR` 32 | VCDIR=`greadlink -fn $VCDIR` 33 | 34 | echo "Voice dir: ${VOICE_DIR}" 35 | echo "Wav dir: ${WAV_DIR}" 36 | echo "Naive dir: ${NAIVE_DIR}" 37 | echo "VC dir dir: ${VCDIR}" 38 | 39 | 40 | 41 | ## make a place to put synth training features: 42 | FEATURE_DIR=$VOICE_DIR/synth_feats 43 | mkdir $FEATURE_DIR 44 | 45 | 46 | ## make a place to put synth model: 47 | SYNTH_DIR=$VOICE_DIR/processors/acoustic_model/ 48 | if [ ! -e $SYNTH_DIR ] ; then 49 | echo "$SYNTH_DIR does not exist!" ; 50 | exit 1 ; 51 | fi 52 | 53 | 54 | ## ============================= 55 | ## 1) extract STRAIGHT features: 56 | 57 | ## Get template config file: 58 | STRAIGHT_CONF=$FEATURE_DIR/straight_config.txt 59 | echo $NAIVE_DIR/ 60 | cp $NAIVE_DIR/recipes/straight_config_template.txt $STRAIGHT_CONF 61 | 62 | ## Make some substitutions in the config file (this should really be done with 63 | ## proper string interpolation in config): 64 | 65 | echo $STRAIGHT_CONF 66 | 67 | sed "s@VCDIR@${VCDIR}@" $STRAIGHT_CONF > ${STRAIGHT_CONF}_1 68 | sed "s@FEATDIR@${FEATURE_DIR}@" ${STRAIGHT_CONF}_1 > ${STRAIGHT_CONF}_2 69 | sed "s@ESTDIR@${ESTDIR}@" ${STRAIGHT_CONF}_2 > ${STRAIGHT_CONF}_3 70 | sed "s@WAVDIR@${WAV_DIR}@" ${STRAIGHT_CONF}_3 > ${STRAIGHT_CONF}_4 71 | 72 | mv ${STRAIGHT_CONF}_4 ${STRAIGHT_CONF} 73 | 74 | ## Use the config to do feature extraction: 75 | HERE=`pwd` 76 | cd $VCDIR/trunk/Research-Demo/fa-tts/STRAIGHT-TTS/ 77 | ./fa-tts.sh $STRAIGHT_CONF 78 | cd $HERE 79 | 80 | 81 | 82 | ## ============================= 83 | ## 2) make training lists: 84 | 85 | 86 | ## Make training lists, exluding utts for which there are not both cmp and lab files: 87 | echo "$NAIVE_DIR/scripts/util//make_hts_training_lists.sh $FEATURE_DIR/cmp $VOICE_DIR/lab/ $SYNTH_DIR/training_list" 88 | $NAIVE_DIR/scripts/util//make_hts_training_lists.sh $FEATURE_DIR/cmp $VOICE_DIR/lab/ $SYNTH_DIR/training_list 89 | 90 | 91 | 92 | ## ============================= 93 | ## 3) train voice on a single machine: 94 | 95 | HERE=`pwd` 96 | cd $VCDIR/trunk/HMM-Training/ 97 | 98 | ## In the script HTS2011-Training.pl, fix this variable to match the first sentence-level 99 | ## (typically 3rd from last) feature in the labels produced: 100 | 101 | ## SENTENCE_LEVEL_DELIMITER=/51: 102 | 103 | ## I added ./run-hts2011_general.sh to the repository -- as the name says, it's 104 | ## a more general version of the VCTK script -- more things are specified on 105 | ## command line so it is less geared to the specifics of VCTK directory structure. 106 | ## 107 | 108 | echo "Train HTS model, output log to $SYNTH_DIR/train_log.txt..." 109 | 110 | ./run-hts2011_general.sh \ 111 | -feature_list $SYNTH_DIR/training_list.cmp \ 112 | -label_list $SYNTH_DIR/training_list.lab \ 113 | -question_file $VOICE_DIR/questions.hed \ 114 | -out $SYNTH_DIR/ \ 115 | | tee $SYNTH_DIR/train_log.txt 116 | 117 | cd $HERE 118 | 119 | ### copy GV and window parameters for use in synthesis later: 120 | cp $FEATURE_DIR/gv/* $SYNTH_DIR/hmm/hts_engine/ 121 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /scripts/shell/train_cart.R: -------------------------------------------------------------------------------- 1 | ### R script 2 | 3 | 4 | ## Command line arguments 5 | # trailingOnly=TRUE means that only arguments after --args are returned 6 | args = commandArgs(trailingOnly = TRUE) 7 | print("Script train_cart.R called with arguments:") 8 | print(args) 9 | 10 | 11 | data_fn = args[1] 12 | outfile = args[2] 13 | 14 | 15 | library(rpart) 16 | 17 | 18 | my_data <- read.csv(file=data_fn,head=TRUE,sep=",") 19 | ## my_data$break_type <- as.factor(my_data$break_type) ## <-- make sure predictee is category 20 | summary(my_data) 21 | 22 | 23 | my_control=rpart.control(minsplit=1, minbucket=1, xval=10, cp=0.0) 24 | my_rpart <- rpart(response~., data=my_data, control=my_control ) 25 | 26 | ### tree before pruning: 27 | print(my_rpart) 28 | printcp(my_rpart) 29 | 30 | ## find smallest model with cp with 1SE of cross validation error: 31 | min_error = min(my_rpart$cptable[,"xerror"]) 32 | min_error_std = min(my_rpart$cptable[,"xstd"]) 33 | thresh = min_error + min_error_std 34 | for (i in seq(nrow(my_rpart$cptable))) { 35 | print(my_rpart$cptable[i,"xerror"]) 36 | if (my_rpart$cptable[i,"xerror"] < thresh) { 37 | best_cp=my_rpart$cptable[i,"CP"] 38 | break 39 | } 40 | } 41 | print(best_cp) 42 | my_rpart <- prune(my_rpart, cp=best_cp) 43 | print(my_rpart) 44 | 45 | save(my_rpart, file=outfile) 46 | -------------------------------------------------------------------------------- /scripts/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSTR-Edinburgh/Ossian/fd01c8f9e1e5fa4f4f00dd444a565b714973b7a9/scripts/tools/__init__.py -------------------------------------------------------------------------------- /scripts/util/Environment.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | ## Project: Simple4All - January 2013 - www.simple4all.org 4 | ## Contact: Antti Suni - Antti.Suni@helsinki.fi 5 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 6 | 7 | import default.fnames as fname 8 | import default.const as c 9 | import os 10 | import sys 11 | 12 | 13 | 14 | 15 | #def make(dirs, lang, speaker, version): 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /scripts/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSTR-Edinburgh/Ossian/fd01c8f9e1e5fa4f4f00dd444a565b714973b7a9/scripts/util/__init__.py -------------------------------------------------------------------------------- /scripts/util/acoustic_stats.py: -------------------------------------------------------------------------------- 1 | import math 2 | from scipy import stats 3 | import numpy as np 4 | 5 | def get_subsections(trajectory): 6 | ''' 7 | get subsections as in Murray et al. (2006): 8 | ''' 9 | new_features = {} 10 | # frame_rate = 5 ## 5ms per frame 11 | # c = 100 / frame_rate ## c frames per 100ms 12 | 13 | t = len(trajectory) 14 | 15 | new_features['whole'] = trajectory 16 | new_features['half1'] = trajectory[:(t/2)] 17 | new_features['half2'] = trajectory[(t/2):] 18 | new_features['quarter1'] = trajectory[:(t/4)] 19 | new_features['quarter2'] = trajectory[(t/4):(t/2)] 20 | new_features['quarter3'] = trajectory[(t/2):(t-(t/4))] 21 | new_features['quarter4'] = trajectory[(t-(t/4)):] 22 | # new_features['first100ms'] = trajectory[:c] 23 | # new_features['first200ms'] = trajectory[:(c*2)] 24 | # new_features['last100ms'] = trajectory[-c:] 25 | # new_features['last200ms'] = trajectory[-(c*2):] 26 | 27 | return new_features 28 | 29 | 30 | 31 | ### feature functions: 32 | def feature_mean(seq): 33 | return np.mean(seq) #, axis=0) 34 | 35 | def feature_std(seq): 36 | return np.std(seq) #, axis=0) 37 | 38 | def feature_min(seq): 39 | return np.min(seq) # , axis=0) 40 | 41 | def feature_max(seq): 42 | return np.max(seq) # , axis=0) 43 | 44 | def feature_range(seq): 45 | return feature_max(seq) - feature_min(seq) 46 | 47 | def feature_slope(seq): 48 | # m,n = np.shape(seq) 49 | # gradients = [] 50 | # for dimension in xrange(n): 51 | # data = seq[:,dimension] 52 | gradient, intercept, r_value, p_value, std_err, fit_line = fit_lm(seq) 53 | return gradient 54 | # gradients.append(gradient) 55 | # return np.array(gradients) 56 | 57 | def fit_lm(y): 58 | x = np.array(range(len(y))) 59 | gradient, intercept, r_value, p_value, std_err = stats.linregress(x,y) 60 | fit_line = [(x_val * gradient) + intercept for x_val in x] 61 | return gradient, intercept, r_value, p_value, std_err, fit_line 62 | 63 | def get_stats_over_subsections(data): 64 | ''' 65 | Compute several statistics over several subsections of the given data, 66 | return in a dictionary whose keys indicate the statistic and subsection 67 | ''' 68 | subsections = get_subsections(data) 69 | stats = {} 70 | for (subsection,subdata) in subsections.items(): 71 | for feat_func in [feature_mean, feature_std, feature_range, feature_min, feature_max, feature_slope]: 72 | func_name = feat_func.__name__ 73 | func_val = feat_func(subdata) 74 | stats["%s_%s"%(subsection, func_name)] = func_val 75 | return stats 76 | 77 | 78 | 79 | -------------------------------------------------------------------------------- /scripts/util/append_acoustic_model.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | ## Project: Simple4All - January 2013 - www.simple4all.org 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk 5 | ## Contact: Antti Suni - Antti.Suni@helsinki.fi 6 | 7 | import sys 8 | #import re 9 | import naive.naive_util 10 | 11 | from main.Voice import * 12 | 13 | from main.AcousticModel import * 14 | from processors.WaveSynthesiser import * 15 | from processors.WavePlayer import * 16 | 17 | def main_work(): 18 | 19 | ################################################# 20 | 21 | # ======== Get stuff from command line ========== 22 | 23 | def usage(): 24 | print "Usage: ...... " 25 | sys.exit(1) 26 | 27 | # e.g. 28 | 29 | try: 30 | 31 | voice_config = sys.argv[1] 32 | voice_components = sys.argv[2] 33 | ENGINE_BIN = sys.argv[3] 34 | RESYNTH_BIN = sys.argv[4] 35 | trained_model_dir = sys.argv[5] 36 | 37 | 38 | except: 39 | 40 | usage() 41 | 42 | 43 | 44 | ################################################# 45 | sys.path.append("/afs/inf.ed.ac.uk/user/o/owatts/naive/script/") 46 | ################################################# 47 | 48 | 49 | ## Lots of these paths should be interpolated from system-wide options (e.g. bin dir etc). 50 | ## Absolute paths for now. 51 | context_file_location = "/afs/inf.ed.ac.uk/user/o/owatts/naive/context_files/" 52 | ESTDIR = "/group/project/nlp-speech/bin/" 53 | HTSDIR = "/afs/inf.ed.ac.uk/user/o/owatts/repos/simple4all/CSTRVoiceClone/trunk/bin/" 54 | SCRIPT = "/afs/inf.ed.ac.uk/user/o/owatts/naive/script" 55 | GENSIM_LOCATION = "%s/gensim-0.5.0/src/"%(SCRIPT) 56 | ################################################# 57 | 58 | sys.path.append( GENSIM_LOCATION ) ## add gensim to path 59 | from VSMTagger import VSMTagger 60 | 61 | 62 | 63 | print " -- Open the existing voice" 64 | 65 | voice = Voice(config_file=voice_config) 66 | 67 | 68 | print " -- Make an utterance processor from a (trained) acoustic model " 69 | 70 | ### This will only perform work where an utt does not have a wavefile attached: 71 | parameter_generator = AcousticModel(config_file=voice_components + "/parameter_generator.cfg", 72 | processor_name = "parameter_generator", 73 | ENGINE_BIN=ENGINE_BIN, 74 | model_location = trained_model_dir, 75 | HTSDIR=HTSDIR ) 76 | parameter_generator.save() 77 | 78 | 79 | 80 | ### WAVESYNTH 81 | waveform_synthesiser = WaveSynthesiser(config_file=voice_components + "/waveform_synthesiser.cfg", 82 | processor_name = "waveform_synthesiser", 83 | RESYNTH_BIN=RESYNTH_BIN, 84 | HTSDIR=HTSDIR ) 85 | waveform_synthesiser.save() 86 | 87 | 88 | ### WAVE PLAYER (call e.g. sox etc) 89 | wave_player = WavePlayer(config_file=voice_components + "/wave_player.cfg", 90 | processor_name = "wave_player" 91 | ) 92 | wave_player.save() 93 | 94 | voice.add_processor(voice_components + "/parameter_generator.cfg") 95 | voice.add_processor(voice_components + "/waveform_synthesiser.cfg") 96 | voice.add_processor(voice_components + "/wave_player.cfg") 97 | 98 | print " -- Save voice" 99 | voice.save() 100 | 101 | print " -- Synthesize a test utterance (from some Spanish text...)" 102 | ## Use the voice to synth a test utterance: 103 | voice.synth_utterance("Esto es: una prueba.") 104 | 105 | 106 | 107 | 108 | if __name__=="__main__": 109 | 110 | main_work() 111 | -------------------------------------------------------------------------------- /scripts/util/cwt_utils.py: -------------------------------------------------------------------------------- 1 | 2 | #import matplotlib 3 | #matplotlib.use('macosx') 4 | import numpy as np 5 | 6 | #from matplotlib import pyplot as pylab 7 | #import pylab 8 | 9 | ## osw: unused import of old package? :-- 10 | #import cwt as wavelet 11 | 12 | 13 | 14 | def plot_labels(labels,shift = 0, fig="", text = True): 15 | import pylab 16 | if fig == "": 17 | fig = pylab 18 | #print labels 19 | for (start, end,token) in labels: 20 | 21 | 22 | if token: 23 | 24 | fig.axvline(x=start, color='black') 25 | fig.axvline(x=end, color='black') 26 | if text: 27 | fig.text(start+1-shift,0, token) #, color="grey") 28 | fig.legend() 29 | 30 | 31 | def plot_prom_labels(labels, prominences, shift = 0,fig=""): 32 | import pylab 33 | if fig == "": 34 | fig = pylab 35 | for i in range(len(labels)): 36 | (start, end, token) = labels[i] 37 | if token and i <=len(prominences): 38 | fig.text(start+3, shift, (round(prominences[i],1))) 39 | 40 | 41 | pass 42 | 43 | def get_peaks(params): 44 | #peaks = [] 45 | indices = [] 46 | 47 | 48 | zc = np.where(np.diff(np.sign(np.diff(params))))[0] 49 | 50 | indices = (np.diff(np.sign(np.diff(params))) < 0).nonzero()[0] +1 51 | 52 | peaks = params[indices] 53 | return np.array([peaks, indices]) 54 | 55 | def get_valleys(params): 56 | return get_peaks(-params) 57 | 58 | def get_best_scale(wavelet_matrix, num_units): 59 | best_i = 0 60 | best = 999 61 | for i in range(0, wavelet_matrix.shape[0]): 62 | num_peaks = len(get_peaks(wavelet_matrix[i])[0]) 63 | dist= abs(num_peaks - num_units) 64 | if dist < best: 65 | best = dist 66 | best_i = i 67 | 68 | return best_i 69 | 70 | def normalize(params, std=0): 71 | if std ==0: 72 | std = np.std(params) 73 | 74 | mean = np.mean(params) 75 | return (params - mean) / std 76 | 77 | def unnormalize(params, mean, std): 78 | return mean + (params - np.mean(params))*(std/(np.std(params))) 79 | 80 | 81 | 82 | 83 | 84 | def scale_for_reconstruction(wavelet_matrix, scale_dist=1.0, s0=3): 85 | scaled = np.array(wavelet_matrix) 86 | 87 | for i in range(0, wavelet_matrix.shape[0]): 88 | scaled[i] *= 2**(-(i+s0-1)*scale_dist/2) 89 | 90 | return scaled 91 | 92 | 93 | 94 | 95 | def calc_prominence(params, labels, func=np.max, use_peaks = True): 96 | labelled = [] 97 | norm = params.astype(float) 98 | for (start, end, word) in labels: 99 | 100 | if end -start == 0: 101 | continue 102 | #print start, end, word 103 | if use_peaks: 104 | peaks = [] 105 | #pylab.clf() 106 | #pylab.plot(params[start:end]) 107 | 108 | (peaks, indices)=get_peaks(params[start:end]) 109 | 110 | if len(peaks) >0: 111 | labelled.append(np.max(peaks)) 112 | 113 | #labelled.append(norm[start-5+peaks[0]]) 114 | # labelled.append([word,func(params[start:end])]) 115 | 116 | else: 117 | labelled.append(0.0) 118 | else: 119 | #labelled.append([word, func(params[start-10:end])]) 120 | labelled.append(func(params[start:end])) 121 | 122 | #raw_input() 123 | 124 | return labelled 125 | 126 | -------------------------------------------------------------------------------- /scripts/util/indian2latin.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import unicodedata 3 | import codecs 4 | import sys 5 | import re 6 | # 7 | # simple conversion of indic scripts to latin alphabetic form 8 | # implements vowel drop and change, 9 | # 10 | # usage: python indian2latin.py 11 | # antti.suni@helsinki.fi 12 | # 13 | 14 | 15 | # additional thoughts: 16 | # Handle all unicode normalized? 17 | # Then é, ä and ö would contain two unicode chars, 18 | # letter and modifier. Modifier could then be assigned as a contextual features or combined to form new safetext modelnames 19 | # of the letter. Modifiers have their own unicode categories Mc and Mn, both always modify the preceding letter. 20 | # 21 | # Then Indian languages could be handled without exception rules, since 22 | # vowel signs and such are similar modifiers to the preceding syllable characters 23 | # i.e. 24 | # LATIN SMALL LETTER A + COMBINING DIAERESIS => LATIN SMALL LETTER A WITH DIAERESIS 25 | # TAMIL LETTER KA + TAMIL VOWEL SIGN EE => TAMIL LETTER KA WITH VOWEL SIGN EE 26 | # 27 | # Still a problem of having many phonemes for one model though, and unless contextual features ares used 28 | # for modifiers, the vowel identity can not be directly targeted in decision tree questions 29 | 30 | # below is my rule-based alpha-syllabic to alphabetic conversion where virama deletes the default vowel and 31 | # vowel sign replaces the default vowel #, also anusvara adds n_feature to the name 32 | 33 | 34 | ## osw: added some lines for handling CANDRABINDU and assamese 'WITH LOWER DIAGONAL' etc. 35 | ## Rearranged code to allow other scripts to call the main function latinise_indian_script_string 36 | 37 | 38 | def latinise_indian_script_string(l): 39 | prev_letter = "" 40 | letters = [] 41 | for i in range(0,len(l)): 42 | try: 43 | u_name = unicodedata.name(l[i]) 44 | except: 45 | continue 46 | 47 | # for latin letters do nothing 48 | if re.match('.*LATIN.*',u_name): 49 | if prev_letter: 50 | letters.append(prev_letter) 51 | letters.append(l[i]) 52 | continue 53 | 54 | u_name = re.sub(' WITH .+', '', u_name) ## e.g. for assamese 'WITH LOWER DIAGONAL' etc. 55 | 56 | # syllable and independent vowel characters 57 | # skip CANDRA, VOCALIC and such for simplicity 58 | m = re.match('.*LETTER( .+)? (.+)$', u_name) 59 | if m: 60 | letter = m.group(2) 61 | if prev_letter: 62 | letters.append(prev_letter) 63 | # unfortunately syllables and independent vowels are not separated in unicode 64 | # syllable here: = any sequence ending in A except AA, (IA,OA if found, will be split) 65 | if letter != "AA" and re.match(".+A$", letter): 66 | prev_letter = ""+letter[:-1]+" "+letter[-1] # osw: space -> ~ 67 | # vowel 68 | else: 69 | prev_letter = ""+letter 70 | continue 71 | 72 | 73 | # modifiers: 74 | 75 | # change defaut vowel 76 | m = re.match('.*VOWEL SIGN( .+)? (.+)$', u_name) 77 | if m: 78 | prev_letter = prev_letter[:-1] 79 | prev_letter+=m.group(2) 80 | continue 81 | 82 | # remove vowel 83 | if re.match('.*VIRAMA', u_name): 84 | prev_letter= prev_letter[:-2] 85 | continue 86 | 87 | # nasalize something 88 | if re.match('.*ANUSVARA',u_name): 89 | prev_letter+="m" 90 | continue 91 | 92 | # nasalize something -- OSW added: blizzard2014/data/speech/hi/txt/text_01241.txt 93 | if re.match('.*CANDRABINDU',u_name): ## 'usually means that the previous vowel is nasalized.' 94 | prev_letter+="m" 95 | continue 96 | 97 | # else no conversion 98 | if prev_letter: 99 | letters.append(prev_letter) 100 | prev_letter = "" 101 | letters.append(l[i]) 102 | 103 | letters.append(prev_letter) 104 | 105 | ## osw -- return list of single letters 106 | final_letters = [] 107 | for letter in letters: 108 | if len(letter) == 1: 109 | final_letters.append(letter) 110 | else: 111 | final_letters.extend(letter.split(' ')) 112 | return final_letters 113 | 114 | 115 | 116 | def main_work(): 117 | 118 | f = codecs.open(sys.argv[1], "r", encoding='utf-8') 119 | lines = f.readlines() 120 | f.close() 121 | 122 | 123 | for l in lines: 124 | letters = latinise_indian_script_string(l) 125 | print u" ".join(letters).encode('utf-8') 126 | 127 | 128 | if __name__=="__main__": 129 | 130 | main_work() 131 | 132 | 133 | -------------------------------------------------------------------------------- /scripts/util/make_corpus_with_clickable_audio.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | ''' 4 | [dyatlov]owatts: python scripts/util/make_corpus_with_clickable_audio.py ./train/sw/speakers/pm_balanced/naive_01_nn/utt/ ./train/sw/speakers/pm_balanced/naive_01_nn/clickable_audio 5 | 6 | 7 | 8 | [dyatlov]owatts: 9 | [dyatlov]owatts: pwd 10 | /afs/inf.ed.ac.uk/user/o/owatts/sim2/oliver/ossian_test/Ossian2 11 | [dyatlov]owatts: python scripts/util/make_corpus_with_clickable_audio.py ./train/sw/speakers/pm_balanced/naive_01_nn/utt/ ./train/sw/speakers/pm_balanced/naive_01_nn/clickable_audio 12 | 13 | ''' 14 | import os 15 | import sys 16 | import glob 17 | from lxml import etree 18 | import multiprocessing 19 | 20 | uttdir = sys.argv[1] 21 | outdir = sys.argv[2] 22 | 23 | 24 | outdir = os.path.abspath(outdir) 25 | 26 | 27 | assert not os.path.isdir(outdir), '%s already exists'%(outdir) 28 | 29 | audiodir = os.path.join(outdir, 'audio') 30 | os.makedirs(outdir) 31 | os.makedirs(audiodir) 32 | 33 | 34 | 35 | max_cores = '30' 36 | 37 | 38 | html_lines = [] 39 | 40 | 41 | 42 | 43 | 44 | 45 | # Using all available CPU cores unless defined otherwise 46 | if max_cores is not None and max_cores.isdigit(): 47 | n_cores = int(max_cores) 48 | else: 49 | n_cores = multiprocessing.cpu_count() 50 | 51 | 52 | 53 | def proc_utt(uttfile): 54 | 55 | path, base = os.path.split(uttfile) 56 | base = base.replace('.utt', '') 57 | print base 58 | utt = etree.parse(uttfile) 59 | wavfile = utt.getroot().attrib['waveform'] 60 | i = 1 61 | html_line = '' 62 | os.makedirs(audiodir + '/clickable_%s/'%(base)) 63 | for token in utt.xpath('//token'): 64 | text = token.attrib['text'] 65 | if 'start' in token.attrib: 66 | 67 | 68 | 69 | start_sec = float(token.attrib['start']) / 1000.0 70 | end_sec = float(token.attrib['end']) / 1000.0 71 | dur_sec = end_sec - start_sec 72 | 73 | 74 | 75 | ## chop wav extract: 76 | outwave = audiodir + '/clickable_%s/clickable_%s_%s.ogg'%(base, base, i) 77 | comm = 'sox %s %s trim %s %s'%(wavfile, outwave, start_sec, dur_sec) 78 | os.system(comm) 79 | #print comm 80 | 81 | #html_line += ''%(text, outwave) 82 | #html_line += '%s'%(outwave, text) 83 | html_line += '%s'%(outwave, text) 84 | 85 | 86 | 87 | #html_line += '