├── .gitignore
├── 0_README.txt
├── LICENSE
├── README.md
├── config_templates
    ├── GlottExtractor.cfg
    ├── MFCCExtractor.cfg
    ├── SPTKExtractor.cfg
    └── glott_config_template.txt
├── doc
    ├── Makefile
    └── source
    │   ├── STORAGE
    │       ├── extending.rst
    │       ├── modules
    │       │   ├── corpus_utterance.rst
    │       │   └── index.rst
    │       ├── python_install.rst
    │       └── queries.rst
    │   ├── acoustic_modelling_scripts.rst
    │   ├── basic.rst
    │   ├── complete_recipes.rst
    │   ├── conf.py
    │   ├── conf.py.initial
    │   ├── gold_standard_recipes.rst
    │   ├── index.rst
    │   ├── initial_voice.rst
    │   ├── overview.rst
    │   ├── python_install.rst
    │   ├── refinements.rst
    │   ├── s4a.png
    │   ├── setting_up.rst
    │   └── todo_list.rst
├── make_release.sh
├── recipes
    ├── lex_01_nn.cfg
    ├── lex_02_nn.cfg
    ├── naive_01_hts.cfg
    ├── naive_01_nn.cfg
    ├── naive_SIMP2.cfg.py
    └── world_extraction.cfg
├── rules
    └── en
    │   └── textnorm
    │       ├── rules
    │           ├── abbrevlist
    │           ├── abbrevmap
    │           ├── hyphenated
    │           ├── num_excp
    │           └── tldlist
    │       └── scripts
    │           ├── filter_text1.pl
    │           ├── filter_text_gigaword.pl
    │           ├── final_cleanup.pl
    │           ├── normalize_puncts.pl
    │           ├── numproc
    │           ├── remove_dups.pl
    │           ├── ted2ascii_puncts.pl
    │           ├── tokenize_words.pl
    │           └── utf2ascii_puncts.pl
├── scripts
    ├── acoustic_model_training
    │   ├── steps
    │   │   ├── build_MDL_trees.sh
    │   │   ├── clone_monophone_to_fullcontext.sh
    │   │   ├── increase_mixture_components.sh
    │   │   ├── initial_alignment.sh
    │   │   ├── make_alignment_lexicon.sh
    │   │   ├── make_alignment_monophone.sh
    │   │   ├── make_engine_model.sh
    │   │   ├── make_engine_model.sh.OLD
    │   │   ├── make_monophone.sh
    │   │   ├── realign.sh
    │   │   ├── realign_to_labels.sh
    │   │   ├── reestimate.sh
    │   │   ├── reestimate_alignment_model.sh
    │   │   ├── set_up_data.py
    │   │   ├── subset_data.py
    │   │   └── untie_models.sh
    │   ├── subrecipes
    │   │   ├── config_template
    │   │   │   ├── quick_voicebuild_01.cfg
    │   │   │   ├── quick_voicebuild_01.cfg.OLD
    │   │   │   ├── standard_alignment.cfg
    │   │   │   ├── standard_voicebuild.cfg
    │   │   │   ├── standard_voicebuild.cfg.OLD
    │   │   │   └── standard_voicebuild_STRAIGHT.cfg
    │   │   └── script
    │   │   │   ├── extend_standard_alignment.sh
    │   │   │   ├── extend_standard_alignment_external_lexicon.sh
    │   │   │   ├── quick_voicebuild_01.sh
    │   │   │   ├── quick_voicebuild_01.sh.OLD
    │   │   │   ├── quick_voicebuild_02.sh
    │   │   │   ├── standard_alignment.sh
    │   │   │   ├── standard_voicebuild.sh
    │   │   │   └── standard_voicebuild.sh.OLD
    │   └── util
    │   │   ├── filter_questions.py
    │   │   ├── make_config.sh
    │   │   ├── make_proto_hsmm.pl
    │   │   ├── make_proto_hsmm.py
    │   │   ├── make_proto_skip_hsmm.py
    │   │   ├── separate_trees.py
    │   │   ├── setup_directory.sh
    │   │   ├── update_train_list.py
    │   │   └── util.py
    ├── batch_align_and_utt.py
    ├── batch_speak.py
    ├── default
    │   ├── __init__.py
    │   ├── const.py
    │   └── fnames.py
    ├── download_tundra_subset.sh
    ├── main
    │   ├── Corpus.py
    │   ├── Resources.py
    │   ├── Utterance.py
    │   ├── Voice.py
    │   └── __init__.py
    ├── merlin_interface
    │   ├── feed_forward_dnn_ossian_acoustic_model.conf
    │   └── feed_forward_dnn_ossian_duration_model.conf
    ├── naive
    │   ├── __init__.py
    │   ├── naive_util.py
    │   ├── nudge_boundaries.py
    │   ├── train_static_vsm.py
    │   ├── train_static_vsm_direct-to-disk.py
    │   └── train_static_vsm_gensim.py
    ├── processors
    │   ├── AcousticModel.py
    │   ├── Aligner.py
    │   ├── BasicTokenisers.py
    │   ├── EnglishGoldProcessors.py
    │   ├── FeatureDumper.py
    │   ├── FeatureExtractor.py
    │   ├── GenericProcessor.py
    │   ├── IndianScriptLatiniser.py
    │   ├── Lexicon.py
    │   ├── Lexicon.py.20170605
    │   ├── MiscProcessor.py
    │   ├── NN.py
    │   ├── NN.py.MSCOLD
    │   ├── NodeEnricher.py
    │   ├── NodeRemover.py
    │   ├── NodeSplitter.py
    │   ├── PhoneClassifier.py
    │   ├── Phonetisers.py
    │   ├── PhraseMaker.py
    │   ├── ProminenceLabeller.py
    │   ├── SKLProcessors.py
    │   ├── SimpleChildAdder.py
    │   ├── Syllabifier.py
    │   ├── Tokenisers.py
    │   ├── UtteranceProcessor.py
    │   ├── VSMTagger.py
    │   ├── WaveSynthesiser.py
    │   └── __init__.py
    ├── setup_tools.sh
    ├── shell
    │   ├── combine_lsf_and_gain.pl
    │   ├── compose_glott_features.pl
    │   ├── do_align_multisyn_lexicon
    │   ├── make_hts_training_lists.sh
    │   ├── setup_alignment.sh
    │   ├── split_cmp.py
    │   ├── train_backend.sh
    │   ├── train_cart.R
    │   └── window.pl
    ├── speak.py
    ├── test.py
    ├── tools
    │   ├── __init__.py
    │   └── morfessor.py
    ├── train.py
    └── util
    │   ├── Environment.py
    │   ├── LookupTable.py
    │   ├── NodeProcessors.py
    │   ├── TTS.py
    │   ├── Wavelets.py
    │   ├── __init__.py
    │   ├── acoustic_feats.py
    │   ├── acoustic_stats.py
    │   ├── append_acoustic_model.py
    │   ├── cwt_utils.py
    │   ├── discretise_vsm.py
    │   ├── draw_hts_tree_simple.py
    │   ├── gpu_lock.py
    │   ├── indian2latin.py
    │   ├── make_corpus_with_clickable_audio.py
    │   ├── make_hts_training_lists.sh
    │   ├── penn_treebank_tokenizer.sed
    │   ├── print_proms.py
    │   ├── speech_manip.py
    │   ├── store_merlin_model.py
    │   ├── submit.sh
    │   ├── trim_silences.py
    │   ├── uttsdata_to_text.py
    │   └── xpath_extensions_for_ossian.py
├── test
    └── txt
    │   ├── english.txt
    │   ├── hindi.txt
    │   ├── romanian.txt
    │   └── romanian2.txt
├── test_release.sh
└── tools
    └── patch
        ├── ossian_engine.patch
        ├── ossian_hts.patch
        └── sequitur_compilation.patch


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 


--------------------------------------------------------------------------------
/0_README.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | ====================================                                                           
 3 | Ossian Speech Synthesis Toolkit                  
 4 | Simple4All Consortium                      
 5 | Copyright (c) 2013-2014                          
 6 | All Rights Reserved.     
 7 | ====================================                       
 8 |                                                                         
 9 | THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK         
10 | DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING       
11 | ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT    
12 | SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE      
13 | FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES     
14 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN    
15 | AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,           
16 | ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF        
17 | THIS SOFTWARE.                                                        
18 | 
19 |  Authors: Simple4All consortium members          
20 |  Date:    November 2013-2014                      
21 |  Contact: owatts@staffmail.ed.ac.uk                 
22 | 
23 | 
24 | Please point an HTML browser at ./doc/build/html/index.html for some tips and pointers.


--------------------------------------------------------------------------------
/config_templates/GlottExtractor.cfg:
--------------------------------------------------------------------------------
 1 | LPC_ORDER =                     30
 2 | LPC_ORDER_SOURCE =              10
 3 | HNR_CHANNELS =                  5
 4 | 
 5 | F0_MIN =                        50.0
 6 | F0_MAX =                        360.0
 7 | 
 8 | 
 9 | 
10 | POSTFILTER_COEFFICIENT =        0.5
11 | 
12 | 
13 | HPFILTER_FILENAME =             hp_16khz
14 | GLOTTAL_PULSE_NAME = 		pulse
15 | 
16 | 


--------------------------------------------------------------------------------
/config_templates/MFCCExtractor.cfg:
--------------------------------------------------------------------------------
 1 | TARGETRATE = 20000.0
 2 | TARGETKIND = MFCC_E
 3 | SOURCEFORMAT = NIST
 4 | ENORMALISE = F
 5 | SAVECOMPRESSED = T
 6 | SOURCEKIND = WAVEFORM
 7 | SAVEWITHCRC = T
 8 | USEHAMMING = T
 9 | WINDOWSIZE = 100000.0
10 | CEPLIFTER = 22
11 | NUMCHANS = 26
12 | NUMCEPS = 12
13 | PREEMCOEF = 0.97


--------------------------------------------------------------------------------
/config_templates/SPTKExtractor.cfg:
--------------------------------------------------------------------------------
 1 | order = 12
 2 | 
 3 | static_window = 1.0
 4 | delta_window = -0.5 0.0 0.5
 5 | delta_delta_window = 1.0 -2.0 1.0
 6 | 
 7 | framelength = 400   ## in samples (i.e. 400 = 25 ms at 16000 hz sampling) 
 8 | frameshift = 80     ## in samples ( "  80 = 5  ms      " )
 9 | fft_length = 512
10 | 
11 | f0_method = swipe  ## 0 = swipe, 1 = rapt 
12 | lo_f0=30
13 | hi_f0=500
14 | 
15 | frameshift_ms = 5
16 | target_sample_rate = 16000


--------------------------------------------------------------------------------
/doc/source/STORAGE/extending.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | Extending Ossian with your own recipes and processors
 3 | =====================================================
 4 | 
 5 | Note on the recipes included here
 6 | ---------------------------------
 7 | 
 8 | 
 9 | None of the recipes mentioned is intended to be in any way definitive -- for a given 
10 | recipe, there are probably lots of different imaginable alternative pipelines of 
11 | processors that will produce the same result. 
12 | 
13 | One major possible area of variation is processor granularity -- rather than, e.g. the 
14 | separate segment_adder, silence_adder and endsilence_adder processors in demo05, it 
15 | would be straightforward to write a single processor that performs the functions of all 
16 | three. There has been a lot of indecision about a reasonable level of granularity during 
17 | development. Trade-off between making elements so specific that they can’t be 
18 | reconfigured, and making a recipe so long with so many elements that it can’t be 
19 | understood. Personal taste; also depends on your role as to what level of granularity 
20 | is a good one. If you just want to run existing recipes on new data, ....
21 | 
22 | 
23 | 
24 |  
25 | 


--------------------------------------------------------------------------------
/doc/source/STORAGE/modules/corpus_utterance.rst:
--------------------------------------------------------------------------------
 1 | Corpus
 2 | ------
 3 | 
 4 | Data processed by Ossian is held in a ``corpus`` object.
 5 | 
 6 | 
 7 | 
 8 | 
 9 | .. autoclass:: main.Corpus.Corpus
10 |     :members:
11 |     :undoc-members:
12 |     
13 | 
14 | 
15 | 
16 | Utterance
17 | ---------
18 | 
19 | Utterance struct held herecd
20 | 
21 | 
22 | Various inherited methods are specialised to act on the ``data`` attribute. These 'rerouted' 
23 | methods are not documented here.
24 | 
25 | .. todo:: Is there a more proper way to do this?  
26 | 
27 | 
28 | .. autoclass:: main.Utterance.Utterance
29 |     :members:
30 |     :undoc-members:
31 |     :exclude-members:  iterdescendants, pretty_print, remove, get, set, xpath, insert, has_attribute
32 |     
33 |     
34 |     
35 |     
36 | 


--------------------------------------------------------------------------------
/doc/source/STORAGE/modules/index.rst:
--------------------------------------------------------------------------------
1 | Ossian modules
2 | ==============
3 | 
4 | .. toctree::
5 |    :maxdepth: 4
6 |    
7 |    corpus_utterance
8 | 


--------------------------------------------------------------------------------
/doc/source/STORAGE/python_install.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | .. note:: A couple of notes on making an intallation of Python that satisfies TASSAL's requirements. Made for Chee Yong, could be more generally useful documentation with some editing.
 3 | 
 4 | =======================================
 5 | MAKING YOUR OWN PYTHON INSTALLATION 
 6 | =======================================
 7 | 
 8 | 
 9 | On Linux
10 | --------
11 | 
12 | Make a directory for your Python installation:
13 | 
14 | MYPYTHON=~/temp/python  ## your chosen directory might be e.g. /afs/inf.ed.ac.uk/group/cstr/projects/simple4all/malay/chee_yong_tools/python
15 | 
16 | mkdir $MYPYTHON
17 | 
18 | With a browser go to:
19 | 
20 | http://www.activestate.com/activepython/downloads
21 | 
22 | ... click on the link for Python version 2.7.2.5 for Linux (x86_64) to download the package.
23 | When you've done this, move the package to the current directory (or wherever you want to install it):
24 | 
25 | mv ~/Downloads/ActivePython-2.7.2.5-linux-x86_64.tar.gz $MYPYTHON
26 | 
27 | Unpack it:
28 | 
29 | cd $MYPYTHON
30 | tar xvf ActivePython-2.7.2.5-linux-x86_64.tar.gz 
31 | 
32 | Use the installer provided to install:
33 | 
34 | cd ActivePython-2.7.2.5-linux-x86_64
35 | ./install.sh
36 | 
37 | When prompted to specify "Install directory: ", enter ".." to install to $MYPYTHON and type "y" when it asks for confirmation.
38 | 
39 | This should have installed Python at $MYPYTHON/bin/python -- type it in the command 
40 | line and check you get a Python prompt like this:
41 | 
42 | 
43 | [channings]owatts: $MYPYTHON/bin/python
44 | ActivePython 2.7.2.5 (ActiveState Software Inc.) based on
45 | Python 2.7.2 (default, Jun 24 2011, 11:24:26) 
46 | [GCC 4.0.2 20051125 (Red Hat 4.0.2-8)] on linux2
47 | Type "help", "copyright", "credits" or "license" for more information.
48 | >>> 
49 | 
50 | If that works, type quit() to exit the interactive session.
51 | 
52 | 
53 | Use these 3 commands to install numpy, configobj, and lxml (which are all required by TASSAL):
54 | 
55 | $MYPYTHON/bin/pip install numpy
56 | $MYPYTHON/bin/pip install scipy
57 | $MYPYTHON/bin/pip install configobj
58 | $MYPYTHON/bin/pip install lxml
59 | 
60 | 
61 | installation of these on mac with macports
62 | -------------------------------------------
63 | 
64 | sudo port install py27-lxml
65 | sudo port install py27-configobj 
66 | 
67 | OR:
68 | 
69 | sudo port install py27-pip 
70 | 
71 | ###
72 | 
73 | pip-2.7 install scikit-learn 
74 | 
75 | ## off-topic:
76 | 
77 | sudo port install R
78 | 


--------------------------------------------------------------------------------
/doc/source/STORAGE/queries.rst:
--------------------------------------------------------------------------------
 1 | Queries
 2 | =======
 3 | 
 4 | 
 5 | 
 6 | corpus object
 7 | -------------
 8 | 
 9 | corpus -- contain utt objects, not filenames to avoid this in  e.g. vsm tagger / dt proc.s
10 |  
11 |         for utt_name in speech_corpus:
12 |             utterance = Utterance(utt_name)
13 |             
14 |             
15 | text and speech in copurs
16 | -------------------------
17 |             
18 | in vsmtagger.py:
19 |           
20 | .. todo:: Add the text from the unspoken parts
21 | 
22 | 
23 | 
24 | others
25 | -------
26 | 
27 | - separate train and voice
28 | 
29 | - letter vsm uses cross-word contexts...
30 | 
31 | - lowercasing should be separated from safetexting
32 | 
33 | 
34 | Document whole module like this:
35 | 
36 | .. automodule:: naive.naive_util
37 |    :members:
38 |    
39 | Document functions like this:
40 | 
41 | 
42 | .. autofunction:: naive.naive_util.unicode_character_to_safetext
43 | 
44 | 
45 | 
46 | utt mod
47 | --------
48 | .. automodule:: main.Utterance
49 |    :members:
50 |    :undoc-members:
51 | 
52 |    
53 | TODO list   
54 | ---------
55 | 
56 | .. todolist::


--------------------------------------------------------------------------------
/doc/source/acoustic_modelling_scripts.rst:
--------------------------------------------------------------------------------
 1 | ============================================
 2 | Scripts for acoustic model training
 3 | ============================================
 4 | 
 5 | Ossian includes a collection of scripts for training acoustic models which in the normal course of things are used to train HMMs after speech coding and text analysis have been done. Infact, some of these scripts will have been called if the commands given in this documentation to build  demonstration voices have been run.  This collection of scripts can be found under:
 6 | 
 7 | .. code-block:: bash
 8 | 
 9 |     ossian-v.?.?/scripts/acoustic_model_training/
10 | 
11 | A number of different subrecipes are available, and can be added to. These subrecipes specify different ways of training acoustic models, and are selected by the relevant processors of top-level recipes. E.g. the recipe ``ossian-v.1.2/recipes/naive.cfg`` configures an acoustic model which is to be trained using the subrecipe called ``quick_voicebuild_01``:
12 | 
13 | .. code-block:: ini
14 | 
15 |     [acoustic_model]
16 |         class = AcousticModel.AcousticModel
17 |         acoustic_subrecipe = quick_voicebuild_01
18 |         [[training_settings]]
19 |             BINMOD = " -B "
20 | 
21 | The scripts for these subrecipes are contained in ``ossian-v.?.?/scripts/acoustic_model_training/subrecipes/script``, and default configurations for them are in ``ossian-v.?.?/scripts/acoustic_model_training/subrecipes/config_templates``. The default configuration settings are overridden by top-level recipes by ``training_settings`` subsections: in the above excerpt from ``naive.cfg``, for example, the value of ``BINMOD`` is set to be ``" -B "`` instead of the default ``" "`` -- this means that acoustic models will be written out in HTK binary model format instead of the default ASCII.  
22 | 
23 | 
24 | Using the scripts without Ossian's text processing
25 | --------------------------------------------------
26 | 
27 | As well as using these scripts as part of an Ossian recipe, it is also possible to use them with already parameterised and annotated data (i.e. without using Ossian to do any speech coding or text analysis). This might be done to try out e.g. alternative TTS front-ends using acoustic models trained in a comparable way. 
28 | 
29 | With the environment variable $OSSIAN pointing to the top directory of an Ossian installation (called something like ``./ossian-v.1.2``), the following command line can be used to train an acoustic model from some acoustic feature files in ``$FEAT_DIRECTORY``, label files in ``$LABEL_DIRECTORY``, and the question file at ``$QUESTIONS``, and output a trained model under ``$OUTPUT``:
30 | 
31 | .. code-block:: bash
32 | 
33 |     $OSSIAN/scripts/acoustic_model_training/subrecipes/script/standard_voicebuild.sh \
34 |         $FEAT_DIRECTORY $LABEL_DIRECTORY $QUESTIONS $OSSIAN/tools/bin/ $OUTPUT \
35 |         $OSSIAN/scripts/acoustic_model_training/subrecipes/config_template/standard_voicebuild.cfg 
36 | 
37 | 
38 | .. comment:: $OSSIAN/script/standard_voicebuild.sh ~/temp/ossian-v.1.2/train/rm/speakers/rss_toy_demo/naive/cmp/ ~/temp/ossian-v.1.2/train/rm/speakers/rss_toy_demo/naive/lab/ ~/temp/ossian-v.1.2/train/rm/speakers/rss_toy_demo/naive/questions.hed ~/temp/ossian-v.1.2/tools/bin/  ~/temp/voicetest1/ ./config_template/standard_voicebuild.cfg 
39 | 
40 | Please modify ``standard_voicebuild.cfg`` or a copy of it to change default settings (e.g. 4 streams, 25 mel cepstral coefficients in the spectrum stream, etc.). ``standard_voicebuild_STRAIGHT.cfg`` is given for use with acoustic features like those used in the `Voice Cloning Toolkit  <http://homepages.inf.ed.ac.uk/jyamagis/software/page37/page37.html>`_.
41 | 
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/doc/source/complete_recipes.rst:
--------------------------------------------------------------------------------
 1 | New improved recipes
 2 | ======================================
 3 | 
 4 | There are several recipes which build on and improve the ``naive`` one already described. Some examples are given here.
 5 | 
 6 | 
 7 | ``naive_glott``: naive recipe with GlottHMM vocoder
 8 | ---------------------------------------------------
 9 | 
10 | .. code-block:: bash
11 | 
12 |     ## Assuming that you want to start from scratch:
13 |     rm -r ./train/rm/speakers/rss_toy_demo/naive_glott/ ./voices/rm/rss_toy_demo/naive_glott/
14 |     
15 |     ## Train:
16 |     python ./scripts/train.py -s rss_toy_demo -l rm -text wikipedia_10K_words naive_glott
17 |     
18 |     ## Synthesise:
19 |     ./scripts/speak.py -l rm -s rss_toy_demo -o ./test/wav/romanian_toy_naive_glott.wav \
20 |                                             -play naive_glott ./test/txt/romanian.txt
21 |      
22 |      
23 |     
24 | This is the same as the ``naive`` recipe but uses the high-quality vocoder `GlottHMM <http://www.helsinki.fi/speechsciences/synthesis/glott.html>`_ for 
25 | speech analysis and synthesis.
26 | 
27 | ``naive_glott_prom``: wavelet-based prominence labelling 
28 | --------------------------------------------------------
29 | 
30 | .. code-block:: bash
31 | 
32 |     ## Assuming that you want to start from scratch:
33 |     rm -r ./train/rm/speakers/rss_toy_demo/naive_glott_prom/ ./voices/rm/rss_toy_demo/naive_glott_prom/
34 |     
35 |     ## Train:
36 |     python ./scripts/train.py -s rss_toy_demo -l rm -text wikipedia_10K_words naive_glott_prom
37 | 
38 |     ## Synthesise:
39 |     ./scripts/speak.py -l rm -s rss_toy_demo -o ./test/wav/romanian_toy_naive_glott_prom.wav \
40 |                                             -play naive_glott_prom ./test/txt/romanian.txt
41 |      
42 | 
43 | 
44 | This is the same as the ``naive_glott`` recipe but also makes use of an unsupervised
45 | representation of prominence similar to the one described `here <https://tuhat.halvi.helsinki.fi/portal/files/29895386/vainio_etal_trasp2013.pdf>`_. Extraction of the representation is based on wavelet transform-derived  acoustic features and prediction makes use of vector space models of words and a decision 
46 | tree classifier.
47 | 
48 | Voices from non-alphabetic script data 
49 | --------------------------------------------------------
50 | 
51 | A Hindi toy corpus (extracted from the IIIT Indic database available `here <http://speech.iiit.ac.in/index.php/research-svl/69.html>`_) is included to demonstrate parts of the recipe developed for the Simple4All
52 | Blizzard Challenge entry described in `this paper <http://www.cstr.ed.ac.uk/downloads/publications/2014/blizzard_14.pdf>`_. 
53 | The recipes ``blizzard_2014_naive_latinised`` and ``blizzard_2014_naive_latinised_syl`` incrementally introduce the naive alphabetisation and syllabification described in the paper. Due to the toy corpus's small size, the syllabification severely affects the quality of the speech.  The recipe ``blizzard_2014_naive_latinised_glott`` adds
54 | the latinisation and GlottHMM vocoder:
55 | 
56 | .. code-block:: bash
57 | 
58 |     ## Assuming that you want to start from scratch:
59 |     rm -r ./train/hi/speakers/toy/blizzard_2014_naive_latinised_glott/ ./voices/hi/toy/blizzard_2014_naive_latinised_glott/
60 |     
61 |     ## Train:
62 |     python ./scripts/train.py -s toy -l hi -text wikipedia_10K_words blizzard_2014_naive_latinised_glott
63 |     
64 |     ## Synthesise:
65 |     ./scripts/speak.py -l hi -s toy -o ./test/wav/hindi_naive_latinised_glott.wav \
66 |                  -play blizzard_2014_naive_latinised_glott ./test/txt/hindi.txt 
67 | 
68 | A simpler recipe like the  ``naive`` one can be used here for comparison:
69 | 
70 | .. code-block:: bash
71 | 
72 |     ## Assuming that you want to start from scratch:
73 |     rm -r ./train/hi/speakers/toy/naive/ ./voices/hi/toy/naive/
74 |     
75 |     python ./scripts/train.py -s toy -l hi -text wikipedia_10K_words naive
76 |     
77 |      ./scripts/speak.py -l hi -s toy -o ./test/wav/hindi_naive.wav \
78 |                     -play naive ./test/txt/hindi.txt 
79 | 


--------------------------------------------------------------------------------
/doc/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. ossian documentation master file, created by
 2 |    sphinx-quickstart on Mon Nov 11 14:45:09 2013.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Ossian documentation 
 7 | ==================================
 8 | 
 9 | 
10 | 
11 | .. image:: s4a.png
12 |    :height: 100px
13 |    :width: 200 px
14 |    :scale: 50 %
15 |    :alt: alternate text
16 |    :align: right
17 |    
18 | Ossian is a collection of Python code for building text-to-speech (TTS) systems, with an 
19 | emphasis on easing research into building TTS systems with minimal expert 
20 | supervision. Work on it started with funding from the `EU FP7 Project Simple4All <http://simple4all.org>`_.
21 | 
22 | 
23 | 
24 | A core idea of Ossian is that a lot of the work for making a Python TTS system 
25 | can be done by using existing modules. For example, instead of writing a module for 
26 | manipulating and querying utterance structures from scratch, we can use existing XML 
27 | and  XPATH implementations. Instead implementing a decision tree learning from 
28 | scratch, we can use simply 
29 | design the tools to work with existing open source machine learning packages, with the 
30 | obvious benefit that many different methods besides decision trees are implemented with 
31 | a unified interface. By depending on relevant Python core or 3rd party packages, we aim
32 | to make the original code of Ossian as minimal as possible.   
33 | 
34 | 
35 | If you are interested only in running existing voices, please take a look at *Setting up* and *Basic operations*.
36 | If you plan to build voices using already-defined recipes, these will also be helpful.
37 | If you plan to extend existing recipes or write new ones, the *Tutorial* sections might be of use.
38 | 
39 | 
40 | The online version of this documentation `here <http://homepages.inf.ed.ac.uk/owatts/ossian/html/index.html>`_ is often more up-to-date than the one included with releases of the code.
41 | 
42 | Contents:
43 | 
44 | .. toctree::
45 |    :maxdepth: 4
46 |    
47 |    setting_up
48 |    
49 |    basic
50 |    complete_recipes
51 |    
52 |    gold_standard_recipes
53 |    acoustic_modelling_scripts
54 |    
55 |    initial_voice
56 |    refinements
57 | 
58 |    todo_list
59 |   
60 | 
61 | 
62 | Indices and tables
63 | ==================
64 | 
65 | * :ref:`genindex`
66 | * :ref:`modindex`
67 | * :ref:`search`
68 | 
69 | 


--------------------------------------------------------------------------------
/doc/source/overview.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ===============
 4 | Overview
 5 | ===============
 6 | 
 7 | 
 8 | 
 9 | .. ---- These are just comments ----:
10 | .. But it's not straightforward! The whole Festival system seems to be designed to be complicated and keep non-geeks out!
11 | 
12 | ..            --digitaltoast
13 | 
14 | .. [http://ubuntuforums.org/showthread.php?t=751169&page=12]


--------------------------------------------------------------------------------
/doc/source/python_install.rst:
--------------------------------------------------------------------------------
 1 | =============================================
 2 | MAKING YOUR OWN PYTHON INSTALLATION ON LINUX
 3 | =============================================
 4 | 
 5 | Here are some steps to install a Python interpreter from scratch for running Ossian,
 6 | using the 'Community Edition' distribution at ``http://www.activestate.com/activepython``.
 7 | This is just one possible way to install Python.
 8 | 
 9 | Make a directory for your Python installation and `cd` to it, and set an environment variable
10 | to point to it for these instructions:
11 | 
12 | export MYPYTHON=$PWD
13 | 
14 | With a browser go to:
15 | 
16 | ``http://www.activestate.com/activepython/downloads``
17 | 
18 | and click on the link for Python version 2.7.5.6 for Linux (x86_64) to download the package.
19 | This 'Community Edition' is for non-commercial or non-production use -- please refer to 
20 | the Activestate license for details.
21 | When you've done this, move the package to the new Python directory:
22 | 
23 | mv ~/Downloads/ActivePython-2.7.5.6-linux-x86_64.tar.gz $MYPYTHON
24 | 
25 | Unpack it:
26 | 
27 | .. code-block:: bash
28 | 
29 |     cd $MYPYTHON
30 |     tar xvf ActivePython-2.7.5.6-linux-x86_64.tar.gz 
31 | 
32 | Use the installer provided to install:
33 | 
34 | .. code-block:: bash
35 | 
36 |     cd ActivePython-2.7.5.6-linux-x86_64
37 |     ./install.sh
38 | 
39 | When prompted to specify "Install directory: ", enter ".." to install to $MYPYTHON and type "y" when it asks for confirmation.
40 | 
41 | This should have installed Python at $MYPYTHON/bin/python -- type it in the command 
42 | line and check you get a Python prompt like this:
43 | 
44 | .. code-block:: bash
45 | 
46 |     ActivePython 2.7.5.6 (ActiveState Software Inc.) based on
47 |     Python 2.7.5 (default, Sep 16 2013, 23:05:39) 
48 |     [GCC 4.0.2 20051125 (Red Hat 4.0.2-8)] on linux2
49 |     Type "help", "copyright", "credits" or "license" for more information.
50 |     >>> 
51 | 
52 | If that works, type ``quit()`` to exit the interactive session.
53 | 
54 | Add the new Python ``bin`` directory to the start of system path so that the new Python 
55 | will be used for the rest of the session (you can make this last beyond the session by 
56 | editing e.g. your ``~/.bashrc``):
57 | 
58 | .. code-block:: bash
59 | 
60 |     export PATH=$MYPYTHON/python/bin/:$PATH
61 | 
62 | Use the ``pip`` package installer to get some necessary packages:
63 | 
64 | .. code-block:: bash
65 | 
66 |     pip install numpy==1.8.0
67 |     pip install scipy==0.12.0
68 |     pip install configobj==4.7.2
69 |     pip install scikit-learn==0.13.1
70 |     pip install regex
71 |     pip install lxml
72 | 
73 | The version of scikit-learn is probably important; we have not yet determined how much
74 | flexibility there is with the versions of the other packages, but the above combination works.
75 | 
76 | 
77 | 
78 |   
79 |   
80 |  
81 |   
82 |   
83 |   


--------------------------------------------------------------------------------
/doc/source/s4a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CSTR-Edinburgh/Ossian/fd01c8f9e1e5fa4f4f00dd444a565b714973b7a9/doc/source/s4a.png


--------------------------------------------------------------------------------
/doc/source/todo_list.rst:
--------------------------------------------------------------------------------
 1 | --------------------------------------
 2 | List of some obvious things to do next
 3 | --------------------------------------
 4 | 
 5 | There are lots of these -- a few that come to mind:
 6 | 
 7 | - Merge in recent extensions to trunk from Antti, Peter, Jari...
 8 |     - Morfessor
 9 |     - ... 
10 |     
11 | - Languages:
12 |     - Add toy demo corpora from all Tundra/Indic languages
13 |     - Train and distribute voices on decent amounts of data for all these languages
14 |     - Make tars of the Tundra etc. data that can be unpacked at $OSSIAN so the data lands 
15 |       in the right place
16 | 
17 | - Online demo:
18 |     - Get it working at a decent speed -- move from STRAIGHT resynthesis to MLSA?
19 |     - Client/server mode to avoid loading voices per utterance?
20 |   
21 |   
22 |   
23 |   
24 |     
25 | .. - Vocoding:
26 | ..    - Currently using SPTK's mcep and hts_engine's MLSA on 16kH speech
27 | ..   - Higher sampling rates
28 | ..    - Move to STRAIGHT (at least for extraction -- can .cmp files be distributed? )
29 | ..   - Incorporate Cassia's modifications to hts_engine
30 | ..    - GlottHMM


--------------------------------------------------------------------------------
/make_release.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | VERSION="1.3"  ## set this by hand
  4 | 
  5 | 
  6 | TOPDIR="./ossian-v.${VERSION}"
  7 | 
  8 | 
  9 | if [ -e $TOPDIR ] ; then
 10 |     echo $TOPDIR exists -- please delete it an try again
 11 |     exit 1
 12 | fi
 13 | if [ -e ./ossian-v.${VERSION}.tgz ] ; then
 14 |     echo ./ossian-v.${VERSION}.tgz exists -- please delete it an try again
 15 |     exit 1
 16 | fi
 17 | 
 18 | 
 19 | ### -------- build and add html doc -----------
 20 | HERE=`pwd`
 21 | cd ./doc
 22 | gsed "s/__VERSION__/${VERSION}/"  ./source/conf.py.initial > ./source/conf.py
 23 | make html
 24 | [ $# -ne 0 ] && echo "make doc failed" && exit 1 ;
 25 | cd $HERE
 26 | 
 27 | mkdir $TOPDIR
 28 | 
 29 | ### --------- pack and unpack the stuff with tar to preserve all dir structure: ------
 30 | 
 31 | 
 32 | tar cf $TOPDIR/ossian_package.tar  \
 33 |             ./config_templates/ \
 34 |             ./corpus/rm/speakers/rss_toy_demo/ \
 35 |             ./corpus/rm/text_corpora/wikipedia_10K_words/ \
 36 |             ./corpus/en/speakers/tundra_toy_demo/ \
 37 |             ./corpus/en/labelled_corpora/cmudict/cmudict_phones.table \
 38 |             ./corpus/en/labelled_corpora/cmudict/letter.names \
 39 |             ./corpus/hi/ \
 40 |             ./rules/ \
 41 |             ./doc/build/ \
 42 |             ./example_voices/rm-rss_toy-naive_example.tar \
 43 |             ./example_voices/rm-rss_rnd1-naive_example.tar  \
 44 |             ./scripts \
 45 |             ./test/txt/*.txt \
 46 |             ./test/ref_wav/*.wav \
 47 |             ./0_README.txt  \
 48 |             ./recipes/demo*.cfg  \
 49 |             ./recipes/baseline*.cfg  \
 50 |             ./recipes/naive*.cfg \
 51 |             ./recipes/blizzard_2014_naive*.cfg \
 52 |             ./recipes/english_gold_basic.cfg \
 53 |             ./tools/patch/*.patch \
 54 |             ./tools/downloads \
 55 |             ./tools/GlottHMM/
 56 |             
 57 | cd $TOPDIR
 58 | tar xf ossian_package.tar 
 59 | rm ossian_package.tar 
 60 | cd ..
 61 | 
 62 | 
 63 | ## ----- add some more directries -----
 64 | 
 65 | mkdir -p $TOPDIR/tools/bin
 66 | mkdir -p $TOPDIR/tools/downloads
 67 | 
 68 | 
 69 | mkdir $TOPDIR/train/
 70 | mkdir $TOPDIR/voices/
 71 | 
 72 | mkdir $TOPDIR/test/wav
 73 | 
 74 | # --- remove any copied junk from release (.pyc and .svn stuff): ----
 75 | for FNAME in `find $TOPDIR/* -name *.pyc` ; do
 76 |     rm  $FNAME
 77 | done
 78 | 
 79 | for FNAME in `find $TOPDIR/* | grep .svn` ; do
 80 |     rm -rf $FNAME
 81 | done
 82 | 
 83 | ## strip some other mac-crap:
 84 | for FNAME in `find $TOPDIR/* -name '.DS_Store'` ; do
 85 |     rm  $FNAME
 86 | done
 87 | 
 88 | # --- remove compiled GlottHMM files, etc.: ----
 89 | for FNAME in `find $TOPDIR/tool/GlottHMM/* -name *.o` ; do
 90 |     rm  $FNAME
 91 | done
 92 | rm $TOPDIR/tool/GlottHMM/{Analysis,Synthesis}
 93 | 
 94 | ### ----- pack up into a tgz file: ----
 95 | 
 96 | tar cvzf ./ossian-v.${VERSION}.tgz $TOPDIR
 97 | rm -r $TOPDIR
 98 | 
 99 | 
100 | 
101 | 
102 | 


--------------------------------------------------------------------------------
/recipes/world_extraction.cfg:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Not a real recipe for building a whole voice, just a convenience for extracting World features from a database
 4 | 
 5 | 
 6 | import sys
 7 | import os
 8 | import inspect
 9 | current_dir = os.path.realpath(os.path.abspath(os.path.dirname(inspect.getfile(inspect.currentframe()))))
10 | 
11 | ## for when config is still in recipes directory:
12 | sys.path.append(current_dir + '/../scripts/')
13 | sys.path.append(current_dir + '/../scripts/processors/')
14 | 
15 | ## for after config is copied to voice.cfg:
16 | sys.path.append(current_dir + '/../../../../scripts/')
17 | sys.path.append(current_dir + '/../../../../scripts/processors/')
18 | 
19 | from FeatureExtractor import WorldExtractor
20 | 
21 | 
22 | import default.const as c
23 | 
24 | 
25 | 
26 | 
27 | 
28 | 
29 | speech_coding_config = {'order': 59, 'static_window': '1', 'delta_window': '-0.5 0.0 0.5', 'delta_delta_window': '1.0 -2.0 1.0'}
30 | 
31 | 
32 | ## ----------------------------------------------------------------
33 | ## Now, a number of utterance processors are defined:--
34 | 
35 | 
36 | 
37 | speech_feature_extractor = WorldExtractor('acoustic_feature_extractor', input_filetype='wav', output_filetype='cmp', \
38 |                             coding_config=speech_coding_config, sample_rate=48000, alpha=0.77, mcep_order=59)
39 | 
40 | 
41 | ## -----------------------------------------------------------------
42 | ## The processors are grouped for convenience into several 'stages':
43 | 
44 | single_stage = [speech_feature_extractor]
45 | 
46 | 
47 | 
48 | ## ----------------------------------------------------------------
49 | ## The final part of the config specifies which stages are run in each of the modes
50 | ## "train" and "runtime" (and optionally extra, specialised, modes):
51 | 
52 | train_stages   = [single_stage]
53 | 
54 | runtime_stages = [single_stage]
55 | 
56 | 


--------------------------------------------------------------------------------
/rules/en/textnorm/rules/abbrevlist:
--------------------------------------------------------------------------------
  1 | Adj.
  2 | Adm.
  3 | Adv.
  4 | Ala.
  5 | Alex.
  6 | Apr.
  7 | Ariz.
  8 | Ark.
  9 | Assn.
 10 | Asst.
 11 | Aug.
 12 | Ave.
 13 | B.A.IT.
 14 | B.A.T.
 15 | B.Sc.
 16 | Bart.
 17 | Bhd.
 18 | Bldg.
 19 | Blvd.
 20 | Brig.
 21 | Bros.
 22 | Ca.
 23 | Cal.
 24 | Calif.
 25 | Capt.
 26 | Cdr.
 27 | Ch.
 28 | Cie.
 29 | Cmdr.
 30 | Co.
 31 | Col.
 32 | Colo.
 33 | Comdr.
 34 | Con.
 35 | Conn.
 36 | Corp.
 37 | Cos.
 38 | Cpl.
 39 | Cpt.
 40 | Ct.
 41 | D.Phil.
 42 | D.Sc.
 43 | DR.
 44 | Dec.
 45 | Del.
 46 | Dept.
 47 | dept.
 48 | Dr.
 49 | Drs.
 50 | Ens.
 51 | Etc.
 52 | Feb.
 53 | Fla.
 54 | Fr.
 55 | Fri.
 56 | Ft.
 57 | Ga.
 58 | Gen.
 59 | Gov.
 60 | Hon.
 61 | Hosp.
 62 | Hr.
 63 | Hwy.
 64 | Ill.
 65 | Inc.
 66 | Ind.
 67 | Insp.
 68 | Jan.
 69 | Jl.
 70 | Jr.
 71 | Jul.
 72 | Jun.
 73 | Kan.
 74 | Ky.
 75 | La.
 76 | Lt.
 77 | Ltd.
 78 | Ltda.
 79 | M.Phil.
 80 | M.Sc.
 81 | MCorp.
 82 | MM.
 83 | MR.
 84 | MRS.
 85 | MS.
 86 | Maj.
 87 | Mar.
 88 | Mass.
 89 | Md.
 90 | Me.
 91 | Me.T.A.
 92 | Messrs.
 93 | Mfg.
 94 | Mich.
 95 | Minn.
 96 | Miss.
 97 | Mlle.
 98 | Mme.
 99 | Mo.
100 | Mohd.
101 | Mont.
102 | Mr.
103 | Mrs.
104 | Ms.
105 | Msgr.
106 | Mt.
107 | Muhd.
108 | Neb.
109 | Nev.
110 | Nov.
111 | Oct.
112 | Okla.
113 | Ont.
114 | Op.
115 | Ord.
116 | Ore.
117 | Oreg.
118 | Pa.
119 | Penn.
120 | Pfc.
121 | Ph.
122 | Ph.D.
123 | PhD.
124 | Pkwy.
125 | Prof.
126 | Prop.
127 | Pte.
128 | Pty.
129 | Pvt.
130 | Qtr.
131 | Rd.
132 | Rep.
133 | Reps.
134 | Res.
135 | Rev.
136 | Rt.
137 | S.p.A.
138 | Sen.
139 | Sens.
140 | Sep.
141 | Sept.
142 | Sfc.
143 | Sgt.
144 | Spc.
145 | Sr.
146 | St.
147 | Ste.
148 | Supt.
149 | Surg.
150 | Tel.
151 | Tenn.
152 | Tex.
153 | Tk.
154 | U.K.
155 | U.S.
156 | U.S.A.
157 | U.Conn.
158 | U-Conn.
159 | U.Mass.
160 | U-Mass.
161 | U.Md.
162 | U-Md.
163 | U.Penn.
164 | U-Penn.
165 | U.Va.
166 | U-Va.
167 | Va.
168 | Vt.
169 | W.Va.
170 | Wash.
171 | Wis.
172 | Wisc.
173 | Wyo.
174 | Yr.
175 | 
176 | D-Ala.
177 | D-Ariz.
178 | D-Ark.
179 | D-Cal.
180 | D-Calif.
181 | D-Colo.
182 | D-Conn.
183 | D-Ct.
184 | D-D.C.
185 | D-Del.
186 | D-Fla.
187 | D-Ga.
188 | D-Ia.
189 | D-Ida.
190 | D-Ill.
191 | D-Ind.
192 | D-Kan.
193 | D-Ky.
194 | D-La.
195 | D-Mass.
196 | D-Md.
197 | D-Mich.
198 | D-Minn.
199 | D-Miss.
200 | D-Mo.
201 | D-Mont.
202 | D-N.C.
203 | D-N.D.
204 | D-N.H.
205 | D-N.J.
206 | D-N.M.
207 | D-N.Y.
208 | D-Neb.
209 | D-Nev.
210 | D-Okla.
211 | D-Ore.
212 | D-Oreg.
213 | D-Pa.
214 | D-Penn.
215 | D-R.I.
216 | D-S.C.
217 | D-S.D.
218 | D-Tenn.
219 | D-Tex.
220 | D-Va.
221 | D-Vt.
222 | D-W.Va.
223 | D-Wash.
224 | D-Wis.
225 | D-Wisc.
226 | D-Wyo.
227 | R-Ala.
228 | R-Ariz.
229 | R-Ark.
230 | R-Cal.
231 | R-Calif.
232 | R-Colo.
233 | R-Conn.
234 | R-Ct.
235 | R-D.C.
236 | R-Del.
237 | R-Fla.
238 | R-Ga.
239 | R-Ia.
240 | R-Ida.
241 | R-Ill.
242 | R-Ind.
243 | R-Kan.
244 | R-Ky.
245 | R-La.
246 | R-Mass.
247 | R-Md.
248 | R-Mich.
249 | R-Minn.
250 | R-Miss.
251 | R-Mo.
252 | R-Mont.
253 | R-N.C.
254 | R-N.D.
255 | R-N.H.
256 | R-N.J.
257 | R-N.M.
258 | R-N.Y.
259 | R-Neb.
260 | R-Nev.
261 | R-Okla.
262 | R-Ore.
263 | R-Oreg.
264 | R-Pa.
265 | R-Penn.
266 | R-R.I.
267 | R-S.C.
268 | R-S.D.
269 | R-Tenn.
270 | R-Tex.
271 | R-Va.
272 | R-Vt.
273 | R-W.Va.
274 | R-Wash.
275 | R-Wis.
276 | R-Wisc.
277 | R-Wyo.
278 | 
279 | a.d.
280 | a.m.
281 | b.c.
282 | co.
283 | cu.
284 | dlrs.
285 | e.g.
286 | etc.
287 | ft.
288 | i.e.
289 | inc.
290 | km.
291 | lb.
292 | lbs.
293 | mfg.
294 | mg.
295 | mi.
296 | mm.
297 | p.m.
298 | r.p.m.
299 | sq.
300 | rev.
301 | v.
302 | vs.
303 | 
304 | o.tel.o
305 | O.tel.o
306 | 
307 | A.
308 | B.
309 | C.
310 | D.
311 | E.
312 | F.
313 | G.
314 | H.
315 | I.
316 | J.
317 | K.
318 | L.
319 | M.
320 | N.
321 | O.
322 | P.
323 | Q.
324 | R.
325 | S.
326 | T.
327 | U.
328 | V.
329 | W.
330 | X.
331 | Y.
332 | Z.
333 | 
334 | D-W.
335 | D-N.
336 | D-S.
337 | R-W.
338 | R-N.
339 | R-S.
340 | 
341 | Art.  #number
342 | No.   #number
343 | no.   #number
344 | Nos.  #number
345 | Nr.   #number
346 | p.    #number
347 | pp.   #number
348 | 


--------------------------------------------------------------------------------
/rules/en/textnorm/rules/hyphenated:
--------------------------------------------------------------------------------
 1 | U-Haul
 2 | U-Hauls
 3 | U-Hauls'
 4 | U-Haul's
 5 | U-Conn.
 6 | U-Mass.
 7 | U-Md.
 8 | U-Penn.
 9 | U-Va.
10 | U-Conn
11 | U-MASS
12 | U-Mass
13 | U-Md
14 | U-Penn
15 | U-Tapao
16 | U-Texas
17 | U-VA
18 | U-Va
19 | U-Conn.'s
20 | U-Mass.'s
21 | U-Md.'s
22 | U-Penn.'s
23 | U-Va.'s
24 | U-Conn's
25 | U-MASS's
26 | U-Mass's
27 | U-Md's
28 | U-Penn's
29 | U-Tapao's
30 | U-Texas's
31 | U-VA's
32 | U-Va's
33 | vis-a-vis
34 | vis-à-vis
35 | 


--------------------------------------------------------------------------------
/rules/en/textnorm/rules/tldlist:
--------------------------------------------------------------------------------
  1 | # Top-level domain names from http://data.iana.org/TLD/tlds-alpha-by-domain.txt
  2 | # and lowercased, with the internationalized TLDs like xn--45brj9c removed.
  3 | # Version 2012070501, Last Updated Fri Jul  6 07:07:01 2012 UTC
  4 | ac
  5 | ad
  6 | ae
  7 | aero
  8 | af
  9 | ag
 10 | ai
 11 | al
 12 | am
 13 | an
 14 | ao
 15 | aq
 16 | ar
 17 | arpa
 18 | as
 19 | asia
 20 | at
 21 | au
 22 | aw
 23 | ax
 24 | az
 25 | ba
 26 | bb
 27 | bd
 28 | be
 29 | bf
 30 | bg
 31 | bh
 32 | bi
 33 | biz
 34 | bj
 35 | bm
 36 | bn
 37 | bo
 38 | br
 39 | bs
 40 | bt
 41 | bv
 42 | bw
 43 | by
 44 | bz
 45 | ca
 46 | cat
 47 | cc
 48 | cd
 49 | cf
 50 | cg
 51 | ch
 52 | ci
 53 | ck
 54 | cl
 55 | cm
 56 | cn
 57 | co
 58 | com
 59 | coop
 60 | cr
 61 | cu
 62 | cv
 63 | cw
 64 | cx
 65 | cy
 66 | cz
 67 | de
 68 | dj
 69 | dk
 70 | dm
 71 | do
 72 | dz
 73 | ec
 74 | edu
 75 | ee
 76 | eg
 77 | er
 78 | es
 79 | et
 80 | eu
 81 | fi
 82 | fj
 83 | fk
 84 | fm
 85 | fo
 86 | fr
 87 | ga
 88 | gb
 89 | gd
 90 | ge
 91 | gf
 92 | gg
 93 | gh
 94 | gi
 95 | gl
 96 | gm
 97 | gn
 98 | gov
 99 | gp
100 | gq
101 | gr
102 | gs
103 | gt
104 | gu
105 | gw
106 | gy
107 | hk
108 | hm
109 | hn
110 | hr
111 | ht
112 | hu
113 | id
114 | ie
115 | il
116 | im
117 | in
118 | info
119 | int
120 | io
121 | iq
122 | ir
123 | is
124 | it
125 | je
126 | jm
127 | jo
128 | jobs
129 | jp
130 | ke
131 | kg
132 | kh
133 | ki
134 | km
135 | kn
136 | kp
137 | kr
138 | kw
139 | ky
140 | kz
141 | la
142 | lb
143 | lc
144 | li
145 | lk
146 | lr
147 | ls
148 | lt
149 | lu
150 | lv
151 | ly
152 | ma
153 | mc
154 | md
155 | me
156 | mg
157 | mh
158 | mil
159 | mk
160 | ml
161 | mm
162 | mn
163 | mo
164 | mobi
165 | mp
166 | mq
167 | mr
168 | ms
169 | mt
170 | mu
171 | museum
172 | mv
173 | mw
174 | mx
175 | my
176 | mz
177 | na
178 | name
179 | nc
180 | ne
181 | net
182 | nf
183 | ng
184 | ni
185 | nl
186 | no
187 | np
188 | nr
189 | nu
190 | nz
191 | om
192 | org
193 | pa
194 | pe
195 | pf
196 | pg
197 | ph
198 | pk
199 | pl
200 | pm
201 | pn
202 | pr
203 | pro
204 | ps
205 | pt
206 | pw
207 | py
208 | qa
209 | re
210 | ro
211 | rs
212 | ru
213 | rw
214 | sa
215 | sb
216 | sc
217 | sd
218 | se
219 | sg
220 | sh
221 | si
222 | sj
223 | sk
224 | sl
225 | sm
226 | sn
227 | so
228 | sr
229 | st
230 | su
231 | sv
232 | sx
233 | sy
234 | sz
235 | tc
236 | td
237 | tel
238 | tf
239 | tg
240 | th
241 | tj
242 | tk
243 | tl
244 | tm
245 | tn
246 | to
247 | tp
248 | tr
249 | travel
250 | tt
251 | tv
252 | tw
253 | tz
254 | ua
255 | ug
256 | uk
257 | us
258 | uy
259 | uz
260 | va
261 | vc
262 | ve
263 | vg
264 | vi
265 | vn
266 | vu
267 | wf
268 | ws
269 | xxx
270 | ye
271 | yt
272 | za
273 | zm
274 | zw
275 | 


--------------------------------------------------------------------------------
/rules/en/textnorm/scripts/final_cleanup.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | 
 3 | # Copyright 2012  Arnab Ghoshal
 4 | # Modified by Fergus McInnes (FRM), 2013
 5 | 
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #  http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
14 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
15 | # MERCHANTABLITY OR NON-INFRINGEMENT.
16 | # See the Apache 2 License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | 
20 | use strict;
21 | use Unicode::Normalize;
22 | use open ':encoding(utf8)';
23 | # use feature 'unicode_strings';
24 | 
25 | # Make sure we are reading and writing in UTF-8. 
26 | binmode(STDIN, ":encoding(utf8)");
27 | binmode(STDOUT, ":encoding(utf8)");
28 | binmode(STDERR, ":encoding(utf8)");
29 | 
30 | my $help_message="USAGE: final_cleanup.pl < in > out\n";
31 | 
32 | while (<STDIN>) {
33 |   chomp;
34 |   $_ = NFD($_);   # UTF8 decompose
35 | 
36 |   s/^/ /;
37 |   s/$/ /;
38 | 
39 |   # Heuristics by FRM to distinguish article "A" from initial "A.":
40 |   s/ A\&/ A. and /g;
41 |   s/ A (ROD|Rod|BAT) / A. $1 /g;
42 |   s/ A A / A. A. /g;
43 |   s/ A A / A. A. /g;
44 |   s/A\. A\. A /A. A. A. /g;
45 |   s/ ([B-Z]) A / $1 A. /g;
46 |   s/ ([B-Z]) A / $1 A. /g;
47 |   s/ A ([A-Z][A-Z])/ a $1/g;  # prevent conversion to "A." in capitalised text
48 |   # Exceptions and default for title case (correct for "Dial A Book" etc, but
49 |   # not for names like "David A Cardona"):
50 |   s/ (Single|Double|Triple|Type|Class|Serie|Series|Group|Model|Avenue|The|An) A / $1 A. /g;
51 |   s/ ([A-Z][a-z\']+) A ([A-Z][a-z])/ $1 a $2/g;
52 |   # Default for "A" in lower-case context:
53 |   s/([a-z\&]) A /$1 A. /g;
54 |   # After a comma, "A Title" keeps "A", but "A P Herbert" or "A to Z" gets "A.":
55 |   s/, A ([^A-Z])/, A. $1/g;
56 |   s/, A ([A-Z] )/, A. $1/g;
57 |   # After any other punctuation, retain "A" with no dot
58 | 
59 |   # Convert email and Twitter notation:
60 |   s/\@/ at /g;
61 |   #s/ \#(\p{L}+) )/ hashtag $1 /g;  # judged not worth doing for ASR LM
62 |   # "#" on its own is sometimes "number", but not always
63 | 
64 |   # Remove punctuation:
65 |   s/( [\"\'\-\.\?\!\,\:\;]+)+ / /g;
66 |   s/([^\.])\&/$1 and /g;	# this leaves "A.T.&T." etc unchanged
67 |   s/[\|\$\#\%\*\+\,\^\:\;\?\~\\\/\!]/ /g;  # will handle a**holes incorrectly
68 |   s/\.{2,}/ /g;
69 | 
70 |   s/\p{M}//g;  # Remove diacritics
71 |   s/[\x{007F}-\x{00BF}]/ /g;  # Remove some other unicode junk
72 |   s/[\x{2190}-\x{21FF}]/ /g;  # Remove arrows
73 | 
74 |   $_ = lc($_);
75 | 
76 |   s/(^| )(b|c|d|e|f|g|h|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)( |$)/ $2. /g;
77 |   s/(^| )(b|c|d|e|f|g|h|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)( |$)/ $2. /g;
78 | 
79 |   s/^\s*//;  s/\s*$//;  s/\s+/ /g;  # Normalize spaces
80 |   next if /^$/;  # Skip empty lines
81 |   print NFC($_), "\n";  # UTF8 recompose & reorder canonically
82 | }
83 | 


--------------------------------------------------------------------------------
/rules/en/textnorm/scripts/remove_dups.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | 
 3 | use strict;
 4 | # use open ':encoding(utf8)';
 5 | # use feature 'unicode_strings';
 6 | 
 7 | # Make sure we are reading and writing in UTF-8. 
 8 | binmode(STDIN, ":encoding(utf8)");
 9 | binmode(STDOUT, ":encoding(utf8)");
10 | binmode(STDERR, ":encoding(utf8)");
11 | 
12 | my %seen_lines = ();
13 | 
14 | while (<STDIN>) {
15 |   if (!defined($seen_lines{$_})) {
16 |     $seen_lines{$_} = 1;
17 |     print;
18 |   }
19 | }
20 | 


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/steps/build_MDL_trees.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ##
 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 
 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 5 | 
 6 | #----------------------------------------------------------------------
 7 | 
 8 | INDIR=$1
 9 | OUTDIR=$2
10 | MDLWEIGHT=$3
11 | QUESTIONS=$4
12 | BIN=$5
13 | 
14 | [ $# -ne 5 ] && echo "build_MDL_trees.sh: Wrong number of arguments supplied" && exit 1 ;
15 | 
16 | UTIL="$( cd "$( dirname "$0" )/../util" && pwd )"  ## location of util script
17 | 
18 | if [ -z $VOICE_BUILD_CONFIG ] ; then
19 |     echo 'Environment variable VOICE_BUILD_CONFIG has not been set!' ; exit 1
20 | fi
21 | source $VOICE_BUILD_CONFIG
22 | 
23 | #----------------------------------------------------------------------
24 | 
25 | source $UTIL/setup_directory.sh $INDIR $OUTDIR
26 | 
27 | 
28 | 
29 | 
30 | 
31 | 
32 | endstate=$[$NSTATE + 1]
33 | 
34 | 
35 | 
36 | 
37 | ## Make edit file for building ALL cmp trees:
38 | 
39 | echo "RO 0 $INDIR/stat.cmp" > $OUTDIR/cluster_cmp.hed 
40 | echo "TR 1"             >> $OUTDIR/cluster_cmp.hed 
41 | cat $QUESTIONS          >> $OUTDIR/cluster_cmp.hed 
42 | echo "TR 1"             >> $OUTDIR/cluster_cmp.hed 
43 | for STREAM in $STREAMS ; do     
44 |     for STATE in `seq 2 $endstate` ; do
45 |         NAME="stream-${STREAM}-state-${STATE}"
46 |         echo "TB 0 ${NAME}_ {*.state[${STATE}].stream[${STREAM}]}" >> $OUTDIR/cluster_cmp.hed 
47 |     done
48 |     #echo "ST $OUTDIR/tree_cmp_str_${STREAM}.txt"  >> $OUTDIR/cluster_cmp.hed 
49 | done
50 | echo "TR 1"             >> $OUTDIR/cluster_cmp.hed 
51 | echo "ST $OUTDIR/tree_cmp.txt"  >> $OUTDIR/cluster_cmp.hed 
52 | 
53 | 
54 | $BIN/HHEd -A $BINMOD -C $OUTDIR/config/general.conf -D -T 1  \
55 |          -i -m -a $MDLWEIGHT -H $INDIR/cmp.mmf  \
56 |         -p -r 1 -s -w $OUTDIR/cmp.mmf $OUTDIR/cluster_cmp.hed $OUTDIR/data/modellist.full
57 | 
58 | 
59 | 
60 | 
61 | ## Duration:
62 | 
63 | echo "RO 0 $INDIR/stat.dur" > $OUTDIR/cluster_dur.hed 
64 | echo "TR 1"             >> $OUTDIR/cluster_dur.hed 
65 | cat $QUESTIONS          >> $OUTDIR/cluster_dur.hed 
66 | echo "TR 1"             >> $OUTDIR/cluster_dur.hed 
67 | echo "TB 0 duration_ {*.state[2].stream[1-${NSTATE}]}" >> $OUTDIR/cluster_dur.hed   
68 | echo "TR 1"             >> $OUTDIR/cluster_dur.hed 
69 | echo "ST $OUTDIR/tree_dur.txt"  >> $OUTDIR/cluster_dur.hed 
70 | 
71 | 
72 | $BIN/HHEd -A $BINMOD -C $OUTDIR/config/general.conf -D -T 1  \
73 |          -i -m -a $MDLWEIGHT -H $INDIR/dur.mmf  \
74 |         -p -r 1 -s -w $OUTDIR/dur.mmf $OUTDIR/cluster_dur.hed $OUTDIR/data/modellist.full
75 |         
76 |         
77 |         
78 |         
79 |         
80 | 
81 | ## ------------------------ check success ----------------------------
82 | if [ -z `grep -l QS $OUTDIR/tree_cmp.txt` ] ; then
83 |     echo "Building emission trees failed: no QS lines in $OUTDIR/tree_cmp.txt"
84 |     exit 1
85 | fi
86 | if [ -z `grep -l QS $OUTDIR/tree_dur.txt` ] ; then
87 |     echo "Building duration trees failed: no QS lines in $OUTDIR/tree_dur.txt"
88 |     exit 1
89 | fi
90 | if [ ! -e $OUTDIR/cmp.mmf ] ; then
91 |     echo "Building duration trees failed: no $OUTDIR/cmp.mmf"
92 |     exit 1
93 | fi
94 | if [ ! -e $OUTDIR/dur.mmf ] ; then
95 |     echo "Building duration trees failed: no $OUTDIR/dur.mmf"
96 |     exit 1
97 | fi
98 | ## -------------------------------------------------------------------
99 | 


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/steps/clone_monophone_to_fullcontext.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ##
 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 
 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 5 | 
 6 | 
 7 | #----------------------------------------------------------------------
 8 | 
 9 | INDIR=$1
10 | OUTDIR=$2
11 | BIN=$3
12 | 
13 | [ $# -ne 3 ] && echo "Wrong number of arguments supplied" && exit 1 ;
14 | 
15 | UTIL="$( cd "$( dirname "$0" )/../util" && pwd )"  ## location of util script
16 | 
17 | if [ -z $VOICE_BUILD_CONFIG ] ; then
18 |     echo 'Environment variable VOICE_BUILD_CONFIG has not been set!' ; exit 1
19 | fi
20 | source $VOICE_BUILD_CONFIG
21 | 
22 | #----------------------------------------------------------------------
23 | 
24 | source $UTIL/setup_directory.sh $INDIR $OUTDIR
25 | 
26 | 
27 | 
28 | 
29 | ## hardcoded: TODO
30 | NSTATE=5
31 | BINMOD=""
32 | 
33 | 
34 | 
35 | endstate=$[$NSTATE + 1]
36 | 
37 | 
38 | ## cmp
39 | echo "TI \"SWeight\" { *.state[2-${endstate}].weights }"     >  $OUTDIR/clone_cmp.hed
40 | echo "MM \"trP\"    { *.transP }"                           >>  $OUTDIR/clone_cmp.hed
41 | echo "CL \"$OUTDIR/data/modellist.full\""                   >>  $OUTDIR/clone_cmp.hed
42 | 
43 | $BIN/HHEd -A $BINMOD -C $OUTDIR/config/general.conf -D -V -T 1 -H $INDIR/cmp.mmf -s -p -i -w $OUTDIR/cmp.mmf $OUTDIR/clone_cmp.hed $INDIR/data/modellist.mono
44 |            
45 |       
46 | ## dur     
47 | echo "MM \"trP\"    { *.transP }"                            >  $OUTDIR/clone_dur.hed
48 | echo "CL \"$OUTDIR/data/modellist.full\""                   >>  $OUTDIR/clone_dur.hed
49 | 
50 | $BIN/HHEd -A $BINMOD -C $OUTDIR/config/general.conf -D -V -T 1 -H $INDIR/dur.mmf -s -p -i -w $OUTDIR/dur.mmf $OUTDIR/clone_dur.hed $INDIR/data/modellist.mono


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/steps/increase_mixture_components.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ##
 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 
 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 5 | 
 6 | 
 7 | #----------------------------------------------------------------------
 8 | 
 9 | INDIR=$1
10 | OUTDIR=$2
11 | NCOMPONENTS=$3
12 | BIN=$4
13 | 
14 | 
15 | [ $# -ne 4 ] && echo "Wrong number of arguments supplied" && exit 1 ;
16 | 
17 | UTIL="$( cd "$( dirname "$0" )/../util" && pwd )"  ## location of util script
18 | 
19 | if [ -z $VOICE_BUILD_CONFIG ] ; then
20 |     echo 'Environment variable VOICE_BUILD_CONFIG has not been set!' ; exit 1
21 | fi
22 | source $VOICE_BUILD_CONFIG
23 | 
24 | #----------------------------------------------------------------------
25 | 
26 | ## no label prune!!!!
27 | 
28 | source $UTIL/setup_directory.sh $INDIR $OUTDIR
29 | 
30 | LAST_STATE=$[${NSTATE}+2]
31 | 
32 | echo "MU $NCOMPONENTS {*.state[2-${LAST_STATE}].stream[${MIXUP_STREAMS}].mix}" > $OUTDIR/mixup.hed
33 | 
34 | $BIN/HHEd -A $BINMOD -C $OUTDIR/config/general.conf -D -T 1 -H $INDIR/cmp.mmf -M $OUTDIR/ $OUTDIR/mixup.hed $OUTDIR/data/modellist.mono 
35 |      
36 | ## ------------------------ check success ----------------------------
37 | if [ ! -e $OUTDIR/cmp.mmf ] ; then
38 |     echo "Reestimation failed: cmp.mmf not made"
39 |     exit 1
40 | fi
41 | ## -------------------------------------------------------------------
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/steps/initial_alignment.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | ##
  3 | ## Project: Simple4All - November 2013 - www.simple4all.org 
  4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
  5 | 
  6 | 
  7 | #----------------------------------------------------------------------
  8 | 
  9 | INDIR=$1
 10 | OUTDIR=$2
 11 | BIN=$3
 12 | 
 13 | [ $# -ne 3 ] && echo "Wrong number of arguments supplied" && exit 1 ;
 14 | 
 15 | UTIL="$( cd "$( dirname "$0" )/../util" && pwd )"  ## location of util script
 16 | 
 17 | #----------------------------------------------------------------------
 18 | 
 19 | 
 20 | source $UTIL/setup_directory.sh $INDIR $OUTDIR
 21 | 
 22 | 
 23 | $UTIL/make_proto_hsmm.pl $OUTDIR/proto.txt
 24 | 
 25 | 
 26 | 
 27 | ## -------- floor variance ---------:
 28 | ## cmp:
 29 | $BIN/HCompV -A -C $OUTDIR/config/general.conf -D -V -S $OUTDIR/data/uttlist.cmp -T 1 -M $OUTDIR $OUTDIR/proto.txt
 30 | if [ $? -gt 0 ] ; then echo "Floor variance failed" ;exit 1 ; fi
 31 | head -n 1 $OUTDIR/proto.txt | cat - $OUTDIR/vFloors > $OUTDIR/floor_cmp.mmf
 32 | 
 33 | 
 34 | 
 35 | 
 36 | ## dur -- floor variance to 1.0:
 37 | rm $OUTDIR/floor_dur.mmf
 38 | for i in 1 2 3 4 5 ; do
 39 |     echo "~v varFloor${i}" >> $OUTDIR/floor_dur.mmf
 40 |     echo "<Variance> 1" >> $OUTDIR/floor_dur.mmf
 41 |     echo "1.0" >> $OUTDIR/floor_dur.mmf
 42 | done
 43 | 
 44 | 
 45 | 
 46 | ## ------- segmental K-means & EM-based estimation of monophones: ------
 47 | mkdir $OUTDIR/hinit
 48 | mkdir $OUTDIR/hrest_cmp
 49 | mkdir $OUTDIR/hrest_dur
 50 | 
 51 | for phone in `cat $OUTDIR/data/modellist.mono`; do
 52 |     echo $phone
 53 |     $BIN/HInit -A -C $OUTDIR/config/general.conf -D -V -H $OUTDIR/floor_cmp.mmf \
 54 |             -I $OUTDIR/data/mlf.mono -M $OUTDIR/hinit -o $phone -S $OUTDIR/data/uttlist.cmp \
 55 |             -T 1 -l $phone -m 1 -u tmvw -w 3 $OUTDIR/proto.txt
 56 |     $BIN/HRest -A -C $OUTDIR/config/general.conf -D -V -H $OUTDIR/floor_cmp.mmf \
 57 |             -I $OUTDIR/data/mlf.mono -M $OUTDIR/hrest_cmp -o $phone -S $OUTDIR/data/uttlist.cmp \
 58 |             -T 1 -l $phone -g $OUTDIR/hrest_dur/$phone -m 1 -u tmvw -w 3 $OUTDIR/hinit/$phone
 59 | done
 60 |   
 61 | 
 62 | ## ------ join individual monophone files together --------
 63 | mkdir $OUTDIR/joined_0
 64 | 
 65 | ## cmp:
 66 | echo  "FV $OUTDIR/floor_cmp.mmf" > $OUTDIR/join_cmp.hed   ## make hed file
 67 | 
 68 | arg=""
 69 | for phone in `cat $OUTDIR/data/modellist.mono`; do
 70 |     arg="$arg -H $OUTDIR/hrest_cmp/$phone"
 71 | done
 72 | 
 73 | $BIN/HHEd -A -B -C $OUTDIR/config/general.conf -D -V -T 1 $arg -s -p -i -w $OUTDIR/joined_0/cmp.mmf $OUTDIR/join_cmp.hed $OUTDIR/data/modellist.mono
 74 | 
 75 | 
 76 | ## dur:
 77 | echo  "FV $OUTDIR/floor_dur.mmf" > $OUTDIR/join_dur.hed   ## make hed file
 78 | 
 79 | arg=""
 80 | for phone in `cat $OUTDIR/data/modellist.mono`; do
 81 |     arg="$arg -H $OUTDIR/hrest_dur/$phone"
 82 | done
 83 | 
 84 | $BIN/HHEd -A -B -C $OUTDIR/config/general.conf -D -V -T 1 $arg -s -p -i -w $OUTDIR/joined_0/dur.mmf $OUTDIR/join_dur.hed $OUTDIR/data/modellist.mono
 85 | 
 86 | 
 87 | 
 88 | NREEST=5
 89 | ## ------ embedded reestimation --------
 90 | for new in `seq ${NREEST}` ; do
 91 |     old=$[$new - 1]
 92 |     mkdir $OUTDIR/joined_${new}
 93 |     $BIN/HERest -A -B -C $OUTDIR/config/general.conf -D -V -H $OUTDIR/joined_${old}/cmp.mmf \
 94 |          -N $OUTDIR/joined_${old}/dur.mmf -e 2 -I $OUTDIR/data/mlf.mono -M $OUTDIR/joined_${new} \
 95 |          -R $OUTDIR/joined_${new} -S $OUTDIR/data/uttlist.cmp -T 1 -m 1 -t 5000 5000 10000 \
 96 |          -u mvwtdmv -w 3 $OUTDIR/data/modellist.mono $OUTDIR/data/modellist.mono
 97 | done
 98 | cp $OUTDIR/joined_${NREEST}/cmp.mmf $OUTDIR/cmp.mmf 
 99 | cp $OUTDIR/joined_${NREEST}/dur.mmf $OUTDIR/dur.mmf 
100 | 
101 | 
102 | 


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/steps/make_alignment_lexicon.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ##
 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 
 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 5 | 
 6 | 
 7 | #----------------------------------------------------------------------
 8 | 
 9 | INDIR=$1
10 | BIN=$2
11 | 
12 | [ $# -ne 2 ] && echo "Wrong number of arguments supplied" && exit 1 ;
13 | 
14 | UTIL="$( cd "$( dirname "$0" )/../util" && pwd )"  ## location of util script
15 | 
16 | if [ -z $VOICE_BUILD_CONFIG ] ; then
17 |     echo 'Environment variable VOICE_BUILD_CONFIG has not been set!' ; exit 1
18 | fi
19 | source $VOICE_BUILD_CONFIG
20 | 
21 | #----------------------------------------------------------------------
22 | 
23 | if [ ! -e $INDIR/data/modellist.mono ] ; then
24 |     echo "$INDIR/data/modellist.mono doesn't exist" ; exit 1 ;
25 | fi
26 | 
27 | echo "" > $INDIR/data/lexicon.txt  ## clear existing lexicon data
28 | echo "" > $INDIR/data/lexicon.tmp
29 | 
30 | if [ ! -z $EXTRA_SUBSTITUTIONS ] ; then
31 |     cat $EXTRA_SUBSTITUTIONS >  $INDIR/data/lexicon.txt 
32 | fi
33 | 
34 | 
35 | for MODELNAME in `cat $INDIR/data/modellist.mono | sort ` ; do
36 |     case $MODELNAME in
37 |         _SPACE_ ) 
38 |             ## Note the order of skip and sil -- the first provides initial 
39 |             ## expansion; after models are initialised like this, both options 
40 |             ## are allowed. 
41 |             echo '_SPACE_ skip' >> $INDIR/data/lexicon.tmp
42 |             echo '_SPACE_ sil'  >> $INDIR/data/lexicon.tmp
43 |             ;;
44 |         _PUNC_ )
45 |             echo '_PUNC_ sil'   >> $INDIR/data/lexicon.tmp
46 |             echo '_PUNC_ skip'  >> $INDIR/data/lexicon.tmp          
47 |             ;;
48 |         * )
49 |             echo "$MODELNAME $MODELNAME" >> $INDIR/data/lexicon.tmp ;;
50 |     esac
51 | done
52 | 
53 | for ENTRY in `awk '{print $1}' $INDIR/data/lexicon.txt` ; do
54 |     grep -v "^$ENTRY " $INDIR/data/lexicon.tmp > $INDIR/data/lexicon.tmp2
55 |     mv $INDIR/data/lexicon.tmp2 $INDIR/data/lexicon.tmp
56 | done
57 | 
58 | cat $INDIR/data/lexicon.tmp >> $INDIR/data/lexicon.txt
59 | rm $INDIR/data/lexicon.tmp
60 | 
61 | 
62 | #--------- initial expansion of labels using this lexicon -------------
63 | cp $INDIR/data/mlf.full $INDIR/data/mlf.words
64 | 
65 | echo "EX" > $INDIR/expand_labels.hed
66 | 
67 | $BIN/HLEd -I $INDIR/data/mlf.words -i $INDIR/data/mlf.mono -l '*' -d $INDIR/data/lexicon.txt $INDIR/expand_labels.hed $INDIR/data/mlf.words
68 | 
69 | #--------- re-make monophone list (to add e.g. skip) ------------------
70 | awk '{print $2}' $INDIR/data/lexicon.txt | sort -u > $INDIR/data/modellist.mono
71 | 


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/steps/make_alignment_monophone.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ##
 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 
 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 5 | 
 6 | 
 7 | #----------------------------------------------------------------------
 8 | 
 9 | INDIR=$1
10 | OUTDIR=$2
11 | BIN=$3
12 | 
13 | 
14 | [ $# -ne 3 ] && echo "Wrong number of arguments supplied" && exit 1 ;
15 | 
16 | UTIL="$( cd "$( dirname "$0" )/../util" && pwd )"  ## location of util script
17 | 
18 | if [ -z $VOICE_BUILD_CONFIG ] ; then
19 |     echo 'Environment variable VOICE_BUILD_CONFIG has not been set!' ; exit 1
20 | fi
21 | source $VOICE_BUILD_CONFIG
22 | 
23 | #----------------------------------------------------------------------
24 | 
25 | 
26 | source $UTIL/setup_directory.sh $INDIR $OUTDIR
27 | 
28 | 
29 | 
30 | $UTIL/make_proto_hsmm.py $OUTDIR/proto.txt $VOICE_BUILD_CONFIG
31 | $UTIL/make_proto_skip_hsmm.py $OUTDIR/proto_skip.txt $VOICE_BUILD_CONFIG
32 | 
33 | 
34 | 
35 | 
36 | echo "Floor variance..."
37 | $BIN/HCompV -A -C $OUTDIR/config/general.conf -D -V -f 0.01 -m -S $OUTDIR/data/uttlist.cmp -T 1 -M $OUTDIR $OUTDIR/proto.txt
38 | if [ $? -gt 0 ] ; then echo "Floor variance failed" ; exit 1 ; fi
39 | 
40 | 
41 | 
42 | echo "Generate models..."
43 | mkdir -p $OUTDIR/hcompv/
44 | for m in `cat $OUTDIR/data/modellist.mono` ; do
45 |     echo "phone $m"
46 |     if [ "$m" != "skip" ] ; then  
47 | 	    grep -v "~h" $OUTDIR/proto > $OUTDIR/hcompv/$m
48 |     else 
49 | 	    cp $OUTDIR/proto_skip.txt $OUTDIR/hcompv/$m            # null topol for skip
50 |     fi
51 | done
52 | echo "models made OK"
53 | 
54 | echo "Combine models into single file..."
55 | echo " " > $OUTDIR/null.hed 
56 | $BIN/HHEd -d $OUTDIR/hcompv/ -w $OUTDIR/cmp.mmf $OUTDIR/null.hed $OUTDIR/data/modellist.mono
57 | if [ $? -gt 0 ] ; then echo "Model combination failed" ; exit 1 ; fi
58 | 
59 | 
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/steps/make_engine_model.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ##
 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 
 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 5 | 
 6 | 
 7 | #----------------------------------------------------------------------
 8 | 
 9 | INDIR=$1
10 | OUTDIR=$2
11 | BIN=$3
12 | 
13 | [ $# -ne 3 ] && echo "Wrong number of arguments supplied" && exit 1 ;
14 | 
15 | UTIL="$( cd "$( dirname "$0" )/../util" && pwd )" ## location of util script
16 | 
17 | if [ -z $VOICE_BUILD_CONFIG ] ; then
18 |     echo 'Environment variable VOICE_BUILD_CONFIG has not been set!' ; exit 1
19 | fi
20 | source $VOICE_BUILD_CONFIG
21 | 
22 | #----------------------------------------------------------------------
23 | 
24 | source $UTIL/setup_directory.sh $INDIR $OUTDIR
25 | 
26 | 
27 | mkdir -p $OUTDIR/engine/
28 | 
29 | 
30 | 
31 | 
32 | cp $INDIR/tree* $OUTDIR/
33 | python $UTIL/separate_trees.py -treefile $OUTDIR/tree_cmp.txt
34 | 
35 | #$UTIL/make_config.sh $OUTDIR/config/
36 | 
37 | for STREAM in $SHORT_STREAM_NAMES ; do 
38 | 
39 |  echo "LT $OUTDIR/tree_cmp.txt_${STREAM}" > $OUTDIR/engine.hed
40 |  echo "CT $OUTDIR/engine/" >> $OUTDIR/engine.hed
41 |  echo "CM $OUTDIR/engine/" >> $OUTDIR/engine.hed
42 | 
43 |  $BIN/HHEd -A -B -C $OUTDIR/config/engine_convert.conf -D -V -T 1 -H $INDIR/cmp.mmf \
44 |  -s -p -i $OUTDIR/engine.hed $OUTDIR/data/modellist.full
45 |  
46 |  if [ $? -gt 0 ] ; then echo "Convert to engine format failed for stream $STREAM" ; exit 1 ; fi
47 | 
48 | done
49 | 
50 | 
51 | ## Rename final models -- this is hard-coded for SPTK / STRAIGHT stream-names,
52 | ## need to generalise for GlottHMM etc.:--
53 | i=1
54 | for TYPE in $STREAM_NAMES; do
55 |     mv $OUTDIR/engine/pdf.${i} $OUTDIR/engine/${TYPE}.pdf
56 |     mv $OUTDIR/engine/trees.${i} $OUTDIR/engine/tree-${TYPE}.inf
57 |     i=$((i+1))
58 | done
59 | 
60 | ## hardcoded for WORLD
61 | mv $OUTDIR/engine/trees.5 $OUTDIR/engine/tree-bap.inf
62 | mv $OUTDIR/engine/pdf.5 $OUTDIR/engine/bap.pdf
63 | 
64 |  
65 | 
66 | ## dur 
67 | echo "LT $OUTDIR/tree_dur.txt" > $OUTDIR/engine.hed
68 | echo "CT $OUTDIR/engine/" >> $OUTDIR/engine.hed
69 | echo "CM $OUTDIR/engine/" >> $OUTDIR/engine.hed
70 | 
71 | 
72 | $BIN/HHEd -A -B -C $OUTDIR/config/engine_convert.conf -D -V -T 1 -H $INDIR/dur.mmf \
73 |  -s -p -i $OUTDIR/engine.hed $OUTDIR/data/modellist.full
74 | 
75 | if [ $? -gt 0 ] ; then echo "Convert to engine format failed for duration " ; exit 1 ; fi
76 | 
77 | 
78 | mv $OUTDIR/engine/pdf.1 $OUTDIR/engine/duration.pdf     ## $OUTDIR/engine/duration-2.3.pdf
79 | mv $OUTDIR/engine/trees.1 $OUTDIR/engine/tree-duration.inf
80 | 
81 | 
82 | 
83 | ## ------------------------ check success ----------------------------
84 | #for fname in tree-duration.inf duration.pdf tree-mcep.inf mcep.pdf \
85 | #                                            logF0.pdf tree-logF0.inf ; do
86 | # if [ ! -e $OUTDIR/engine/$fname ] ; then
87 | #    echo "Making engine files failed: no $OUTDIR/engine/$fname"
88 | #    exit 1
89 | # fi
90 | #done   
91 | 
92 | ## -------------------------------------------------------------------
93 | 
94 | 


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/steps/make_engine_model.sh.OLD:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ##
 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 
 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 5 | 
 6 | 
 7 | #----------------------------------------------------------------------
 8 | 
 9 | INDIR=$1
10 | OUTDIR=$2
11 | BIN=$3
12 | 
13 | [ $# -ne 3 ] && echo "Wrong number of arguments supplied" && exit 1 ;
14 | 
15 | UTIL="$( cd "$( dirname "$0" )/../util" && pwd )" ## location of util script
16 | 
17 | if [ -z $VOICE_BUILD_CONFIG ] ; then
18 |     echo 'Environment variable VOICE_BUILD_CONFIG has not been set!' ; exit 1
19 | fi
20 | source $VOICE_BUILD_CONFIG
21 | 
22 | #----------------------------------------------------------------------
23 | 
24 | source $UTIL/setup_directory.sh $INDIR $OUTDIR
25 | 
26 | 
27 | mkdir -p $OUTDIR/engine/
28 | 
29 | 
30 | 
31 | 
32 | cp $INDIR/tree* $OUTDIR/
33 | python $UTIL/separate_trees.py -treefile $OUTDIR/tree_cmp.txt
34 | 
35 | #$UTIL/make_config.sh $OUTDIR/config/
36 | 
37 | for STREAM in $SHORT_STREAM_NAMES ; do 
38 | 
39 |  echo "LT $OUTDIR/tree_cmp.txt_${STREAM}" > $OUTDIR/engine.hed
40 |  echo "CT $OUTDIR/engine/" >> $OUTDIR/engine.hed
41 |  echo "CM $OUTDIR/engine/" >> $OUTDIR/engine.hed
42 | 
43 |  $BIN/HHEd -A -B -C $OUTDIR/config/engine_convert.conf -D -V -T 1 -H $INDIR/cmp.mmf \
44 |  -s -p -i $OUTDIR/engine.hed $OUTDIR/data/modellist.full
45 |  
46 |  if [ $? -gt 0 ] ; then echo "Convert to engine format failed for stream $STREAM" ; exit 1 ; fi
47 | 
48 | done
49 | 
50 | 
51 | ## Rename final models -- this is hard-coded for SPTK / STRAIGHT stream-names,
52 | ## need to generalise for GlottHMM etc.:--
53 | 
54 | mv $OUTDIR/engine/pdf.1 $OUTDIR/engine/mcep.pdf
55 | mv $OUTDIR/engine/pdf.2 $OUTDIR/engine/logF0.pdf
56 | ## For STRAIGHT case:
57 | if [ -e $OUTDIR/engine/pdf.5 ] ; then
58 |     mv $OUTDIR/engine/pdf.5 $OUTDIR/engine/bndap.pdf
59 | fi
60 | 
61 | 
62 | mv $OUTDIR/engine/trees.1 $OUTDIR/engine/tree-mcep.inf
63 | mv $OUTDIR/engine/trees.2 $OUTDIR/engine/tree-logF0.inf
64 | ## For STRAIGHT case:
65 | if [ -e $OUTDIR/engine/trees.5 ] ; then
66 |     mv $OUTDIR/engine/trees.5 $OUTDIR/engine/tree-bndap.inf
67 | fi
68 | 
69 |  
70 | 
71 | ## dur 
72 | echo "LT $OUTDIR/tree_dur.txt" > $OUTDIR/engine.hed
73 | echo "CT $OUTDIR/engine/" >> $OUTDIR/engine.hed
74 | echo "CM $OUTDIR/engine/" >> $OUTDIR/engine.hed
75 | 
76 | $BIN/HHEd -A -B -C $OUTDIR/config/engine_convert.conf -D -V -T 1 -H $INDIR/dur.mmf \
77 |  -s -p -i $OUTDIR/engine.hed $OUTDIR/data/modellist.full
78 | 
79 | if [ $? -gt 0 ] ; then echo "Convert to engine format failed for duration " ; exit 1 ; fi
80 | 
81 | 
82 | mv $OUTDIR/engine/pdf.1 $OUTDIR/engine/duration.pdf     ## $OUTDIR/engine/duration-2.3.pdf
83 | mv $OUTDIR/engine/trees.1 $OUTDIR/engine/tree-duration.inf
84 | 
85 | 
86 | 
87 | ## ------------------------ check success ----------------------------
88 | for fname in tree-duration.inf duration.pdf tree-mcep.inf mcep.pdf \
89 |                                             logF0.pdf tree-logF0.inf ; do
90 |  if [ ! -e $OUTDIR/engine/$fname ] ; then
91 |     echo "Making engine files failed: no $OUTDIR/engine/$fname"
92 |     exit 1
93 |  fi
94 | done   
95 | ## -------------------------------------------------------------------
96 | 
97 | 


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/steps/make_monophone.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | ##
  3 | ## Project: Simple4All - November 2013 - www.simple4all.org 
  4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
  5 | 
  6 |  
  7 | 
  8 | #----------------------------------------------------------------------
  9 | 
 10 | INDIR=$1
 11 | OUTDIR=$2
 12 | BIN=$3
 13 | 
 14 | [ $# -ne 3 ] && echo "Wrong number of arguments supplied" && exit 1 ;
 15 | 
 16 | UTIL="$( cd "$( dirname "$0" )/../util" && pwd )"  ## location of util script
 17 | 
 18 | if [ -z $VOICE_BUILD_CONFIG ] ; then
 19 |     echo 'Environment variable VOICE_BUILD_CONFIG has not been set!' ; exit 1
 20 | fi
 21 | source $VOICE_BUILD_CONFIG
 22 | 
 23 | #----------------------------------------------------------------------
 24 | 
 25 | set -e
 26 | 
 27 | source $UTIL/setup_directory.sh $INDIR $OUTDIR
 28 | 
 29 | 
 30 | 
 31 | 
 32 | $UTIL/make_proto_hsmm.py $OUTDIR/proto.txt $VOICE_BUILD_CONFIG
 33 | 
 34 | 
 35 | 
 36 | ## -------- floor variance ---------:
 37 | ## cmp:
 38 | $BIN/HCompV -A -C $OUTDIR/config/general.conf -D -V -S $OUTDIR/data/uttlist.cmp -T 1 -M $OUTDIR $OUTDIR/proto.txt
 39 | if [ $? -gt 0 ] ; then echo "Floor variance failed" ;exit 1 ; fi
 40 | head -n 1 $OUTDIR/proto.txt | cat - $OUTDIR/vFloors > $OUTDIR/floor_cmp.mmf
 41 | 
 42 | 
 43 | 
 44 | 
 45 | ## dur -- floor variance to 1.0:
 46 | rm -f $OUTDIR/floor_dur.mmf
 47 | for i in `seq $NSTATE` ; do
 48 |     echo "~v varFloor${i}" >> $OUTDIR/floor_dur.mmf
 49 |     echo "<Variance> 1" >> $OUTDIR/floor_dur.mmf
 50 |     echo "1.0" >> $OUTDIR/floor_dur.mmf
 51 | done
 52 | 
 53 | 
 54 | ## ------- segmental K-means & EM-based estimation of monophones: ------
 55 | mkdir $OUTDIR/hinit
 56 | mkdir $OUTDIR/hrest_cmp
 57 | mkdir $OUTDIR/hrest_dur
 58 | i=1
 59 | 
 60 | for phone in `cat $OUTDIR/data/modellist.mono`; do
 61 |     echo $phone
 62 |     $BIN/HInit -A -C $OUTDIR/config/general.conf -D -V -H $OUTDIR/floor_cmp.mmf \
 63 |             -I $OUTDIR/data/mlf.mono -M $OUTDIR/hinit -o $phone -S $OUTDIR/data/uttlist.cmp \
 64 |             -T 1 -l $phone -m 1 -u tmvw -w 3 $OUTDIR/proto.txt
 65 |     $BIN/HRest -A -C $OUTDIR/config/general.conf -D -V -H $OUTDIR/floor_cmp.mmf \
 66 |             -I $OUTDIR/data/mlf.mono -M $OUTDIR/hrest_cmp -o $phone -S $OUTDIR/data/uttlist.cmp \
 67 |             -T 1 -l $phone -g $OUTDIR/hrest_dur/$phone -m 1 -u tmvw -w 3 $OUTDIR/hinit/$phone
 68 |     mv $OUTDIR/hrest_cmp/$phone $OUTDIR/hrest_cmp/$i-mmf
 69 |     mv $OUTDIR/hrest_dur/$phone $OUTDIR/hrest_dur/$i-mmf
 70 |     i=$[$i + 1]
 71 |     if [ $? -gt 0 ] ; then echo "HInit failed for $phone" ;exit 1 ; fi
 72 | done
 73 |   
 74 | 
 75 | ## ------ join individual monophone files together --------
 76 | mkdir $OUTDIR/joined_0
 77 | 
 78 | ## cmp:
 79 | echo  "FV $OUTDIR/floor_cmp.mmf" > $OUTDIR/join_cmp.hed   ## make hed file
 80 | 
 81 | arg=""
 82 | i=1
 83 | for phone in `cat $OUTDIR/data/modellist.mono`; do
 84 |     arg="$arg -H $OUTDIR/hrest_cmp/$i-mmf"
 85 |     i=$[$i + 1]
 86 | done
 87 | 
 88 | $BIN/HHEd -A -B -C $OUTDIR/config/general.conf -D -V -T 1 $arg -s -p -i -w $OUTDIR/joined_0/cmp.mmf $OUTDIR/join_cmp.hed $OUTDIR/data/modellist.mono
 89 | 
 90 | 
 91 | ## dur:
 92 | echo  "FV $OUTDIR/floor_dur.mmf" > $OUTDIR/join_dur.hed   ## make hed file
 93 | 
 94 | arg=""
 95 | i=1
 96 | for phone in `cat $OUTDIR/data/modellist.mono`; do
 97 |     arg="$arg -H $OUTDIR/hrest_dur/$i-mmf"
 98 |     i=$[$i + 1]
 99 | done
100 | 
101 | $BIN/HHEd -A -B -C $OUTDIR/config/general.conf -D -V -T 1 $arg -s -p -i -w $OUTDIR/joined_0/dur.mmf $OUTDIR/join_dur.hed $OUTDIR/data/modellist.mono
102 | 
103 | 
104 | NREEST=5
105 | ## ------ embedded reestimation --------
106 | for new in `seq ${NREEST}` ; do
107 |     old=$[$new - 1]
108 |     mkdir $OUTDIR/joined_${new}
109 |     $BIN/HERest -A -B -C $OUTDIR/config/general.conf -D -V -H $OUTDIR/joined_${old}/cmp.mmf \
110 |          -N $OUTDIR/joined_${old}/dur.mmf -e 2 -I $OUTDIR/data/mlf.mono -M $OUTDIR/joined_${new} \
111 |          -R $OUTDIR/joined_${new} -S $OUTDIR/data/uttlist.cmp -T 1 -m 1 -t 5000 5000 10000 \
112 |          -u mvwtdmv -w 3 $OUTDIR/data/modellist.mono $OUTDIR/data/modellist.mono
113 | done
114 | 
115 | cp $OUTDIR/joined_${NREEST}/cmp.mmf $OUTDIR/cmp.mmf 
116 | cp $OUTDIR/joined_${NREEST}/dur.mmf $OUTDIR/dur.mmf 
117 | 
118 | 
119 | 


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/steps/realign.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ##
 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 
 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 5 | 
 6 | 
 7 | #----------------------------------------------------------------------
 8 | 
 9 | INDIR=$1
10 | OUTDIR=$2
11 | BIN=$3
12 | 
13 | [ $# -ne 3 ] && echo "Wrong number of arguments supplied" && exit 1 ;
14 | 
15 | UTIL="$( cd "$( dirname "$0" )/../util" && pwd )"  ## location of util script
16 | 
17 | source $VOICE_BUILD_CONFIG
18 | #----------------------------------------------------------------------
19 | 
20 | 
21 | 
22 | source $UTIL/setup_directory.sh $INDIR $OUTDIR
23 | 
24 | ## copy models:
25 | cp $INDIR/cmp.mmf $OUTDIR/
26 | cp $INDIR/dur.mmf $OUTDIR/
27 | 
28 | ## Copy decision trees if present:
29 | if [ -e $INDIR/tree_dur.txt ] ; then
30 |   cp $INDIR/tree* $OUTDIR
31 | fi
32 | 
33 | 
34 | 
35 | 
36 | 
37 | 
38 | 
39 | 
40 | endstate=$[$NSTATE + 1]
41 | 
42 | 
43 | 
44 | mkdir -p $OUTDIR/data/newlab 
45 | 
46 | $BIN/HSMMAlign -A -C $OUTDIR/config/general.conf -D -V $RELAXED_LABEL_PRUNE -H $INDIR/cmp.mmf -N $INDIR/dur.mmf \
47 |          -I $OUTDIR/data/mlf.full -S $OUTDIR/data/uttlist.cmp -T 1 -t 4000 -w 1.0 \
48 |          -m $OUTDIR/data/newlab $OUTDIR/data/modellist.full $OUTDIR/data/modellist.full
49 | rm -f $OUTDIR/data/newlab_list
50 | find  $OUTDIR/data/newlab/ -name '*.lab' -print > $OUTDIR/data/newlab_list
51 | echo " " > $OUTDIR/null.hed
52 | mv $OUTDIR/data/mlf.full $OUTDIR/data/mlf.full.OLD
53 | $BIN/HLEd -A -D -T 1 -V -l '*' -i $OUTDIR/data/mlf.full -S $OUTDIR/data/newlab_list $OUTDIR/null.hed
54 | 


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/steps/realign_to_labels.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ##
 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 
 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 5 | 
 6 | 
 7 | #----------------------------------------------------------------------
 8 | 
 9 | INDIR=$1
10 | OUTDIR=$2
11 | BIN=$3
12 | 
13 | 
14 | [ $# -ne 3 ] && echo "Wrong number of arguments supplied" && exit 1 ;
15 | 
16 | UTIL="$( cd "$( dirname "$0" )/../util" && pwd )"  ## location of util script
17 | 
18 | if [ -z $VOICE_BUILD_CONFIG ] ; then
19 |     echo 'Environment variable VOICE_BUILD_CONFIG has not been set!' ; exit 1
20 | fi
21 | source $VOICE_BUILD_CONFIG
22 | 
23 | #----------------------------------------------------------------------
24 | 
25 | 
26 | #-------------
27 | # Hard coded:
28 | HVITE_BEAM=" 1000 100000 1000000 "
29 | #-------------
30 | 
31 | 
32 | ## no label prune!!!!
33 | 
34 | source $UTIL/setup_directory.sh $INDIR $OUTDIR
35 | 
36 | 
37 | $BIN/HVite -l \* -A -C $OUTDIR/config/general.conf -D -V -T 1 -a -m -I $OUTDIR/data/mlf.words \
38 |      -H $INDIR/cmp.mmf -i $OUTDIR/data/mlf.mono.NEW -o SW  \
39 |      -t $HVITE_BEAM -S $OUTDIR/data/uttlist.cmp -y lab $OUTDIR/data/lexicon.txt \
40 |     $INDIR/data/modellist.mono 
41 | if [ $? -gt 0 ] ; then echo "Alignment failed" ; exit 1 ; fi
42 | 
43 | ## ------------------------ check success ----------------------------
44 | if [ ! -e $OUTDIR/data/mlf.mono.NEW ] ; then
45 |     echo "Alignment failed: cmp.mmf not made"
46 |     exit 1
47 | fi
48 | ## -------------------------------------------------------------------
49 | 
50 | 
51 | ## rename new alignment so it will be used in future:
52 | mv $OUTDIR/data/mlf.mono.NEW $OUTDIR/data/mlf.mono
53 | 
54 | ## remove names of utts for which no label has been found from training list:
55 | $UTIL/update_train_list.py -mlf $OUTDIR/data/mlf.mono -trainlist $OUTDIR/data/uttlist.cmp 
56 | 
57 | ## copy models (which weren't updated):
58 | cp $INDIR/cmp.mmf $OUTDIR/cmp.mmf
59 | 
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/steps/reestimate.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ##
 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 
 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 5 | 
 6 | 
 7 | #----------------------------------------------------------------------
 8 | 
 9 | INDIR=$1
10 | OUTDIR=$2
11 | BIN=$3
12 | FLOOR=$4
13 | 
14 | [ $# -ne 4 ] && echo "Wrong number of arguments supplied" && exit 1 ;
15 | 
16 | UTIL="$( cd "$( dirname "$0" )/../util" && pwd )"  ## location of util script
17 | 
18 | if [ -z $VOICE_BUILD_CONFIG ] ; then
19 |     echo 'Environment variable VOICE_BUILD_CONFIG has not been set!' ; exit 1
20 | fi
21 | source $VOICE_BUILD_CONFIG
22 | 
23 | #----------------------------------------------------------------------
24 | 
25 | 
26 | source $UTIL/setup_directory.sh $INDIR $OUTDIR
27 | 
28 | 
29 | 
30 | 
31 | ## FLOOR=0 is used to perturb untied models before clustering, 1 used for clustered training. 
32 | if [ $FLOOR == 0 ] ; then 
33 |     OPT=" -C $OUTDIR/config/general-unfloor.conf -w 0.0 "  # -w: set mix weight floor to f*MINMIX
34 | else
35 |     OPT=" -C $OUTDIR/config/general.conf -w 3.0 "
36 | fi
37 | 
38 | $BIN/HERest -A $BINMOD -D -T 1 -S $OUTDIR/data/uttlist.cmp \
39 |          -I $OUTDIR/data/mlf.full -m 1 -u tmvwdmv -t $BEAM \
40 |          -H $INDIR/cmp.mmf -N $INDIR/dur.mmf \
41 |          -M $OUTDIR/ -R $OUTDIR/ \
42 |          $OPT -s $OUTDIR/stat.cmp $STRICT_LABEL_PRUNE \
43 |          $OUTDIR/data/modellist.full $OUTDIR/data/modellist.full
44 | 
45 | ## Make duration stats file: 
46 | awk '{print $1 " " $2 " " $3 " " $3 }' $OUTDIR/stat.cmp > $OUTDIR/stat.dur
47 | 
48 | ## Copy decision trees if present:
49 | if [ -e $INDIR/tree_dur.txt ] ; then
50 |   cp $INDIR/tree* $OUTDIR
51 | fi
52 | 
53 | 
54 | ## ------------------------ check success ----------------------------
55 | if [ ! -e $OUTDIR/cmp.mmf ] ; then
56 |     echo "Reestimation failed: cmp.mmf not made"
57 |     exit 1
58 | fi
59 | if [ ! -e $OUTDIR/dur.mmf ] ; then
60 |     echo "Reestimation failed: dur.mmf not made"
61 |     exit 1
62 | fi
63 | ## -------------------------------------------------------------------
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/steps/reestimate_alignment_model.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ##
 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 
 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 5 | 
 6 | 
 7 | #----------------------------------------------------------------------
 8 | 
 9 | INDIR=$1
10 | OUTDIR=$2
11 | BIN=$3
12 | 
13 | 
14 | [ $# -ne 3 ] && echo "Wrong number of arguments supplied" && exit 1 ;
15 | 
16 | UTIL="$( cd "$( dirname "$0" )/../util" && pwd )"  ## location of util script
17 | 
18 | if [ -z $VOICE_BUILD_CONFIG ] ; then
19 |     echo 'Environment variable VOICE_BUILD_CONFIG has not been set!' ; exit 1
20 | fi
21 | source $VOICE_BUILD_CONFIG
22 | 
23 | #----------------------------------------------------------------------
24 | 
25 | #-----------
26 | # Hard coded:
27 | ALIGN_BEAM=" -t 250.0 500.0 1000.0 "
28 | #-----------
29 | 
30 | 
31 | ## no label prune!!!!
32 | 
33 | source $UTIL/setup_directory.sh $INDIR $OUTDIR
34 | 
35 | ## strip skip models from input MLF as an easy way to avoid 'ERROR [+7332]  CreateInsts: Cannot have successive Tee models'
36 | grep -v skip $OUTDIR/data/mlf.mono > $OUTDIR/data/mlf.mono.noskip
37 | 
38 | $BIN/HERest -A $BINMOD -C $OUTDIR/config/general.conf -D -V -T 1 -S $OUTDIR/data/uttlist.cmp \
39 |          -I $OUTDIR/data/mlf.mono.noskip -m 1 -u tmvw $LABEL_PRUNE $ALIGN_BEAM \
40 |          -H $INDIR/cmp.mmf -M $OUTDIR/ $OUTDIR/data/modellist.mono 
41 | 
42 | 
43 | ## ------------------------ check success ----------------------------
44 | sleep 1
45 | if [ ! -e $OUTDIR/cmp.mmf ] ; then
46 |     echo "Reestimation failed: cmp.mmf not made"
47 |     exit 1
48 | fi
49 | ## -------------------------------------------------------------------
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/steps/subset_data.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 
 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 5 |   
 6 | import sys
 7 | import re
 8 | import os
 9 | 
10 | from argparse import ArgumentParser
11 | 
12 | ## find location of util relative to current script:
13 | loc = os.path.abspath(os.path.join( __file__, os.pardir, os.pardir, 'util') )
14 | sys.path.append(loc)
15 | 
16 | '''
17 | Choose subset of data randomly -- either x minutes or at least x or each monophone.
18 | '''
19 | from util import *
20 | 
21 | def main_work():
22 | 
23 |     #################################################
24 | 
25 |     # ======== Get stuff from command line ==========
26 | 
27 |     a = ArgumentParser()
28 |     a.add_argument('-indir', required=True, help= "...")
29 |     a.add_argument('-outdir', required=True, help= "...")
30 |     a.add_argument('-bindir', required=True, help= "...")
31 |     a.add_argument('-choose', required=True, help= "e.g. 30minutes / 5examples (=> 5 of each monophone)")
32 |     
33 |     opts = a.parse_args()
34 |     BIN=opts.bindir
35 |     
36 |     ## parse 'choose':
37 |     if not re.search('(\d+)(minutes|examples)', opts.choose):
38 |         sys.exit('bad value for "choose" option')
39 |     
40 |     s = re.search('(\d+)(min|examples)', opts.choose)
41 |     #(quantity, kind) = s.groups
42 |     print s.groups() # (quantity, kind) 
43 |     sys.exit('www')
44 |     # ===============================================
45 |     
46 |     if not os.path.isdir(opts.outdir + '/data/'):
47 |         os.makedirs(opts.outdir + '/data/')
48 | 
49 |     ## find 
50 | 
51 |     cmplist = os.path.join(opts.outdir, 'data', 'uttlist.cmp')
52 |     lablist = os.path.join(opts.outdir, 'data', 'uttlist.lab')
53 |     monolist = os.path.join(opts.outdir, 'data', 'modellist.mono')
54 |     fulllist = os.path.join(opts.outdir, 'data', 'modellist.full')
55 |     monomlf = os.path.join(opts.outdir, 'data', 'mlf.mono')
56 |     fullmlf = os.path.join(opts.outdir, 'data', 'mlf.full')
57 |     
58 |     
59 |     ## 1) Make lists of .cmp and .lab (acoustic and linguistic feature) files:
60 |     lab_ext = os.listdir(opts.labdir)[0].split('.')[-1]
61 |     cmp = [re.sub('\.cmp\Z', '', fname) for fname in os.listdir(opts.cmpdir) \
62 |                                                             if fname.endswith('.cmp')]
63 |     lab = [re.sub('\.'+lab_ext+'\Z', '', fname) for fname in os.listdir(opts.labdir) ]
64 |     intersect = [name for name in lab if name in cmp] ## only where both are present
65 |     if intersect == []:
66 |         sys.exit('set_up_data.py: No matching data files found in %s and %s'%( \
67 |                                                 opts.labdir, opts.cmpdir))
68 |     cmp = [os.path.join(opts.cmpdir, name + '.cmp') for name in intersect]
69 |     lab = [os.path.join(opts.labdir, name + '.' + lab_ext) for name in intersect]
70 |     writelist(cmp, cmplist)
71 |     writelist(lab, lablist)
72 |     
73 |     
74 |     ## 2) Make mlfs and model lists for monophones and fullcontext phones:
75 |     comm=BIN+"""/HLEd -A -D -T 1 -V -l '*' -n %s -i %s -S %s %s
76 |                 """%(fulllist, fullmlf, lablist, nullhed(opts.outdir))
77 |     print comm
78 |     os.system(comm)
79 |     
80 |     comm=BIN+"""/HLEd -A -D -T 1 -V -l '*' -n %s -i %s -S %s -m %s %s
81 |                 """%(monolist, monomlf, lablist, nullhed(opts.outdir), fullmlf)
82 |     print comm
83 |     os.system(comm)
84 |     
85 | 
86 | if __name__=="__main__":
87 | 
88 |     main_work()
89 | 
90 | 


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/steps/untie_models.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ##
 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 
 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 5 | 
 6 | #----------------------------------------------------------------------
 7 | 
 8 | INDIR=$1
 9 | OUTDIR=$2
10 | BIN=$3
11 | 
12 | [ $# -ne 3 ] && echo "Wrong number of arguments supplied" && exit 1 ;
13 | 
14 | UTIL="$( cd "$( dirname "$0" )/../util" && pwd )"  ## location of util script
15 | 
16 | if [ -z $VOICE_BUILD_CONFIG ] ; then
17 |     echo 'Environment variable VOICE_BUILD_CONFIG has not been set!' ; exit 1
18 | fi
19 | source $VOICE_BUILD_CONFIG
20 | #----------------------------------------------------------------------
21 | 
22 | 
23 | source $UTIL/setup_directory.sh $INDIR $OUTDIR
24 | 
25 | ## Use grep to get number of last stream from $STREAMS (which looks like e.g. "1 2-4"):
26 | LAST_STREAM=`grep -E -o "[0-9]+" <<<$STREAMS | tail -1`
27 | 
28 | 
29 | 
30 | 
31 | endstate=$[$NSTATE + 1]
32 | 
33 | 
34 | 
35 | ## cmp
36 | 
37 | echo "LT $INDIR/tree_cmp.txt"                    > $OUTDIR/untie_cmp.hed 
38 | echo "AU $OUTDIR/data/modellist.full"           >> $OUTDIR/untie_cmp.hed 
39 | 
40 | echo "UT {*.state[2-$endstate].stream[1-$LAST_STREAM]}"    >> $OUTDIR/untie_cmp.hed 
41 | echo "UT {*.state[2-$endstate] }"               >> $OUTDIR/untie_cmp.hed 
42 | echo "TI \"SWeight\" { *.state[2-$endstate].weights }"  >> $OUTDIR/untie_cmp.hed  
43 | echo "UT {*.transP}"                            >>   $OUTDIR/untie_cmp.hed
44 | echo "TI TrP {*.transP}"                        >>   $OUTDIR/untie_cmp.hed
45 | 
46 | $BIN/HHEd -A $BINMOD -C $OUTDIR/config/general.conf -D -V -T 1 -H $INDIR/cmp.mmf -s -p -i -w $OUTDIR/cmp.mmf $OUTDIR/untie_cmp.hed $INDIR/data/modellist.full
47 | #
48 | [ $? -gt 0 ] && echo "HHEd untie cmp failed" && exit 1 ; 
49 | 
50 |            
51 |        
52 |        
53 |        
54 | ## dur       
55 | 
56 | echo "LT $INDIR/tree_dur.txt"                    > $OUTDIR/untie_dur.hed 
57 | echo "AU $OUTDIR/data/modellist.full"           >> $OUTDIR/untie_dur.hed 
58 | 
59 | echo "UT {*.state[2].stream[1-${NSTATE}] }"     >> $OUTDIR/untie_dur.hed 
60 | echo "UT {*.state[2] }"                         >> $OUTDIR/untie_dur.hed 
61 | echo "UT {*.transP}"                            >>   $OUTDIR/untie_dur.hed
62 | echo "TI TrP {*.transP}"                        >>   $OUTDIR/untie_dur.hed
63 | 
64 | $BIN/HHEd -A $BINMOD -C $OUTDIR/config/general.conf -D -V -T 1 -H $INDIR/dur.mmf -s -p -i -w $OUTDIR/dur.mmf $OUTDIR/untie_dur.hed $INDIR/data/modellist.full
65 | #
66 | [ $? -gt 0 ] && echo "HHEd untie dur failed" && exit 1 ; 
67 | 
68 | 
69 | 
70 | 
71 | 
72 | 


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/subrecipes/config_template/quick_voicebuild_01.cfg:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | MONOPHONE_SUBSET="1examples"
 4 | SUBSETS="5minutes 1minutes 2minutes "  #  all"
 5 | SHORT_QUESTION_PATT="segment_is"
 6 | 
 7 | 
 8 | STATIC_STREAM_SIZES="25 1 1 1"
 9 | MSD_STREAM_INFO="0 1 1 1"
10 | STREAM_WEIGHTS="1.0 0.0 0.0 0.0"  ## best to keep F0 streams turned off for initial alignment
11 | VFLOORSCALESTR="Vector 4 0.01 0.01 0.01 0.01"
12 | 
13 | 
14 | STREAMS="1 2-4" ## <--sptk,  ;  "1 2-4 4"  <- straight
15 | SHORT_STREAM_NAMES="1 2"   #  first digit only! No 5 for SPTK   2_3_4 5 ; do
16 | STREAM_NAMES = "mcep logF0"
17 | 
18 | 
19 | NSTATE=5
20 | 
21 | BINMOD=" "  ## should we write models in binary format? " -B " = yes, " " = no.
22 | 
23 | 
24 | BEAM=" 5000 5000 10000 "
25 | STRICT_LABEL_PRUNE=" -e 2 "
26 | RELAXED_LABEL_PRUNE=" -e 10 "
27 | 
28 | 
29 | NRECLUSTER=5
30 | NREEST=5
31 | 
32 | 
33 | 
34 | 
35 | 
36 | 
37 | 
38 | 
39 | 
40 | 
41 | 
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/subrecipes/config_template/quick_voicebuild_01.cfg.OLD:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | MONOPHONE_SUBSET="1examples"
 4 | SUBSETS="5minutes 1minutes 2minutes "  #  all"
 5 | SHORT_QUESTION_PATT="segment_is"
 6 | 
 7 | 
 8 | STATIC_STREAM_SIZES="25 1 1 1"
 9 | MSD_STREAM_INFO="0 1 1 1"
10 | STREAM_WEIGHTS="1.0 0.0 0.0 0.0"  ## best to keep F0 streams turned off for initial alignment
11 | VFLOORSCALESTR="Vector 4 0.01 0.01 0.01 0.01"
12 | 
13 | 
14 | STREAMS="1 2-4" ## <--sptk,  ;  "1 2-4 4"  <- straight
15 | SHORT_STREAM_NAMES="1 2"   #  first digit only! No 5 for SPTK   2_3_4 5 ; do
16 | 
17 | 
18 | 
19 | NSTATE=5
20 | 
21 | BINMOD=" "  ## should we write models in binary format? " -B " = yes, " " = no.
22 | 
23 | 
24 | BEAM=" 5000 5000 10000 "
25 | STRICT_LABEL_PRUNE=" -e 2 "
26 | RELAXED_LABEL_PRUNE=" -e 10 "
27 | 
28 | 
29 | NRECLUSTER=5
30 | NREEST=5
31 | 
32 | 
33 | 
34 | 
35 | 
36 | 
37 | 
38 | 
39 | 
40 | 
41 | 
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/subrecipes/config_template/standard_alignment.cfg:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | STATIC_STREAM_SIZES="25 1 1 1"
 5 | MSD_STREAM_INFO="0 1 1 1"
 6 | STREAM_WEIGHTS="1.0 0.0 0.0 0.0"  ## best to keep F0 streams turned off for initial alignment
 7 | VFLOORSCALESTR="Vector 4 0.01 0.01 0.01 0.01"
 8 | 
 9 | 
10 | NSTATE=5
11 | 
12 | BINMOD=" "  ## should we write models in binary format? " -B " = yes, " " = no.
13 | 
14 | MIXTURE_SCHEDULE="0 0 0 2 4 "  
15 | 
16 | NREEST=3
17 | 
18 | MIXUP_STREAMS=1     ## nasty thing happen if we try to increase mixtures of MSD streams 
19 | 
20 | LABEL_PRUNE=" -e 2 "   #  empty string to turn this off      " -e 2 "


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/subrecipes/config_template/standard_voicebuild.cfg:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | STATIC_STREAM_SIZES="25 1 1 1"
 5 | MSD_STREAM_INFO="0 1 1 1"
 6 | STREAM_WEIGHTS="1.0 0.0 0.0 0.0"  ## best to keep F0 streams turned off for initial alignment
 7 | VFLOORSCALESTR="Vector 4 0.01 0.01 0.01 0.01"
 8 | 
 9 | 
10 | STREAMS="1 2-4" ## <--sptk,  ;  "1 2-4 4"  <- straight
11 | SHORT_STREAM_NAMES="1 2"   #  first digit only! No 5 for SPTK   2_3_4 5 ; do
12 | STREAM_NAMES ="mcep logF0"
13 | 
14 | 
15 | NSTATE=5
16 | 
17 | BINMOD=" "  ## should we write models in binary format? " -B " = yes, " " = no.
18 | 
19 | 
20 | BEAM=" 5000 5000 10000 "
21 | STRICT_LABEL_PRUNE=" -e 2 "
22 | RELAXED_LABEL_PRUNE=" -e 20 "
23 | 
24 | 
25 | NRECLUSTER=2
26 | NREEST=5
27 | 
28 | 
29 | 
30 | 
31 | 
32 | 
33 | 
34 | 
35 | 
36 | 
37 | 
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/subrecipes/config_template/standard_voicebuild.cfg.OLD:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | STATIC_STREAM_SIZES="25 1 1 1"
 5 | MSD_STREAM_INFO="0 1 1 1"
 6 | STREAM_WEIGHTS="1.0 0.0 0.0 0.0"  ## best to keep F0 streams turned off for initial alignment
 7 | VFLOORSCALESTR="Vector 4 0.01 0.01 0.01 0.01"
 8 | 
 9 | 
10 | STREAMS="1 2-4" ## <--sptk,  ;  "1 2-4 4"  <- straight
11 | SHORT_STREAM_NAMES="1 2"   #  first digit only! No 5 for SPTK   2_3_4 5 ; do
12 | 
13 | 
14 | 
15 | NSTATE=5
16 | 
17 | BINMOD=" "  ## should we write models in binary format? " -B " = yes, " " = no.
18 | 
19 | 
20 | BEAM=" 5000 5000 10000 "
21 | STRICT_LABEL_PRUNE=" -e 2 "
22 | RELAXED_LABEL_PRUNE=" -e 20 "
23 | 
24 | 
25 | NRECLUSTER=2
26 | NREEST=5
27 | 
28 | 
29 | 
30 | 
31 | 
32 | 
33 | 
34 | 
35 | 
36 | 
37 | 
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/subrecipes/config_template/standard_voicebuild_STRAIGHT.cfg:
--------------------------------------------------------------------------------
 1 | ### Alternative configuration file for standard_voicebuild subrecipe. The only differences 
 2 | ### are to handle the extra aperiodicity stream, and to change 25 -> 60 for spectral stream
 3 | ### size.
 4 | ###
 5 | ### This config file is intended for use from commandline external to Ossian.
 6 | 
 7 | 
 8 | 
 9 | STATIC_STREAM_SIZES="60 1 1 1 25"
10 | MSD_STREAM_INFO="0 1 1 1 0"
11 | STREAM_WEIGHTS="1.0 0.0 0.0 0.0 0.0"  ## best to keep F0 streams turned off for initial alignment
12 | VFLOORSCALESTR="Vector 5 0.01 0.01 0.01 0.01 0.01"
13 | 
14 | 
15 | STREAMS="1 2-4 5"
16 | SHORT_STREAM_NAMES="1 2 5"
17 | 
18 | 
19 | 
20 | NSTATE=5
21 | 
22 | BINMOD=" "  ## should we write models in binary format? " -B " = yes, " " = no.
23 | 
24 | 
25 | BEAM=" 5000 5000 10000 "
26 | STRICT_LABEL_PRUNE=" -e 2 "
27 | RELAXED_LABEL_PRUNE=" -e 20 "
28 | 
29 | 
30 | NRECLUSTER=2
31 | NREEST=5
32 | 
33 | 
34 | 
35 | 
36 | 
37 | 
38 | 
39 | 
40 | 
41 | 
42 | 
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/subrecipes/script/extend_standard_alignment.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ##
 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 
 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 5 | 
 6 | #----------------------------------------------------------------------
 7 | 
 8 | CMPDIR=$1
 9 | LABDIR=$2
10 | BIN=$3
11 | OUT=$4
12 | CONFIG=$5
13 | EXISTING_MODEL=$6
14 | 
15 | [ $# -ne 6 ] && echo "Wrong number of arguments supplied" && exit 1 ;
16 | 
17 | ## location of directory 2 above that the script is in:
18 | TOPDIR="$( cd "$( dirname "$0" )"/../../ && pwd )"
19 | 
20 | #----------------------------------------------------------------------
21 | 
22 | export VOICE_BUILD_CONFIG=$CONFIG
23 | 
24 | source $VOICE_BUILD_CONFIG
25 | 
26 | 
27 | STEPS=$TOPDIR/steps/
28 | 
29 | function check_step {
30 |     ## use global $? and $STEPNUM
31 |     if [ $? -gt 0 ] ; then 
32 |         echo ; echo "Step ${STEPNUM} in script $0 failed, aborted!" ; echo ; exit 1 ; 
33 |     fi
34 | }
35 | 
36 | 
37 | STEPNUM=1 
38 | 
39 | start_time=$(date +"%s")
40 | 
41 | ## ------ preparation ------
42 | 
43 | 
44 | 
45 | python $STEPS/set_up_data.py -labdir $LABDIR -cmpdir $CMPDIR -outdir $OUT/${STEPNUM} -bindir $BIN -subset $DATA_SUBSET
46 | check_step ;
47 | 
48 | $STEPS/make_alignment_lexicon.sh $OUT/$STEPNUM  $BIN
49 | check_step 
50 | 
51 | #$STEPS/make_alignment_monophone.sh $OUT/$PREVIOUS $OUT/$STEPNUM  $BIN
52 | #check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
53 | 
54 | cp $EXISTING_MODEL/final_model/cmp.mmf $OUT/$STEPNUM 
55 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
56 | 
57 | 
58 | ## TODO -- check all nec, models exist
59 | 
60 | ## ------ training -----
61 | 
62 | for NMIX in $MIXTURE_SCHEDULE ; do
63 |     echo "$NMIX ==== "
64 |     if [ ! $NMIX == 0 ] ; then
65 |         $STEPS/increase_mixture_components.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $NMIX $BIN
66 |         check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
67 |     fi
68 |     ## --- reestimation ---
69 |     for i in `seq $NREEST` ; do
70 |          $STEPS/reestimate_alignment_model.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN
71 |          check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
72 |     done
73 |     ## --- realignment ---
74 |     $STEPS/realign_to_labels.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN
75 |     check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
76 | done
77 | 
78 | rm -rf $OUT/final_model/
79 | cp -r  $OUT/$PREVIOUS/ $OUT/final_model/
80 | 
81 | end_time=$(date +"%s")
82 | time_diff=$(($end_time-$start_time))
83 | 
84 | echo "Aligner training took $(($time_diff / 60)) minutes and $(($time_diff % 60)) seconds to run."
85 | 
86 | 
87 | 


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/subrecipes/script/extend_standard_alignment_external_lexicon.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | ##
  3 | ## Project: Simple4All - November 2013 - www.simple4all.org 
  4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
  5 | 
  6 | #----------------------------------------------------------------------
  7 | 
  8 | CMPDIR=$1
  9 | LABDIR=$2
 10 | BIN=$3
 11 | OUT=$4
 12 | CONFIG=$5
 13 | EXISTING_MODEL=$6
 14 | LEXICON=$7
 15 | 
 16 | [ $# -ne 7 ] && echo "Wrong number of arguments supplied" && exit 1 ;
 17 | 
 18 | ## location of directory 2 above that the script is in:
 19 | TOPDIR="$( cd "$( dirname "$0" )"/../../ && pwd )"
 20 | 
 21 | #----------------------------------------------------------------------
 22 | 
 23 | export VOICE_BUILD_CONFIG=$CONFIG
 24 | 
 25 | source $VOICE_BUILD_CONFIG
 26 | 
 27 | 
 28 | STEPS=$TOPDIR/steps/
 29 | 
 30 | function check_step {
 31 |     ## use global $? and $STEPNUM
 32 |     if [ $? -gt 0 ] ; then 
 33 |         echo ; echo "Step ${STEPNUM} in script $0 failed, aborted!" ; echo ; exit 1 ; 
 34 |     fi
 35 | }
 36 | 
 37 | 
 38 | STEPNUM=1 
 39 | 
 40 | start_time=$(date +"%s")
 41 | 
 42 | ## ------ preparation ------
 43 | 
 44 | 
 45 | 
 46 | python $STEPS/set_up_data.py -labdir $LABDIR -cmpdir $CMPDIR -outdir $OUT/${STEPNUM} -bindir $BIN ## -subset $DATA_SUBSET
 47 | check_step ;
 48 | 
 49 | 
 50 | #$STEPS/make_alignment_monophone.sh $OUT/$PREVIOUS $OUT/$STEPNUM  $BIN
 51 | #check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
 52 | 
 53 | mkdir -p $OUT/$STEPNUM/data/
 54 | cp $EXISTING_MODEL/final_model/cmp.mmf $OUT/$STEPNUM 
 55 | cp $EXISTING_MODEL/final_model/data/modellist.mono  $OUT/$STEPNUM/data/modellist.mono
 56 | 
 57 | 
 58 | #$STEPS/make_alignment_lexicon.sh $OUT/$STEPNUM  $BIN
 59 | #check_step 
 60 | 
 61 | ## copy existing lexicon -- must contain _PUNC_ etc.
 62 | cp $LEXICON $OUT/${STEPNUM}/data/lexicon.txt
 63 | 
 64 | echo '_SPACE_ skip' >> $OUT/${STEPNUM}/data/lexicon.txt
 65 | echo '_SPACE_ sil'  >> $OUT/${STEPNUM}/data/lexicon.txt
 66 | echo '_PUNC_ sil'   >> $OUT/${STEPNUM}/data/lexicon.txt
 67 | echo '_PUNC_ skip'  >> $OUT/${STEPNUM}/data/lexicon.txt 
 68 | echo 'sil sil'  >> $OUT/${STEPNUM}/data/lexicon.txt 
 69 | 
 70 | # initial expansion of labels using this lexicon 
 71 | cp $OUT/${STEPNUM}/data/mlf.full $OUT/${STEPNUM}/data/mlf.words
 72 | 
 73 | echo "EX" > $OUT/${STEPNUM}/expand_labels.hed
 74 | 
 75 | $BIN/HLEd -I $OUT/${STEPNUM}/data/mlf.words -i $OUT/${STEPNUM}/data/mlf.mono -l '*' -d $OUT/${STEPNUM}/data/lexicon.txt $OUT/${STEPNUM}/expand_labels.hed $OUT/${STEPNUM}/data/mlf.words
 76 | 
 77 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
 78 | 
 79 | ## TODO -- check all nec, models exist
 80 | 
 81 | ## ------ training -----
 82 | 
 83 | 
 84 | for NMIX in $MIXTURE_SCHEDULE ; do
 85 |     echo "$NMIX ==== "
 86 |     if [ ! $NMIX == 0 ] ; then
 87 |         $STEPS/increase_mixture_components.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $NMIX $BIN
 88 |         check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
 89 |     fi
 90 |     ## --- reestimation ---
 91 |     for i in `seq $NREEST` ; do
 92 |          $STEPS/reestimate_alignment_model.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN
 93 |          check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
 94 |     done
 95 |     ## --- realignment ---
 96 |     $STEPS/realign_to_labels.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN
 97 |     check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
 98 | done
 99 | 
100 | rm -rf $OUT/final_model/
101 | cp -r  $OUT/$PREVIOUS/ $OUT/final_model/
102 | 
103 | end_time=$(date +"%s")
104 | time_diff=$(($end_time-$start_time))
105 | 
106 | echo "Aligner training took $(($time_diff / 60)) minutes and $(($time_diff % 60)) seconds to run."
107 | 
108 | 
109 | 


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/subrecipes/script/quick_voicebuild_01.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | ##
  3 | ## Project: Simple4All - November 2013 - www.simple4all.org 
  4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
  5 | 
  6 | # speed up voice building by using subset of questions until final clustering.
  7 | 
  8 | #----------------------------------------------------------------------
  9 | 
 10 | CMPDIR=$1
 11 | LABDIR=$2
 12 | QUESTIONS=$3
 13 | BIN=$4
 14 | OUT=$5
 15 | CONFIG=$6
 16 | 
 17 | [ $# -ne 6 ] && echo "Wrong number of arguments supplied" && exit 1 ;
 18 | 
 19 | ## location of directory 2 above that the script is in:
 20 | TOPDIR="$( cd "$( dirname "$0" )"/../../ && pwd )"
 21 | 
 22 | #----------------------------------------------------------------------
 23 | 
 24 | ## defaults for configured values:
 25 | SHORT_QUESTION_PATT="segment_is"
 26 | 
 27 | #----------------------------------------------------------------------
 28 | 
 29 | 
 30 | 
 31 | 
 32 | export VOICE_BUILD_CONFIG=$CONFIG
 33 | 
 34 | source $VOICE_BUILD_CONFIG
 35 | 
 36 | 
 37 | STEPS=$TOPDIR/steps/
 38 | UTIL=$TOPDIR/util/
 39 | 
 40 | function check_step {
 41 |     ## use global $? and $STEPNUM
 42 |     if [ $? -gt 0 ] ; then 
 43 |         echo ; echo "Step ${STEPNUM} in script $0 failed, aborted!" ; echo ; exit 1 ; 
 44 |     fi
 45 | }
 46 | 
 47 | 
 48 | 
 49 | 
 50 | 
 51 | start_time=$(date +"%s")
 52 | 
 53 | 
 54 | 
 55 | 
 56 | 
 57 | ## prepare SHORT_QUESTIONS:
 58 | mkdir -p $OUT/
 59 | SHORT_QUESTIONS=$OUT/short_questions.hed
 60 | grep $SHORT_QUESTION_PATT $QUESTIONS > $SHORT_QUESTIONS
 61 | 
 62 | 
 63 | 
 64 | 
 65 | STEPNUM=1                 
 66 | 
 67 | python $TOPDIR/steps/set_up_data.py -labdir $LABDIR -cmpdir $CMPDIR -outdir $OUT/${STEPNUM} -bindir $BIN
 68 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
 69 |                                      
 70 | $STEPS/make_monophone.sh $OUT/$PREVIOUS $OUT/$STEPNUM  $BIN
 71 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
 72 | 
 73 | FIRST_ITER=1
 74 | for j in `seq $NRECLUSTER` ; do
 75 | 
 76 |     if [ $FIRST_ITER -eq 1 ] ; then
 77 |         $STEPS/clone_monophone_to_fullcontext.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN
 78 |         check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
 79 |     else
 80 |         $STEPS/untie_models.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN
 81 |         #check_step ;   ### this gave fail even when ran ok...
 82 |         PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
 83 |     fi
 84 |     
 85 |     $STEPS/reestimate.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 0   
 86 |     check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
 87 |         
 88 |     QUESTIONS_TO_USE=$SHORT_QUESTIONS
 89 |     if [ $j -eq $NRECLUSTER ] ; then
 90 |         QUESTIONS_TO_USE=$QUESTIONS
 91 |     fi    
 92 |     
 93 |     $STEPS/build_MDL_trees.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM 1.0 $QUESTIONS_TO_USE $BIN
 94 |     check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
 95 |     
 96 |     for i in `seq $NREEST` ; do
 97 |         $STEPS/reestimate.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 1    
 98 |         check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
 99 |     done
100 |     
101 |     if [ $FIRST_ITER -eq 1 ] ; then
102 |         $STEPS/realign.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN
103 |         check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
104 |     fi
105 |     FIRST_ITER=0
106 | done
107 | 
108 | $STEPS/make_engine_model.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN
109 | check_step
110 | 
111 | rm -rf $OUT/final_model/
112 | cp -r  $OUT/$STEPNUM/ $OUT/final_model/
113 | 
114 | end_time=$(date +"%s")
115 | time_diff=$(($end_time-$start_time))
116 | 
117 | echo "Model training took $(($time_diff / 60)) minutes and $(($time_diff % 60)) seconds to run."
118 | 


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/subrecipes/script/quick_voicebuild_01.sh.OLD:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | ##
  3 | ## Project: Simple4All - November 2013 - www.simple4all.org 
  4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
  5 | 
  6 | # speed up voice building by using subset of questions until final clustering.
  7 | 
  8 | #----------------------------------------------------------------------
  9 | 
 10 | CMPDIR=$1
 11 | LABDIR=$2
 12 | QUESTIONS=$3
 13 | BIN=$4
 14 | OUT=$5
 15 | CONFIG=$6
 16 | 
 17 | [ $# -ne 6 ] && echo "Wrong number of arguments supplied" && exit 1 ;
 18 | 
 19 | ## location of directory 2 above that the script is in:
 20 | TOPDIR="$( cd "$( dirname "$0" )"/../../ && pwd )"
 21 | 
 22 | #----------------------------------------------------------------------
 23 | 
 24 | ## defaults for configured values:
 25 | SHORT_QUESTION_PATT="segment_is"
 26 | 
 27 | #----------------------------------------------------------------------
 28 | 
 29 | 
 30 | 
 31 | 
 32 | export VOICE_BUILD_CONFIG=$CONFIG
 33 | 
 34 | source $VOICE_BUILD_CONFIG
 35 | 
 36 | 
 37 | STEPS=$TOPDIR/steps/
 38 | UTIL=$TOPDIR/util/
 39 | 
 40 | function check_step {
 41 |     ## use global $? and $STEPNUM
 42 |     if [ $? -gt 0 ] ; then 
 43 |         echo ; echo "Step ${STEPNUM} in script $0 failed, aborted!" ; echo ; exit 1 ; 
 44 |     fi
 45 | }
 46 | 
 47 | 
 48 | 
 49 | 
 50 | 
 51 | start_time=$(date +"%s")
 52 | 
 53 | 
 54 | 
 55 | 
 56 | 
 57 | ## prepare SHORT_QUESTIONS:
 58 | mkdir -p $OUT/
 59 | SHORT_QUESTIONS=$OUT/short_questions.hed
 60 | grep $SHORT_QUESTION_PATT $QUESTIONS > $SHORT_QUESTIONS
 61 | 
 62 | 
 63 | 
 64 | 
 65 | STEPNUM=1                 
 66 | 
 67 | python $TOPDIR/steps/set_up_data.py -labdir $LABDIR -cmpdir $CMPDIR -outdir $OUT/${STEPNUM} -bindir $BIN
 68 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
 69 |                                      
 70 | $STEPS/make_monophone.sh $OUT/$PREVIOUS $OUT/$STEPNUM  $BIN
 71 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
 72 | 
 73 | FIRST_ITER=1
 74 | for j in `seq $NRECLUSTER` ; do
 75 | 
 76 |     if [ $FIRST_ITER -eq 1 ] ; then
 77 |         $STEPS/clone_monophone_to_fullcontext.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN
 78 |         check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
 79 |     else
 80 |         $STEPS/untie_models.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN
 81 |         #check_step ;   ### this gave fail even when ran ok...
 82 |         PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
 83 |     fi
 84 |     
 85 |     $STEPS/reestimate.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 0   
 86 |     check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
 87 |         
 88 |     QUESTIONS_TO_USE=$SHORT_QUESTIONS
 89 |     if [ $j -eq $NRECLUSTER ] ; then
 90 |         QUESTIONS_TO_USE=$QUESTIONS
 91 |     fi    
 92 |     
 93 |     $STEPS/build_MDL_trees.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM 1.0 $QUESTIONS_TO_USE $BIN
 94 |     check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
 95 |     
 96 |     for i in `seq $NREEST` ; do
 97 |         $STEPS/reestimate.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 1    
 98 |         check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
 99 |     done
100 |     
101 |     if [ $FIRST_ITER -eq 1 ] ; then
102 |         $STEPS/realign.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN
103 |         check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
104 |     fi
105 |     FIRST_ITER=0
106 | done
107 | 
108 | $STEPS/make_engine_model.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN
109 | check_step
110 | 
111 | rm -rf $OUT/final_model/
112 | cp -r  $OUT/$STEPNUM/ $OUT/final_model/
113 | 
114 | end_time=$(date +"%s")
115 | time_diff=$(($end_time-$start_time))
116 | 
117 | echo "Model training took $(($time_diff / 60)) minutes and $(($time_diff % 60)) seconds to run."
118 | 


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/subrecipes/script/quick_voicebuild_02.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | ##
  3 | ## Project: Simple4All - November 2013 - www.simple4all.org 
  4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
  5 | 
  6 | #----------------------------------------------------------------------
  7 | 
  8 | CMPDIR=$1
  9 | LABDIR=$2
 10 | QUESTIONS=$3
 11 | BIN=$4
 12 | OUT=$5
 13 | CONFIG=$6
 14 | 
 15 | [ $# -ne 6 ] && echo "Wrong number of arguments supplied" && exit 1 ;
 16 | 
 17 | ## location of directory 2 above that the script is in:
 18 | TOPDIR="$( cd "$( dirname "$0" )"/../../ && pwd )"
 19 | 
 20 | #----------------------------------------------------------------------
 21 | 
 22 | ## defaults for configured values:
 23 | MONOPHONE_SUBSET="1examples"
 24 | SUBSETS="5minutes 10minutes 20minutes all"
 25 | SHORT_QUESTION_PATT="segment_is"
 26 | 
 27 | #----------------------------------------------------------------------
 28 | 
 29 | 
 30 | 
 31 | 
 32 | export VOICE_BUILD_CONFIG=$CONFIG
 33 | 
 34 | source $VOICE_BUILD_CONFIG
 35 | 
 36 | 
 37 | STEPS=$TOPDIR/steps/
 38 | UTIL=$TOPDIR/util/
 39 | 
 40 | function check_step {
 41 |     ## use global $? and $STEPNUM
 42 |     if [ $? -gt 0 ] ; then 
 43 |         echo ; echo "Step ${STEPNUM} in script $0 failed, aborted!" ; echo ; exit 1 ; 
 44 |     fi
 45 | }
 46 | 
 47 | 
 48 | 
 49 | 
 50 | 
 51 | start_time=$(date +"%s")
 52 | 
 53 | 
 54 | 
 55 | 
 56 | 
 57 | ## prepare SHORT_QUESTIONS:
 58 | mkdir -p $OUT/
 59 | SHORT_QUESTIONS=$OUT/short_questions.hed
 60 | grep $SHORT_QUESTION_PATT $QUESTIONS > $SHORT_QUESTIONS
 61 | 
 62 | 
 63 | 
 64 | 
 65 | STEPNUM=1                 
 66 | 
 67 | ## Make small data set for training monophone models:
 68 | $TOPDIR/steps/set_up_data.py -labdir $LABDIR -cmpdir $CMPDIR -outdir $OUT/data_${MONOPHONE_SUBSET} \
 69 |             -bindir $BIN -subset $MONOPHONE_SUBSET
 70 |                            
 71 | $STEPS/make_monophone.sh $OUT/data_${MONOPHONE_SUBSET} $OUT/$STEPNUM  $BIN
 72 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
 73 | 
 74 | 
 75 | FIRST_ITER=1
 76 | for SUBSET in $SUBSETS ; do
 77 | 
 78 |     ## Move up to more data -- make it in $OUT/$STEPNUM where future models are trained:
 79 |     $TOPDIR/steps/set_up_data.py -labdir $LABDIR -cmpdir $CMPDIR -outdir $OUT/$STEPNUM \
 80 |             -bindir $BIN -subset $SUBSET
 81 |     check_step ;
 82 | 
 83 |     if [ $FIRST_ITER -eq 1 ] ; then
 84 |         $STEPS/clone_monophone_to_fullcontext.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN
 85 |         check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
 86 |     else
 87 |         $STEPS/untie_models.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN
 88 |         #check_step ;   ### this gave fail even when ran ok...
 89 |         PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
 90 |     fi
 91 |     
 92 |     $STEPS/reestimate.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 0   
 93 |     check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
 94 |         
 95 |     QUESTIONS_TO_USE=$SHORT_QUESTIONS
 96 |     if [ $SUBSET == "all" ] ; then
 97 |         QUESTIONS_TO_USE=$QUESTIONS
 98 |     fi    
 99 |     
100 |     echo "$STEPS/build_MDL_trees.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM 1.0 $QUESTIONS_TO_USE $BIN"    
101 |     $STEPS/build_MDL_trees.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM 1.0 $QUESTIONS_TO_USE $BIN
102 |     check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
103 |     
104 |     for i in `seq $NREEST` ; do
105 |         $STEPS/reestimate.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 1    
106 |         check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
107 |     done
108 |     
109 | #     if [ $FIRST_ITER -eq 1 ] ; then
110 | #         $STEPS/realign.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN
111 | #         check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
112 | #     fi
113 |     FIRST_ITER=0
114 | done
115 | 
116 | $STEPS/make_engine_model.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN
117 | check_step
118 | 
119 | rm -rf $OUT/final_model/
120 | cp -r  $OUT/$STEPNUM/ $OUT/final_model/
121 | 
122 | end_time=$(date +"%s")
123 | time_diff=$(($end_time-$start_time))
124 | 
125 | echo "Model training took $(($time_diff / 60)) minutes and $(($time_diff % 60)) seconds to run."
126 | 


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/subrecipes/script/standard_alignment.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ##
 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 
 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 5 | 
 6 | #----------------------------------------------------------------------
 7 | 
 8 | CMPDIR=$1
 9 | LABDIR=$2
10 | BIN=$3
11 | OUT=$4
12 | CONFIG=$5
13 | 
14 | [ $# -ne 5 ] && echo "Wrong number of arguments supplied" && exit 1 ;
15 | 
16 | ## location of directory 2 above that the script is in:
17 | TOPDIR="$( cd "$( dirname "$0" )"/../../ && pwd )"
18 | 
19 | #----------------------------------------------------------------------
20 | 
21 | export VOICE_BUILD_CONFIG=$CONFIG
22 | 
23 | source $VOICE_BUILD_CONFIG
24 | 
25 | 
26 | STEPS=$TOPDIR/steps/
27 | 
28 | function check_step {
29 |     ## use global $? and $STEPNUM
30 |     if [ $? -gt 0 ] ; then 
31 |         echo ; echo "Step ${STEPNUM} in script $0 failed, aborted!" ; echo ; exit 1 ; 
32 |     fi
33 | }
34 | 
35 | 
36 | STEPNUM=1 
37 | 
38 | start_time=$(date +"%s")
39 | 
40 | ## ------ preparation ------
41 | 
42 | python $STEPS/set_up_data.py -labdir $LABDIR -cmpdir $CMPDIR -outdir $OUT/${STEPNUM} -bindir $BIN
43 | check_step ;
44 | $STEPS/make_alignment_lexicon.sh $OUT/$STEPNUM  $BIN
45 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
46 | 
47 | $STEPS/make_alignment_monophone.sh $OUT/$PREVIOUS $OUT/$STEPNUM  $BIN
48 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
49 | 
50 | 
51 | ## ------ training -----
52 | 
53 | for NMIX in $MIXTURE_SCHEDULE ; do
54 |     echo "$NMIX ==== "
55 |     if [ ! $NMIX == 0 ] ; then
56 |         $STEPS/increase_mixture_components.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $NMIX $BIN
57 |         check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
58 |     fi
59 |     ## --- reestimation ---
60 |     for i in `seq $NREEST` ; do
61 |          $STEPS/reestimate_alignment_model.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN
62 |          check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
63 |     done
64 |     ## --- realignment ---
65 |     $STEPS/realign_to_labels.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN
66 |     check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
67 | done
68 | 
69 | rm -rf $OUT/final_model/
70 | cp -r  $OUT/$PREVIOUS/ $OUT/final_model/
71 | 
72 | end_time=$(date +"%s")
73 | time_diff=$(($end_time-$start_time))
74 | 
75 | echo "Aligner training took $(($time_diff / 60)) minutes and $(($time_diff % 60)) seconds to run."
76 | 
77 | 
78 | 


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/subrecipes/script/standard_voicebuild.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ##
 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 
 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 5 | 
 6 | #----------------------------------------------------------------------
 7 | 
 8 | CMPDIR=$1
 9 | LABDIR=$2
10 | QUESTIONS=$3
11 | BIN=$4
12 | OUT=$5
13 | CONFIG=$6
14 | 
15 | [ $# -ne 6 ] && echo "Wrong number of arguments supplied" && exit 1 ;
16 | 
17 | ## location of directory 2 above that the script is in:
18 | TOPDIR="$( cd "$( dirname "$0" )"/../../ && pwd )"
19 | 
20 | #----------------------------------------------------------------------
21 | 
22 | 
23 | ##!!!!! temp: !!!!!
24 | 
25 | #SPTK=/Users/owatts/repos/simple4all/CSTRVoiceClone/trunk/bin
26 | #OLDHTS=/Users/owatts/simple4all/hts_on_speed/code/hts2_2/bin  ## ~/repos/simple4all/CSTRVoiceClone/trunk/bin/
27 | 
28 | 
29 | 
30 | export VOICE_BUILD_CONFIG=$CONFIG
31 | 
32 | source $VOICE_BUILD_CONFIG
33 | 
34 | 
35 | STEPS=$TOPDIR/steps/
36 | 
37 | 
38 | function check_step {
39 |     ## use global $? and $STEPNUM
40 |     if [ $? -gt 0 ] ; then 
41 |         echo ; echo "Step ${STEPNUM} in script $0 failed, aborted!" ; echo ; exit 1 ; 
42 |     fi
43 | }
44 | 
45 | 
46 | STEPNUM=1 
47 | 
48 | start_time=$(date +"%s")
49 | 
50 | 
51 | $TOPDIR/steps/set_up_data.py -labdir $LABDIR -cmpdir $CMPDIR -outdir $OUT/${STEPNUM} -bindir $BIN
52 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
53 |                                      
54 | $STEPS/make_monophone.sh $OUT/$PREVIOUS $OUT/$STEPNUM  $BIN
55 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
56 | 
57 | for j in `seq $NRECLUSTER` ; do
58 | 
59 |     if [ $j -eq 1 ] ; then
60 |         $STEPS/clone_monophone_to_fullcontext.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN
61 |         check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
62 |     else
63 |         $STEPS/untie_models.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN
64 |         #check_step ; 
65 |         PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
66 |     fi
67 |     
68 |     $STEPS/reestimate.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 0   
69 |     check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
70 |         
71 |     $STEPS/build_MDL_trees.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM 1.0 $QUESTIONS $BIN
72 |     check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
73 | 
74 |     for i in `seq $NREEST` ; do
75 |         $STEPS/reestimate.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 1    
76 |         check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
77 |     done
78 |     
79 |     if [ $j -eq 1 ] ; then
80 |         $STEPS/realign.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN
81 |         check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
82 |     fi
83 |     
84 | done
85 | 
86 | 
87 | 
88 | $STEPS/make_engine_model.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN
89 | check_step
90 | 
91 | rm -rf $OUT/final_model/
92 | cp -r  $OUT/$STEPNUM/ $OUT/final_model/
93 | 
94 | end_time=$(date +"%s")
95 | time_diff=$(($end_time-$start_time))
96 | 
97 | echo "Model training took $(($time_diff / 60)) minutes and $(($time_diff % 60)) seconds to run."
98 | 


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/subrecipes/script/standard_voicebuild.sh.OLD:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ##
 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 
 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 5 | 
 6 | #----------------------------------------------------------------------
 7 | 
 8 | CMPDIR=$1
 9 | LABDIR=$2
10 | QUESTIONS=$3
11 | BIN=$4
12 | OUT=$5
13 | CONFIG=$6
14 | 
15 | [ $# -ne 6 ] && echo "Wrong number of arguments supplied" && exit 1 ;
16 | 
17 | ## location of directory 2 above that the script is in:
18 | TOPDIR="$( cd "$( dirname "$0" )"/../../ && pwd )"
19 | 
20 | #----------------------------------------------------------------------
21 | 
22 | 
23 | ##!!!!! temp: !!!!!
24 | 
25 | #SPTK=/Users/owatts/repos/simple4all/CSTRVoiceClone/trunk/bin
26 | #OLDHTS=/Users/owatts/simple4all/hts_on_speed/code/hts2_2/bin  ## ~/repos/simple4all/CSTRVoiceClone/trunk/bin/
27 | 
28 | 
29 | 
30 | export VOICE_BUILD_CONFIG=$CONFIG
31 | 
32 | source $VOICE_BUILD_CONFIG
33 | 
34 | 
35 | STEPS=$TOPDIR/steps/
36 | 
37 | 
38 | function check_step {
39 |     ## use global $? and $STEPNUM
40 |     if [ $? -gt 0 ] ; then 
41 |         echo ; echo "Step ${STEPNUM} in script $0 failed, aborted!" ; echo ; exit 1 ; 
42 |     fi
43 | }
44 | 
45 | 
46 | STEPNUM=1 
47 | 
48 | start_time=$(date +"%s")
49 | 
50 | 
51 | $TOPDIR/steps/set_up_data.py -labdir $LABDIR -cmpdir $CMPDIR -outdir $OUT/${STEPNUM} -bindir $BIN
52 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
53 |                                      
54 | $STEPS/make_monophone.sh $OUT/$PREVIOUS $OUT/$STEPNUM  $BIN
55 | check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
56 | 
57 | for j in `seq $NRECLUSTER` ; do
58 | 
59 |     if [ $j -eq 1 ] ; then
60 |         $STEPS/clone_monophone_to_fullcontext.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN
61 |         check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
62 |     else
63 |         $STEPS/untie_models.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN
64 |         #check_step ; 
65 |         PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
66 |     fi
67 |     
68 |     $STEPS/reestimate.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 0   
69 |     check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
70 |         
71 |     $STEPS/build_MDL_trees.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM 1.0 $QUESTIONS $BIN
72 |     check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
73 | 
74 |     for i in `seq $NREEST` ; do
75 |         $STEPS/reestimate.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN 1    
76 |         check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
77 |     done
78 |     
79 |     if [ $j -eq 1 ] ; then
80 |         $STEPS/realign.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN
81 |         check_step ; PREVIOUS=$STEPNUM ; STEPNUM=$[$STEPNUM + 1]
82 |     fi
83 |     
84 | done
85 | 
86 | 
87 | 
88 | $STEPS/make_engine_model.sh $OUT/$PREVIOUS/ $OUT/$STEPNUM $BIN
89 | check_step
90 | 
91 | rm -rf $OUT/final_model/
92 | cp -r  $OUT/$STEPNUM/ $OUT/final_model/
93 | 
94 | end_time=$(date +"%s")
95 | time_diff=$(($end_time-$start_time))
96 | 
97 | echo "Model training took $(($time_diff / 60)) minutes and $(($time_diff % 60)) seconds to run."
98 | 


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/util/filter_questions.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 
 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 5 |   
 6 | import sys
 7 | import re
 8 | import os
 9 | import random
10 | 
11 | from argparse import ArgumentParser
12 | 
13 | ## find location of util relative to current script:
14 | loc = os.path.abspath(os.path.join( __file__, os.pardir, os.pardir, 'util') )
15 | sys.path.append(loc)
16 | 
17 | 
18 | from util import *
19 | 
20 | def main_work():
21 | 
22 |     #################################################
23 | 
24 |     # ======== Get stuff from command line ==========
25 | 
26 |     a = ArgumentParser()
27 |     a.add_argument('-infile', required=True, help= "...")
28 |     a.add_argument('-outfile', required=True, help= "...")
29 |     a.add_argument('-models', required=True, help= "...")
30 |     a.add_argument('-percent', required=True, type=int, help= "...")
31 | 
32 |    
33 |     opts = a.parse_args()
34 | 
35 |     # ===============================================
36 |     
37 |     questions = readlist(opts.infile)
38 |     models = readlist(opts.models)
39 |     
40 |     nmod = float(len(models))
41 |     
42 |     filtered = []
43 |     
44 |     for line in questions:
45 |         print line
46 |         line = line.strip()
47 |         if line != '':
48 |             
49 |             (QS,name,patt) = re.split('\s+', line)
50 |             regex_patt = htk_wildcard_pattern_to_regex(patt)
51 |             for mod in models:
52 |                 count = 0
53 |                 if re.match(regex_patt, mod):
54 |                     count += 1
55 |                 percent_matched = count / nmod
56 |                 if percent_matched < opts.percent or percent_matched > (100.0 - opts.percent):
57 |                     pass
58 |                 else:
59 |                     filtered.append(line)
60 |             
61 |     writelist(filtered, opts.outfile)
62 |     
63 | if __name__=="__main__":
64 | 
65 |     main_work()
66 | 
67 | 


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/util/make_config.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ##
 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 
 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 5 | 
 6 | 
 7 | #----------------------------------------------------------------------
 8 | 
 9 | DIR=$1
10 | 
11 | [ $# -ne 1 ] && echo "Wrong number of arguments supplied" && exit 1 ;
12 | 
13 | source $VOICE_BUILD_CONFIG
14 | 
15 | #----------------------------------------------------------------------
16 | 
17 | 
18 | 
19 | 
20 | 
21 | mkdir -p $DIR
22 | 
23 | cat > $DIR/general.conf<<EOF
24 | NATURALREADORDER      = T
25 | NATURALWRITEORDER     = T
26 | APPLYVFLOOR           = T
27 | VFLOORSCALESTR        = "$VFLOORSCALESTR"
28 | APPLYDURVARFLOOR      = T
29 | DURVARFLOORPERCENTILE = 1.000000
30 | MAXSTDDEVCOEF         = 10
31 | EOF
32 | 
33 | # VFLOORSCALESTR        = "Vector 5 0.01 0.01 0.01 0.01 0.01"
34 | 
35 | ## Conf to convert engine files specifies non-natural write order (i.e. big-endian) 
36 | ## so that the requred big-endian data is produced for hts_engine 
37 | cat > $DIR/engine_convert.conf<<EOF
38 | NATURALREADORDER      = T
39 | NATURALWRITEORDER     = F
40 | APPLYVFLOOR           = T
41 | VFLOORSCALESTR        = "$VFLOORSCALESTR"
42 | APPLYDURVARFLOOR      = T
43 | DURVARFLOORPERCENTILE = 1.000000
44 | MAXSTDDEVCOEF         = 10
45 | EOF
46 | 
47 | 
48 | cat > $DIR/general-unfloor.conf<<EOF
49 | NATURALREADORDER      = T
50 | NATURALWRITEORDER     = T
51 | APPLYVFLOOR           = F
52 | APPLYDURVARFLOOR      = F
53 | DURVARFLOORPERCENTILE = 0.000000
54 | MAXSTDDEVCOEF         = 10
55 | EOF
56 | 
57 | cat > $DIR/clust.conf<<EOF
58 | MINLEAFOCC            =  5
59 | EOF
60 | 
61 | ## SHRINKOCCTHRESH       = "Vector 5  36600.0 2000.0 2000.0 2000.0 6500.0" 
62 | 
63 | cat > $DIR/clust-dur.conf<<EOF
64 | MINLEAFOCC            = 10
65 | EOF
66 | 
67 | 


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/util/make_proto_hsmm.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | ##
  3 | ## Project: Simple4All - November 2013 - www.simple4all.org 
  4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
  5 | 
  6 | 
  7 | import sys
  8 | import re
  9 | from configobj import ConfigObj
 10 | 
 11 | ## History: public HTS -> Junichi's script -> Reima made stream independent -> Oliver
 12 | ## put in separate script and moved from perl to python
 13 | 
 14 | # sub routine for generating proto-type model (Copy from HTS-2.1)
 15 | 
 16 | # Made stream-independent 23/4/2012 rk
 17 | 
 18 | 
 19 | proto_out = sys.argv[1]
 20 | config_in = sys.argv[2]
 21 | 
 22 | 
 23 | config = ConfigObj(config_in)
 24 | 
 25 | static_stream_sizes = config.get('STATIC_STREAM_SIZES', default='25 1 1 1')  ### defaults for SPTK
 26 | MSD_stream_info = config.get('MSD_STREAM_INFO', default='0 1 1 1') 
 27 | stream_weights = config.get('STREAM_WEIGHTS', default='1.0 1.0 1.0 0.9') 
 28 | NSTATE = int(config.get('NSTATE', default=5))
 29 | 
 30 | 
 31 | ## string -> numeric list conversion:
 32 | def int_list(string):
 33 |     seq = re.split('\s+', string.strip())
 34 |     return [int(item) for item in seq]
 35 |     
 36 | static_stream_sizes = int_list(static_stream_sizes)
 37 | MSD_stream_info = int_list(MSD_stream_info)
 38 | 
 39 | 
 40 | 
 41 | n_weights = len(re.split('\s+', stream_weights.strip()))
 42 | num_stream = len(static_stream_sizes)
 43 | if (len(MSD_stream_info) != num_stream) or (n_weights!= num_stream):
 44 |     sys.exit('stream info not same: %s %s %s'%(static_stream_sizes, MSD_stream_info, stream_weights))
 45 | 
 46 | 
 47 | stream_indexes = range(1, num_stream+1)
 48 | 
 49 | 
 50 | 
 51 | total_stream_sizes = []
 52 | for (MSD,size) in zip(MSD_stream_info, static_stream_sizes): 
 53 |    if MSD:
 54 |         total_stream_sizes.append(size)
 55 |    else:
 56 |         total_stream_sizes.append(size * 3)
 57 | 
 58 | vsize = sum(total_stream_sizes)
 59 | 
 60 | 
 61 |             
 62 | 
 63 | d = ''
 64 | 
 65 | ## ----- HEADER -----
 66 | d += '~o <VecSize> %s <USER> <DIAGC> '%(vsize)
 67 | 
 68 | d += '<MSDInfo> %s '%(num_stream)
 69 | d += ' '.join([str(val) for val in MSD_stream_info])
 70 | d += '\n'
 71 | 
 72 | d += '<StreamInfo> %s '%(num_stream)
 73 | d += ' '.join([str(val) for val in total_stream_sizes])
 74 | d += '\n'
 75 | 
 76 | ## ----- output HMMs ------
 77 | d += "<BeginHMM>\n"
 78 | d += "  <NumStates> %d\n"%(NSTATE+2)
 79 | 
 80 | # output HMM states 
 81 | for i in range(2, NSTATE+2):
 82 | 
 83 |     # output state information
 84 |     d += "  <State> %s\n"%(i)
 85 | 
 86 |     # output stream weight
 87 |     d += '  <SWeights> %d '%(num_stream)
 88 | 
 89 |     d += stream_weights
 90 |     d += '\n'
 91 | 
 92 | 
 93 |     for (i, MSD, size) in zip(stream_indexes, MSD_stream_info, total_stream_sizes):
 94 |         d += "  <Stream> %d\n"%(i)
 95 | 
 96 |         if not MSD:
 97 |             d += "    <Mean> %d\n      "%(size)
 98 |             for j in range(size):
 99 |                 d += "0.0 "
100 |             d += '\n'
101 |       
102 |             d += "    <Variance> %d\n      "%(size)
103 |             for j in range(size):
104 |                 d += "1.0 "
105 |             d += '\n'
106 |                       
107 |         else:
108 |         
109 |             d += "  <NumMixes> 2\n"
110 |             
111 |             # output 1st space (non 0-dimensional space)
112 |             d += "  <Mixture> 1 0.5000\n"
113 |             d += "    <Mean> 1 0.0 \n"
114 |             d += "    <Variance> 1 1.0 \n"	      
115 |       
116 |             # output 2nd space (0-dimensional space)
117 |             d += "  <Mixture> 2 0.5000\n"
118 |             d += "    <Mean> 0 \n"
119 |             d += "    <Variance> 0 \n"	 
120 | 
121 | # output state transition matrix
122 | d +=  '<TransP> %d\n'%(NSTATE+2)
123 | d +=  "    "    
124 | d += "0.000e+0 1.000e+0 " + " ".join(["0.000e+0"] * (NSTATE))
125 |     
126 | d +=  "\n    "   
127 | 
128 | for i in range(2, NSTATE+2):
129 |     for j in range(1, NSTATE+3):
130 |         if i==j:
131 |             d += "6.000e-1 "
132 |         elif i == j-1:
133 |             d += "4.000e-1 " 
134 |         else:
135 |             d += "0.000e+0 " 
136 |     d +=  "\n    " 
137 | 
138 | for j in range(NSTATE+2):
139 |     d += "0.000e+0 "
140 | 
141 | d += "\n<EndHMM>\n"
142 | 
143 | f = open(proto_out, 'w')
144 | for line in d:
145 |     f.write(line)
146 | f.close()
147 | 
148 | 
149 | 


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/util/make_proto_skip_hsmm.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | ##
  3 | ## Project: Simple4All - November 2013 - www.simple4all.org 
  4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
  5 | 
  6 | 
  7 | import sys
  8 | import re
  9 | from configobj import ConfigObj
 10 | 
 11 | ## History: public HTS -> Junichi's script -> Reima made stream independent -> Oliver
 12 | ## put in separate script and moved from perl to python
 13 | 
 14 | # sub routine for generating proto-type model (Copy from HTS-2.1)
 15 | 
 16 | # Made stream-independent 23/4/2012 rk
 17 | 
 18 | 
 19 | proto_out = sys.argv[1]
 20 | config_in = sys.argv[2]
 21 | 
 22 | 
 23 | config = ConfigObj(config_in)
 24 | 
 25 | static_stream_sizes = config.get('STATIC_STREAM_SIZES', default='25 1 1 1')  ### defaults for SPTK
 26 | MSD_stream_info = config.get('MSD_STREAM_INFO', default='0 1 1 1') 
 27 | stream_weights = config.get('STREAM_WEIGHTS', default='1.0 1.0 1.0 0.9') 
 28 | 
 29 | #static_stream_sizes = config.get('static_stream_sizes', default='25 1 1 1')  ### defaults for SPTK
 30 | #MSD_stream_info = config.get('MSD_stream_info', default='0 1 1 1') 
 31 | #stream_weights = config.get('stream_weights', default='1.0 1.0 1.0 0.9') 
 32 | 
 33 | 
 34 | NSTATE = 1 ## fixed for skip model
 35 | 
 36 | 
 37 | ## string -> numeric list conversion:
 38 | def int_list(string):
 39 |     seq = re.split('\s+', string.strip())
 40 |     return [int(item) for item in seq]
 41 |     
 42 | static_stream_sizes = int_list(static_stream_sizes)
 43 | MSD_stream_info = int_list(MSD_stream_info)
 44 | 
 45 | 
 46 | 
 47 | n_weights = len(re.split('\s+', stream_weights.strip()))
 48 | num_stream = len(static_stream_sizes)
 49 | if (len(MSD_stream_info) != num_stream) or (n_weights!= num_stream):
 50 |     sys.exit('stream info not same: %s %s %s'%(static_stream_sizes, MSD_stream_info, stream_weights))
 51 | 
 52 | 
 53 | stream_indexes = range(1, num_stream+1)
 54 | 
 55 | 
 56 | 
 57 | total_stream_sizes = []
 58 | for (MSD,size) in zip(MSD_stream_info, static_stream_sizes): 
 59 |    if MSD:
 60 |         total_stream_sizes.append(size)
 61 |    else:
 62 |         total_stream_sizes.append(size * 3)
 63 | 
 64 | vsize = sum(total_stream_sizes)
 65 | 
 66 | 
 67 |             
 68 | 
 69 | d = ''
 70 | 
 71 | ## ----- HEADER -----
 72 | d += '~o <VecSize> %s <USER> <DIAGC> '%(vsize)
 73 | 
 74 | d += '<MSDInfo> %s '%(num_stream)
 75 | d += ' '.join([str(val) for val in MSD_stream_info])
 76 | d += '\n'
 77 | 
 78 | d += '<StreamInfo> %s '%(num_stream)
 79 | d += ' '.join([str(val) for val in total_stream_sizes])
 80 | d += '\n'
 81 | 
 82 | ## ----- output HMMs ------
 83 | d += "<BeginHMM>\n"
 84 | d += "  <NumStates> %d\n"%(NSTATE+2)
 85 | 
 86 | # output HMM states 
 87 | for i in range(2, NSTATE+2):
 88 | 
 89 |     # output state information
 90 |     d += "  <State> %s\n"%(i)
 91 | 
 92 |     # output stream weight
 93 |     d += '  <SWeights> %d '%(num_stream)
 94 | 
 95 |     d += stream_weights
 96 |     d += '\n'
 97 | 
 98 | 
 99 |     for (i, MSD, size) in zip(stream_indexes, MSD_stream_info, total_stream_sizes):
100 |         d += "  <Stream> %d\n"%(i)
101 | 
102 |         if not MSD:
103 |             d += "    <Mean> %d\n      "%(size)
104 |             for j in range(size):
105 |                 d += "0.0 "
106 |             d += '\n'
107 |       
108 |             d += "    <Variance> %d\n      "%(size)
109 |             for j in range(size):
110 |                 d += "1.0 "
111 |             d += '\n'
112 |                       
113 |         else:
114 |         
115 |             d += "  <NumMixes> 2\n"
116 |             
117 |             # output 1st space (non 0-dimensional space)
118 |             d += "  <Mixture> 1 0.5000\n"
119 |             d += "    <Mean> 1 0.0 \n"
120 |             d += "    <Variance> 1 1.0 \n"	      
121 |       
122 |             # output 2nd space (0-dimensional space)
123 |             d += "  <Mixture> 2 0.5000\n"
124 |             d += "    <Mean> 0 \n"
125 |             d += "    <Variance> 0 \n"	 
126 | 
127 | 
128 | # output state transition matrix
129 | d +=  '<TransP> %d\n'%(NSTATE+2)
130 | d +=  "    0.0 0.0 1.0 \n"
131 | d +=  "    0.0 0.5 0.5 \n"
132 | d +=  "    0.0 0.0 0.0 \n"
133 | d += "\n<EndHMM>\n"
134 | 
135 | f = open(proto_out, 'w')
136 | for line in d:
137 |     f.write(line)
138 | f.close()
139 | 
140 | 
141 | 


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/util/separate_trees.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 
 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 5 |   
 6 | import sys
 7 | import re
 8 | import os
 9 | 
10 | from argparse import ArgumentParser
11 | 
12 | from util import *
13 | 
14 | def main_work():
15 | 
16 |     #################################################
17 | 
18 |     # ======== Get stuff from command line ==========
19 | 
20 |     a = ArgumentParser()
21 |     a.add_argument('-treefile', required=True, help= "...")
22 |         
23 |     opts = a.parse_args()
24 |     # ===============================================
25 |     
26 |     f = open(opts.treefile, 'r')
27 |     data = f.read()
28 |     f.close()
29 |     trees = re.split('\n\s*\n', data)
30 |     trees = [t for t in trees if t != '']
31 |     
32 |     ## first block is questions:
33 |     questions = trees[0] + '\n\n'
34 |     assert questions[:2] == 'QS'
35 | 
36 |     tree_dict = {}
37 |     for tree in trees[1:]:
38 |         stream = re.search('(?<=stream\[)[^\]]+(?=\])', tree).group()
39 |         if ',' in stream:
40 |             stream = stream.split(',')[0]
41 |         if stream not in tree_dict:
42 |             tree_dict[stream] = []
43 |         tree_dict[stream].append(tree + '\n\n')
44 | 
45 |     for (stream, trees) in tree_dict.items():
46 |         writelist([questions] + trees, opts.treefile+'_'+stream)
47 | 
48 | if __name__=="__main__":
49 | 
50 |     main_work()
51 | 
52 | 


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/util/setup_directory.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ##
 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 
 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 5 | 
 6 | 
 7 | #----------------------------------------------------------------------
 8 | 
 9 | INDIR=$1
10 | OUTDIR=$2
11 | 
12 | [ $# -ne 2 ] && echo "Wrong number of arguments supplied" && exit 1 ;
13 | 
14 | UTIL="$( cd "$( dirname "$0" )/../util" && pwd )"  ## location of util script
15 | 
16 | #----------------------------------------------------------------------
17 | 
18 | 
19 | ### Don't overwrite existing data! Allows bigger data etc to be copied in  before this is called.
20 | mkdir -p $OUTDIR/data
21 | for datafile in uttlist.cmp uttlist.lab modellist.mono modellist.full mlf.mono mlf.full ; do
22 |     if [ ! -e $OUTDIR/data/$datafile ] ; then
23 |         cp $INDIR/data/$datafile $OUTDIR/data/$datafile
24 |     fi
25 | done
26 | 
27 | for optional_file in mlf.words lexicon.txt ; do
28 |     if [ -e $INDIR/data/$optional_file ] ; then
29 |         cp $INDIR/data/$optional_file $OUTDIR/data/$optional_file ;
30 |     fi
31 | done
32 | 
33 | $UTIL/make_config.sh $OUTDIR/config/
34 | 


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/util/update_train_list.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 
 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 5 |   
 6 | import sys
 7 | import re
 8 | import os
 9 | 
10 | from argparse import ArgumentParser
11 | 
12 | from util import *
13 | 
14 | def main_work():
15 | 
16 |     #################################################
17 | 
18 |     # ======== Get stuff from command line ==========
19 | 
20 |     a = ArgumentParser()
21 |     a.add_argument('-mlf', required=True, help= "...")
22 |     a.add_argument('-trainlist', required=True, help= "...")
23 |         
24 |     opts = a.parse_args()
25 |     # ===============================================
26 |     
27 |     mlf = readlist(opts.mlf)
28 |     trainlist = readlist(opts.trainlist)
29 |     
30 |     mlf_files = [line for line in mlf if re.match('\A"\*.+\.lab\"\Z', line)]
31 |     mlf_files = [line.strip('"*/') for line in mlf_files]
32 |     mlf_files = [line.replace('.lab', '') for line in mlf_files]
33 |     
34 |     trainlist_files = [line.split('/')[-1].replace('.cmp','') for line in trainlist]
35 |     train_dict = dict(zip(trainlist_files, trainlist))
36 |     
37 |     outlist = []
38 |     for f in mlf_files:
39 |         if f in train_dict:
40 |             outlist.append(train_dict[f])
41 |         else:
42 |             print '%s skipped -- no label for it'%(f)
43 |     
44 |     ## overwrite training list:
45 |     writelist(outlist, opts.trainlist)
46 |     
47 |     
48 | if __name__=="__main__":
49 | 
50 |     main_work()
51 | 
52 | 


--------------------------------------------------------------------------------
/scripts/acoustic_model_training/util/util.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | ## Project: Simple4All - November 2013 - www.simple4all.org 
 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 5 | 
 6 | 
 7 | import os
 8 | import re
 9 | 
10 |    
11 | def nullhed(dirname):
12 |     '''Make empty null.hed file, return its name'''
13 |     fname = os.path.join(dirname, 'null.hed')
14 |     open(fname, 'w').close()
15 |     return fname
16 | 
17 | def writelist(data, fname):
18 |     f = open(fname, "w")
19 |     f.write("\n".join(data) + '\n')
20 |     f.close()
21 |     
22 | def readlist(fname):
23 |     f = open(fname)
24 |     lines = f.readlines()
25 |     f.close()
26 |     return [line.strip(' \n') for line in lines]
27 |     
28 | def htk_to_sec(htk_time):
29 |     """
30 |     Convert time in HTK (100 ns) units to sec
31 |     """
32 |     if type(htk_time)==type("string"):
33 |         htk_time = float(htk_time)
34 |     return htk_time / 10000000.0
35 | 
36 | def htk_wildcard_pattern_to_regex(pattern):
37 |     pattern = pattern.strip('{}').split(',')
38 |     chunks = []
39 |     for chunk in pattern:
40 |         chunk = re.escape(chunk)
41 |         chunk = chunk.replace('\*', '.*')
42 |         chunk = chunk.replace('\?', '.')
43 |         chunks.append(chunk)
44 |     new_chunks = '(' + '|'.join(chunks)  + ')'
45 |     reg = re.compile(new_chunks)
46 |     return reg


--------------------------------------------------------------------------------
/scripts/default/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CSTR-Edinburgh/Ossian/fd01c8f9e1e5fa4f4f00dd444a565b714973b7a9/scripts/default/__init__.py


--------------------------------------------------------------------------------
/scripts/default/const.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | ## Project: Simple4All - January 2013 - www.simple4all.org 
 4 | ## Contact: Antti Suni - Antti.Suni@helsinki.fi
 5 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 6 | 
 7 | PI = 3
 8 | 
 9 | 
10 | #directory constants
11 | CONFIG = "config"
12 | PROCESSOR = "processors"
13 | MODEL = "models"
14 | TRAIN = "train"
15 | VOICE = "voice"
16 | CORPUS = "corpus"
17 | RULES = "rules"
18 | CONTEXT_FEATS = "context_feats"
19 | SCRIPT = "scripts"
20 | SPEAKER ="speakers"
21 | COMPONENT="components"
22 | LANG = "lang"
23 | BIN = "bin"
24 | HTS = "htk"
25 | EST = "speech_tools"
26 | SPTK = "bin"
27 | 
28 | ACOUSTIC_MODELLING_SCRIPT = "acoustic_modelling_script"
29 | ACOUSTIC_MODELLING_CONFIG = "acoustic_modelling_config"
30 | 
31 | 
32 | #resource types
33 | FILE = "file"
34 | DIRECTORY = "dir"
35 | STRING = "string"
36 | FLAG = "flag"
37 | 
38 | 
39 | # file manipulation
40 | 
41 | CREATE = "create"
42 | REPLACE = "replace"
43 | APPEND = "append"
44 | DELETE = "delete"
45 | BACKUP = "backup"
46 | 
47 | #possible units in utterance hierarchy from bottom
48 | 
49 | STATE = "state"
50 | PHONE = "segment"
51 | LETTER = "letter"
52 | SYLLABLE = "syl"
53 | MORPH = "morph"
54 | SUBWORD = "subword"
55 | WORD = "word"
56 | TOKEN = "token"
57 | XP = "xp"
58 | PHRASE = "phrase"
59 | UTTERANCE = "utt"
60 | PARAGRAPH = "paragraph"
61 | CHAPTER = "chapter"
62 | TEXT = "text"
63 | 
64 | ## osw: for utterance.py:
65 | #UTTEND = "_UTTEND_"
66 | TERMINAL = "_END_"
67 | SKIP_MODEL_NAME = 'skip'  ## name of htk model with no emissions
68 | PROB_PAUSE = '_PROB_PAUSE_'
69 | POSS_PAUSE = '_POSS_PAUSE_'
70 | 


--------------------------------------------------------------------------------
/scripts/default/fnames.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | ## Project: Simple4All - January 2013 - www.simple4all.org 
 4 | ## Contact: Antti Suni - Antti.Suni@helsinki.fi
 5 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 6 | 
 7 | 
 8 | """
 9 | default file names
10 | 
11 | """
12 | 
13 | RECIPE = "recipe.cfg"
14 | VOICE_CONF = "voice.cfg"
15 | SPEAKERS = "speakers"
16 | TEXT_CORPORA = "text_corpora"
17 | CONFIG_EXT = '.cfg'
18 | 
19 | 
20 | # direcory constants, magic of capitals :)
21 | BIN = "bin"
22 | HTS = "hts"
23 | EST = "speech_tools"
24 | STRAIGHT = "straight"
25 | HTS_ENGINE = "hts_engine"
26 | 
27 | 


--------------------------------------------------------------------------------
/scripts/download_tundra_subset.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | 
 4 | 
 5 | 
 6 | LANG_CODE=$1
 7 | 
 8 | 
 9 | USAGE="Please supply a single language code, from the set: bg, de, en, fi, hu, it, pl "
10 | 
11 | echo $#
12 | 
13 | if [ $# -ne 1 ] ; then
14 |     echo $USAGE ;
15 |     exit 1 ;
16 | fi
17 | 
18 | 
19 | URLSTEM="http://tundra.simple4all.org/data"
20 | DIR="$( cd "$( dirname "$0" )" && pwd )"   ## location of this script
21 | CORPUS_OUT=$DIR/../corpus/$LANG_CODE/speakers/tundra_v1_1hour/
22 | 
23 | echo $CORPUS_OUT
24 | echo $LANG_CODE
25 | 
26 | case $LANG_CODE in 
27 |     bg )
28 |         DATA_ARCHIVE=BG_zhetvariat_1hr.zip
29 |         ;;
30 |     de ) 
31 |         DATA_ARCHIVE=DE_doriangray.zip
32 |         ;;        
33 |     en ) 
34 |         DATA_ARCHIVE=EN_livingalone_1hr.zip
35 |         ;;
36 |     fi )
37 |         DATA_ARCHIVE=FI_rautatie_1hr.zip
38 |         ;;        
39 |     hu )
40 |         DATA_ARCHIVE=HU_egri_1hr.zip
41 |         ;;
42 |     it )
43 |         DATA_ARCHIVE=IT_galatea_1hr.zip
44 |         ;;        
45 |     pl )
46 |         DATA_ARCHIVE=PL_siedem_1hr.zip
47 |         ;;        
48 |     *) 
49 |         echo $USAGE ; exit 1 ;;
50 | esac 
51 | 
52 | mkdir -p $CORPUS_OUT
53 | 
54 | 
55 | 
56 | 
57 | echo "Download Tundra 1 hour subset for language $LANG_CODE..."
58 | HERE=`pwd`
59 | cd $CORPUS_OUT
60 | wget $URLSTEM/$DATA_ARCHIVE
61 | unzip $DATA_ARCHIVE
62 | 
63 | 
64 | 
65 | mv */train/text/ ./txt/
66 | mv */train/wav/ ./wav/
67 | 
68 | 
69 | 
70 | 


--------------------------------------------------------------------------------
/scripts/main/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CSTR-Edinburgh/Ossian/fd01c8f9e1e5fa4f4f00dd444a565b714973b7a9/scripts/main/__init__.py


--------------------------------------------------------------------------------
/scripts/merlin_interface/feed_forward_dnn_ossian_acoustic_model.conf:
--------------------------------------------------------------------------------
  1 | [DEFAULT]
  2 | 
  3 | ## The DEFAULT section just gives a few global variables -- this is designed to reduce the
  4 | ## number of paths you have to change when modifying this config file.
  5 | 
  6 | ##!!! Change the following line to the top level of your copy of the Ossian code:
  7 | OSSIAN: __INSERT_PATH_TO_OSSIAN_HERE__
  8 | LANGUAGE: __INSERT_LANGUAGE_HERE__
  9 | SPEAKER: __INSERT_SPEAKER_HERE__
 10 | RECIPE: __INSERT_RECIPE_HERE__
 11 | 
 12 | 
 13 | ## This line should point to the language/data/recipe combination you are working on:
 14 | TOP: %(OSSIAN)s/train/%(LANGUAGE)s/speakers/%(SPEAKER)s/%(RECIPE)s/
 15 | 
 16 | ## spot for putting things in training -- not the final stored model:
 17 | WORKDIR: %(TOP)s/dnn_training_ACOUST/
 18 | DATADIR: %(TOP)s/cmp/
 19 | 
 20 | [Paths]
 21 | 
 22 | work: %(WORKDIR)s/
 23 | data: %(DATADIR)s/
 24 | 
 25 | plot: %(WORKDIR)s/plots
 26 | 
 27 | file_id_list: __INSERT_FILELIST_HERE__
 28 | 
 29 | log_config_file: %(OSSIAN)s/tools/merlin/egs/slt_arctic/s1/conf/logging_config.conf
 30 | log_file: %(WORKDIR)s/log/log.txt
 31 | log_path: %(WORKDIR)s/log/
 32 | 
 33 | ## You won't need these -- just leave the placeholder paths here:
 34 | sptk     :   /this/path/does/not/exist
 35 | straight :   /this/path/does/not/exist
 36 | 
 37 | in_mgc_dir: %(DATADIR)s/ 
 38 | in_lf0_dir: %(DATADIR)s/
 39 | in_bap_dir: %(DATADIR)s/
 40 | 
 41 | [Labels]
 42 | 
 43 | question_file_name  : %(TOP)s/questions_dnn.hed.cont
 44 | silence_pattern: ['*-sil+*'] 
 45 | label_type: state_align
 46 | label_align: %(TOP)s/lab_dnn
 47 | add_frame_features: True
 48 | subphone_feats: full
 49 | 
 50 | 
 51 | [Extensions]
 52 | 
 53 | lab_ext: .lab_dnn
 54 | mgc_ext: .mgc
 55 | bap_ext: .bap
 56 | lf0_ext: .lf0
 57 | 
 58 | [Outputs]
 59 | ## mgc, bap and lf0 need to be the same sizes as the static streams used when calling
 60 | ## split_cmp.py previously; the corresponding variables starting d* are just the static
 61 | ## value multiplied by 3: 
 62 | mgc    : __INSERT_MGC_DIM_HERE__
 63 | dmgc   : __INSERT_DELTA_MGC_DIM_HERE__
 64 | bap    : __INSERT_BAP_DIM_HERE__
 65 | dbap   : __INSERT_DELTA_BAP_DIM_HERE__
 66 | lf0    : __INSERT_LF0_DIM_HERE__
 67 | dlf0   : __INSERT_DELTA_LF0_DIM_HERE__
 68 | 
 69 | 
 70 | [Waveform]
 71 | 
 72 | ## This won't be used -- but keep it here as a placeholder:
 73 | vocoder_type : WORLD
 74 | framelength : 2048
 75 | 
 76 | [Architecture]
 77 | 
 78 | ## Adjust the number and size of hidden layers here:
 79 | 
 80 | hidden_layer_size  : [1024, 1024, 1024, 1024, 1024, 1024]
 81 | hidden_layer_type  : ['TANH', 'TANH', 'TANH', 'TANH', 'TANH', 'TANH']
 82 | 
 83 | ## if RNN or sequential training is used, please set sequential_training to True. For 
 84 | ## use with Ossian, we will only train DNNs, so don't alter this.
 85 | sequential_training : False
 86 | 
 87 | ## You might want to experiment with different learning rates, batch sizes, and maximum 
 88 | ## number of training epochs:
 89 | learning_rate    : 0.002
 90 | batch_size       : 256
 91 | training_epochs  : 12
 92 | ## set warmup_epoch to a number larger than training_epochs to effectively disable it
 93 | warmup_epoch     : 1000 
 94 | 
 95 | L1_regularization: 0.0
 96 | L2_regularization: 0.0
 97 | hidden_activation: tanh
 98 | output_activation: linear
 99 | warmup_momentum  : 0.0
100 | private_l2_reg   : 0.0
101 | 
102 | [Streams]
103 | # which feature to be used in the output
104 | output_features      : ['mgc', 'lf0', 'vuv', 'bap']
105 | 
106 | 
107 | [Data]
108 | ## We need to divide the files available up into train/validation/test data. We don't need 
109 | ## to do any testing, but set test_file_number to 1 to keep the tools happy. Split the remaining
110 | ## files between train and validation. Using about 5% or 10% of the data for validation is 
111 | ## pretty standard. This is how you might divide up 28 files:
112 | train_file_number: __INSERT_NUMBER_OF_TRAINING_FILES_HERE__
113 | valid_file_number: __INSERT_NUMBER_OF_VALIDATION_FILES_HERE__
114 | test_file_number : __INSERT_NUMBER_OF_TEST_FILES_HERE__
115 | #buffer size of each block of data to
116 | buffer_size: 100000
117 | 
118 | [Utility]
119 | 
120 | plot : True
121 | 
122 | [Processes]
123 | ## For use with Ossian, just keep the first 4 set to True -- we will generate speech later 
124 | ## within Ossian itself. You can run each of the 4 steps individually if you like:
125 | NORMLAB  : True
126 | MAKECMP  : True
127 | NORMCMP  : True
128 | TRAINDNN : True
129 | DNNGEN   : False
130 | GENWAV   : False
131 | CALMCD   : False
132 | 
133 | 
134 | 


--------------------------------------------------------------------------------
/scripts/merlin_interface/feed_forward_dnn_ossian_duration_model.conf:
--------------------------------------------------------------------------------
  1 | [DEFAULT]
  2 | 
  3 | ## The DEFAULT section just gives a few global variables -- this is designed to reduce the
  4 | ## number of paths you have to change when modifying this config file.
  5 | 
  6 | ##!!! Change the following line to the top level of your copy of the Ossian code:
  7 | OSSIAN: __INSERT_PATH_TO_OSSIAN_HERE__
  8 | LANGUAGE: __INSERT_LANGUAGE_HERE__
  9 | SPEAKER: __INSERT_SPEAKER_HERE__
 10 | RECIPE: __INSERT_RECIPE_HERE__
 11 | 
 12 | 
 13 | ## This line should point to the language/data/recipe combination you are working on:
 14 | TOP: %(OSSIAN)s/train/%(LANGUAGE)s/speakers/%(SPEAKER)s/%(RECIPE)s/
 15 | 
 16 | 
 17 | ## spot for putting things in training -- not the final stored model:
 18 | WORKDIR: %(TOP)s/dnn_training_DUR/
 19 | DATADIR: %(TOP)s/
 20 | 
 21 | 
 22 | 
 23 | 
 24 | [Paths]
 25 | 
 26 | work: %(WORKDIR)s/
 27 | data: %(DATADIR)s/
 28 | 
 29 | plot: %(WORKDIR)s/plots
 30 | 
 31 | file_id_list: __INSERT_FILELIST_HERE__
 32 | 
 33 | log_config_file: %(OSSIAN)s/tools/merlin/egs/slt_arctic/s1/conf/logging_config.conf
 34 | log_file: %(WORKDIR)s/log/log.txt
 35 | log_path: %(WORKDIR)s/log/
 36 | 
 37 | ## You won't need these -- just leave the placeholder paths here:
 38 | sptk     :   /this/path/does/not/exist
 39 | straight :   /this/path/does/not/exist
 40 | 
 41 | 
 42 | in_dur_dir: %(DATADIR)s/dur
 43 | 
 44 | 
 45 | 
 46 | 
 47 | 
 48 | 
 49 | [Labels]
 50 | 
 51 | 
 52 | question_file_name  : %(TOP)s/questions_dur.hed.cont
 53 | silence_pattern: ['*/THIS-STRING-DOESNT-APPEAR-IN-LABELS/*'] 
 54 | label_type: phone_align
 55 | label_align: %(TOP)s/lab_dur
 56 | add_frame_features: False
 57 | subphone_feats: none
 58 | 
 59 | [Extensions]
 60 | 
 61 | lab_ext: .lab_dur
 62 | dur_ext: .dur
 63 | 
 64 | [Outputs]
 65 | ## This says that we are predicting 5 state durations per example (letter/phone)
 66 | dur    : 5
 67 | 
 68 | 
 69 | [Waveform]
 70 | 
 71 | ## This won't be used -- but keep it here as a placeholder:
 72 | vocoder_type : WORLD
 73 | framelength : 2048
 74 | 
 75 | [Architecture]
 76 | 
 77 | ## Adjust the number and size of hidden layers here:
 78 | hidden_layer_size  : [512, 512, 512]
 79 | hidden_layer_type  : ['TANH', 'TANH', 'TANH']
 80 | 
 81 | 
 82 | ## if RNN or sequential training is used, please set sequential_training to True. For 
 83 | ## use with Ossian, we will only train DNNs, so don't alter this.
 84 | sequential_training : False
 85 | 
 86 | ## You might want to experiment with different learning rates, batch sizes, and maximum 
 87 | ## number of training epochs:
 88 | learning_rate    : 0.002
 89 | batch_size       : 256
 90 | training_epochs  : 6
 91 | ## set warmup_epoch to a number larger than training_epochs to effectively disable it
 92 | warmup_epoch     : 1000 
 93 | 
 94 | L1_regularization: 0.0
 95 | L2_regularization: 0.0
 96 | hidden_activation: tanh
 97 | output_activation: linear
 98 | warmup_momentum  : 0.0
 99 | private_l2_reg   : 0.0
100 | 
101 | [Streams]
102 | # which feature to be used in the output
103 | output_features      : ['dur']
104 | 
105 | 
106 | [Data]
107 | ## We need to divide the files available up into train/validation/test data. We don't need 
108 | ## to do any testing, but set test_file_number to 1 to keep the tools happy. Split the remaining
109 | ## files between train and validation. Using about 5% or 10% of the data for validation is 
110 | ## pretty standard. This is how you might divide up 28 files:
111 | train_file_number: __INSERT_NUMBER_OF_TRAINING_FILES_HERE__
112 | valid_file_number: __INSERT_NUMBER_OF_VALIDATION_FILES_HERE__
113 | test_file_number : __INSERT_NUMBER_OF_TEST_FILES_HERE__
114 | #buffer size of each block of data to
115 | buffer_size: 200000
116 | 
117 | [Utility]
118 | 
119 | plot : True
120 | 
121 | [Processes]
122 | ## For use with Ossian, just keep the first 4 set to True -- we will generate speech later 
123 | ## within Ossian itself. You can run each of the 4 steps individually if you like:
124 | NORMLAB  : True
125 | MAKECMP  : True
126 | NORMCMP  : True
127 | TRAINDNN : True
128 | DNNGEN   : False
129 | GENWAV   : False
130 | CALMCD   : False
131 | 
132 | 
133 | 


--------------------------------------------------------------------------------
/scripts/naive/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CSTR-Edinburgh/Ossian/fd01c8f9e1e5fa4f4f00dd444a565b714973b7a9/scripts/naive/__init__.py


--------------------------------------------------------------------------------
/scripts/processors/GenericProcessor.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | ## Project: Simple4All - January 2013 - www.simple4all.org 
 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 5 | ## Contact: Antti Suni - Antti.Suni@helsinki.fi
 6 | 
 7 | 
 8 | import tempfile 
 9 | from UtteranceProcessor import *
10 | import util.NodeProcessors
11 | import logging
12 | 
13 | class GenericProcessor(UtteranceProcessor):
14 | 
15 |     '''
16 |     '''
17 | 
18 |     def load(self):
19 | 
20 |         assert self.config["function_name"] in dir(util.NodeProcessors)
21 |         self.function = getattr(util.NodeProcessors, self.config["function_name"]) 
22 | 
23 |         ### Could give the poss of calling fuction with args, but keep it simple for now:
24 |         ##        self.function_args = {}
25 |         ##        if "function_args" in self.config:
26 |         ##            for (k,v) in self.config["function_args"].items():
27 |         ##                self.function_args[k] = v
28 | 
29 | 
30 |     def process_utterance(self, utt):
31 | 
32 | #         if utt.has_attribute(self.config["skip_condition"]):
33 | #             pass
34 | #         else:
35 |         for node in utt.xpath(self.config["target_nodes"]):
36 |             self.function(node)  ##  , **self.function_args)  ## see above
37 | 
38 | 
39 |     def do_training(self, speech_corpus, text_corpus):
40 |         print 'GenericProcessor requires no training'
41 |         return
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/scripts/processors/IndianScriptLatiniser.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | ## Project: Simple4All - January 2013 - www.simple4all.org 
 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 5 | ## Contact: Antti Suni - Antti.Suni@helsinki.fi
 6 | 
 7 | from naive.naive_util import *
 8 | import unicodedata
 9 | import glob
10 | from processors.UtteranceProcessor import *
11 | from processors.NodeSplitter import *
12 | from processors.NodeEnricher import *
13 | import datetime
14 | 
15 | from naive import naive_util
16 | 
17 | from util.indian2latin import latinise_indian_script_string
18 | 
19 | import default.const as c
20 | 
21 | class IndianScriptLatiniser(NodeSplitter):
22 |     '''
23 |     Alphabetise indian language alphasyllabic representations of words, and add
24 |     the alphabetic representations as children
25 |     '''
26 |     def load(self):
27 |         NodeSplitter.load(self)
28 |         
29 |         ## TODO: tidier way to use as_bool with default values
30 |         try:
31 |             self.add_terminal_tokens = self.config.as_bool('add_terminal_tokens')
32 |         except KeyError:
33 |             self.add_terminal_tokens = False
34 |         
35 |     def splitting_function(self, instring):
36 |         tokens = latinise_indian_script_string(instring)
37 |         tokens = [t for t in tokens if t != '']
38 |         if self.add_terminal_tokens:
39 |             tokens = [c.TERMINAL] + tokens + [c.TERMINAL]
40 |         return tokens 
41 |         
42 |     def do_training(self, speech_corpus, text_corpus):
43 |         print "IndianScriptLatiniser requires no training"    
44 | 
45 | 
46 | 
47 | 


--------------------------------------------------------------------------------
/scripts/processors/MiscProcessor.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | ## Project: Simple4All - January 2013 - www.simple4all.org 
 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 5 | ## Contact: Antti Suni - Antti.Suni@helsinki.fi
 6 | 
 7 | '''
 8 | These are mainly one-use things which I found useful in specific cases. Often hardcoded
 9 | and not configurable...
10 | '''
11 | import codecs
12 | 
13 | from UtteranceProcessor import *
14 | from NodeEnricher import NodeEnricher, AttributeAdder
15 | 
16 | class TextPrinter(UtteranceProcessor):
17 | 
18 |     '''
19 |     '''
20 | 
21 |     def load(self):
22 | 
23 |         self.target_nodes = '//token'
24 | 
25 | 
26 |     def process_utterance(self, utt):
27 | 
28 |         accum_text = ""
29 | 
30 |         for token in utt.xpath(self.target_nodes):
31 |             if token.get('token_class') != '_END_':
32 |                 if token.get('token_class') == 'space':
33 |                     if  token.get('has_silence') == 'yes':
34 |                         accum_text += ', '  ## add a comma
35 |                     else:
36 |                         accum_text += ' '            
37 |                 elif token.get('token_class') == 'punctuation':
38 |                     if  token.get('has_silence') == 'yes':
39 |                         accum_text += token.get('text')
40 |                     else:
41 |                         accum_text += ' '
42 |                 else:    
43 |                     accum_text += token.get('text')
44 | 
45 |         
46 |         outf = utt.get_filename('txt_punc')
47 |         f = codecs.open(outf, 'w', encoding='utf-8')
48 |         f.write(accum_text)
49 |         f.close()
50 |                  
51 |     def do_training(self, speech_corpus, text_corpus):
52 |         print 'TextPrinter requires no training'
53 |         return
54 | 
55 | 
56 | class BadDataMasker(NodeEnricher):
57 |     '''
58 |     The intended use of this class is to allow phones [e.g. a] to be
59 |     rewritten in 'marked form' (e.g. a_MASKED) conditioned on position 
60 |     and attributes in utterance structure. This is so there are 
61 |     models to soak up bad data in alignment but keep the models 
62 |     built on good data pure. The masked versions will never be used in
63 |     synthesis. E.g. if our LTS is bad, we could mask phones in words
64 |     which are OOV. Ditto for number words where our number normalisation
65 |     is known to be poor. 
66 |     '''
67 |     def enriching_function(self, input):
68 |         return input + '_BAD_DATA_MASKED'
69 | 
70 | 
71 | 


--------------------------------------------------------------------------------
/scripts/processors/NodeEnricher.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | ## Project: Simple4All - January 2013 - www.simple4all.org 
 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 5 | ## Contact: Antti Suni - Antti.Suni@helsinki.fi
 6 | 
 7 | from UtteranceProcessor import *
 8 | 
 9 | class NodeEnricher(UtteranceProcessor):
10 |     """
11 |     Refines UtteranceProcessor to enrich the target nodes of the utterance to
12 |     which it is applied by taking the target node's input_attribute, performing
13 |     some enriching_function on it, and writing the result to the target node's 
14 |     output_attribute.
15 |     
16 |     The enriching_function should be provided to subclasses.
17 |     """
18 | 
19 |     def load(self):  
20 | 
21 |         ## get attributes from config, converting type and supplying defaults:
22 |         self.target_nodes = self.config.get('target_nodes', '//')
23 |         self.input_attribute = self.config.get('input_attribute', 'text')
24 |         self.output_attribute = self.config.get('output_attribute', 'some_attribute')
25 |         
26 |         
27 |     def process_utterance(self, utt):
28 | #         print "-----"
29 | #         print "-----"
30 | #         utt.pretty_print()
31 | #         print "-----"
32 | #         print  self.target_nodes
33 |         for node in utt.xpath(self.target_nodes):
34 |             assert node.has_attribute(self.input_attribute)
35 |             input = node.get(self.input_attribute)
36 |             
37 |             transformed = self.enriching_function(input)
38 |             
39 |             node.set(self.output_attribute, transformed)
40 | 
41 | #         utt.pretty_print()
42 |         
43 |     def enriching_function(self, input):
44 |         raise NotImplementedError, 'Please provide an enriching_function when subclassing NodeEnricher'  
45 |         
46 | 
47 |     def do_training(self, speech_corpus, text_corpus):
48 |         return
49 |        
50 |    
51 | class AttributeAdder(NodeEnricher):
52 |     '''
53 |     Can be used to add attributes to target nodes, also to overwrite existing ones.
54 |     '''
55 |     def load(self):
56 |         NodeEnricher.load(self)
57 |         self.output_value = self.config.get('output_value', 'some_value')
58 |         
59 |     def enriching_function(self, input):
60 |         return self.output_value
61 |    
62 |    
63 |        
64 | 


--------------------------------------------------------------------------------
/scripts/processors/NodeRemover.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CSTR-Edinburgh/Ossian/fd01c8f9e1e5fa4f4f00dd444a565b714973b7a9/scripts/processors/NodeRemover.py


--------------------------------------------------------------------------------
/scripts/processors/NodeSplitter.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | ## Project: Simple4All - January 2013 - www.simple4all.org 
 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 5 | ## Contact: Antti Suni - Antti.Suni@helsinki.fi
 6 | 
 7 | from UtteranceProcessor import *
 8 | 
 9 | class NodeSplitter(UtteranceProcessor):
10 |     """
11 |     Split contents of node's parent_attribute on delimiter, make children
12 |     of node with tag child_tag and add split contents of parent_attribute
13 |     as child_attribute, one chunk per child.
14 | 
15 |     Using the defaults, this provides very crude tokenisation on whitespace. 
16 |     """
17 | 
18 | 
19 | 
20 |     def load(self):  
21 | 
22 |         ## get attributes from config, converting type and supplying defaults:
23 |         self.target_nodes = self.config.get('target_nodes', '//')
24 |         self.split_attribute = self.config.get('split_attribute', 'some_attribute')
25 |         self.child_node_type = self.config.get('child_node_type', 'some_child_type')
26 |         
27 |         
28 | #         #print  dir(self.shared_models[self.my_model])
29 | #         #func_name = self.config['function_to_apply']
30 | #         
31 | #         #self.my_function = getattr(self.shared_models[self.my_model], self.config['function_to_apply'])
32 | #         #print self.my_function
33 | #         #sys.exit(1)
34 |         
35 |     def process_utterance(self, utt):
36 | #         print "-----"
37 | #         print "-----"
38 | #         utt.pretty_print()
39 | #         print "-----"
40 | #         print self.target_nodes
41 | #         print utt.xpath(self.target_nodes)
42 |         for node in utt.xpath(self.target_nodes):
43 |             assert node.has_attribute(self.split_attribute)
44 |             to_split = node.get(self.split_attribute)
45 |             
46 |             child_chunks = self.splitting_function(to_split)
47 |             
48 |             for chunk in child_chunks:
49 |     
50 |                 child = Element(self.child_node_type)
51 |                 child.set(self.split_attribute, chunk)
52 |                 node.add_child(child)
53 | 
54 | #         utt.pretty_print()
55 | 
56 |     def splitting_function(self, instring):
57 |         ## Default -- burst into list. Replace this in subclasses.
58 |         return list(instring)
59 | 
60 | 
61 |     def do_training(self, speech_corpus, text_corpus):
62 |        return
63 |        
64 |        
65 |        
66 |        
67 |        
68 |        


--------------------------------------------------------------------------------
/scripts/processors/Phonetisers.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | ## Project: Ossian - May 2017  
 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 5 | 
 6 | from processors.UtteranceProcessor import SUtteranceProcessor, Element
 7 | from naive import naive_util
 8 | import default.const as c
 9 | 
10 | 
11 | # import os
12 | # import sys
13 | # import re
14 | # import regex
15 | # import unicodedata
16 | # import shutil
17 | # import glob 
18 | # import fileinput
19 | # import subprocess
20 | # import codecs 
21 | 
22 | # import default.const as c
23 | 
24 | # from processors.NodeEnricher import NodeEnricher
25 | # from processors.UtteranceProcessor import UtteranceProcessor
26 | 
27 | # from util.LookupTable import LookupTable
28 | 
29 | # from naive.naive_util import readlist, writelist
30 | 
31 | 
32 | class NaivePhonetiser(SUtteranceProcessor):
33 |     '''
34 |     Add 'phonetic' segments consisting of standard orthography characters, converted into an ASCII-safe 'safetext' form
35 |     '''
36 |     def __init__(self, processor_name='naive_phonetiser', target_nodes="//token", \
37 |                 target_attribute='text', child_node_type='segment', output_attribute='pronunciation', \
38 |                 class_attribute='token_class', word_classes=['word'], probable_pause_classes=['punctuation', c.TERMINAL], \
39 |                 possible_pause_classes=['space']):
40 | 
41 |         self.processor_name = processor_name
42 |         self.target_nodes = target_nodes
43 |         self.target_attribute = target_attribute
44 |         self.child_node_type = child_node_type
45 |         self.output_attribute = output_attribute
46 |         self.class_attribute = class_attribute
47 |         self.word_classes = word_classes
48 |         self.probable_pause_classes = probable_pause_classes
49 |         self.possible_pause_classes = possible_pause_classes
50 | 
51 |         super(NaivePhonetiser, self).__init__()
52 | 
53 |     def process_utterance(self, utt):
54 |         for node in utt.xpath(self.target_nodes):
55 |             assert node.has_attribute(self.class_attribute)
56 |             assert node.has_attribute(self.target_attribute)
57 | 
58 |             current_class = node.attrib[self.class_attribute]
59 | 
60 |             if current_class in self.word_classes:
61 |                 word = node.attrib[self.target_attribute]
62 |                 children = self.get_phonetic_segments(word)
63 |             elif current_class in self.probable_pause_classes:
64 |                 children = [c.PROB_PAUSE]
65 |             elif current_class in self.possible_pause_classes:
66 |                 children = [c.POSS_PAUSE]
67 |             else:
68 |                 sys.exit('Class "%s" not in any of word_classes, probable_pause_classes, possible_pause_classes')
69 |             for chunk in children:
70 |                 child = Element(self.child_node_type)
71 |                 child.set(self.output_attribute, chunk)
72 |                 node.add_child(child)
73 | 
74 |     def get_phonetic_segments(self, word):
75 |         safetext_letters = []
76 |         for letter in list(word.lower()):
77 |             safetext_letters.append(naive_util.safetext(letter))
78 |         return safetext_letters
79 | 
80 |     def do_training(self, speech_corpus, text_corpus):
81 |         print "NaivePhonetiser requires no training"    
82 | 
83 | 
84 | 


--------------------------------------------------------------------------------
/scripts/processors/PhraseMaker.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | ## Project: Simple4All - January 2013 - www.simple4all.org 
 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 5 | ## Contact: Antti Suni - Antti.Suni@helsinki.fi
 6 | 
 7 | 
 8 | from UtteranceProcessor import SUtteranceProcessor
 9 | from util.NodeProcessors import add_phrase_tags, restructure
10 | 
11 | class PhraseMaker(SUtteranceProcessor):
12 |     '''
13 |     ## Regroup token nodes under new phrase parent nodes:
14 | 
15 |     ## Group so that phrases are delimited by silence; to find silence, look for 
16 |     ## nodes with attribute segment_name having value sil under each token:
17 |         
18 |     '''
19 | 
20 |     def __init__(self, processor_name='phrase_maker', node_type_to_regroup='token', parent_node_type='phrase', \
21 |                  attribute_with_silence='segment_name', silence_symbol='sil'):  
22 |         
23 |         self.processor_name = processor_name
24 |         self.node_type_to_regroup = node_type_to_regroup
25 |         self.parent_node_type = parent_node_type
26 |         self.attribute_with_silence = attribute_with_silence
27 |         self.silence_symbol = silence_symbol
28 | 
29 |         ## derived attribute:
30 |         self.target_xpath='//' + self.node_type_to_regroup
31 |         
32 |         super(PhraseMaker, self).__init__()
33 | 
34 |     def process_utterance(self, utt):
35 |         
36 |         ### Perform 2 'atomic' operations on the utterance:  
37 | 
38 |         ## add phrase start / end attributes  on tokens (True/False values):
39 |         add_phrase_tags(utt, target_xpath=self.target_xpath, silence_symbol=self.silence_symbol, \
40 |                                         attribute_with_silence=self.attribute_with_silence)
41 |   
42 |         ## Use those attributes to restructure the utterance using a generic
43 |         ## restructuring function:
44 |         restructure(utt, regroup_nodes_of_type=self.node_type_to_regroup, 
45 |                     start_criterion="phrase_start", end_criterion="phrase_end", 
46 |                     new_parent_type="phrase")
47 |         
48 | 
49 | 


--------------------------------------------------------------------------------
/scripts/processors/SimpleChildAdder.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | ## Project: Simple4All - January 2013 - www.simple4all.org 
 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 5 | ## Contact: Antti Suni - Antti.Suni@helsinki.fi
 6 | 
 7 | from UtteranceProcessor import *
 8 | 
 9 | class SimpleChildAdder(UtteranceProcessor):
10 |     """
11 |     Simplest kind of manipulation, no model. For each node in target nodes, add a child
12 |     with tag child_tag, and child_attribute as child_attribute_value. The xpath given for target nodes
13 |     can be tailored to match the desired set of nodes.
14 |     
15 |     TODO: doc
16 |     """
17 | 
18 |     def load(self):  
19 | 
20 |         ## get attributes from config, converting type and supplying defaults:
21 |         self.target_nodes = self.config.get('target_nodes', '//')
22 |         self.child_tag = self.config.get('child_tag', 'some_tag')
23 |         self.child_attribute = self.config.get('child_attribute', 'some_attribute')
24 |         self.child_attribute_value = self.config.get('child_attribute_value', 'some_value')
25 | 
26 |         
27 |     def process_utterance(self, utt):
28 | 
29 |         for node in utt.xpath(self.target_nodes):
30 |             child = Element(self.child_tag)
31 |             child.set(self.child_attribute, self.child_attribute_value)
32 |             node.add_child(child)
33 | 
34 | 
35 | 
36 |     def do_training(self, speech_corpus, text_corpus):
37 |        return
38 |        
39 |        
40 |        


--------------------------------------------------------------------------------
/scripts/processors/Syllabifier.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from configobj import ConfigObj
  5 | from UtteranceProcessor import *
  6 | #import util.NodeProcessors
  7 | import logging
  8 | import os
  9 | import re
 10 | import default.const as c
 11 | class Syllabifier(UtteranceProcessor):
 12 |     
 13 | 
 14 | 
 15 | 
 16 |     def load(self):
 17 |         self.parent_node_type =  self.config.get('parent_node_type', '//token')
 18 |         self.target_nodes = self.config.get('target_nodes', "//token[@token_class='word']/descendant::segment")
 19 |         ## read phonetic classes, either unsupervised or human produced
 20 |         self.phoneclass_filename = os.path.join(self.get_location()+"/../phonetic_classifier", self.config['phone_classes'])
 21 |         #filename = os.path.join(self.voice_resources.get_path(c.LANG), self.config['phone_classes'])
 22 |         if os.path.isfile(self.phoneclass_filename):
 23 |             self.phones = ConfigObj(self.phoneclass_filename, encoding='utf8')
 24 |             # culcurate legexprs on init
 25 |             
 26 |             self.regexps = self._compile_syllable_regexps()
 27 |             self.trained = True
 28 |         else:
 29 |             self.trained = False
 30 | 
 31 |             
 32 | 
 33 | 
 34 | 
 35 |     def do_training(self, speech_corpus, text_corpus):
 36 | 
 37 |         self.load()  ## because phoneclass_filename prob. didn't exist when processor was first loaded.
 38 | 
 39 |         if self.trained == True:
 40 |             return
 41 |         if self.phones:
 42 |             self.regexps = self._compile_syllable_regexps()
 43 |             self.trained = True
 44 | 
 45 |             
 46 | 
 47 |         
 48 |         
 49 | 
 50 |     def  _compile_syllable_regexps(self):
 51 |         # should only be letters, but better quote_meta anyway
 52 |         quoted_cons = [re.escape(c) for c in self.phones['consonant']]
 53 |         quoted_vow = [re.escape(c) for c in self.phones['vowel']]
 54 |         quoted_legal = [re.escape(c) for c in self.phones['legal']]
 55 |         cons = u'|'.join(quoted_cons)
 56 |         vow = u'|'.join(quoted_vow)
 57 |         #cons = u'|'.join(self.phones['consonant'])
 58 |         #vow = u'|'.join(self.phones['vowel'])
 59 |         MAX_ONSET = 20
 60 |         legal_cons=[""]*MAX_ONSET
 61 | 
 62 |         #make regexp from legal onsets
 63 |         #for l in self.phones['legal']:
 64 |         for l in quoted_legal:
 65 |             if legal_cons[len(l)] == "":
 66 |                 legal_cons[len(l)]=  l
 67 |             else:
 68 |                 legal_cons[len(l)]= legal_cons[len(l)]+'|' + l 
 69 |         
 70 |         regexps = []
 71 |         # legality principle with max onset
 72 |         for i in range(len(legal_cons)-1, 0, -1):
 73 |             if len(legal_cons[i]) > 0:
 74 |                 #regexps.append(re.compile('((?:%s) (?:%s|\s)*)((?:%s) (?:%s))'% (vow,cons,legal_cons[i],vow), re.UNICODE)) # max onset for frequent legal
 75 |                 regexps.append(re.compile('((?:%s) (?:%s|\s)*) ((?:%s) (?:%s))'% (vow,cons,legal_cons[i],vow), re.UNICODE)) # max onset for frequent legal       
 76 |         #defaults
 77 |         # V.CV
 78 |         regexps.append(re.compile('(%s) ((?:%s) (?:%s))'% (vow, cons, vow), re.UNICODE))
 79 |         # VC+.CV
 80 |         regexps.append(re.compile('((?:%s) (?:%s|\s)+) ((?:%s) (?:%s))'% (vow,cons,cons,vow), re.UNICODE)) # at least one consonant before 
 81 |         #for r in regexps:
 82 |         #    print r.pattern
 83 | 
 84 |         # finally hiatus
 85 |         for h in self.phones['non_diphthongs']:
 86 |             (h1, h2) = h.split()
 87 |             regexps.append(re.compile('(%s+) (%s+)'% (h1,h2), re.UNICODE))
 88 |           
 89 |         return regexps
 90 | 
 91 |     
 92 |     # TODO: remove hard-coding, morph level?
 93 |     def process_utterance(self, utt):
 94 |         
 95 |         for node in utt.xpath('//token[@token_class=\"word\"]'):
 96 |               
 97 |             segments = [s.get('text') for s in node.xpath('./segment')]
 98 |             if len(segments) == 0:
 99 |                 continue
100 |             text =  u" ".join(segments).lower()
101 |             syllables = self._syllabify(text)
102 | 
103 |             # add syllable level between token and letter
104 |             # TODO:  maybe apply Oliver's generic transform
105 |             segments = node.xpath('./segment')
106 |             for s in syllables:
107 |                 syl_node = Element('syllable', text=u"".join(s.split(' ')))
108 |                 node.add_child(syl_node)
109 |                 for p in s.split(' '):
110 |                     phone_node = segments.pop(0)
111 |                     phone_node.getparent().remove(phone_node)
112 |                     syl_node.add_child(phone_node)
113 |                       
114 | 
115 |         
116 | 
117 | 
118 |     def _syllabify(self, word):
119 | 
120 |         for regex in (self.regexps):
121 |             
122 |             while re.search(regex, word):
123 |                 word = re.sub(regex, '\\1 ||| \\2', word)
124 | 
125 |         # some regexp produces additional space ...
126 |         word = word.replace('  ',' ')
127 | 
128 |         return word.split(' ||| ')
129 |         
130 | 
131 | 
132 | 
133 | 
134 | 


--------------------------------------------------------------------------------
/scripts/processors/WaveSynthesiser.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | ## Project: Simple4All - January 2013 - www.simple4all.org 
 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 5 | ## Contact: Antti Suni - Antti.Suni@helsinki.fi
 6 | 
 7 | from UtteranceProcessor import *
 8 | from util.NodeProcessors import *
 9 | 
10 | from distutils.spawn import find_executable
11 | 
12 | class WaveSynthesiser(UtteranceProcessor):
13 | 
14 |     '''
15 |     As with AcousticModel, this class needs to be generalised to glottHMM etc.
16 |     '''
17 | 
18 |     def load(self): 
19 |         pass
20 | #         ## Check necessary binaries are on system path:       
21 | #         for tool in ["synthesis_fft", "x2x", "mgc2sp"]:  
22 | #             if not find_executable(tool):
23 | #                 sys.exit("Binary %s must be on system path"%(tool))
24 | 
25 |     def process_utterance(self, utt):
26 | 
27 |         if utt.has_attribute("waveform"):    
28 |             print "Utt has a natural waveform -- don't synthesise"
29 |             return
30 |             
31 |         ## Check we've got everything to synthesise with:
32 |         for filetype in ["gen_f0", "gen_mcep", "gen_bndap"]:
33 |             if not utt.has_external_data(filetype):
34 |                 print 'Utterance does not have filetype %s associated with it -- cannot synthesise a wave'%(filetype)
35 |                 return
36 | 
37 |         fzero = utt.get_filename("gen_f0")
38 |         mcep  = utt.get_filename("gen_mcep")
39 |         bndap = utt.get_filename("gen_bndap")
40 |         
41 |         ## TODO: !!! fix hardcoded values here !!!
42 |         shift = 5
43 |         rate = 48000
44 |         alpha = "0.77"  ## Assume 48kH and Bark cepstrum (Julius)      <<-- this should be shared from vocoder config!!
45 |         gamma = "0"  ## for mcep
46 |         order = "59"
47 |         fft_len = "2048"
48 | 
49 |         ## convert params:
50 |         comm = "x2x +fd %s > %s.double"%( bndap, bndap)
51 |         #print comm
52 |         os.system(comm)
53 |         comm = "x2x +fa %s > %s.txt"%(fzero, fzero)
54 |         #print comm
55 |         os.system(comm)
56 |         comm = "mgc2sp -a %s -g %s -m %s -l %s -o 2 %s | x2x +fd > %s.spec.double"%(alpha, gamma, order, fft_len, mcep, mcep)
57 | 
58 |         #print comm
59 |         os.system(comm)
60 | 
61 |         gen_wav = utt.get_filename("gen_wav")
62 | 
63 |         comm = "%s "%("synthesis_fft") # self.RESYNTH_BIN)
64 |         comm += "  -f %s "%(rate)
65 |         comm += "  -fftl %s "%(fft_len)
66 |         comm += "  -spec "
67 |         comm += "  -order %s "%(order)
68 |         comm += "  -shift %s "%(shift)
69 |         comm += "  -sigp %s "%(1.2)
70 |         comm += "  -sd %s "%(0.5)
71 |         comm += "  -cornf %s "%(4000)
72 |         comm += "  -bw %s "%(70.0)
73 |         comm += "  -delfrac %s "%(0.2)
74 |         comm += "  -bap "
75 |         comm += "  -apfile %s.double "%(bndap)
76 |         comm += "  %s.txt "%(fzero)
77 |         comm += "  %s.spec.double "%(mcep)
78 |         comm += "  %s > %s"%(gen_wav, gen_wav.replace(".wav", ".log"))
79 | 
80 |         #print comm
81 |         os.system(comm)
82 | 
83 |         assert os.path.isfile(gen_wav)
84 | 
85 | 
86 | 
87 |     ## def train -- not necessary for vocoder (yet).
88 | 
89 | 
90 | 


--------------------------------------------------------------------------------
/scripts/processors/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CSTR-Edinburgh/Ossian/fd01c8f9e1e5fa4f4f00dd444a565b714973b7a9/scripts/processors/__init__.py


--------------------------------------------------------------------------------
/scripts/shell/combine_lsf_and_gain.pl:
--------------------------------------------------------------------------------
 1 | #args
 2 | # 1: lsf 2: lsf coeff count 3: gain
 3 | 
 4 | 
 5 | $DIM = $ARGV[1];
 6 | open GAIN, $ARGV[2];
 7 | @gain = <GAIN>;
 8 | close GAIN;
 9 | $i = 1;
10 | open LSF, "$ARGV[0]";
11 | while (<LSF>){
12 | 
13 |     print;
14 |     if ($i % $DIM == 0) {
15 | 	print shift @gain;
16 |     }
17 |     $i++;
18 | 
19 | }
20 | close LSF;
21 | 


--------------------------------------------------------------------------------
/scripts/shell/make_hts_training_lists.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | cmp_dir=$1
 4 | lab_dir=$2
 5 | outfile_stem=$3
 6 | 
 7 | 
 8 | lab_out=$outfile_stem.lab
 9 | cmp_out=$outfile_stem.cmp
10 | 
11 | rm $lab_out $cmp_out
12 | 
13 | for file in $lab_dir/* ; do
14 | 	base=`basename $file .lab` ; 
15 | 	cmpfile=$cmp_dir/$base.cmp ;
16 | 	if [ -e $cmpfile ] ; then
17 | 		echo $cmpfile >> $cmp_out ;
18 | 		echo $file >> $lab_out ;
19 | 	fi
20 | done	
21 | 


--------------------------------------------------------------------------------
/scripts/shell/split_cmp.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | ## Project: Natural Speech Technology - February 2015 - www.natural-speech-technology.org
  4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
  5 |   
  6 | import sys
  7 | import os
  8 | import struct
  9 | import glob
 10 | import numpy
 11 | from numpy import array
 12 | from argparse import ArgumentParser
 13 | 
 14 | 
 15 | def main_work():
 16 | 
 17 |     #################################################
 18 |       
 19 |     # ======== Get stuff from command line ==========
 20 | 
 21 |     a = ArgumentParser()
 22 |     a.add_argument('-cmp', dest='cmpdir', required=True)
 23 |     a.add_argument('-out', dest='outdir', required=True, \
 24 |                     help= "Put output here: make it if it doesn't exist")
 25 |     a.add_argument('-streams', default='LSF,LSFsource,HNR,Gain,F0')
 26 |     a.add_argument('-widths', default='30,10,5,1,1')
 27 |     a.add_argument('-deltas', default=3, type=int, \
 28 |                             help='e.g. 3 for static + delta + deltadelta')                
 29 |     opts = a.parse_args()
 30 |     
 31 |     # ===============================================    
 32 |     streams = opts.streams.split(',')
 33 |     widths = [int(val) for val in opts.widths.split(',')]
 34 |     
 35 |     assert len(streams) == len(widths)
 36 |     
 37 |     # ===============================================
 38 |     streams_out = [os.path.join(opts.outdir, stream) for stream in streams]    
 39 | 
 40 |     for direc in [opts.outdir] + streams_out:
 41 |         if not os.path.isdir(direc):
 42 |             os.makedirs(direc)
 43 | 
 44 |     total_dim = sum(widths) * opts.deltas
 45 | 
 46 | 
 47 |     for cmp in glob.glob(os.path.join(opts.cmpdir, '*.cmp')):
 48 |         junkpath,base=os.path.split(cmp)
 49 |         base=base.replace('.cmp','')
 50 |         data = get_speech(cmp, total_dim, remove_htk_header=True)
 51 |         start = 0
 52 |         #print '========'
 53 |         print base 
 54 |         for (stream, width) in zip(streams, widths):
 55 |             #print '   ' + stream
 56 |             outfile = os.path.join(opts.outdir, stream, base + '.' + stream)
 57 |             end = start + width
 58 |             stream_data = data[:, start:end]
 59 |             put_speech(stream_data, outfile)
 60 |             start = start + (width * opts.deltas)
 61 |             
 62 |         
 63 |         
 64 | 
 65 | def get_speech(infile, dim, remove_htk_header=False):
 66 | 
 67 |     data = read_floats(infile)
 68 |     if remove_htk_header:
 69 |         data = data[3:]  ## 3 floats correspond to 12 byte htk header
 70 | 
 71 |     assert len(data) % float(dim) == 0,"Bad dimension!"
 72 |     m = len(data) / dim
 73 |     data = array(data).reshape((m,dim))
 74 |     return data
 75 | 
 76 | def put_speech(data, outfile):
 77 |     m,n = numpy.shape(data)
 78 |     size = m*n
 79 |     flat_data = list(data.reshape((size, 1)))
 80 |     write_floats(flat_data, outfile)
 81 | 
 82 | def write_floats(data, outfile):
 83 |     m = len(data)             
 84 |     format = str(m)+"f"
 85 | 
 86 |     packed = struct.pack(format, *data)
 87 |     f = open(outfile, "w")
 88 |     f.write(packed)
 89 |     f.close()
 90 | 
 91 | def read_floats(infile):
 92 |     f = open(infile, "r")
 93 |     l = os.stat(infile)[6]  # length in bytes
 94 |     data = f.read(l)        # = read until bytes run out (l)
 95 |     f.close()
 96 | 
 97 |     m = l / 4               
 98 |     format = str(m)+"f"
 99 | 
100 |     unpacked = struct.unpack(format, data)
101 |     unpacked = list(unpacked)
102 |     return unpacked
103 | 
104 | 
105 | if __name__=="__main__":
106 | 
107 |     main_work()
108 | 
109 | 


--------------------------------------------------------------------------------
/scripts/shell/train_backend.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | ## Script to run other scripts to train HTS back-end using STRAIGHT features.
  4 | ## In the near future, feature extraction and synthesis model building
  5 | ## will also be handled inside the train.py script, at which point this script will
  6 | ## no longer need to exist ;-)
  7 | 
  8 | 
  9 | 
 10 | 
 11 | LNG=$1
 12 | SPEAKER=$2
 13 | RECIPE=$3
 14 | VCDIR=$4      ## location of S4A VCTK (e.g. <SOME_PATH>/simple4all/CSTRVoiceClone/ )
 15 | 
 16 | echo $VCDIR
 17 | 
 18 | ## location of script:
 19 | ROOT="$( cd "$( dirname "$0" )" && pwd )" 
 20 | 
 21 | ## derived paths:
 22 | NAIVE_DIR=$ROOT/../../  ## parent of config, script, etc.
 23 | VOICE_DIR=$NAIVE_DIR/train/$LNG/speakers/$SPEAKER/$RECIPE/  ## location of the voice being trained 
 24 | WAV_DIR=$NAIVE_DIR/corpus/$LNG/speakers/$SPEAKER/wav/   ##
 25 | 
 26 | 
 27 | 
 28 | ## Make paths absolute:
 29 | VOICE_DIR=`greadlink -fn $VOICE_DIR`
 30 | WAV_DIR=`greadlink -fn $WAV_DIR`
 31 | NAIVE_DIR=`greadlink -fn $NAIVE_DIR`
 32 | VCDIR=`greadlink -fn $VCDIR`
 33 | 
 34 | echo "Voice dir: ${VOICE_DIR}"
 35 | echo "Wav dir: ${WAV_DIR}"
 36 | echo "Naive dir: ${NAIVE_DIR}"
 37 | echo "VC dir dir: ${VCDIR}"
 38 | 
 39 | 
 40 | 
 41 | ## make a place to put synth training features:
 42 | FEATURE_DIR=$VOICE_DIR/synth_feats
 43 | mkdir $FEATURE_DIR
 44 | 
 45 | 
 46 | ## make a place to put synth model:
 47 | SYNTH_DIR=$VOICE_DIR/processors/acoustic_model/
 48 | if [ ! -e $SYNTH_DIR ] ; then
 49 |   echo "$SYNTH_DIR does not exist!" ; 
 50 |   exit 1 ;
 51 | fi
 52 | 
 53 | 
 54 | ## =============================
 55 | ## 1) extract STRAIGHT features:
 56 | 
 57 | ## Get template config file:
 58 | STRAIGHT_CONF=$FEATURE_DIR/straight_config.txt
 59 | echo $NAIVE_DIR/
 60 | cp $NAIVE_DIR/recipes/straight_config_template.txt $STRAIGHT_CONF
 61 | 
 62 | ## Make some substitutions in the config file (this should really be done with 
 63 | ## proper string interpolation in config):
 64 | 
 65 | echo $STRAIGHT_CONF 
 66 | 
 67 | sed "s@VCDIR@${VCDIR}@" $STRAIGHT_CONF > ${STRAIGHT_CONF}_1
 68 | sed "s@FEATDIR@${FEATURE_DIR}@" ${STRAIGHT_CONF}_1 > ${STRAIGHT_CONF}_2
 69 | sed "s@ESTDIR@${ESTDIR}@" ${STRAIGHT_CONF}_2 > ${STRAIGHT_CONF}_3
 70 | sed "s@WAVDIR@${WAV_DIR}@" ${STRAIGHT_CONF}_3 > ${STRAIGHT_CONF}_4
 71 | 
 72 | mv ${STRAIGHT_CONF}_4  ${STRAIGHT_CONF}
 73 | 
 74 | ## Use the config to do feature extraction:
 75 | HERE=`pwd`
 76 | cd $VCDIR/trunk/Research-Demo/fa-tts/STRAIGHT-TTS/
 77 | ./fa-tts.sh $STRAIGHT_CONF
 78 | cd $HERE
 79 | 
 80 | 
 81 | 
 82 | ## =============================
 83 | ## 2) make training lists:
 84 | 
 85 | 
 86 | ## Make training lists, exluding utts for which there are not both cmp and lab files:
 87 | echo "$NAIVE_DIR/scripts/util//make_hts_training_lists.sh $FEATURE_DIR/cmp $VOICE_DIR/lab/  $SYNTH_DIR/training_list"
 88 | $NAIVE_DIR/scripts/util//make_hts_training_lists.sh $FEATURE_DIR/cmp $VOICE_DIR/lab/  $SYNTH_DIR/training_list
 89 | 
 90 | 
 91 | 
 92 | ## =============================
 93 | ## 3) train voice on a single machine:
 94 | 
 95 | HERE=`pwd`
 96 | cd $VCDIR/trunk/HMM-Training/  
 97 | 
 98 | ## In the script HTS2011-Training.pl, fix this variable to match the first sentence-level 
 99 | ## (typically 3rd from last) feature in the labels produced:
100 | 
101 | ## SENTENCE_LEVEL_DELIMITER=/51:  
102 |                                       
103 | ## I added ./run-hts2011_general.sh to the repository -- as the name says, it's
104 | ## a more general version of the VCTK script -- more things are specified on
105 | ## command line so it is less geared to the specifics of VCTK directory structure. 
106 | ##
107 | 
108 | echo "Train HTS model, output log to $SYNTH_DIR/train_log.txt..."
109 | 
110 | ./run-hts2011_general.sh \
111 |     -feature_list $SYNTH_DIR/training_list.cmp \
112 |     -label_list  $SYNTH_DIR/training_list.lab \
113 |     -question_file $VOICE_DIR/questions.hed \
114 |     -out $SYNTH_DIR/ \
115 |       | tee $SYNTH_DIR/train_log.txt
116 | 
117 | cd $HERE
118 | 
119 | ### copy GV and window parameters for use in synthesis later:
120 | cp $FEATURE_DIR/gv/*   $SYNTH_DIR/hmm/hts_engine/
121 | 
122 | 
123 | 
124 | 
125 | 


--------------------------------------------------------------------------------
/scripts/shell/train_cart.R:
--------------------------------------------------------------------------------
 1 | ### R script
 2 | 
 3 | 
 4 | ## Command line arguments
 5 | # trailingOnly=TRUE means that only arguments after --args are returned
 6 | args = commandArgs(trailingOnly = TRUE)
 7 | print("Script train_cart.R called with arguments:")
 8 | print(args)
 9 | 
10 | 
11 | data_fn = args[1]
12 | outfile = args[2]
13 | 
14 | 
15 | library(rpart)
16 | 
17 | 
18 | my_data <- read.csv(file=data_fn,head=TRUE,sep=",")
19 | ## my_data$break_type <- as.factor(my_data$break_type) ## <-- make sure predictee is category
20 | summary(my_data) 
21 | 
22 | 
23 | my_control=rpart.control(minsplit=1, minbucket=1, xval=10, cp=0.0)
24 | my_rpart <- rpart(response~., data=my_data, control=my_control   )
25 | 
26 | ### tree before pruning:
27 | print(my_rpart)
28 | printcp(my_rpart)
29 | 
30 | ## find smallest model with cp with 1SE of cross validation error:
31 | min_error = min(my_rpart$cptable[,"xerror"])
32 | min_error_std = min(my_rpart$cptable[,"xstd"])
33 | thresh = min_error + min_error_std
34 | for (i in seq(nrow(my_rpart$cptable))) {   
35 |    print(my_rpart$cptable[i,"xerror"])
36 |    if (my_rpart$cptable[i,"xerror"] < thresh) {
37 |       best_cp=my_rpart$cptable[i,"CP"]
38 |       break
39 |       }
40 |    } 
41 | print(best_cp)
42 | my_rpart <- prune(my_rpart, cp=best_cp)
43 | print(my_rpart)
44 | 
45 | save(my_rpart, file=outfile)
46 | 


--------------------------------------------------------------------------------
/scripts/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CSTR-Edinburgh/Ossian/fd01c8f9e1e5fa4f4f00dd444a565b714973b7a9/scripts/tools/__init__.py


--------------------------------------------------------------------------------
/scripts/util/Environment.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | ## Project: Simple4All - January 2013 - www.simple4all.org 
 4 | ## Contact: Antti Suni - Antti.Suni@helsinki.fi
 5 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 6 | 
 7 | import default.fnames as fname
 8 | import default.const as c
 9 | import os
10 | import sys
11 | 
12 | 
13 | 
14 | 
15 | #def make(dirs, lang, speaker, version):
16 |     
17 | 
18 | 
19 | 
20 | 
21 | 
22 | 
23 | 
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/scripts/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CSTR-Edinburgh/Ossian/fd01c8f9e1e5fa4f4f00dd444a565b714973b7a9/scripts/util/__init__.py


--------------------------------------------------------------------------------
/scripts/util/acoustic_stats.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | from scipy import stats
 3 | import numpy as np
 4 | 
 5 | def get_subsections(trajectory):
 6 |     '''
 7 |     get subsections as in Murray et al. (2006):
 8 |     '''
 9 |     new_features = {}
10 | #     frame_rate = 5  ## 5ms per frame
11 | #     c = 100 / frame_rate  ## c frames per 100ms
12 | 
13 |     t = len(trajectory)
14 | 
15 |     new_features['whole']      = trajectory
16 |     new_features['half1']      = trajectory[:(t/2)]
17 |     new_features['half2']      = trajectory[(t/2):]
18 |     new_features['quarter1']   = trajectory[:(t/4)]
19 |     new_features['quarter2']   = trajectory[(t/4):(t/2)]
20 |     new_features['quarter3']   = trajectory[(t/2):(t-(t/4))]
21 |     new_features['quarter4']   = trajectory[(t-(t/4)):]
22 | #         new_features['first100ms'] = trajectory[:c]
23 | #         new_features['first200ms'] = trajectory[:(c*2)]
24 | #         new_features['last100ms'] = trajectory[-c:]
25 | #         new_features['last200ms']  = trajectory[-(c*2):]
26 | 
27 |     return new_features
28 | 
29 | 
30 | 
31 | ### feature functions:
32 | def feature_mean(seq):
33 |     return np.mean(seq) #, axis=0)
34 | 
35 | def feature_std(seq):
36 |     return np.std(seq) #, axis=0)
37 | 
38 | def feature_min(seq):
39 |     return np.min(seq) # , axis=0)
40 |     
41 | def feature_max(seq):
42 |     return np.max(seq) # , axis=0)
43 |             
44 | def feature_range(seq):
45 |     return feature_max(seq) - feature_min(seq)
46 | 
47 | def feature_slope(seq):
48 | #    m,n = np.shape(seq)
49 | #    gradients = []
50 | #    for dimension in xrange(n):
51 | #    data = seq[:,dimension]
52 |     gradient, intercept, r_value, p_value, std_err, fit_line = fit_lm(seq)
53 |     return gradient
54 | #    gradients.append(gradient)
55 | #    return np.array(gradients)
56 | 
57 | def fit_lm(y):   
58 |     x = np.array(range(len(y)))
59 |     gradient, intercept, r_value, p_value, std_err = stats.linregress(x,y)
60 |     fit_line = [(x_val * gradient) + intercept for x_val in x]
61 |     return gradient, intercept, r_value, p_value, std_err, fit_line
62 | 
63 | def get_stats_over_subsections(data):
64 |     '''
65 |     Compute several statistics over several subsections of the given data, 
66 |     return in a dictionary whose keys indicate the statistic and subsection
67 |     '''
68 |     subsections = get_subsections(data)
69 |     stats = {}
70 |     for (subsection,subdata) in subsections.items():
71 |         for feat_func in [feature_mean, feature_std, feature_range,  feature_min, feature_max, feature_slope]:
72 |             func_name = feat_func.__name__
73 |             func_val = feat_func(subdata)
74 |             stats["%s_%s"%(subsection, func_name)] = func_val
75 |     return stats
76 | 
77 | 
78 | 
79 | 


--------------------------------------------------------------------------------
/scripts/util/append_acoustic_model.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | ## Project: Simple4All - January 2013 - www.simple4all.org 
  4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
  5 | ## Contact: Antti Suni - Antti.Suni@helsinki.fi
  6 | 
  7 | import sys
  8 | #import re
  9 | import naive.naive_util
 10 | 
 11 | from main.Voice import *
 12 | 
 13 | from main.AcousticModel import *
 14 | from processors.WaveSynthesiser import *
 15 | from processors.WavePlayer import *
 16 | 
 17 | def main_work():
 18 | 
 19 |     #################################################
 20 | 
 21 |     # ======== Get stuff from command line ==========
 22 | 
 23 |     def usage():
 24 |         print "Usage: ......  "
 25 |         sys.exit(1)
 26 | 
 27 |     # e.g. 
 28 | 
 29 |     try:
 30 | 
 31 |         voice_config = sys.argv[1]
 32 |         voice_components = sys.argv[2]
 33 |         ENGINE_BIN = sys.argv[3]      
 34 |         RESYNTH_BIN = sys.argv[4]  
 35 |         trained_model_dir = sys.argv[5]  
 36 | 
 37 | 
 38 |     except:
 39 |         
 40 |         usage()
 41 | 
 42 | 
 43 | 
 44 |     #################################################
 45 |     sys.path.append("/afs/inf.ed.ac.uk/user/o/owatts/naive/script/")
 46 |     #################################################
 47 | 
 48 | 
 49 |     ## Lots of these paths should be interpolated from system-wide options (e.g. bin dir etc).
 50 |     ## Absolute paths for now.
 51 |     context_file_location = "/afs/inf.ed.ac.uk/user/o/owatts/naive/context_files/"
 52 |     ESTDIR = "/group/project/nlp-speech/bin/"
 53 |     HTSDIR = "/afs/inf.ed.ac.uk/user/o/owatts/repos/simple4all/CSTRVoiceClone/trunk/bin/"
 54 |     SCRIPT = "/afs/inf.ed.ac.uk/user/o/owatts/naive/script"
 55 |     GENSIM_LOCATION = "%s/gensim-0.5.0/src/"%(SCRIPT)
 56 |     #################################################
 57 | 
 58 |     sys.path.append( GENSIM_LOCATION ) ## add gensim to path
 59 |     from VSMTagger import VSMTagger
 60 | 
 61 | 
 62 | 
 63 |     print " -- Open the existing voice"
 64 | 
 65 |     voice = Voice(config_file=voice_config)
 66 | 
 67 | 
 68 |     print " -- Make an utterance processor from a (trained) acoustic model   "
 69 | 
 70 |     ### This will only perform work where an utt does not have a wavefile attached:
 71 |     parameter_generator = AcousticModel(config_file=voice_components + "/parameter_generator.cfg",
 72 |                                     processor_name = "parameter_generator",
 73 |                                     ENGINE_BIN=ENGINE_BIN,
 74 |                                     model_location = trained_model_dir,
 75 |                                     HTSDIR=HTSDIR )
 76 |     parameter_generator.save()
 77 | 
 78 | 
 79 | 
 80 |     ### WAVESYNTH
 81 |     waveform_synthesiser = WaveSynthesiser(config_file=voice_components + "/waveform_synthesiser.cfg",
 82 |                                     processor_name = "waveform_synthesiser",
 83 |                                     RESYNTH_BIN=RESYNTH_BIN,                                    
 84 |                                     HTSDIR=HTSDIR )
 85 |     waveform_synthesiser.save()
 86 | 
 87 | 
 88 |     ### WAVE PLAYER (call e.g. sox etc)
 89 |     wave_player = WavePlayer(config_file=voice_components + "/wave_player.cfg",
 90 |                                     processor_name = "wave_player"
 91 |                              )
 92 |     wave_player.save()
 93 | 
 94 |     voice.add_processor(voice_components + "/parameter_generator.cfg")
 95 |     voice.add_processor(voice_components + "/waveform_synthesiser.cfg")
 96 |     voice.add_processor(voice_components + "/wave_player.cfg")
 97 | 
 98 |     print " -- Save voice"
 99 |     voice.save()
100 | 
101 |     print " -- Synthesize a test utterance (from some Spanish text...)"
102 |     ## Use the voice to synth a test utterance:
103 |     voice.synth_utterance("Esto es: una prueba.")
104 | 
105 | 
106 | 
107 | 
108 | if __name__=="__main__":
109 | 
110 |     main_work()
111 | 


--------------------------------------------------------------------------------
/scripts/util/cwt_utils.py:
--------------------------------------------------------------------------------
  1 | 
  2 | #import matplotlib
  3 | #matplotlib.use('macosx') 
  4 | import numpy as np
  5 | 
  6 | #from matplotlib import pyplot as pylab
  7 | #import pylab
  8 | 
  9 | ## osw: unused import of old package? :--
 10 | #import cwt as wavelet
 11 | 
 12 | 
 13 | 
 14 | def plot_labels(labels,shift = 0,  fig="", text = True):
 15 |     import pylab
 16 |     if fig == "":
 17 |         fig = pylab
 18 |     #print labels
 19 |     for (start, end,token) in labels: 
 20 |             
 21 | 
 22 |         if token:
 23 |                 
 24 |             fig.axvline(x=start, color='black')
 25 |             fig.axvline(x=end, color='black')
 26 |             if text:
 27 |                 fig.text(start+1-shift,0, token) #, color="grey")
 28 |             fig.legend()
 29 | 
 30 | 
 31 | def plot_prom_labels(labels, prominences, shift = 0,fig=""):
 32 |     import pylab
 33 |     if fig == "":
 34 |         fig = pylab
 35 |     for i in range(len(labels)):
 36 |         (start, end, token) = labels[i]
 37 |         if token and i <=len(prominences):
 38 |             fig.text(start+3, shift, (round(prominences[i],1)))
 39 |     
 40 |     
 41 |         pass
 42 | 
 43 | def get_peaks(params):
 44 |     #peaks = []
 45 |     indices = []
 46 | 
 47 |     
 48 |     zc = np.where(np.diff(np.sign(np.diff(params))))[0]
 49 |     
 50 |     indices = (np.diff(np.sign(np.diff(params))) < 0).nonzero()[0] +1
 51 |     
 52 |     peaks = params[indices]
 53 |     return np.array([peaks, indices])
 54 | 
 55 | def get_valleys(params):
 56 |     return get_peaks(-params)
 57 | 
 58 | def get_best_scale(wavelet_matrix, num_units):
 59 |     best_i = 0
 60 |     best = 999
 61 |     for i in range(0, wavelet_matrix.shape[0]):
 62 |         num_peaks = len(get_peaks(wavelet_matrix[i])[0])
 63 |         dist= abs(num_peaks - num_units)
 64 |         if dist < best:
 65 |             best = dist
 66 |             best_i = i
 67 | 
 68 |     return best_i
 69 |                          
 70 | def normalize(params, std=0):
 71 |     if std ==0:
 72 |         std = np.std(params)
 73 | 
 74 |     mean = np.mean(params)
 75 |     return (params - mean) / std
 76 | 
 77 | def unnormalize(params, mean, std):
 78 |     return  mean + (params - np.mean(params))*(std/(np.std(params)))
 79 | 
 80 | 
 81 | 
 82 | 
 83 | 
 84 | def scale_for_reconstruction(wavelet_matrix, scale_dist=1.0, s0=3):
 85 |     scaled = np.array(wavelet_matrix)
 86 | 
 87 |     for i in range(0, wavelet_matrix.shape[0]):
 88 |         scaled[i] *= 2**(-(i+s0-1)*scale_dist/2)
 89 |       
 90 |     return scaled
 91 | 
 92 | 
 93 | 
 94 | 
 95 | def calc_prominence(params, labels, func=np.max, use_peaks = True):
 96 |     labelled = []
 97 |     norm = params.astype(float)
 98 |     for (start, end, word) in labels:
 99 |    
100 |         if end -start == 0:
101 |             continue
102 |         #print start, end, word
103 |         if use_peaks:
104 |             peaks = []
105 |             #pylab.clf()
106 |             #pylab.plot(params[start:end])
107 | 
108 |             (peaks, indices)=get_peaks(params[start:end])
109 | 
110 |             if len(peaks) >0:
111 |                 labelled.append(np.max(peaks))
112 |                
113 |                 #labelled.append(norm[start-5+peaks[0]])
114 |                 # labelled.append([word,func(params[start:end])])
115 |                 
116 |             else:
117 |                 labelled.append(0.0)
118 |         else:
119 |             #labelled.append([word, func(params[start-10:end])])
120 |             labelled.append(func(params[start:end]))
121 |         
122 |         #raw_input()
123 | 	
124 |     return labelled
125 | 
126 | 


--------------------------------------------------------------------------------
/scripts/util/indian2latin.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import unicodedata
  3 | import codecs
  4 | import sys
  5 | import re
  6 | #
  7 | # simple conversion of  indic scripts to latin alphabetic form
  8 | # implements vowel drop and change,
  9 | #
 10 | # usage: python indian2latin.py <utf-8 text file> 
 11 | # antti.suni@helsinki.fi
 12 | #
 13 | 
 14 | 
 15 | # additional thoughts:
 16 | # Handle all unicode normalized? 
 17 | # Then é, ä and ö would contain two unicode chars,
 18 | # letter and modifier. Modifier could then be assigned as a contextual features or combined to form new safetext modelnames
 19 | # of the letter. Modifiers have their own unicode categories Mc and Mn, both always modify the preceding letter.
 20 | #
 21 | # Then Indian languages could be handled without exception rules, since
 22 | # vowel signs and such are similar modifiers to the preceding syllable characters
 23 | #  i.e.
 24 | # LATIN SMALL LETTER A + COMBINING DIAERESIS => LATIN SMALL LETTER A WITH DIAERESIS
 25 | # TAMIL LETTER KA + TAMIL VOWEL SIGN EE => TAMIL LETTER KA WITH VOWEL SIGN EE
 26 | #
 27 | # Still a problem of having many phonemes for one model though, and unless contextual features ares used
 28 | #  for modifiers, the vowel identity can not be directly targeted in decision tree questions
 29 | 
 30 | # below is my rule-based alpha-syllabic to alphabetic conversion where virama deletes the default vowel and
 31 | # vowel sign replaces the default vowel #, also anusvara adds n_feature to the name
 32 | 
 33 | 
 34 | ## osw: added some lines for handling CANDRABINDU and assamese 'WITH LOWER DIAGONAL' etc.
 35 | ##      Rearranged code to allow other scripts to call the main function latinise_indian_script_string
 36 | 
 37 | 
 38 | def latinise_indian_script_string(l):
 39 |     prev_letter = ""
 40 |     letters = []
 41 |     for i in range(0,len(l)):
 42 |         try:
 43 |             u_name = unicodedata.name(l[i])
 44 |         except:
 45 |             continue
 46 | 
 47 |         # for latin letters do nothing
 48 |         if re.match('.*LATIN.*',u_name):
 49 |             if prev_letter:
 50 |                 letters.append(prev_letter)
 51 |             letters.append(l[i])
 52 |             continue
 53 | 
 54 |         u_name = re.sub(' WITH .+', '', u_name)  ## e.g. for assamese 'WITH LOWER DIAGONAL' etc.
 55 | 
 56 |         # syllable and independent vowel characters 
 57 |         # skip CANDRA, VOCALIC and such for simplicity
 58 |         m = re.match('.*LETTER( .+)? (.+)$', u_name)
 59 |         if m:
 60 |             letter = m.group(2)
 61 |             if prev_letter:
 62 |                 letters.append(prev_letter)
 63 |             # unfortunately syllables and independent vowels are not separated in unicode
 64 |             # syllable here: = any sequence ending in A except AA,  (IA,OA if found, will be split)
 65 |             if letter != "AA" and re.match(".+A$",  letter):
 66 |                 prev_letter = ""+letter[:-1]+" "+letter[-1]   # osw: space -> ~
 67 |             # vowel
 68 |             else:
 69 |                 prev_letter = ""+letter
 70 |             continue
 71 |         
 72 | 
 73 |         # modifiers:
 74 | 
 75 |         # change defaut vowel
 76 |         m = re.match('.*VOWEL SIGN( .+)? (.+)$', u_name)
 77 |         if m:
 78 |             prev_letter = prev_letter[:-1]
 79 |             prev_letter+=m.group(2)
 80 |             continue
 81 | 
 82 |         # remove vowel
 83 |         if re.match('.*VIRAMA', u_name):
 84 |             prev_letter= prev_letter[:-2]
 85 |             continue
 86 |        
 87 |         # nasalize something
 88 |         if re.match('.*ANUSVARA',u_name):
 89 |             prev_letter+="m"
 90 |             continue
 91 | 
 92 |         # nasalize something -- OSW added:  blizzard2014/data/speech/hi/txt/text_01241.txt
 93 |         if re.match('.*CANDRABINDU',u_name):  ## 'usually means that the previous vowel is nasalized.'
 94 |             prev_letter+="m"
 95 |             continue
 96 |             
 97 |         # else no conversion
 98 |         if prev_letter:
 99 |             letters.append(prev_letter)
100 |             prev_letter = ""
101 |         letters.append(l[i])
102 | 
103 |     letters.append(prev_letter)
104 | 
105 |     ## osw -- return list of single letters
106 |     final_letters = []
107 |     for letter in letters:
108 |         if len(letter) == 1:
109 |             final_letters.append(letter)
110 |         else:
111 |             final_letters.extend(letter.split(' '))
112 |     return final_letters
113 |    
114 | 
115 | 
116 | def main_work():
117 | 
118 |     f = codecs.open(sys.argv[1], "r", encoding='utf-8')
119 |     lines = f.readlines()
120 |     f.close()
121 | 
122 | 
123 |     for l in lines:
124 |         letters = latinise_indian_script_string(l)
125 |         print u" ".join(letters).encode('utf-8')
126 |    
127 | 
128 | if __name__=="__main__":
129 | 
130 |     main_work()
131 | 
132 | 
133 | 


--------------------------------------------------------------------------------
/scripts/util/make_corpus_with_clickable_audio.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | '''
  4 | [dyatlov]owatts: python scripts/util/make_corpus_with_clickable_audio.py ./train/sw/speakers/pm_balanced/naive_01_nn/utt/ ./train/sw/speakers/pm_balanced/naive_01_nn/clickable_audio
  5 | 
  6 | 
  7 | 
  8 | [dyatlov]owatts:
  9 | [dyatlov]owatts: pwd
 10 | /afs/inf.ed.ac.uk/user/o/owatts/sim2/oliver/ossian_test/Ossian2
 11 | [dyatlov]owatts: python scripts/util/make_corpus_with_clickable_audio.py ./train/sw/speakers/pm_balanced/naive_01_nn/utt/ ./train/sw/speakers/pm_balanced/naive_01_nn/clickable_audio
 12 | 
 13 | '''
 14 | import os
 15 | import sys
 16 | import glob
 17 | from lxml import etree
 18 | import multiprocessing
 19 | 
 20 | uttdir = sys.argv[1]
 21 | outdir = sys.argv[2]
 22 | 
 23 | 
 24 | outdir = os.path.abspath(outdir)
 25 | 
 26 | 
 27 | assert not os.path.isdir(outdir), '%s already exists'%(outdir)
 28 | 
 29 | audiodir = os.path.join(outdir, 'audio')
 30 | os.makedirs(outdir)
 31 | os.makedirs(audiodir)
 32 | 
 33 | 
 34 | 
 35 | max_cores = '30'
 36 | 
 37 | 
 38 | html_lines = []
 39 | 
 40 | 
 41 | 
 42 | 
 43 | 
 44 | 
 45 | # Using all available CPU cores unless defined otherwise
 46 | if max_cores is not None and max_cores.isdigit():
 47 |     n_cores = int(max_cores)
 48 | else:
 49 |     n_cores = multiprocessing.cpu_count()
 50 | 
 51 | 
 52 | 
 53 | def proc_utt(uttfile):
 54 | 
 55 |     path, base = os.path.split(uttfile)
 56 |     base = base.replace('.utt', '')
 57 |     print base
 58 |     utt = etree.parse(uttfile)
 59 |     wavfile = utt.getroot().attrib['waveform']
 60 |     i = 1
 61 |     html_line = ''
 62 |     os.makedirs(audiodir + '/clickable_%s/'%(base))
 63 |     for token in utt.xpath('//token'):
 64 |         text = token.attrib['text']
 65 |         if 'start' in token.attrib:
 66 | 
 67 |             
 68 | 
 69 |             start_sec =  float(token.attrib['start']) / 1000.0
 70 |             end_sec = float(token.attrib['end']) / 1000.0
 71 |             dur_sec = end_sec - start_sec
 72 | 
 73 | 
 74 | 
 75 |             ## chop wav extract:
 76 |             outwave = audiodir + '/clickable_%s/clickable_%s_%s.ogg'%(base, base, i)
 77 |             comm = 'sox %s %s trim %s %s'%(wavfile, outwave, start_sec, dur_sec)
 78 |             os.system(comm)
 79 |             #print comm
 80 | 
 81 |             #html_line += '<audio>%s<source src="%s" type="audio/wav"></audio>'%(text, outwave)
 82 |             #html_line += '<a href="%s">%s</a>'%(outwave, text)
 83 |             html_line += '<a href="%s" onclick="playItHere(event, this)">%s</a>'%(outwave, text)
 84 | 
 85 | 
 86 | 
 87 |             #html_line += '<audio src="%s" hidden> Sorry, your browser does not support the <audio> element. </audio>'%(outwave)
 88 | 
 89 |             #html_line += '<a href="#">%s</a>'%(text)
 90 |             #html_line += '<audio hidden><source src="%s" type="audio/wav">If you are reading this, audio is not supported. </audio>'%(outwave)
 91 |             i += 1
 92 |         else:
 93 |             html_line += text
 94 | 
 95 |     if html_line != '':
 96 |         html_line = '<p> %s: %s <p>\n'%(base, html_line)
 97 |     return (base, html_line)
 98 | 
 99 | 
100 | uttlist = sorted(glob.glob(uttdir + '/*.utt')) # [:10]
101 | 
102 | if n_cores == 1:
103 |     for uttfile in uttlist:
104 |         html_lines.append(proc_utt(uttfile))
105 | else:
106 |     pool = multiprocessing.Manager().Pool(n_cores) 
107 |     html_lines = pool.map(proc_utt, uttlist)
108 | 
109 |                               
110 | 
111 | 
112 | 
113 | 
114 | html_lines.sort()
115 | html_lines = [line for (base, line) in html_lines]
116 | 
117 | f = open(outdir + '/text.html', 'w')
118 | 
119 | f.write('''
120 | <script>
121 | function playItHere(e, link) {
122 |   var audio = document.createElement("audio");
123 |   var src = document.createElement("source");
124 |   src.src = link.href;
125 |   audio.appendChild(src);
126 |   audio.play();
127 |   e.preventDefault();
128 | }
129 | </script>
130 | \n\n\n\n''' )
131 | 
132 | for html_line in html_lines:
133 |         f.write(html_line)
134 | f.close()
135 | 
136 | 
137 | print 
138 | print 
139 | print 'Please view ' + outdir + '/text.html in chrome -- safari does not support a script it uses'
140 | print 
141 | print 


--------------------------------------------------------------------------------
/scripts/util/make_hts_training_lists.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ## ./make_hts_training_lists.sh <LAB_DIR> <CMP_DIR> <LIST_NAME>
 4 | ##
 5 | ## Look for .lab files in <LAB_DIR> and .cmp files in <CMP_DIR>.
 6 | ## List them with full pathnames in <LIST_NAME>.lab and <LIST_NAME>.cmp
 7 | ##
 8 | 
 9 | cmp_dir=$1
10 | lab_dir=$2
11 | outfile_stem=$3
12 | 
13 | 
14 | lab_out=$outfile_stem.lab
15 | cmp_out=$outfile_stem.cmp
16 | 
17 | 
18 | if [ -e $lab_out ] ; then
19 |     rm $lab_out 
20 | fi
21 | 
22 | if [ -e $cmp_out ] ; then
23 |     rm $cmp_out 
24 | fi
25 | 
26 | 
27 | for file in $lab_dir/* ; do
28 | 	base=`basename $file .lab` ; 
29 | 	cmpfile=$cmp_dir/$base.cmp ;
30 | 	if [ -e $cmpfile ] ; then
31 | 		echo $cmpfile >> $cmp_out ;
32 | 		echo $file >> $lab_out ;
33 | 	fi
34 | done	
35 | 
36 | 


--------------------------------------------------------------------------------
/scripts/util/penn_treebank_tokenizer.sed:
--------------------------------------------------------------------------------
 1 | #!/bin/sed -f
 2 | 
 3 | # OSW -- this came from: http://www.cis.upenn.edu/~treebank/tokenizer.sed
 4 | 
 5 | # Sed script to produce Penn Treebank tokenization on arbitrary raw text.
 6 | # Yeah, sure.
 7 | 
 8 | # expected input: raw text with ONE SENTENCE TOKEN PER LINE
 9 | 
10 | # by Robert MacIntyre, University of Pennsylvania, late 1995.
11 | 
12 | # If this wasn't such a trivial program, I'd include all that stuff about
13 | # no warrantee, free use, etc. from the GNU General Public License.  If you
14 | # want to be picky, assume that all of its terms apply.  Okay?
15 | 
16 | # attempt to get correct directional quotes
17 | s=^"=`` =g
18 | s=\([ ([{<]\)"=\1 `` =g
19 | # close quotes handled at end
20 | 
21 | s=\.\.\.= ... =g
22 | s=[,;:@#$%&]= & =g
23 | 
24 | # Assume sentence tokenization has been done first, so split FINAL periods
25 | # only. 
26 | s=\([^.]\)\([.]\)\([])}>"']*\)[ 	]*$=\1 \2\3 =g
27 | # however, we may as well split ALL question marks and exclamation points,
28 | # since they shouldn't have the abbrev.-marker ambiguity problem
29 | s=[?!]= & =g
30 | 
31 | # parentheses, brackets, etc.
32 | s=[][(){}<>]= & =g
33 | # Some taggers, such as Adwait Ratnaparkhi's MXPOST, use the parsed-file
34 | # version of these symbols.
35 | # UNCOMMENT THE FOLLOWING 6 LINES if you're using MXPOST.
36 | # s/(/-LRB-/g
37 | # s/)/-RRB-/g
38 | # s/\[/-LSB-/g
39 | # s/\]/-RSB-/g
40 | # s/{/-LCB-/g
41 | # s/}/-RCB-/g
42 | 
43 | s=--= -- =g
44 | 
45 | # NOTE THAT SPLIT WORDS ARE NOT MARKED.  Obviously this isn't great, since
46 | # you might someday want to know how the words originally fit together --
47 | # but it's too late to make a better system now, given the millions of
48 | # words we've already done "wrong".
49 | 
50 | # First off, add a space to the beginning and end of each line, to reduce
51 | # necessary number of regexps.
52 | s=$= =
53 | s=^= =
54 | 
55 | s="= '' =g
56 | # possessive or close-single-quote
57 | s=\([^']\)' =\1 ' =g
58 | # as in it's, I'm, we'd
59 | s='\([sSmMdD]\) = '\1 =g
60 | s='ll = 'll =g
61 | s='re = 're =g
62 | s='ve = 've =g
63 | s=n't = n't =g
64 | s='LL = 'LL =g
65 | s='RE = 'RE =g
66 | s='VE = 'VE =g
67 | s=N'T = N'T =g
68 | 
69 | s= \([Cc]\)annot = \1an not =g
70 | s= \([Dd]\)'ye = \1' ye =g
71 | s= \([Gg]\)imme = \1im me =g
72 | s= \([Gg]\)onna = \1on na =g
73 | s= \([Gg]\)otta = \1ot ta =g
74 | s= \([Ll]\)emme = \1em me =g
75 | s= \([Mm]\)ore'n = \1ore 'n =g
76 | s= '\([Tt]\)is = '\1 is =g
77 | s= '\([Tt]\)was = '\1 was =g
78 | s= \([Ww]\)anna = \1an na =g
79 | # s= \([Ww]\)haddya = \1ha dd ya =g
80 | # s= \([Ww]\)hatcha = \1ha t cha =g
81 | 
82 | # clean out extra spaces
83 | s=  *= =g
84 | s=^ *==g


--------------------------------------------------------------------------------
/scripts/util/print_proms.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | 
  4 | '''
  5 | 
  6 | '''
  7 | 
  8 | import sys
  9 | import glob
 10 | import os
 11 | from lxml import etree
 12 | from argparse import ArgumentParser
 13 | 
 14 | def main_work():
 15 | 
 16 |     #################################################
 17 |       
 18 |     # ======== Get stuff from command line ==========
 19 | 
 20 |     a = ArgumentParser()
 21 |     a.add_argument('-i', dest='indir', required=True)
 22 | #     a.add_argument('-x', dest='token_xpath', required=True, \
 23 | #                     help= "XPath expression to match nodes (tokens) of right type")
 24 | #     a.add_argument('-o', dest='outdir', required=True, \
 25 | #                     help= "Put output here: make it if it doesn't exist")
 26 | #     a.add_argument('-n', dest='token_name', default="word", \
 27 | #                     help= "Attribute added to nodes will be called <TOKEN_NAME>_index")
 28 |     opts = a.parse_args()
 29 |     
 30 |     # ===============================================
 31 | #     outdir = opts.outdir
 32 |     indir = opts.indir
 33 | #     token_xpath = opts.token_xpath
 34 | #     attrib_name = opts.token_name + '_index'
 35 |     # ===============================================
 36 | 
 37 | 
 38 | #     if not os.path.isdir(outdir):
 39 | #         os.makedirs(outdir)
 40 | 
 41 | #     i = 1
 42 | #    token_xpath = "//token[@token_class='word']"
 43 |     token_xpath = "//token"
 44 |   
 45 |   
 46 |     print 
 47 |     print 'syllable'
 48 |   
 49 |     for uttfile in sorted(glob.glob(indir + '/*.utt')):
 50 |         
 51 | #         words = []
 52 | #         sylls = []
 53 | #         utt = etree.parse(uttfile)
 54 | #         #print utt
 55 | #         #print  utt.xpath(token_xpath)
 56 | #         for node in utt.xpath(token_xpath):
 57 | #             if 'prom' in node.attrib:
 58 | #                 words.append( node.attrib['norm_text'] + '_' + node.attrib['prom'])
 59 | #             else:
 60 | #                 words.append( node.attrib['norm_text'] )
 61 | #             for syll in node.xpath('.//syllable'):
 62 | #                 
 63 | #         print ' '.join(words).replace('  ', ' ')
 64 | # 
 65 | 
 66 | 
 67 |         words = []
 68 |         sylls = []
 69 |         utt = etree.parse(uttfile)
 70 |         #print utt
 71 |         #print  utt.xpath(token_xpath)
 72 |         for node in utt.xpath(token_xpath):
 73 |             
 74 |             syllprom = [syll.attrib['prom'] for syll in node.xpath('.//syllable')]
 75 |             
 76 |             syllprom = '-'.join(syllprom)
 77 |                     
 78 |             words.append( node.attrib['norm_text'] + '_' + syllprom)
 79 |     
 80 |                 
 81 |         print ' '.join(words).replace('  ', ' ')
 82 | 
 83 | 
 84 |     print '\n\n\n\n\n-------dynamic_prom on syll\n\n\n\n'
 85 |   
 86 |     for uttfile in sorted(glob.glob(indir + '/*.utt')):
 87 |       
 88 | 
 89 |         words = []
 90 |         sylls = []
 91 |         utt = etree.parse(uttfile)
 92 |         #print utt
 93 |         #print  utt.xpath(token_xpath)
 94 |         for node in utt.xpath(token_xpath):
 95 |             
 96 |             syllprom = [syll.attrib['dynamic_prom'] for syll in node.xpath('.//syllable')]
 97 |             
 98 |             syllprom = '-'.join(syllprom)
 99 |                     
100 |             words.append( node.attrib['norm_text'] + '_' + syllprom)
101 |     
102 |                 
103 |         print ' '.join(words).replace('  ', ' ')
104 | 
105 | 
106 |     print '\n\n\n\n\n-------words:\n\n\n\n'
107 |   
108 |     for uttfile in sorted(glob.glob(indir + '/*.utt')):
109 |         
110 |         words = []
111 |         utt = etree.parse(uttfile)
112 |         #print utt
113 |         #print  utt.xpath(token_xpath)
114 |         for node in utt.xpath(token_xpath):
115 |             if 'prom' in node.attrib:
116 |                 words.append( node.attrib['norm_text'] + '_' + node.attrib['prom'])
117 |             else:
118 |                 words.append( node.attrib['norm_text'] )
119 |            
120 |                 
121 |         print ' '.join(words).replace('  ', ' ')
122 | 
123 | 
124 | 
125 | 
126 | if __name__=="__main__":
127 | 
128 |     main_work()
129 | 


--------------------------------------------------------------------------------
/scripts/util/speech_manip.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import sys, os
  5 | import struct
  6 | import numpy
  7 | from numpy import array, reshape, shape
  8 | from scipy.interpolate import splev, splrep
  9 | 
 10 | 
 11 | def extract_portion_and_write(infile, outfile, old_dim, new_dim_start, new_dim_width, remove_htk_header):
 12 | 
 13 |     new_data = extract_portion(infile, old_dim, new_dim_start, new_dim_width, remove_htk_header)
 14 |     put_speech(new_data, outfile)
 15 | 
 16 | def extract_portion(infile, old_dim, new_dim_start, new_dim_width, remove_htk_header):
 17 | 
 18 |     assert new_dim_start >= 1
 19 |     new_dim_start -= 1
 20 |     new_dim_stop = new_dim_start + new_dim_width
 21 | 
 22 |     data = get_speech(infile, old_dim, remove_htk_header=remove_htk_header)
 23 |     new_data = data[:, new_dim_start : new_dim_stop]
 24 |     
 25 |     return new_data
 26 | 
 27 | 
 28 | def get_speech(infile, dim, remove_htk_header=False):
 29 | 
 30 |     data = read_floats(infile)
 31 |     if remove_htk_header:
 32 |         data = data[3:]  ## 3 floats correspond to 12 byte htk header
 33 | 
 34 |     assert len(data) % float(dim) == 0,"Bad dimension!"
 35 |     m = len(data) / dim
 36 |     data = array(data).reshape((m,dim))
 37 |     return data
 38 | 
 39 | def put_speech(data, outfile):
 40 |     m,n = shape(data)
 41 |     size = m*n
 42 |     flat_data = list(data.reshape((size, 1)))
 43 |     write_floats(flat_data, outfile)
 44 | 
 45 | def write_floats(data, outfile):
 46 |     m = len(data)             
 47 |     format = str(m)+"f"
 48 | 
 49 |     packed = struct.pack(format, *data)
 50 |     f = open(outfile, "w")
 51 |     f.write(packed)
 52 |     f.close()
 53 | 
 54 | def read_floats(infile):
 55 |     f = open(infile, "r")
 56 |     l = os.stat(infile)[6]  # length in bytes
 57 |     data = f.read(l)        # = read until bytes run out (l)
 58 |     f.close()
 59 | 
 60 |     m = l / 4               
 61 |     format = str(m)+"f"
 62 | 
 63 |     unpacked = struct.unpack(format, data)
 64 |     unpacked = list(unpacked)
 65 |     return unpacked
 66 | 
 67 | 
 68 | 
 69 | def spline_smooth_fzero(traj, trim_n_frames=4, s=100, k=1):
 70 | 
 71 |     ## set unvoiced to 0 in case we are using log f0:
 72 |     traj = numpy.maximum(traj, 0.0)
 73 | 
 74 |     ## remove starts of voiced regions to exclude obstruent peturbations:
 75 |     for i in range(trim_n_frames):
 76 |         voiced_ix = numpy.nonzero(traj)[0] ## returns tuple for each dimension of input -- take the first
 77 |         for ix in voiced_ix:
 78 |             if ix-1 not in voiced_ix:
 79 |                 traj[ix] = 0
 80 |         ## reverse order pass -- ends of voiced regions:
 81 |         for ix in reversed(voiced_ix):
 82 |             if ix+1 not in voiced_ix:
 83 |                 traj[ix] = 0
 84 |       
 85 |     ## 1) add values at ends
 86 |     voiced_ix = numpy.nonzero(traj)[0] ## returns tuple for each dimension of input -- take the first
 87 |     voiced = traj[voiced_ix]
 88 |     if numpy.prod(numpy.shape(voiced)) == 0: ## i.e. if all unvoiced
 89 |         mini = 0.001   ## special value for all unvoiced, must be > 0 or else gets stripped below!
 90 |         first_v = 1
 91 |         last_v = -1
 92 |     else:
 93 |         mini=numpy.min(voiced)
 94 |         first_v = voiced_ix[0]
 95 |         last_v = voiced_ix[-1]
 96 |     
 97 |     traj[:first_v] = mini
 98 |     traj[last_v:]= mini
 99 |  
100 |     ## 2) rerun
101 |     voiced_ix = numpy.nonzero(traj)[0] ## returns tuple for each dimension of input -- take the first
102 |     voiced = traj[voiced_ix]
103 |     
104 |     tck = splrep(voiced_ix, voiced, s=s, k=k)  
105 |     ## k = order of the spline fit (3->cubic)
106 |     ## higher s = smoother, 0 = interpolation
107 | 
108 |     x2 = range(len(traj))
109 |     y2 = splev(x2, tck)
110 | 
111 |     return y2
112 | 
113 | 
114 | 


--------------------------------------------------------------------------------
/scripts/util/submit.sh:
--------------------------------------------------------------------------------
 1 | #PYTHONPATH=${PYTHONPATH}:/afs/inf.ed.ac.uk/group/project/dnn_tts/tools/site-packages/
 2 | 
 3 | #PYTHON=~/my_python
 4 | PYTHON=python
 5 | 
 6 | 
 7 | ## Generic script for submitting any Theano job to GPU
 8 | # usage: submit.sh [scriptname.py script_arguments ... ]
 9 | 
10 | 
11 | ## location of this script:
12 | THIS_DIR="$( cd "$( dirname "$0" )" && pwd )"
13 | 
14 | echo $THIS_DIR
15 | 
16 | gpu_id=$(python $THIS_DIR/gpu_lock.py --id-to-hog)
17 | 
18 | 
19 | if [ $gpu_id -gt -1 ]; then
20 |     #THEANO_FLAGS="cuda.root=/opt/cuda-5.0.35,mode=FAST_RUN,device=gpu$gpu_id,floatX=float32"
21 |     THEANO_FLAGS="cuda.root=/opt/6.5.19,mode=FAST_RUN,device=gpu$gpu_id,floatX=float32,on_unused_input=ignore"
22 |     export THEANO_FLAGS
23 |     
24 |     $PYTHON $@
25 |     
26 |     python $THIS_DIR/gpu_lock.py --free $gpu_id
27 | else
28 |     echo 'Let us wait! No GPU is available!'
29 | 
30 | fi
31 | 


--------------------------------------------------------------------------------
/scripts/util/uttsdata_to_text.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | ## Project: Simple4All - January 2013 - www.simple4all.org 
 4 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 5 | ## Contact: Antti Suni - Antti.Suni@helsinki.fi
 6 | 
 7 | 
 8 | import sys, os
 9 | from string import strip
10 | import codecs
11 | 
12 | 
13 | """
14 | This script takes a Festival-style utts.data file with lines like
15 | 
16 | ( utt_name "Some text." )
17 | 
18 | ... and outputs a directory of single-line text files in utf-8 called e.g. 
19 | utt_name.txt with contents like e.g. 
20 | 
21 | Some text
22 | 
23 | Input encodings is assumed to be ASCII unless stated as a 3rd argument on the
24 | command line (e.g. iso-8859-1)
25 | 
26 | """
27 | 
28 | 
29 | def main_work():
30 | 
31 |     #################################################
32 | 
33 |     # ======== Get stuff from command line ==========
34 | 
35 |     def usage():
36 |         print "Usage: ......  "
37 |         sys.exit(1)
38 | 
39 |     # e.g. 
40 | 
41 |     source_encoding = "ascii"  ## initialise with default encoding, ASCII
42 | 
43 |     try:
44 |         uttsdata = sys.argv[1]
45 |         text_dir = sys.argv[2]
46 |         if len(sys.argv) > 3:
47 |             source_encoding = sys.argv[3] ## change encoding here
48 | 
49 |         
50 |     except:
51 |         usage()
52 | 
53 | 
54 |     #################################################
55 |     if not os.path.isdir(text_dir):
56 |         os.mkdir(text_dir)
57 |     
58 |     target_encoding = "utf-8"     
59 |     
60 |     data = codecs.open(uttsdata, "r", source_encoding).readlines()  
61 |     data = [line.strip("\n ()") for line in data]
62 |     text = [" ".join(line.split(" ")[1:]) for line in data]    
63 |     names = [line.split(" ")[0] for line in data]   
64 |     text = [line.strip('" ') for line in text]    
65 | 
66 |     for (name, words) in zip(names, text):
67 |         print "Write text of utt %s, from %s to utf-8"%(name, source_encoding)
68 |         f=codecs.open(os.path.join(text_dir, name + ".txt"), "w", target_encoding)
69 |         f.write(words)
70 |         f.close()
71 | 
72 | 
73 | if __name__=="__main__": 
74 | 
75 |         main_work()
76 | 
77 | 
78 | 


--------------------------------------------------------------------------------
/scripts/util/xpath_extensions_for_ossian.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | ## This file is part of the Ossian text-to-speech toolkit: 
 4 | ##   http://homepages.inf.ed.ac.uk/owatts/ossian/html/overview.html 
 5 | ## Contact: Oliver Watts - owatts@staffmail.ed.ac.uk
 6 | ## Contact: Antti Suni - Antti.Suni@helsinki.fi
 7 | ## April 2015 
 8 | 
 9 | import sys
10 | import lxml
11 | from lxml import etree
12 | from lxml.etree import * 
13 | 
14 | ## See here for adding new functions to lxml's xpath: http://lxml.de/3.2/extensions.html
15 | 
16 | 
17 | ## This is based on a function sent by Antti -- Oliver turned it into an xpath extension:
18 | def simple_count(context, child, ancestor, count):
19 |     print context, child, ancestor, count
20 |     assert count in ['preceding', 'following', 'sum'], count
21 |     ancestor = context.context_node.xpath('./ancestor::'+ancestor)
22 |     target = context.context_node.xpath('./ancestor-or-self::'+child)
23 |     
24 |     if ancestor and target:
25 |         #print "ok"
26 |         siblings = ancestor[0].xpath('./descendant::'+child)
27 |         if count == 'sum':
28 |             return float(len(siblings))
29 |         pos = 0
30 |         for node in siblings:
31 |             if node == target[0]:
32 |                 if count=='preceding':
33 |                     #print node.get('text'), pos,
34 |                     return float(pos)
35 |                 else:
36 |                     return float(len(siblings)-pos)
37 |             pos+=1
38 |     else:
39 |         return 0.0  ## standard xpath default value
40 | 
41 | 
42 | ### The same function broken down into 3 specialised functions with more 
43 | ## descriptive names and simpler interfaces:
44 | def count_Xs_since_start_Y(context, X, Y):
45 |     
46 |     ancestor = context.context_node.xpath('./ancestor::'+Y)
47 |     target = context.context_node.xpath('./ancestor-or-self::'+X)
48 |     
49 |     answer = 0.0  ## standard xpath default value
50 |     if ancestor and target:
51 |         siblings = ancestor[0].xpath('./descendant::'+X)
52 |         for (pos, node) in enumerate(siblings):
53 |             if node == target[0]:
54 |                 answer = float(pos+1)  ## osw: count from 1 (matches behaviour of count_Xs_till_end_Y)
55 |     return answer
56 | 
57 | 
58 | def count_Xs_till_end_Y(context, X, Y):
59 |     
60 |     ancestor = context.context_node.xpath('./ancestor::'+Y)
61 |     target = context.context_node.xpath('./ancestor-or-self::'+X)
62 |     
63 |     answer = 0.0  ## standard xpath default value
64 |     if ancestor and target:
65 |         siblings = ancestor[0].xpath('./descendant::'+X)
66 |         pos = 0
67 |         for (pos, node) in enumerate(siblings):
68 |             if node == target[0]:
69 |                 answer = float(len(siblings)-pos)
70 |     return answer
71 | 
72 | def count_Xs_in_Y(context, X, Y):
73 |     
74 |     ancestor = context.context_node.xpath('./ancestor::'+Y)
75 |     target = context.context_node.xpath('./ancestor-or-self::'+X)
76 |     
77 |     answer = 0.0  ## standard xpath default value
78 |     if ancestor and target:
79 |         siblings = ancestor[0].xpath('./descendant::'+X)
80 |         answer = float(len(siblings))
81 |     return answer
82 | 
83 | 
84 | 
85 | ## register the functions.
86 | ## TODO: specify some non-default namespace?
87 | ns = etree.FunctionNamespace(None)
88 | ns['simple_count'] = simple_count
89 | ns['count_Xs_since_start_Y'] = count_Xs_since_start_Y
90 | ns['count_Xs_till_end_Y'] = count_Xs_till_end_Y
91 | ns['count_Xs_in_Y'] = count_Xs_in_Y
92 | 
93 | 
94 |     
95 | 


--------------------------------------------------------------------------------
/test/txt/english.txt:
--------------------------------------------------------------------------------
1 | She was not really used to being alive at all, and that is what made her take to magic so kindly.


--------------------------------------------------------------------------------
/test/txt/hindi.txt:
--------------------------------------------------------------------------------
1 | आप उसे खुद हटा लें
2 | 


--------------------------------------------------------------------------------
/test/txt/romanian.txt:
--------------------------------------------------------------------------------
1 | Un militar român a fost ucis, marţi, în Afganistan, iar un altul a fost rănit.


--------------------------------------------------------------------------------
/test/txt/romanian2.txt:
--------------------------------------------------------------------------------
1 | Cabinetul nostru va ofera servicii de cosmetica si frizerie canina si felina care constau in toaletari, tuns, spalat, periat si parfumat.
2 | 


--------------------------------------------------------------------------------
/tools/patch/ossian_engine.patch:
--------------------------------------------------------------------------------
 1 | diff -rupN hts_engine_API-1.05/lib/HTS_model.c hts_engine_API-1.05_patched/lib/HTS_model.c
 2 | --- hts_engine_API-1.05/lib/HTS_model.c	2011-07-06 23:59:30.000000000 +0100
 3 | +++ hts_engine_API-1.05_patched/lib/HTS_model.c	2014-11-13 13:17:04.000000000 +0000
 4 | @@ -42,6 +42,12 @@
 5 |  /* POSSIBILITY OF SUCH DAMAGE.                                       */
 6 |  /* ----------------------------------------------------------------- */
 7 |  
 8 | +/* ----------------------------------------------------------------- */
 9 | +/* This is a modified version of HTS.                                */
10 | +/* Project: Simple4All - October 2014 - www.simple4all.org           */
11 | +/* Contact: Oliver Watts - owatts@staffmail.ed.ac.uk                 */
12 | +/* ----------------------------------------------------------------- */
13 | +
14 |  #ifndef HTS_MODEL_C
15 |  #define HTS_MODEL_C
16 |  
17 | @@ -459,8 +465,9 @@ static void HTS_Model_load_pdf(HTS_Model
18 |     model->ntree = ntree;
19 |     /* read MSD flag */
20 |     HTS_fread_big_endian(&i, sizeof(int), 1, fp);
21 | -   if ((i != 0 || msd_flag != FALSE) && (i != 1 || msd_flag != TRUE))
22 | -      HTS_error(1, "HTS_Model_load_pdf: Failed to load header of pdfs.\n");
23 | +   /* Remove this check for Ossian (GlottHMM) compatibility: */
24 | +   /*if ((i != 0 || msd_flag != FALSE) && (i != 1 || msd_flag != TRUE))
25 | +      HTS_error(1, "HTS_Model_load_pdf: Failed to load header of pdfs.\n");  */
26 |     /* read stream size */
27 |     HTS_fread_big_endian(&ssize, sizeof(int), 1, fp);
28 |     if (ssize < 1)
29 | 


--------------------------------------------------------------------------------
/tools/patch/sequitur_compilation.patch:
--------------------------------------------------------------------------------
 1 | diff -rupN g2p/setup.py g2p_MOD/setup.py
 2 | --- g2p/setup.py	2016-04-26 11:15:27.000000000 +0100
 3 | +++ g2p_MOD/setup.py	2017-06-22 11:46:30.000000000 +0100
 4 | @@ -62,8 +62,17 @@ sequiturExtension = Extension(
 5 |          'Translation.cc'],
 6 |      include_dirs = [
 7 |          os.path.join(path, 'core/include') for path in numpy.__path__ ],
 8 | +    # Add to extra_compile_args and extra_linker_args to avoid compilation problems on Mac OS relating to tr1 libraries like this:
 9 | +    #
10 | +    # In file included from ./Multigram.hh:33:
11 | +    # ./UnorderedMap.hh:26:10: fatal error: 'tr1/unordered_map' file not found
12 | +    # #include <tr1/unordered_map>    
13 | +    #         
14 |      extra_compile_args = [
15 | -        '-fpermissive'],
16 | +        '-fpermissive',
17 | +        '-stdlib=libstdc++'],
18 | +    extra_link_args = [
19 | +         '-stdlib=libstdc++'],        
20 |      swig_opts = [
21 |          '-c++', '-shadow']
22 |      )
23 | 


--------------------------------------------------------------------------------