├── .gitignore
├── Cargo.toml
├── src
    ├── voices
    │   ├── mod.rs
    │   └── generic.rs
    ├── languages
    │   └── mod.rs
    └── lib.rs
├── LICENCE_MIT
├── README.md
├── examples
    ├── interactive.rs
    └── cli.rs
└── LICENCE-APACHE


/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | Cargo.lock
3 | *.wav


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "grail-rs" 
 3 | description = "A simple formant speech synthesizer"
 4 | version = "0.0.0"
 5 | license = "MIT OR Apache-2.0"
 6 | repository = "https://github.com/Dimev/grail-rs"
 7 | documentation = "https://docs.rs/grail-rs"
 8 | keywords = ["speech", "audio"]
 9 | categories = ["multimedia"]
10 | edition = "2021"
11 | 
12 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
13 | 
14 | [dependencies]
15 | # no dependencies needed
16 | 
17 | [dev-dependencies]
18 | # but we do need this for examples
19 | cpal = "0.13.5"
20 | 


--------------------------------------------------------------------------------
/src/voices/mod.rs:
--------------------------------------------------------------------------------
 1 | //! All voices given with grail, along with functions to get them
 2 | //!
 3 | use crate::{SynthesisElem, NUM_FORMANTS};
 4 | 
 5 | // helper for making phonemes
 6 | // if you're porting this, put this in a seperate file somewhere so you don't include all voices when including a single voice
 7 | pub const MKPHON: fn(
 8 |     freq: [f32; NUM_FORMANTS],
 9 |     bw: [f32; NUM_FORMANTS],
10 |     smooth: [f32; NUM_FORMANTS],
11 |     turb: [f32; NUM_FORMANTS],
12 |     breath: [f32; NUM_FORMANTS],
13 |     amp: [f32; NUM_FORMANTS],
14 | ) -> SynthesisElem = SynthesisElem::new_phoneme;
15 | 
16 | // include the voices we made
17 | pub mod generic;
18 | 
19 | // and use it so we can easily get it
20 | pub use generic::generic;
21 | 


--------------------------------------------------------------------------------
/src/languages/mod.rs:
--------------------------------------------------------------------------------
 1 | //! languages
 2 | use crate::{Language, Phoneme, TranscriptionRule};
 3 | 
 4 | pub const fn generic() -> Language<'static> {
 5 |     Language {
 6 |         case_sensitive: false,
 7 |         rules: &[
 8 |             TranscriptionRule {
 9 |                 string: "a",
10 |                 phonemes: &[Phoneme::A],
11 |             },
12 |             TranscriptionRule {
13 |                 string: "e",
14 |                 phonemes: &[Phoneme::E],
15 |             },
16 |             TranscriptionRule {
17 |                 string: "i",
18 |                 phonemes: &[Phoneme::A],
19 |             },
20 |             TranscriptionRule {
21 |                 string: "ii",
22 |                 phonemes: &[Phoneme::E, Phoneme::A],
23 |             },
24 |             TranscriptionRule {
25 |                 string: "oui",
26 |                 phonemes: &[Phoneme::A, Phoneme::E, Phoneme::A],
27 |             },
28 |             TranscriptionRule {
29 |                 string: "p",
30 |                 phonemes: &[Phoneme::Silence],
31 |             },
32 |         ],
33 |     }
34 | }
35 | 


--------------------------------------------------------------------------------
/LICENCE_MIT:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Dimas Leenman
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/src/voices/generic.rs:
--------------------------------------------------------------------------------
 1 | //! generic voice
 2 | use crate::voices::MKPHON;
 3 | use crate::*;
 4 | 
 5 | pub fn generic() -> Voice {
 6 |     Voice {
 7 |         sample_rate: DEFAULT_SAMPLE_RATE,
 8 |         phonemes: VoiceStorage {
 9 |             a: MKPHON(
10 |                 [
11 |                     910.0, 1271.0, 2851.0, 3213.0, 1200.0, 2000.0, 3000.0, 4000.0,
12 |                 ],
13 |                 [60.0, 160.0, 180.0, 200.0, 100.0, 100.0, 100.0, 100.0],
14 |                 [
15 |                     1600.0, 1600.0, 1600.0, 1600.0, 1600.0, 1600.0, 1600.0, 1600.0,
16 |                 ],
17 |                 [0.2, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0],
18 |                 [0.5, 0.2, 0.05, 0.0, 0.0, 0.0, 0.0, 0.0],
19 |                 [0.3, 0.3, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0],
20 |             ),
21 |             e: MKPHON(
22 |                 [
23 |                     910.0, 1871.0, 2851.0, 3213.0, 1200.0, 2000.0, 3000.0, 4000.0,
24 |                 ],
25 |                 [80.0, 180.0, 180.0, 200.0, 100.0, 100.0, 100.0, 100.0],
26 |                 [
27 |                     1600.0, 1600.0, 1600.0, 1600.0, 1600.0, 1600.0, 1600.0, 1600.0,
28 |                 ],
29 |                 [0.2, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4],
30 |                 [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.1, 0.1],
31 |                 [0.5, 0.4, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0],
32 |             ),
33 |         },
34 |         center_frequency: 120.0 / DEFAULT_SAMPLE_RATE as f32,
35 |         jitter_frequency: 16.0 / DEFAULT_SAMPLE_RATE as f32,
36 |         jitter_delta_frequency: 6.0 / DEFAULT_SAMPLE_RATE as f32,
37 |         jitter_delta_formant_frequency: 6.0 / DEFAULT_SAMPLE_RATE as f32,
38 |         jitter_delta_amplitude: 0.2,
39 |     }
40 | }
41 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Grail-rs (Work in progress)
 2 | Grail, A simple formant speech synthesizer, built for portability
 3 | This is the rust version
 4 | 
 5 | The goal of this synthesizer is to be as simple as possible, and easy to port to C and other languages if needed (I'll make a C port when this one is in a more complete state)
 6 | 
 7 | Still heavy WIP
 8 | 
 9 | # Roadmap:
10 |  - Get the output to be roughly normalized by default (Almost done, ModFM does wonders here)
11 |  - Move everything back to the 2000 line file, that was nicer to work with IMO
12 |  - Use a different carrier for ModFM, so it's closer to what an actual speech sound wave looks like
13 |  - Finish the example, get rid of rodio and dev dependancies
14 |  - Make a full voice
15 |  - Complete the intonator, can see a few items into the future and adjusts voice based on that (also a ruleset for this?)
16 |  - Complete the text->phoneme transcription, via a find-and-replace ruleset
17 |  - Make a macro to generate a language from a language file (and do sorting automatically)
18 |  - make a better way to make voices
19 |  - (later) add a way to send commands to change the intonation
20 |  - (later, maybe) Automatic voice replication
21 | 
22 | # License
23 | Licensed under either of
24 | 
25 |  * Apache License, Version 2.0
26 |    ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
27 |  * MIT license
28 |    ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
29 | 
30 | at your option.
31 | 
32 | # Contribution
33 | Unless you explicitly state otherwise, any contribution intentionally submitted
34 | for inclusion in the work by you, as defined in the Apache-2.0 license, shall be
35 | dual licensed as above, without any additional terms or conditions.


--------------------------------------------------------------------------------
/examples/interactive.rs:
--------------------------------------------------------------------------------
 1 | use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
 2 | use grail_rs::{
 3 |     IntoIntonator, IntoJitter, IntoSelector, IntoSequencer, IntoSynthesize, IntoTranscriber,
 4 | };
 5 | use std::sync::mpsc::channel;
 6 | 
 7 | fn main() {
 8 |     // get cpal's host and output device
 9 |     let host = cpal::default_host();
10 |     let device = host.default_output_device().expect("No audio device found");
11 | 
12 |     // get a config for the stream
13 |     let config = device
14 |         .default_output_config()
15 |         .expect("Failed to get output config");
16 | 
17 |     println!(
18 |         "Output device: {}, {:?}, {}",
19 |         device.name().unwrap(),
20 |         config.sample_rate(),
21 |         config.channels()
22 |     );
23 | 
24 |     // num channels
25 |     let num_channels = config.channels() as usize;
26 | 
27 |     // make the channels
28 |     let (sender, receiver) = channel();
29 | 
30 |     // create the audio iterator
31 |     let mut iterator = std::iter::repeat_with(move || receiver.try_recv().unwrap_or(' '))
32 |         .transcribe(grail_rs::languages::generic())
33 |         .intonate(grail_rs::languages::generic(), grail_rs::voices::generic())
34 |         .select(grail_rs::voices::generic())
35 |         .sequence(grail_rs::voices::generic())
36 |         .jitter(0, grail_rs::voices::generic())
37 |         .synthesize()
38 |         .flat_map(move |x| std::iter::repeat(x).take(num_channels));
39 | 
40 |     // make a stream to play audio with
41 |     let stream = match config.sample_format() {
42 |         cpal::SampleFormat::F32 => device.build_output_stream(
43 |             &config.into(),
44 |             move |data: &mut [f32], _| {
45 |                 for i in data {
46 |                     *i = iterator.next().unwrap_or(0.0);
47 |                 }
48 |             },
49 |             move |err| println!("Error: {:?}", err),
50 |         ),
51 |         cpal::SampleFormat::U16 => device.build_output_stream(
52 |             &config.into(),
53 |             move |data: &mut [u16], _| {
54 |                 for i in data {
55 |                     *i = ((iterator.next().unwrap_or(0.0) * 0.5 + 0.5) * u16::MAX as f32) as u16;
56 |                 }
57 |             },
58 |             move |err| println!("Error: {:?}", err),
59 |         ),
60 |         cpal::SampleFormat::I16 => device.build_output_stream(
61 |             &config.into(),
62 |             move |data: &mut [i16], _| {
63 |                 for i in data {
64 |                     *i = (iterator.next().unwrap_or(0.0) * i16::MAX as f32) as i16;
65 |                 }
66 |             },
67 |             move |err| println!("Error: {:?}", err),
68 |         ),
69 |     }
70 |     .expect("Failed to make stream");
71 | 
72 |     // play
73 |     // can't move the expect here, as stream needs to be alive long enough
74 |     stream.play().expect("Failed to play audio");
75 | 
76 |     // read input
77 |     for line in std::io::stdin().lines().map(|x| x.unwrap()) {
78 |         for character in line.trim().chars().chain(Some(' ').into_iter()) {
79 |             sender.send(character).expect("Failed to send audio");
80 |         }
81 |     }
82 | }
83 | 


--------------------------------------------------------------------------------
/examples/cli.rs:
--------------------------------------------------------------------------------
  1 | use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
  2 | use grail_rs::{
  3 |     IntoIntonator, IntoJitter, IntoSelector, IntoSequencer, IntoSynthesize, IntoTranscriber,
  4 | };
  5 | 
  6 | use std::env;
  7 | use std::fs::File;
  8 | use std::io::prelude::*;
  9 | 
 10 | // helps to check if there's an argument
 11 | fn has_argument(args: &[String], short: &str, long: &str) -> bool {
 12 |     // looks through the array if there is any
 13 |     args.contains(&short.into()) || args.contains(&long.into())
 14 | }
 15 | 
 16 | // helps find a value in a switch, if any
 17 | fn find_argument(args: &[String], short: &str, long: &str) -> Option<String> {
 18 |     // find the switch, if the first value is the right flag, the value after that is the one we need
 19 |     args.windows(2)
 20 |         .find(|x| match x {
 21 |             [switch, value] => switch.as_str() == short || switch.as_str() == long,
 22 |             _ => false,
 23 |         })
 24 |         .map(|x| x[1].clone())
 25 | }
 26 | 
 27 | // save a wav file
 28 | fn save_wav(path: &str, data: &[f32], sample_rate: u32) {
 29 |     // open a file
 30 |     let mut file = std::fs::File::create(path).expect("Failed to create file");
 31 | 
 32 |     // create a wav file
 33 |     let bytes = &[
 34 |         &b"RIFF"[..],                                      // riff header
 35 |         &((36 + data.len() * 2) as u32).to_le_bytes()[..], // file size
 36 |         &b"WAVE"[..],                                      // wave header
 37 |         &b"fmt "[..],                                      // format
 38 |         &(16 as u32).to_le_bytes()[..], // sub chunk size, aka how long is this chunk
 39 |         &(1 as u16).to_le_bytes()[..],  // format, pcm
 40 |         &(1 as u16).to_le_bytes()[..],  // 1 channel
 41 |         &(sample_rate as u32).to_le_bytes()[..], // sample rate
 42 |         &(sample_rate as u32 * 2).to_le_bytes()[..], // byte rate (sample rate * channels * bytes per sample)
 43 |         &(2 as u16).to_le_bytes()[..],               // block align, num channels * bytes per sample
 44 |         &(16 as u16).to_le_bytes()[..],              // bits per sample
 45 |         &b"data"[..],                                // data header
 46 |         &(data.len() as u32 * 2).to_le_bytes()[..],  // section size
 47 |         &data.iter()
 48 |             .map(|x| {
 49 |                 ((x * std::i16::MAX as f32) as i16)
 50 |                     .to_le_bytes()
 51 |                     .into_iter()
 52 |             })
 53 |             .flatten()
 54 |             .collect::<Vec<u8>>()[..],
 55 |     ]
 56 |     .iter()
 57 |     .map(|x| *x)
 58 |     .flatten()
 59 |     .map(|x| *x)
 60 |     .collect::<Vec<u8>>();
 61 | 
 62 |     // write file
 63 |     file.write(&bytes).expect("failed to store");
 64 |     
 65 |     // and store
 66 |     file.flush().expect("Failed to write");
 67 | }
 68 | 
 69 | fn main() {
 70 |     // get the command line args
 71 |     let args: Vec<String> = env::args().collect();
 72 | 
 73 |     // figure out what to do, no args, -h or --help is print help
 74 |     // -v or --voice is to set the voice
 75 |     // -o or --output to set the output file path
 76 |     // -l or --langauge sets the language ruleset
 77 |     // -r or --resample to change the sample rate REALLY NEEDED?
 78 |     // -i or --input to read from a file
 79 |     // -s or --silent to not play back any sound
 80 |     // -V or --version to display the version
 81 |     // anything not preceeded by a switch is assumed to be speech
 82 | 
 83 |     // the parameters we want to use as default
 84 |     let mut voice = String::from("sbinotto");
 85 |     let mut language = String::from("");
 86 |     let mut sample_rate = grail_rs::DEFAULT_SAMPLE_RATE;
 87 |     let mut input_file = String::new();
 88 |     let mut output_file = String::new();
 89 |     let mut play_sound = true;
 90 | 
 91 |     // check what we need to do
 92 |     if has_argument(&args, "-h", "--help") || args.len() < 2 {
 93 |         // print help menu
 94 |         println!("Grail, a rust speech synthesizer");
 95 |         println!("The last argument is interpreted as text to be spoken");
 96 |         println!(
 97 |             "So 'grail -v bob hello' will say 'hello'. -v is to set the voice, bob in this case"
 98 |         );
 99 | 
100 |         // flag descriptions
101 |         println!("Flags:");
102 |         println!("-v or --voice is to set the voice");
103 |         println!("-o or --output to set the output file path");
104 |         println!("-l or --langauge sets the language ruleset");
105 |         println!("-r or --resample to change the sample rate");
106 |         println!("-i or --input to read from a file");
107 |         println!("-s or --silent to not play back any sound");
108 |         println!("-V or --version to display the version");
109 | 
110 |         // list of voices
111 |         println!("Voices:");
112 | 
113 |         // list of languages
114 |         println!("Languages:");
115 | 
116 |         // stop
117 |         return;
118 |     } else if has_argument(&args, "-V", "--version") {
119 |         // print the version
120 |         println!("Grail-rs version {}", env!("CARGO_PKG_VERSION"));
121 | 
122 |         // stop
123 |         return;
124 |     }
125 | 
126 |     // now, parse the arguments with values
127 |     if let Some(path) = find_argument(&args, "-i", "--input") {
128 |         // open the file if it exists
129 |         if let Ok(mut file) = File::open(path.as_str()) {
130 |             // read the in file
131 |             file.read_to_string(&mut input_file)
132 |                 .expect("Failed to read file");
133 |         } else {
134 |             // give an error that we couldn't open the file
135 |             println!("Could not open file \"{}\"", path);
136 |             return;
137 |         }
138 |     }
139 | 
140 |     // set the output file, if any
141 |     if let Some(path) = find_argument(&args, "-o", "--output") {
142 |         output_file = path;
143 |     }
144 | 
145 |     // do we need to be silent?
146 |     if has_argument(&args, "-s", "--silent") {
147 |         play_sound = false;
148 |     }
149 | 
150 |     // what voice do we use?
151 |     if let Some(speaker) = find_argument(&args, "-v", "--voice") {
152 |         voice = speaker;
153 |     }
154 | 
155 |     // figure out what to say, this is simply the last argument, if nothing is to be read from a file
156 |     let to_say = if input_file != String::new() {
157 |         // file was already read to here
158 |         input_file
159 |     } else {
160 |         // read the last argument
161 |         args.last().unwrap().clone()
162 |     };
163 | 
164 |     // Display what to say
165 |     println!("\"{}\"", to_say);
166 |     println!(" -- {}", voice);
167 | 
168 |     // synthesize the speech
169 |     let mut generated_audio = Vec::with_capacity(sample_rate as usize * 4);
170 | 
171 |     // measure the time it takes to synthesize the audio
172 |     let start = std::time::Instant::now();
173 | 
174 |     // and extend the sound part with it
175 |     generated_audio.extend(
176 |         to_say
177 |             .chars()
178 |             .transcribe(grail_rs::languages::generic())
179 |             .intonate(grail_rs::languages::generic(), grail_rs::voices::generic())
180 |             .select(grail_rs::voices::generic())
181 |             .sequence(grail_rs::voices::generic())
182 |             .jitter(0, grail_rs::voices::generic())
183 |             .synthesize(),
184 |     );
185 | 
186 |     let duration = start.elapsed().as_micros();
187 | 
188 |     // display info on how long the audio file is
189 |     println!(
190 |         "{:.2} seconds of audio, generated in {} microseconds",
191 |         generated_audio.len() as f32 / sample_rate as f32,
192 |         duration
193 |     );
194 | 
195 |     // if there's an output file, write to it
196 |     if output_file != String::new() {
197 |         println!("Writing generated sound to {}", output_file);
198 | 
199 |         // and save the file
200 |         save_wav(&output_file, &generated_audio, sample_rate as u32);
201 |     }
202 | 
203 |     // and play it back, if needed
204 |     // TODO: clean this up a bit and move CPAL to a generic func
205 |     if play_sound {
206 |         // get cpal's host and output device
207 |         let host = cpal::default_host();
208 |         let device = host.default_output_device().expect("No audio device found");
209 | 
210 |         println!("Output device: {}", device.name().unwrap());
211 | 
212 |         // get a config for the stream
213 |         let config = device
214 |             .supported_output_configs()
215 |             .expect("No configs found")
216 |             .next()
217 |             .expect("Failed to get config")
218 |             .with_sample_rate(cpal::SampleRate(sample_rate as u32));
219 | 
220 |         // save audio length
221 |         let audio_len = generated_audio.len();
222 | 
223 |         // num channels
224 |         let num_channels = config.channels() as usize;
225 | 
226 |         // consumer iterator to read the generated audio
227 |         let mut consumer = generated_audio
228 |             .into_iter()
229 |             .flat_map(move |x| std::iter::repeat(x).take(num_channels));
230 | 
231 |         // make a stream to play audio with
232 |         let stream = match config.sample_format() {
233 |             cpal::SampleFormat::F32 => device.build_output_stream(
234 |                 &config.into(),
235 |                 move |data: &mut [f32], _| {
236 |                     for i in data {
237 |                         *i = consumer.next().unwrap_or(0.0);
238 |                     }
239 |                 },
240 |                 move |err| println!("Error: {:?}", err),
241 |             ),
242 |             cpal::SampleFormat::U16 => device.build_output_stream(
243 |                 &config.into(),
244 |                 move |data: &mut [u16], _| {
245 |                     for i in data {
246 |                         *i =
247 |                             ((consumer.next().unwrap_or(0.0) * 0.5 + 0.5) * u16::MAX as f32) as u16;
248 |                     }
249 |                 },
250 |                 move |err| println!("Error: {:?}", err),
251 |             ),
252 |             cpal::SampleFormat::I16 => device.build_output_stream(
253 |                 &config.into(),
254 |                 move |data: &mut [i16], _| {
255 |                     for i in data {
256 |                         *i = (consumer.next().unwrap_or(0.0) * i16::MAX as f32) as i16;
257 |                     }
258 |                 },
259 |                 move |err| println!("Error: {:?}", err),
260 |             ),
261 |         }
262 |         .expect("Failed to make stream");
263 | 
264 |         // play
265 |         // can't move the expect here, as stream needs to be alive long enough
266 |         stream.play().expect("Failed to play audio");
267 | 
268 |         // wait till the sound stops playing
269 |         std::thread::sleep(std::time::Duration::from_secs_f32(
270 |             (audio_len as f32 / sample_rate as f32) + 0.5,
271 |         ));
272 |     }
273 | }
274 | 


--------------------------------------------------------------------------------
/LICENCE-APACHE:
--------------------------------------------------------------------------------
  1 |  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2021 Dimas Leenman
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
   1 | #![no_std]
   2 | #![forbid(unsafe_code)]
   3 | 
   4 | // TODO: move phoneme related stuff into phoneme, and language related stuff into either language or transcribe
   5 | // TODO: consider const generics (when done ofc)?
   6 | // TODO: make most of the order easy to read, so keep the explanation
   7 | // TODO: maybe have a CPAL feature to allow easy playback?
   8 | // TODO: move back to svf filters, as they are easier and probably result in better audio
   9 | 
  10 | // we'll want to allow voices to be used from this library
  11 | pub mod voices;
  12 | 
  13 | // and languages
  14 | pub mod languages;
  15 | 
  16 | // The main file the synth is in
  17 | // first, define some constants
  18 | 
  19 | /// default sample rate all voices use
  20 | /// Resampling to a different sample rate is possible
  21 | pub const DEFAULT_SAMPLE_RATE: f32 = 44100.0;
  22 | 
  23 | /// the number of formants to synthesize
  24 | pub const NUM_FORMANTS: usize = 8;
  25 | 
  26 | // we'll want to implement these for arrays
  27 | use core::ops::{Add, AddAssign, Div, Mul, Neg, Sub};
  28 | 
  29 | use core::iter::Peekable;
  30 | 
  31 | // We'll need some helper functions
  32 | // random number generation
  33 | 
  34 | /// generates a random float in the range [-1, 1], and changes the state after doing so
  35 | #[inline]
  36 | pub fn random_f32(state: &mut u32) -> f32 {
  37 |     // here we change the state with a regular integer rng
  38 |     // This is the lehmer random number generator: https://en.wikipedia.org/wiki/Lehmer_random_number_generator
  39 |     // 16807 here is a magic number. In theory this could be any coprime, but there are some numbers that work better
  40 |     *state = state.wrapping_mul(16807).wrapping_add(1);
  41 | 
  42 |     // https://experilous.com/1/blog/post/perfect-fast-random-floating-point-numbers
  43 |     // and here we get the right part of the integer to generate our float from
  44 |     // this abuses IEE 754 floats (and works with doubles too)
  45 |     // the first 9 bits of the float are the sign bit, and the exponent
  46 |     // numbers from 1 - 2 in this have the same exponent (which the | 0x3F800000 sets)
  47 |     // then we can set the mantissa with the state
  48 |     // we shift that to the right so the first 9 bits become 0, and don't affect our exponent
  49 |     // for doubles (f64) we need to shift by 12, due to the sign and exponent taking up 12 bits, and set these to 0x3FF0000000000000 instead
  50 |     let res = (*state >> 9) | 0x3F800000;
  51 | 
  52 |     // and here we get the float number
  53 |     // we have a range of 1-2, but we want -1 to 1
  54 |     (f32::from_bits(res) - 1.5) * 2.0
  55 | }
  56 | 
  57 | // and some arithmatic functions
  58 | // these are approximations to help speed things up
  59 | // hyperbolic tangent, x is multiplied by pi
  60 | /// Approximation of the hyperbolic tangent, tan(pi*x).
  61 | /// Approximation is good for x = [0.0; 0.5]
  62 | #[inline]
  63 | pub fn tan_approx(x: f32) -> f32 {
  64 |     // tan(x) = sin(x) / cos(x)
  65 |     // we can approximate sin and x with the bhaskara I approximation quite well
  66 |     // which is 16x(pi - x) / 5pi^2 - 4x(pi - x) for sin
  67 |     // if we fill it in, multiply pi by and rewrite it, we get this:
  68 |     ((1.0 - x) * x * (5.0 - 4.0 * (x + 0.5) * (0.5 - x)))
  69 |         / ((x + 0.5) * (5.0 - 4.0 * (1.0 - x) * x) * (0.5 - x))
  70 | }
  71 | 
  72 | /// Approximation of -exp(TAU * x)
  73 | /// accurate for the range [0, 1]
  74 | #[inline]
  75 | pub fn exp_approx(x: f32) -> f32 {
  76 |     // exp(-2 * pi * x) ~ (1 - x) ^ 5
  77 |     let o = 1.0 - x;
  78 |     let o2 = o * o;
  79 | 
  80 |     // (1 - x) ^ 5
  81 |     o2 * o2 * o
  82 | }
  83 | 
  84 | // next, let's make a struct to help storing arrays, and do operations on them
  85 | 
  86 | /// Array, containing NUM_FORMANTS floats. Used to store per-formant data
  87 | #[derive(Copy, Clone, PartialEq, PartialOrd, Debug)]
  88 | pub struct Array([f32; NUM_FORMANTS]);
  89 | 
  90 | impl Array {
  91 |     /// makes a new Array from a given array
  92 |     #[inline]
  93 |     pub fn new(arr: [f32; NUM_FORMANTS]) -> Self {
  94 |         Self(arr)
  95 |     }
  96 | 
  97 |     /// make a new array from a given function
  98 |     #[inline]
  99 |     pub fn from_func<F: FnMut() -> f32>(mut f: F) -> Self {
 100 |         Self([(); NUM_FORMANTS].map(|_| f()))
 101 |     }
 102 | 
 103 |     /// makes a new array and fills it with a single element
 104 |     #[inline]
 105 |     pub fn splat(val: f32) -> Self {
 106 |         Self([val; NUM_FORMANTS])
 107 |     }
 108 | 
 109 |     /// do something for every value in the array
 110 |     #[inline]
 111 |     pub fn map<F: Fn(f32) -> f32>(self, f: F) -> Self {
 112 |         Self(self.0.map(f))
 113 |     }
 114 | 
 115 |     /// do something for every value in this array and the other
 116 |     #[inline]
 117 |     pub fn map2<F: Fn(f32, f32) -> f32>(self, other: Self, f: F) -> Self {
 118 |         Self(core::array::from_fn(|i| f(self.0[i], other.0[i])))
 119 |     }
 120 | 
 121 |     /// sums all elements in an array together
 122 |     #[inline]
 123 |     pub fn sum(self) -> f32 {
 124 |         self.0.iter().sum::<f32>()
 125 |     }
 126 | 
 127 |     /// take the min of 2 arrays, element wise
 128 |     #[inline]
 129 |     pub fn min(self, other: Self) -> Self {
 130 |         self.map2(other, f32::min)
 131 |     }
 132 | 
 133 |     /// blend two arrays, based on some blend value
 134 |     #[inline]
 135 |     pub fn blend(self, other: Self, alpha: f32) -> Self {
 136 |         self.map2(other, |a, b| a * (1.0 - alpha) + b * alpha)
 137 |     }
 138 | 
 139 |     /// blend two arrays, based on some blend array
 140 |     #[inline]
 141 |     pub fn blend_multiple(self, other: Self, alpha: Array) -> Self {
 142 |         self * (Array::splat(1.0) - alpha) + other * alpha
 143 |     }
 144 | 
 145 |     /// hyperbolic tangent approximation
 146 |     #[inline]
 147 |     pub fn tan_approx(self) -> Self {
 148 |         self.map(tan_approx)
 149 |     }
 150 | 
 151 |     /// exp(-tau * x) approximation
 152 |     #[inline]
 153 |     pub fn exp_approx(self) -> Self {
 154 |         self.map(exp_approx)
 155 |     }
 156 | }
 157 | 
 158 | // and arithmatic
 159 | // using the Op  traits to make life easier here, this way we can just do +, - * and /
 160 | impl Add for Array {
 161 |     type Output = Self;
 162 |     /// adds the values in two arrays together
 163 |     #[inline]
 164 |     fn add(self, other: Self) -> Self {
 165 |         self.map2(other, |a, b| a + b)
 166 |     }
 167 | }
 168 | 
 169 | impl AddAssign for Array {
 170 |     /// adds the other array to this array
 171 |     #[inline]
 172 |     fn add_assign(&mut self, other: Self) {
 173 |         *self = *self + other;
 174 |     }
 175 | }
 176 | 
 177 | impl Sub for Array {
 178 |     type Output = Self;
 179 |     /// subtracts the values in an array from another
 180 |     #[inline]
 181 |     fn sub(self, other: Self) -> Self {
 182 |         self.map2(other, |a, b| a - b)
 183 |     }
 184 | }
 185 | 
 186 | impl Mul for Array {
 187 |     type Output = Self;
 188 |     /// multiplies the values in two arrays together
 189 |     #[inline]
 190 |     fn mul(self, other: Self) -> Self {
 191 |         self.map2(other, |a, b| a * b)
 192 |     }
 193 | }
 194 | 
 195 | impl Div for Array {
 196 |     type Output = Self;
 197 |     /// divides the values of one array with another
 198 |     #[inline]
 199 |     fn div(self, other: Self) -> Self {
 200 |         self.map2(other, |a, b| a / b)
 201 |     }
 202 | }
 203 | 
 204 | impl Neg for Array {
 205 |     type Output = Self;
 206 |     /// negates all values in the array
 207 |     #[inline]
 208 |     fn neg(self) -> Self {
 209 |         self.map(|x| -x)
 210 |     }
 211 | }
 212 | 
 213 | // As well as a few utils to do better random generation, we want to make a few structs to help with generating noise
 214 | /// Value noise
 215 | /// This noise works by generating two values, and interpolates between them to generate the noise
 216 | /// once it has gone too far, it generates a new next point to interpolate to
 217 | #[derive(Copy, Clone, PartialEq, PartialOrd, Debug)]
 218 | pub struct ValueNoise {
 219 |     current: f32,
 220 |     next: f32,
 221 |     phase: f32,
 222 |     state: u32,
 223 | }
 224 | 
 225 | impl ValueNoise {
 226 |     /// make a new value noise generator, from the given seed
 227 |     pub fn new(state: &mut u32) -> Self {
 228 |         let current = random_f32(state);
 229 |         let next = random_f32(state);
 230 | 
 231 |         Self {
 232 |             current,
 233 |             next,
 234 |             phase: 0.0,
 235 |             state: *state,
 236 |         }
 237 |     }
 238 | 
 239 |     /// generate the next value, and update the internal state
 240 |     pub fn next(&mut self, increment: f32) -> f32 {
 241 |         // increment the state
 242 |         self.phase += increment;
 243 | 
 244 |         // wrap it around if needed
 245 |         if self.phase > 1.0 {
 246 |             self.phase -= 1.0;
 247 | 
 248 |             // also update the noise
 249 |             self.current = self.next;
 250 |             self.next = random_f32(&mut self.state);
 251 |         }
 252 | 
 253 |         // and blend between the current and next
 254 |         self.current * (1.0 - self.phase) + self.next * self.phase
 255 |     }
 256 | }
 257 | 
 258 | // and for arrays too
 259 | /// Value noise, for arrays
 260 | #[derive(Copy, Clone, PartialEq, PartialOrd, Debug)]
 261 | pub struct ArrayValueNoise {
 262 |     current: Array,
 263 |     next: Array,
 264 |     phase: f32,
 265 |     state: u32,
 266 | }
 267 | 
 268 | impl ArrayValueNoise {
 269 |     /// generate a new value noise generator, from the given seed
 270 |     pub fn new(state: &mut u32) -> Self {
 271 |         let mut current = [0.0; NUM_FORMANTS];
 272 |         let mut next = [0.0; NUM_FORMANTS];
 273 | 
 274 |         // write to the arrays
 275 |         for i in 0..NUM_FORMANTS {
 276 |             current[i] = random_f32(state);
 277 |             next[i] = random_f32(state);
 278 |         }
 279 | 
 280 |         Self {
 281 |             current: Array::new(current),
 282 |             next: Array::new(next),
 283 |             phase: 0.0,
 284 |             state: *state,
 285 |         }
 286 |     }
 287 | 
 288 |     /// generate the next value, and update the internal state
 289 |     pub fn next(&mut self, increment: f32) -> Array {
 290 |         // increment the state
 291 |         self.phase += increment;
 292 | 
 293 |         // wrap it around if needed
 294 |         if self.phase > 1.0 {
 295 |             self.phase -= 1.0;
 296 | 
 297 |             // also update the noise
 298 |             self.current = self.next;
 299 | 
 300 |             // next noise step
 301 |             self.next = Array::from_func(|| random_f32(&mut self.state));
 302 |         }
 303 | 
 304 |         // and blend between the current and next
 305 |         self.current * Array::splat(1.0 - self.phase) + self.next * Array::splat(self.phase)
 306 |     }
 307 | }
 308 | 
 309 | // next up, let's go to the audio part
 310 | // we'll want a way to represent what to synthesize
 311 | 
 312 | /// Synthesis element, describes what to synthesize
 313 | /// This describes the frequency and formants to synthesize
 314 | /// All frequency values are normalized to 0-1, where 0 is 0 hz, and 1 is the sample frequency
 315 | #[derive(Copy, Clone, PartialEq, PartialOrd, Debug)]
 316 | pub struct SynthesisElem {
 317 |     /// base frequency, normalized to sample rate
 318 |     pub frequency: f32,
 319 | 
 320 |     /// formant frequencies, normalized to sample rate
 321 |     pub formant_freq: Array,
 322 | 
 323 |     /// formant bandwidth frequencies, normalized to sample rate
 324 |     pub formant_bw: Array,
 325 | 
 326 |     /// formant smoothness, how much the higher frequencies are filtered out
 327 |     pub formant_smooth: Array,
 328 | 
 329 |     /// how breathy each formant is. 0 means fully voiced, 1 means full breath
 330 |     pub formant_breath: Array,
 331 | 
 332 |     /// formant turbulence, affects how much noise is blended through when the glottis is open
 333 |     pub formant_turb: Array,
 334 | 
 335 |     /// formant amplitudes. If these sum up to one, the output amplitude will also be one
 336 |     pub formant_amp: Array,
 337 | }
 338 | 
 339 | // next, make some functions for the element
 340 | // we want to make one from some sample rate, make one with the given sample rate, and blend them
 341 | impl SynthesisElem {
 342 |     /// make a new synthesis element. For unit gain, formant_amp should sum up to 1
 343 |     pub fn new(
 344 |         sample_rate: f32,
 345 |         frequency: f32,
 346 |         formant_freq: [f32; NUM_FORMANTS],
 347 |         formant_smooth: [f32; NUM_FORMANTS],
 348 |         formant_bw: [f32; NUM_FORMANTS],
 349 |         formant_breath: [f32; NUM_FORMANTS],
 350 |         formant_turb: [f32; NUM_FORMANTS],
 351 |         formant_amp: [f32; NUM_FORMANTS],
 352 |     ) -> Self {
 353 |         // make a new element, and then resample it to the appropriate sample rate
 354 |         Self {
 355 |             frequency,
 356 |             formant_freq: Array::new(formant_freq),
 357 |             formant_bw: Array::new(formant_bw),
 358 |             formant_smooth: Array::new(formant_smooth),
 359 |             formant_breath: Array::new(formant_breath),
 360 |             formant_turb: Array::new(formant_turb),
 361 |             formant_amp: Array::new(formant_amp),
 362 |         }
 363 |         .resample(1.0, sample_rate)
 364 |     }
 365 | 
 366 |     /// create a new silent item
 367 |     pub fn silent() -> Self {
 368 |         Self {
 369 |             frequency: 0.25,
 370 |             formant_freq: Array::splat(0.25),
 371 |             formant_bw: Array::splat(0.25),
 372 |             formant_smooth: Array::splat(0.25),
 373 |             formant_breath: Array::splat(0.0),
 374 |             formant_turb: Array::splat(0.0),
 375 |             formant_amp: Array::splat(0.0),
 376 |         }
 377 |     }
 378 | 
 379 |     /// Make a new one with the default sample rate
 380 |     /// Also ensure that the formant amplitudes sum up to 1 to get unit gain
 381 |     pub fn new_phoneme(
 382 |         formant_freq: [f32; NUM_FORMANTS],
 383 |         formant_bw: [f32; NUM_FORMANTS],
 384 |         formant_smooth: [f32; NUM_FORMANTS],
 385 |         formant_turb: [f32; NUM_FORMANTS],
 386 |         formant_breath: [f32; NUM_FORMANTS],
 387 |         formant_amp: [f32; NUM_FORMANTS],
 388 |     ) -> Self {
 389 |         Self {
 390 |             frequency: 0.0,
 391 |             formant_freq: Array::new(formant_freq),
 392 |             formant_bw: Array::new(formant_bw),
 393 |             formant_smooth: Array::new(formant_smooth),
 394 |             formant_breath: Array::new(formant_breath),
 395 |             formant_turb: Array::new(formant_turb),
 396 | 
 397 |             // divide it by the sum of the entire amplitudes, that way we get unit gain
 398 |             formant_amp: Array::new(formant_amp) / Array::splat(Array::new(formant_amp).sum()),
 399 |         }
 400 |         .resample(1.0, DEFAULT_SAMPLE_RATE)
 401 |     }
 402 |     /// blend between this synthesis element and another one
 403 |     #[inline]
 404 |     pub fn blend(self, other: Self, alpha: f32) -> Self {
 405 |         Self {
 406 |             frequency: self.frequency * (1.0 - alpha) + other.frequency * alpha,
 407 |             formant_freq: self.formant_freq.blend(other.formant_freq, alpha),
 408 |             formant_smooth: self.formant_smooth.blend(other.formant_smooth, alpha),
 409 |             formant_bw: self.formant_bw.blend(other.formant_bw, alpha),
 410 |             formant_turb: self.formant_turb.blend(other.formant_turb, alpha),
 411 |             formant_breath: self.formant_breath.blend(other.formant_breath, alpha),
 412 |             formant_amp: self.formant_amp.blend(other.formant_amp, alpha),
 413 |         }
 414 |     }
 415 | 
 416 |     /// resample the synthesis element to a new sample rate
 417 |     #[inline]
 418 |     pub fn resample(self, old_sample_rate: f32, new_sample_rate: f32) -> Self {
 419 |         // scale factor for the sample rate
 420 |         let scale = old_sample_rate / new_sample_rate;
 421 | 
 422 |         // get the new frequency
 423 |         let formant_freq = self.formant_freq * Array::splat(scale);
 424 | 
 425 |         Self {
 426 |             // make sure it doesn't go above nyquist
 427 |             frequency: (self.frequency * scale).min(0.5),
 428 |             formant_freq: (self.formant_freq * Array::splat(scale)).min(Array::splat(0.5)),
 429 |             formant_bw: self.formant_bw * Array::splat(scale),
 430 |             formant_smooth: self.formant_smooth * Array::splat(scale),
 431 | 
 432 |             // drop all values above nyquist
 433 |             formant_amp: self
 434 |                 .formant_amp
 435 |                 .map2(formant_freq, |amp, freq| if freq > 0.5 { 0.0 } else { amp }),
 436 | 
 437 |             // leave the rest intact
 438 |             ..self
 439 |         }
 440 |     }
 441 | 
 442 |     /// copy it with a different frequency
 443 |     /// frequency is already divided by the sample rate here
 444 |     #[inline]
 445 |     pub fn copy_with_frequency(self, frequency: f32) -> Self {
 446 |         Self {
 447 |             frequency: frequency.min(0.5),
 448 |             ..self
 449 |         }
 450 |     }
 451 | 
 452 |     /// copy it without any sound
 453 |     #[inline]
 454 |     pub fn copy_silent(self) -> Self {
 455 |         Self {
 456 |             formant_amp: Array::splat(0.0),
 457 |             ..self
 458 |         }
 459 |     }
 460 | }
 461 | 
 462 | // next we'll want to synthesize some audio
 463 | // for that, we'll use an iterator
 464 | // it keeps track of the filter states, and the underlying iterator to get synthesis elements from
 465 | // if iterators aren't available in the language you are porting to, use a function to get the next item from some state instead
 466 | 
 467 | /// Synthesizer for synthesizing actual audio samples from synthesis elements
 468 | /// this is created by calling .synthesize() on an iterator that produces synthesis elements
 469 | #[derive(Copy, Clone, PartialEq, PartialOrd, Debug)]
 470 | pub struct Synthesize<T: Iterator<Item = SynthesisElem>> {
 471 |     /// underlying iterator
 472 |     iter: T,
 473 | 
 474 |     /// phase of the carrier
 475 |     phase: f32,
 476 | 
 477 |     /// lowpass filter state
 478 |     filter_state_a: Array,
 479 | 
 480 |     /// svf filter state 1 (ic1eq)
 481 |     filter_state_b: Array,
 482 | 
 483 |     /// svf filter statee 2 (ic2eq)
 484 |     filter_state_c: Array,
 485 | 
 486 |     /// noise state
 487 |     seed: u32,
 488 | }
 489 | 
 490 | // TODO: voice here?
 491 | // needed because we probably want jitter to read it's parameters from voice, but we can do that later if really needed, and just pass voice.param_a in there
 492 | 
 493 | // next up, implement iterator for the synthesizer, which takes care of synthesizing sound (in samples) from synthesis elements
 494 | impl<T: Iterator<Item = SynthesisElem>> Iterator for Synthesize<T> {
 495 |     type Item = f32;
 496 | 
 497 |     fn next(&mut self) -> Option<Self::Item> {
 498 |         // get the item from the underlying iterator, or return None if we can't
 499 |         let elem = self.iter.next()?;
 500 | 
 501 |         // generate an anti-aliased saw wave
 502 |         // polyblep offset, to do antialiasing
 503 |         let polyblep = if self.phase < elem.frequency {
 504 |             // if we're at the first sample, smooth it a bit
 505 |             let t = self.phase / elem.frequency;
 506 |             2.0 * t - (t * t) - 1.0
 507 |         } else if self.phase > (1.0 - elem.frequency) {
 508 |             // same for last sample
 509 |             let t = (self.phase - 1.0) / elem.frequency;
 510 |             (t * t) + 2.0 * t + 1.0
 511 |         } else {
 512 |             // otherwise, no smoothing needed
 513 |             0.0
 514 |         };
 515 | 
 516 |         // saw wave
 517 |         let saw_wave = Array::splat((2.0 * self.phase - 1.0) - polyblep);
 518 | 
 519 |         // increment phase
 520 |         self.phase += elem.frequency;
 521 | 
 522 |         // wrap around, as this can't be above nyquist we can avoid fract
 523 |         if self.phase >= 1.0 {
 524 |             self.phase -= 1.0;
 525 |         }
 526 | 
 527 |         // [-1, 1] range
 528 |         let noise = Array::splat(random_f32(&mut self.seed));
 529 | 
 530 |         // apply turbulence and noise
 531 |         let noise_wave = saw_wave.blend_multiple(noise, elem.formant_breath);
 532 | 
 533 |         // get the filter alpha
 534 |         // we can the parameter for the filter from the cutoff frequency with exp(-2*pi*x), which is exp_approx!
 535 |         let alpha = elem.formant_smooth.exp_approx();
 536 | 
 537 |         // apply a low pass filter, single pole
 538 |         self.filter_state_a += (Array::splat(1.0) - alpha) * (noise_wave - self.filter_state_a);
 539 | 
 540 |         // get the result from the filter
 541 |         let glottal_wave = self.filter_state_a;
 542 | 
 543 |         // apply turbulence noise
 544 |         let turbulence_wave =
 545 |             glottal_wave * Array::splat(1.0).blend_multiple(noise, elem.formant_turb);
 546 | 
 547 |         // apply amplitude and scale it
 548 |         // makes sure it's the right amplitude to not make the filter go out of the [-1, 1] range
 549 |         // TODO: how
 550 |         let v0 = turbulence_wave * elem.formant_amp;
 551 | 
 552 |         // state variable filter
 553 |         // https://cytomic.com/files/dsp/SvfLinearTrapOptimised2.pdf
 554 |         // set the parameters
 555 |         let g = elem.formant_freq.tan_approx();
 556 | 
 557 |         // k = 1 / Q, and Q = f_r / delta_f, where f_r is the resonant frequency, and delta_f is the bandwidth
 558 |         let k = elem.formant_bw / elem.formant_freq;
 559 | 
 560 |         let a1 = Array::splat(1.0) / (Array::splat(1.0) + g * (g + k));
 561 |         let a2 = g * a1;
 562 |         let a3 = g * a2;
 563 | 
 564 |         // step the filter forwards to get the next state
 565 |         let v3 = v0 - self.filter_state_c;
 566 |         let v1 = a1 * self.filter_state_b + a2 * v3;
 567 |         let v2 = self.filter_state_c + a2 * self.filter_state_b + a3 * v3;
 568 | 
 569 |         // update actual state
 570 |         self.filter_state_b = Array::splat(2.0) * v1 - self.filter_state_b;
 571 |         self.filter_state_c = Array::splat(2.0) * v2 - self.filter_state_c;
 572 | 
 573 |         // and the bandpass result
 574 |         let res = v1.sum() * 0.5;
 575 | 
 576 |         // and return the wave, scaled by amplitude
 577 |         Some(res)
 578 |     }
 579 | }
 580 | 
 581 | // and we want to be able to easily make a synthesizer, so make a trait for it
 582 | pub trait IntoSynthesize
 583 | where
 584 |     Self: IntoIterator<Item = SynthesisElem> + Sized,
 585 | {
 586 |     /// creates a new synthesizer from this iterator
 587 |     fn synthesize(self) -> Synthesize<Self::IntoIter> {
 588 |         Synthesize {
 589 |             iter: self.into_iter(),
 590 |             phase: 0.0,
 591 |             filter_state_a: Array::splat(0.0),
 592 |             filter_state_b: Array::splat(0.0),
 593 |             filter_state_c: Array::splat(0.0),
 594 |             seed: 0,
 595 |         }
 596 |     }
 597 | }
 598 | 
 599 | // implement it for anything that can become the right iterator
 600 | impl<T> IntoSynthesize for T where T: IntoIterator<Item = SynthesisElem> + Sized {}
 601 | 
 602 | // ensure the peak values don't exceed 1.0
 603 | #[test]
 604 | fn synthesize_normalized() {}
 605 | 
 606 | // ensure resampling gives a similar output
 607 | #[test]
 608 | fn synthesize_resampled() {}
 609 | 
 610 | // that's it, sound synthesis done
 611 | // before we continue, we'd like to set up the internal represenation for voices
 612 | // a voice consists of a number of synthesis elements, each assigned to a phoneme
 613 | // a phoneme is the smallest sound in speech, so we can use that for the internal representation nicely
 614 | // the downside is that there are quite a few
 615 | 
 616 | // first, set up the enum for all phonemes
 617 | // TODO: IPA or some reduced set?
 618 | // reducet set makes it easier to make voices
 619 | 
 620 | // macro to generate the phonemes
 621 | // takes list of (uppercase lowercase example) for each phoneme,
 622 | // then generates the phoneme enum and storage and it's functions
 623 | macro_rules! make_phonemes {
 624 |     ($($upper:ident $lower:ident $example:ident,)*) => {
 625 | 
 626 |         /// Represents all phonemes.
 627 |         /// This is a subset of the IPA,
 628 |         /// with a few extra special phonemes to help with properly converting the sounds represented in grail to actual sounds
 629 |         /// Most notable grail can't properly represent diphthongs and plosives with a single synthesis element,
 630 |         /// so it's required to add a few special marker phonemes to add these
 631 |         #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug)]
 632 |         pub enum Phoneme {
 633 |             /// Silence, will fade in/fade out any other phonemes surrounding it
 634 |             /// use when silence is intended
 635 |             Silence,
 636 | 
 637 |             /// glottal stop, behaves similarly to silence,
 638 |             /// but should be used when a glottal stop is intended.
 639 |             /// This is required for some phonemes to sound correct
 640 |             Stop,
 641 | 
 642 |             /// Blend the next phoneme into the other seamlessly, useful for indicating diphthongs
 643 |             Glide,
 644 |             // insert all uppercase phonemes into the enum, with the examples as in the documentation
 645 |             $(
 646 |                 #[doc = concat!("as in ", stringify!($example))]
 647 |                 $upper,
 648 |             )*
 649 |         }
 650 | 
 651 |         /// Stores all synthesis elements for the phonemes that have an associated sound
 652 |         #[derive(Copy, Clone, PartialEq, PartialOrd, Debug)]
 653 |         pub struct VoiceStorage {
 654 |             // insert all lowercase phonemes as a field
 655 |             $(
 656 |                 #[doc = concat!("as in ", stringify!($example))]
 657 |                 pub $lower: SynthesisElem,
 658 |             )*
 659 |         }
 660 | 
 661 |         impl VoiceStorage {
 662 | 
 663 |             /// retrieve a synthesis elem based on the given phoneme
 664 |             pub fn get(self, phoneme: Phoneme) -> Option<SynthesisElem> {
 665 |                 match phoneme {
 666 |                     Phoneme::Silence | Phoneme::Stop | Phoneme::Glide => None,
 667 |                     $(
 668 |                         Phoneme::$upper => Some(self.$lower),
 669 |                     )*
 670 |                 }
 671 |             }
 672 | 
 673 |             /// run a function on all phonemes
 674 |             pub fn for_all(&mut self, func: fn(Phoneme, &mut SynthesisElem)) {
 675 |                 $(
 676 |                     func(Phoneme::$upper, &mut self.$lower);
 677 |                 )*
 678 |             }
 679 |         }
 680 |     }
 681 | }
 682 | 
 683 | // make the phoneme structs
 684 | // see https://en.wikipedia.org/wiki/Help:IPA
 685 | // TODO!
 686 | make_phonemes!(
 687 |     A a test,
 688 |     E e test,
 689 | );
 690 | 
 691 | // and next, the full voice
 692 | // which is just the voice storage + extra parameters for intonation
 693 | 
 694 | /// A voice containing all needed parameters to synthesize sound from some given phonemes
 695 | #[derive(Copy, Clone, PartialEq, PartialOrd, Debug)]
 696 | pub struct Voice {
 697 |     /// sample rate this voice is at
 698 |     pub sample_rate: f32,
 699 | 
 700 |     /// phonemes, to generate sound
 701 |     pub phonemes: VoiceStorage,
 702 | 
 703 |     /// center frequency for the voice
 704 |     pub center_frequency: f32,
 705 | 
 706 |     /// frequency at which to jitter things, to improve voice naturalness
 707 |     pub jitter_frequency: f32,
 708 | 
 709 |     /// how much to jitter the base frequency
 710 |     pub jitter_delta_frequency: f32,
 711 | 
 712 |     /// how much to jitter the formant frequencies
 713 |     pub jitter_delta_formant_frequency: f32,
 714 | 
 715 |     /// how much to jitter the formant amplitudes
 716 |     pub jitter_delta_amplitude: f32,
 717 | }
 718 | 
 719 | // We also want to jitter all frequencies a bit for more realism, so let's do that next
 720 | 
 721 | // now we can make our jitter work, as getting random numbers is now easier
 722 | // all frequencies are in normalized form, so 1.0 is the sample frequency
 723 | #[derive(Copy, Clone, PartialEq, PartialOrd, Debug)]
 724 | pub struct Jitter<T: Iterator<Item = SynthesisElem>> {
 725 |     /// underlying iterator
 726 |     iter: T,
 727 | 
 728 |     /// noise for the frequency
 729 |     freq_noise: ValueNoise,
 730 | 
 731 |     /// noise for the formant frequency
 732 |     formant_freq_noise: ArrayValueNoise,
 733 | 
 734 |     /// noise for the formant amplitude
 735 |     formant_amp_noise: ArrayValueNoise,
 736 | 
 737 |     /// noise frequency
 738 |     frequency: f32,
 739 | 
 740 |     /// frequency deviation
 741 |     delta_frequency: f32,
 742 | 
 743 |     /// formant deviation
 744 |     delta_formant_freq: f32,
 745 | 
 746 |     /// amplitude deviation
 747 |     delta_amplitude: f32,
 748 | }
 749 | 
 750 | impl<T: Iterator<Item = SynthesisElem>> Iterator for Jitter<T> {
 751 |     type Item = SynthesisElem;
 752 | 
 753 |     fn next(&mut self) -> Option<Self::Item> {
 754 |         // get the next element from the underlying iterator
 755 |         let mut elem = self.iter.next()?;
 756 | 
 757 |         // gather all next noises
 758 |         let freq = self.freq_noise.next(self.frequency);
 759 |         let formant_freq = self.formant_freq_noise.next(self.frequency);
 760 |         let formant_amp = self.formant_amp_noise.next(self.frequency);
 761 | 
 762 |         // change them in the element
 763 |         elem.frequency += freq * self.delta_frequency;
 764 |         elem.formant_freq += formant_freq * Array::splat(self.delta_formant_freq);
 765 | 
 766 |         // we don't want it to get *louder*, so make sure it only becomes softer by doing (1 + [-1, 1]) / 2, which results in [0, 1]
 767 |         // we'll then multiply it by the appropriate amplitude so we can't end up with negative amplitudes for some sounds
 768 |         let formant_amp_delta =
 769 |             (formant_amp + Array::splat(1.0)) * Array::splat(0.5 * self.delta_amplitude);
 770 | 
 771 |         // multiplier is 1 - x, so that it doesn't become very soft
 772 |         let formant_amp_mul = Array::splat(1.0) - formant_amp_delta;
 773 |         elem.formant_amp = elem.formant_amp * formant_amp_mul;
 774 | 
 775 |         // and return the modified element
 776 |         Some(elem)
 777 |     }
 778 | }
 779 | 
 780 | // and we want to be able to easily make the jitter iterator
 781 | pub trait IntoJitter
 782 | where
 783 |     Self: IntoIterator<Item = SynthesisElem> + Sized,
 784 | {
 785 |     /// creates a new synthesizer from this iterator
 786 |     fn jitter(self, mut seed: u32, voice: Voice) -> Jitter<Self::IntoIter> {
 787 |         Jitter {
 788 |             iter: self.into_iter(),
 789 |             freq_noise: ValueNoise::new(&mut seed),
 790 |             formant_freq_noise: ArrayValueNoise::new(&mut seed),
 791 |             formant_amp_noise: ArrayValueNoise::new(&mut seed),
 792 |             frequency: voice.jitter_frequency,
 793 |             delta_frequency: voice.jitter_delta_frequency,
 794 |             delta_formant_freq: voice.jitter_delta_formant_frequency,
 795 |             delta_amplitude: voice.jitter_delta_amplitude,
 796 |         }
 797 |     }
 798 | }
 799 | 
 800 | // implement it for anything that can become the right iterator
 801 | impl<T> IntoJitter for T where T: IntoIterator<Item = SynthesisElem> + Sized {}
 802 | 
 803 | // ensure it doesn't exceed the parameter bounds
 804 | #[test]
 805 | fn jitter_within_bounds() {}
 806 | 
 807 | // we now have a way to synthesize sound, and add random variations to it.
 808 | // However, generating the induvidual samples is kinda a hassle to do, so it would be nicer if we can give each synthesis element a length
 809 | // and then generate the right sequence from that
 810 | // so, we'll create a sequencer that does this
 811 | 
 812 | // for this, we'll first need a struct to help with adding the time
 813 | #[derive(Copy, Clone, PartialEq, PartialOrd, Debug)]
 814 | pub struct SequenceElem {
 815 |     /// the synthesis element
 816 |     /// Some if there, None if silent
 817 |     pub elem: Option<SynthesisElem>,
 818 | 
 819 |     /// time this element lasts for
 820 |     pub length: f32,
 821 | 
 822 |     /// time the blending lasts for
 823 |     pub blend_length: f32,
 824 | }
 825 | 
 826 | impl SequenceElem {
 827 |     /// make a new element
 828 |     pub fn new(elem: Option<SynthesisElem>, length: f32, blend_length: f32) -> Self {
 829 |         Self {
 830 |             elem,
 831 |             length,
 832 |             blend_length,
 833 |         }
 834 |     }
 835 | }
 836 | 
 837 | /// Sequencer, given a time and blend time, it generates the right amount of samples
 838 | #[derive(Copy, Clone, PartialEq, PartialOrd, Debug)]
 839 | pub struct Sequencer<T: Iterator<Item = SequenceElem>> {
 840 |     /// underlying iterator
 841 |     iter: T,
 842 | 
 843 |     /// current element
 844 |     cur_elem: Option<SequenceElem>,
 845 | 
 846 |     /// next element
 847 |     next_elem: Option<SequenceElem>,
 848 | 
 849 |     /// time remaining for this element
 850 |     time: f32,
 851 | 
 852 |     /// sample time, how long a sample lasts (1 / sample rate)
 853 |     delta_time: f32,
 854 | }
 855 | 
 856 | impl<T: Iterator<Item = SequenceElem>> Iterator for Sequencer<T> {
 857 |     type Item = SynthesisElem;
 858 | 
 859 |     fn next(&mut self) -> Option<Self::Item> {
 860 |         // decrease the amount of remaining time
 861 |         self.time -= self.delta_time;
 862 | 
 863 |         // if this is now below 0, we go to the next pair
 864 |         if self.time < 0.0 {
 865 |             // figure out what to do next
 866 |             match (self.cur_elem, self.next_elem) {
 867 |                 // we have both, get a new one
 868 |                 (Some(_), Some(a)) => {
 869 |                     self.cur_elem = self.next_elem;
 870 |                     self.next_elem = self.iter.next();
 871 | 
 872 |                     // set the time
 873 |                     self.time += a.length;
 874 |                 }
 875 |                 // we have none, fetch new ones
 876 |                 (None, None) => {
 877 |                     self.cur_elem = self.iter.next();
 878 |                     self.next_elem = self.iter.next();
 879 | 
 880 |                     // if we have the current one, set the time
 881 |                     if let Some(a) = self.cur_elem {
 882 |                         self.time += a.length;
 883 |                     }
 884 |                 }
 885 |                 // for the rest, we can simply exit early
 886 |                 _ => return None,
 887 |             }
 888 |         }
 889 | 
 890 |         // and match on what to do
 891 |         match (
 892 |             self.cur_elem,
 893 |             self.cur_elem.and_then(|x| x.elem),
 894 |             self.next_elem.and_then(|x| x.elem),
 895 |         ) {
 896 |             // both elements, all are on
 897 |             (Some(a), Some(b), Some(c)) => {
 898 |                 // get the blend amount
 899 |                 let alpha = (self.time / a.blend_length).min(1.0);
 900 | 
 901 |                 // and blend the 2, because alpha goes from 1 to 0, we need to blend in the other order
 902 |                 Some(c.blend(b, alpha))
 903 |             }
 904 | 
 905 |             // only the first one, blend to silence
 906 |             (Some(a), Some(b), None) => {
 907 |                 // get the blend amount
 908 |                 let alpha = (self.time / a.blend_length).min(1.0);
 909 | 
 910 |                 // and blend with a silent one
 911 |                 Some(b.copy_silent().blend(b, alpha))
 912 |             }
 913 | 
 914 |             // only the first one, blend from silence
 915 |             (Some(a), None, Some(c)) => {
 916 |                 // get the blend amount
 917 |                 let alpha = (self.time / a.blend_length).min(1.0);
 918 | 
 919 |                 // and blend with a silent one
 920 |                 Some(c.blend(c.copy_silent(), alpha))
 921 |             }
 922 | 
 923 |             // both silent
 924 |             (Some(_), None, None) => {
 925 |                 // just return silence
 926 |                 Some(SynthesisElem::silent())
 927 |             }
 928 | 
 929 |             // nothing else, return none
 930 |             _ => None,
 931 |         }
 932 |     }
 933 | }
 934 | 
 935 | // and implement an easy way to get the iterator
 936 | pub trait IntoSequencer
 937 | where
 938 |     Self: IntoIterator<Item = SequenceElem> + Sized,
 939 | {
 940 |     /// creates a new sequencer, from a given voice
 941 |     fn sequence(self, voice: Voice) -> Sequencer<Self::IntoIter> {
 942 |         Sequencer {
 943 |             iter: self.into_iter(),
 944 |             delta_time: 1.0 / voice.sample_rate,
 945 |             cur_elem: None,
 946 |             next_elem: None,
 947 |             time: 0.0,
 948 |         }
 949 |     }
 950 | }
 951 | 
 952 | // implement it for anything that can become the right iterator
 953 | impl<T> IntoSequencer for T where T: IntoIterator<Item = SequenceElem> + Sized {}
 954 | 
 955 | // TODO: how test
 956 | 
 957 | // next up, we'll want to go from time + phoneme info to a sequence element, so let's do that
 958 | // first, we'll want a new struct to also store timing info with phonemes
 959 | 
 960 | #[derive(Copy, Clone, PartialEq, PartialOrd, Debug)]
 961 | pub struct PhonemeElem {
 962 |     /// the phoneme
 963 |     pub phoneme: Phoneme,
 964 | 
 965 |     /// total length
 966 |     pub length: f32,
 967 | 
 968 |     /// length of blending
 969 |     pub blend_length: f32,
 970 | 
 971 |     /// the base frequency, normalized, so 1.0 is the sample frequency
 972 |     pub frequency: f32,
 973 | }
 974 | 
 975 | // and we'll want to make the selector next.
 976 | // this simply selects the right synthesis elem from a voice
 977 | // as well as makes sure a silence is blended correctly
 978 | #[derive(Copy, Clone, PartialEq, PartialOrd, Debug)]
 979 | pub struct Selector<T: Iterator<Item = PhonemeElem>> {
 980 |     /// underlying iterator
 981 |     iter: T,
 982 | 
 983 |     /// underlying voice storage to get voice data from
 984 |     voice_storage: VoiceStorage,
 985 | }
 986 | 
 987 | impl<T: Iterator<Item = PhonemeElem>> Iterator for Selector<T> {
 988 |     type Item = SequenceElem;
 989 | 
 990 |     fn next(&mut self) -> Option<Self::Item> {
 991 |         // get the next item if we can
 992 |         let phoneme = self.iter.next()?;
 993 | 
 994 |         // get the right phoneme, or none if it's silent.
 995 |         // this allows correct blending later on
 996 |         let elem = self.voice_storage.get(phoneme.phoneme);
 997 | 
 998 |         // and put it in a sequence element
 999 |         Some(SequenceElem::new(
1000 |             // if there is any, copy it with the right frequency
1001 |             elem.map(|x| x.copy_with_frequency(phoneme.frequency)),
1002 |             phoneme.length,
1003 |             phoneme.blend_length,
1004 |         ))
1005 |     }
1006 | }
1007 | 
1008 | pub trait IntoSelector
1009 | where
1010 |     Self: IntoIterator<Item = PhonemeElem> + Sized,
1011 | {
1012 |     /// creates a selector from the given voice
1013 |     fn select(self, voice: Voice) -> Selector<Self::IntoIter> {
1014 |         Selector {
1015 |             iter: self.into_iter(),
1016 |             voice_storage: voice.phonemes,
1017 |         }
1018 |     }
1019 | }
1020 | 
1021 | // implement it for anything that can become the right iterator
1022 | impl<T> IntoSelector for T where T: IntoIterator<Item = PhonemeElem> + Sized {}
1023 | 
1024 | // now, we need to do some more complex stuff again.
1025 | // so far we got most of the sound generating "backend" done, now time for the "frontend"
1026 | // this needs to take in text and convert it into phonemes + timing.
1027 | // let's first make the rules we use for text -> phoneme
1028 | 
1029 | #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug)]
1030 | pub struct TranscriptionRule<'a> {
1031 |     /// string to compare agains
1032 |     pub string: &'a str,
1033 | 
1034 |     /// phonemes to generate from this
1035 |     pub phonemes: &'a [Phoneme],
1036 | }
1037 | 
1038 | // now make the actual language, which is just a set of transcription rules
1039 | pub struct Language<'a> {
1040 |     /// rules for the language to transcribe phonemes
1041 |     pub rules: &'a [TranscriptionRule<'a>],
1042 | 
1043 |     /// whether the language is case-sensitive
1044 |     pub case_sensitive: bool,
1045 | }
1046 | 
1047 | // next up, the intonator.
1048 | // this will add intonation to any phoneme sequence
1049 | pub struct Intonator<T: Iterator<Item = Phoneme>> {
1050 |     /// underlying iterator
1051 |     iter: T,
1052 | 
1053 |     /// center frequency for the voice
1054 |     center_frequency: f32,
1055 | }
1056 | 
1057 | impl<T: Iterator<Item = Phoneme>> Iterator for Intonator<T> {
1058 |     type Item = PhonemeElem;
1059 |     fn next(&mut self) -> Option<Self::Item> {
1060 |         let phon = self.iter.next()?;
1061 | 
1062 |         // TODO: apply intonation
1063 | 
1064 |         // TODO: speaking rate
1065 | 
1066 |         // TODO: give certain phonemes a length
1067 | 
1068 |         Some(PhonemeElem {
1069 |             phoneme: phon,
1070 |             length: 0.5,
1071 |             blend_length: 0.5,
1072 |             frequency: self.center_frequency,
1073 |         })
1074 |     }
1075 | }
1076 | 
1077 | pub trait IntoIntonator
1078 | where
1079 |     Self: IntoIterator<Item = Phoneme> + Sized,
1080 | {
1081 |     fn intonate(self, _language: Language, voice: Voice) -> Intonator<Self::IntoIter> {
1082 |         Intonator {
1083 |             iter: self.into_iter(),
1084 |             center_frequency: voice.center_frequency,
1085 |         }
1086 |     }
1087 | }
1088 | 
1089 | impl<T> IntoIntonator for T where T: IntoIterator<Item = Phoneme> + Sized {}
1090 | 
1091 | // TODO: how test?
1092 | 
1093 | // now we want to convert text into phonemes
1094 | // we're going to do this with a find-and-replace ruleset, as defined in language.
1095 | // this is assumed to be sorted, so we can binary search with the prefix,
1096 | // to figure out the range we need to search in and see if it's too low or too high
1097 | 
1098 | pub struct Transcriber<'a, T: Iterator<Item = char>> {
1099 |     /// underlying iterator
1100 |     iter: Peekable<T>,
1101 | 
1102 |     /// ruleset to use
1103 |     ruleset: &'a [TranscriptionRule<'a>],
1104 | 
1105 |     /// whether we are case sensitive to match
1106 |     // TODO: replace this with a better ruleset, as in: each rule becomes lowercase
1107 |     case_sensitive: bool,
1108 | 
1109 |     /// buffer for the phonemes we have now
1110 |     buffer: &'a [Phoneme],
1111 | }
1112 | 
1113 | // silent buffer
1114 | const SILENCE: &[Phoneme] = &[Phoneme::Silence];
1115 | 
1116 | impl<'a, T: Iterator<Item = char>> Iterator for Transcriber<'a, T> {
1117 |     type Item = Phoneme;
1118 |     fn next(&mut self) -> Option<Self::Item> {
1119 |         // initial state
1120 |         let mut search_min = 0;
1121 |         let mut search_max = self.ruleset.len();
1122 |         let mut index = 0;
1123 | 
1124 |         // search as long as we haven't found a match
1125 |         while self.buffer.is_empty() {
1126 |             // get the current character
1127 |             let character = self.iter.peek().map(|x| {
1128 |                 if self.case_sensitive {
1129 |                     *x
1130 |                 } else {
1131 |                     x.to_ascii_lowercase()
1132 |                 }
1133 |             })?;
1134 | 
1135 |             // find the new search range
1136 |             // now that we have a new item, we can reduce the search range
1137 |             // this is binary search, where the left half is where the lower range is lexiographically lower than the current buffer content
1138 |             // because we only get one char at a time, we can assume that the previous N characters were already found and reduced the range
1139 |             // so no need to keep those around anymore
1140 |             let new_min = self.ruleset[search_min..search_max]
1141 |                 .partition_point(|x| x.string.chars().nth(index).map_or(true, |x| x < character))
1142 |                 + search_min;
1143 | 
1144 |             // same for the upper range, but now it's lower or equal
1145 |             let new_max = self.ruleset[search_min..search_max].partition_point(|x| {
1146 |                 x.string
1147 |                     .chars()
1148 |                     .nth(index)
1149 |                     .map_or(false, |x| x <= character)
1150 |             }) + search_min;
1151 | 
1152 |             // now decide on where to go
1153 |             if new_min >= new_max && self.ruleset[search_min].string.chars().count() == index {
1154 |                 // if the new range is invalid, but the old search range lower bound matched, grab that
1155 |                 self.buffer = self.ruleset[search_min].phonemes;
1156 |             } else if new_min >= new_max {
1157 |                 // if they are not equal but no previous search range matched, return silence
1158 |                 self.buffer = SILENCE;
1159 | 
1160 |                 // advance, as we don't need this character for the next iteration, because it was part of a garbled sequence anyway
1161 |                 self.iter.next();
1162 |             } else {
1163 |                 // otherwise, we are still running
1164 |                 search_min = new_min;
1165 |                 search_max = new_max;
1166 |                 index += 1;
1167 | 
1168 |                 // advance so we can check the next character
1169 |                 self.iter.next();
1170 | 
1171 |                 // if this fails, we won't be able to peek next iteration, so see if we can emit the final rule
1172 |                 if self.iter.peek().is_none()
1173 |                     && self.ruleset[search_min].string.chars().count() == index
1174 |                 {
1175 |                     self.buffer = self.ruleset[search_min].phonemes;
1176 |                 } else if self.iter.peek().is_none() {
1177 |                     // if no match could be found, indicate so by emitting a silence
1178 |                     self.buffer = SILENCE;
1179 |                 }
1180 |             }
1181 |         }
1182 |         // try and get the first item if we have that
1183 |         let result = self.buffer.get(0);
1184 | 
1185 |         // remove the first item if we can, and set the buffer to the rest of the remaining buffer
1186 |         self.buffer = self.buffer.get(1..).unwrap_or(&[]);
1187 | 
1188 |         // return the result
1189 |         result.copied()
1190 |     }
1191 | }
1192 | 
1193 | pub trait IntoTranscriber
1194 | where
1195 |     Self: IntoIterator<Item = char> + Sized,
1196 | {
1197 |     fn transcribe(self, language: Language) -> Transcriber<Self::IntoIter> {
1198 |         Transcriber {
1199 |             iter: self.into_iter().peekable(),
1200 |             ruleset: language.rules,
1201 |             buffer: SILENCE,
1202 |             case_sensitive: language.case_sensitive,
1203 |         }
1204 |     }
1205 | }
1206 | 
1207 | impl<T> IntoTranscriber for T where T: IntoIterator<Item = char> + Sized {}
1208 | 
1209 | // test the transcriber for correct behaviour
1210 | #[test]
1211 | fn transcribe_unique() {
1212 |     let mut transcriber = Transcriber {
1213 |         iter: "abc".chars().peekable(),
1214 |         ruleset: &[
1215 |             TranscriptionRule {
1216 |                 string: "ab",
1217 |                 phonemes: &[Phoneme::A],
1218 |             },
1219 |             TranscriptionRule {
1220 |                 string: "c",
1221 |                 phonemes: &[Phoneme::E],
1222 |             },
1223 |         ],
1224 |         buffer: &[],
1225 |         case_sensitive: false,
1226 |     };
1227 | 
1228 |     assert_eq!(transcriber.next(), Some(Phoneme::A));
1229 |     assert_eq!(transcriber.next(), Some(Phoneme::E));
1230 |     assert_eq!(transcriber.next(), None);
1231 | }
1232 | 
1233 | #[test]
1234 | fn transcribe_same_start() {
1235 |     let mut transcriber = Transcriber {
1236 |         iter: "abacab".chars().peekable(),
1237 |         ruleset: &[
1238 |             TranscriptionRule {
1239 |                 string: "ab",
1240 |                 phonemes: &[Phoneme::A],
1241 |             },
1242 |             TranscriptionRule {
1243 |                 string: "ac",
1244 |                 phonemes: &[Phoneme::E],
1245 |             },
1246 |         ],
1247 |         buffer: &[],
1248 |         case_sensitive: false,
1249 |     };
1250 | 
1251 |     assert_eq!(transcriber.next(), Some(Phoneme::A));
1252 |     assert_eq!(transcriber.next(), Some(Phoneme::E));
1253 |     assert_eq!(transcriber.next(), Some(Phoneme::A));
1254 |     assert_eq!(transcriber.next(), None);
1255 | }
1256 | 
1257 | #[test]
1258 | fn transcribe_same_char_different_length() {
1259 |     let mut transcriber = Transcriber {
1260 |         iter: "aaa".chars().peekable(),
1261 |         ruleset: &[
1262 |             TranscriptionRule {
1263 |                 string: "a",
1264 |                 phonemes: &[Phoneme::A],
1265 |             },
1266 |             TranscriptionRule {
1267 |                 string: "aa",
1268 |                 phonemes: &[Phoneme::E],
1269 |             },
1270 |         ],
1271 |         buffer: &[],
1272 |         case_sensitive: false,
1273 |     };
1274 | 
1275 |     // should match the longest rule when possible, so aa first, then a
1276 |     assert_eq!(transcriber.next(), Some(Phoneme::E));
1277 |     assert_eq!(transcriber.next(), Some(Phoneme::A));
1278 |     assert_eq!(transcriber.next(), None);
1279 | }
1280 | 
1281 | // same as above, but now it cuts off early so it has to find the shortest when there's a character following
1282 | #[test]
1283 | fn transcribe_same_char_different_length_cutoff() {
1284 |     let mut transcriber = Transcriber {
1285 |         iter: "ae".chars().peekable(),
1286 |         ruleset: &[
1287 |             TranscriptionRule {
1288 |                 string: "a",
1289 |                 phonemes: &[Phoneme::A],
1290 |             },
1291 |             TranscriptionRule {
1292 |                 string: "aa",
1293 |                 phonemes: &[Phoneme::E],
1294 |             },
1295 |             TranscriptionRule {
1296 |                 string: "e",
1297 |                 phonemes: &[Phoneme::E],
1298 |             },
1299 |         ],
1300 |         buffer: &[],
1301 |         case_sensitive: false,
1302 |     };
1303 | 
1304 |     // should match the longest rule when possible, so aa first, then a
1305 |     assert_eq!(transcriber.next(), Some(Phoneme::A));
1306 |     assert_eq!(transcriber.next(), Some(Phoneme::E));
1307 |     assert_eq!(transcriber.next(), None);
1308 | }
1309 | 
1310 | #[test]
1311 | fn transcribe_skip_no_matches() {
1312 |     let mut transcriber = Transcriber {
1313 |         iter: "abuac".chars().peekable(),
1314 |         ruleset: &[
1315 |             TranscriptionRule {
1316 |                 string: "ab",
1317 |                 phonemes: &[Phoneme::A],
1318 |             },
1319 |             TranscriptionRule {
1320 |                 string: "ac",
1321 |                 phonemes: &[Phoneme::E],
1322 |             },
1323 |         ],
1324 |         buffer: &[],
1325 |         case_sensitive: false,
1326 |     };
1327 | 
1328 |     // should match the longest rule when possible, so aa first, then a
1329 |     assert_eq!(transcriber.next(), Some(Phoneme::A));
1330 |     assert_eq!(transcriber.next(), Some(Phoneme::Silence));
1331 |     assert_eq!(transcriber.next(), Some(Phoneme::E));
1332 |     assert_eq!(transcriber.next(), None);
1333 | }
1334 | 
1335 | #[test]
1336 | fn transcribe_skip_partial_match_at_end() {
1337 |     let mut transcriber = Transcriber {
1338 |         iter: "abaca".chars().peekable(),
1339 |         ruleset: &[
1340 |             TranscriptionRule {
1341 |                 string: "ab",
1342 |                 phonemes: &[Phoneme::A],
1343 |             },
1344 |             TranscriptionRule {
1345 |                 string: "ac",
1346 |                 phonemes: &[Phoneme::E],
1347 |             },
1348 |         ],
1349 |         buffer: &[],
1350 |         case_sensitive: false,
1351 |     };
1352 | 
1353 |     // should match the longest rule when possible, so aa first, then a
1354 |     assert_eq!(transcriber.next(), Some(Phoneme::A));
1355 |     assert_eq!(transcriber.next(), Some(Phoneme::E));
1356 |     assert_eq!(transcriber.next(), Some(Phoneme::Silence));
1357 |     assert_eq!(transcriber.next(), None);
1358 | }
1359 | 
1360 | // Here's how it will work
1361 | // synthesizer iterator to generate sound
1362 | // jitter iterator to add randomness to the frequencies
1363 | // sequencer iterator to blend phonemes
1364 | // intonator to add intonation
1365 | // transcriber to transcribe between text and phoneme
1366 | // parser to parse text and handle potential commands
1367 | 


--------------------------------------------------------------------------------