├── .gitignore ├── Cargo.toml ├── src ├── voices │ ├── mod.rs │ └── generic.rs ├── languages │ └── mod.rs └── lib.rs ├── LICENCE_MIT ├── README.md ├── examples ├── interactive.rs └── cli.rs └── LICENCE-APACHE /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | Cargo.lock 3 | *.wav -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "grail-rs" 3 | description = "A simple formant speech synthesizer" 4 | version = "0.0.0" 5 | license = "MIT OR Apache-2.0" 6 | repository = "https://github.com/Dimev/grail-rs" 7 | documentation = "https://docs.rs/grail-rs" 8 | keywords = ["speech", "audio"] 9 | categories = ["multimedia"] 10 | edition = "2021" 11 | 12 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 13 | 14 | [dependencies] 15 | # no dependencies needed 16 | 17 | [dev-dependencies] 18 | # but we do need this for examples 19 | cpal = "0.13.5" 20 | -------------------------------------------------------------------------------- /src/voices/mod.rs: -------------------------------------------------------------------------------- 1 | //! All voices given with grail, along with functions to get them 2 | //! 3 | use crate::{SynthesisElem, NUM_FORMANTS}; 4 | 5 | // helper for making phonemes 6 | // if you're porting this, put this in a seperate file somewhere so you don't include all voices when including a single voice 7 | pub const MKPHON: fn( 8 | freq: [f32; NUM_FORMANTS], 9 | bw: [f32; NUM_FORMANTS], 10 | smooth: [f32; NUM_FORMANTS], 11 | turb: [f32; NUM_FORMANTS], 12 | breath: [f32; NUM_FORMANTS], 13 | amp: [f32; NUM_FORMANTS], 14 | ) -> SynthesisElem = SynthesisElem::new_phoneme; 15 | 16 | // include the voices we made 17 | pub mod generic; 18 | 19 | // and use it so we can easily get it 20 | pub use generic::generic; 21 | -------------------------------------------------------------------------------- /src/languages/mod.rs: -------------------------------------------------------------------------------- 1 | //! languages 2 | use crate::{Language, Phoneme, TranscriptionRule}; 3 | 4 | pub const fn generic() -> Language<'static> { 5 | Language { 6 | case_sensitive: false, 7 | rules: &[ 8 | TranscriptionRule { 9 | string: "a", 10 | phonemes: &[Phoneme::A], 11 | }, 12 | TranscriptionRule { 13 | string: "e", 14 | phonemes: &[Phoneme::E], 15 | }, 16 | TranscriptionRule { 17 | string: "i", 18 | phonemes: &[Phoneme::A], 19 | }, 20 | TranscriptionRule { 21 | string: "ii", 22 | phonemes: &[Phoneme::E, Phoneme::A], 23 | }, 24 | TranscriptionRule { 25 | string: "oui", 26 | phonemes: &[Phoneme::A, Phoneme::E, Phoneme::A], 27 | }, 28 | TranscriptionRule { 29 | string: "p", 30 | phonemes: &[Phoneme::Silence], 31 | }, 32 | ], 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /LICENCE_MIT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Dimas Leenman 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /src/voices/generic.rs: -------------------------------------------------------------------------------- 1 | //! generic voice 2 | use crate::voices::MKPHON; 3 | use crate::*; 4 | 5 | pub fn generic() -> Voice { 6 | Voice { 7 | sample_rate: DEFAULT_SAMPLE_RATE, 8 | phonemes: VoiceStorage { 9 | a: MKPHON( 10 | [ 11 | 910.0, 1271.0, 2851.0, 3213.0, 1200.0, 2000.0, 3000.0, 4000.0, 12 | ], 13 | [60.0, 160.0, 180.0, 200.0, 100.0, 100.0, 100.0, 100.0], 14 | [ 15 | 1600.0, 1600.0, 1600.0, 1600.0, 1600.0, 1600.0, 1600.0, 1600.0, 16 | ], 17 | [0.2, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0], 18 | [0.5, 0.2, 0.05, 0.0, 0.0, 0.0, 0.0, 0.0], 19 | [0.3, 0.3, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0], 20 | ), 21 | e: MKPHON( 22 | [ 23 | 910.0, 1871.0, 2851.0, 3213.0, 1200.0, 2000.0, 3000.0, 4000.0, 24 | ], 25 | [80.0, 180.0, 180.0, 200.0, 100.0, 100.0, 100.0, 100.0], 26 | [ 27 | 1600.0, 1600.0, 1600.0, 1600.0, 1600.0, 1600.0, 1600.0, 1600.0, 28 | ], 29 | [0.2, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4], 30 | [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.1, 0.1], 31 | [0.5, 0.4, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0], 32 | ), 33 | }, 34 | center_frequency: 120.0 / DEFAULT_SAMPLE_RATE as f32, 35 | jitter_frequency: 16.0 / DEFAULT_SAMPLE_RATE as f32, 36 | jitter_delta_frequency: 6.0 / DEFAULT_SAMPLE_RATE as f32, 37 | jitter_delta_formant_frequency: 6.0 / DEFAULT_SAMPLE_RATE as f32, 38 | jitter_delta_amplitude: 0.2, 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Grail-rs (Work in progress) 2 | Grail, A simple formant speech synthesizer, built for portability 3 | This is the rust version 4 | 5 | The goal of this synthesizer is to be as simple as possible, and easy to port to C and other languages if needed (I'll make a C port when this one is in a more complete state) 6 | 7 | Still heavy WIP 8 | 9 | # Roadmap: 10 | - Get the output to be roughly normalized by default (Almost done, ModFM does wonders here) 11 | - Move everything back to the 2000 line file, that was nicer to work with IMO 12 | - Use a different carrier for ModFM, so it's closer to what an actual speech sound wave looks like 13 | - Finish the example, get rid of rodio and dev dependancies 14 | - Make a full voice 15 | - Complete the intonator, can see a few items into the future and adjusts voice based on that (also a ruleset for this?) 16 | - Complete the text->phoneme transcription, via a find-and-replace ruleset 17 | - Make a macro to generate a language from a language file (and do sorting automatically) 18 | - make a better way to make voices 19 | - (later) add a way to send commands to change the intonation 20 | - (later, maybe) Automatic voice replication 21 | 22 | # License 23 | Licensed under either of 24 | 25 | * Apache License, Version 2.0 26 | ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) 27 | * MIT license 28 | ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) 29 | 30 | at your option. 31 | 32 | # Contribution 33 | Unless you explicitly state otherwise, any contribution intentionally submitted 34 | for inclusion in the work by you, as defined in the Apache-2.0 license, shall be 35 | dual licensed as above, without any additional terms or conditions. -------------------------------------------------------------------------------- /examples/interactive.rs: -------------------------------------------------------------------------------- 1 | use cpal::traits::{DeviceTrait, HostTrait, StreamTrait}; 2 | use grail_rs::{ 3 | IntoIntonator, IntoJitter, IntoSelector, IntoSequencer, IntoSynthesize, IntoTranscriber, 4 | }; 5 | use std::sync::mpsc::channel; 6 | 7 | fn main() { 8 | // get cpal's host and output device 9 | let host = cpal::default_host(); 10 | let device = host.default_output_device().expect("No audio device found"); 11 | 12 | // get a config for the stream 13 | let config = device 14 | .default_output_config() 15 | .expect("Failed to get output config"); 16 | 17 | println!( 18 | "Output device: {}, {:?}, {}", 19 | device.name().unwrap(), 20 | config.sample_rate(), 21 | config.channels() 22 | ); 23 | 24 | // num channels 25 | let num_channels = config.channels() as usize; 26 | 27 | // make the channels 28 | let (sender, receiver) = channel(); 29 | 30 | // create the audio iterator 31 | let mut iterator = std::iter::repeat_with(move || receiver.try_recv().unwrap_or(' ')) 32 | .transcribe(grail_rs::languages::generic()) 33 | .intonate(grail_rs::languages::generic(), grail_rs::voices::generic()) 34 | .select(grail_rs::voices::generic()) 35 | .sequence(grail_rs::voices::generic()) 36 | .jitter(0, grail_rs::voices::generic()) 37 | .synthesize() 38 | .flat_map(move |x| std::iter::repeat(x).take(num_channels)); 39 | 40 | // make a stream to play audio with 41 | let stream = match config.sample_format() { 42 | cpal::SampleFormat::F32 => device.build_output_stream( 43 | &config.into(), 44 | move |data: &mut [f32], _| { 45 | for i in data { 46 | *i = iterator.next().unwrap_or(0.0); 47 | } 48 | }, 49 | move |err| println!("Error: {:?}", err), 50 | ), 51 | cpal::SampleFormat::U16 => device.build_output_stream( 52 | &config.into(), 53 | move |data: &mut [u16], _| { 54 | for i in data { 55 | *i = ((iterator.next().unwrap_or(0.0) * 0.5 + 0.5) * u16::MAX as f32) as u16; 56 | } 57 | }, 58 | move |err| println!("Error: {:?}", err), 59 | ), 60 | cpal::SampleFormat::I16 => device.build_output_stream( 61 | &config.into(), 62 | move |data: &mut [i16], _| { 63 | for i in data { 64 | *i = (iterator.next().unwrap_or(0.0) * i16::MAX as f32) as i16; 65 | } 66 | }, 67 | move |err| println!("Error: {:?}", err), 68 | ), 69 | } 70 | .expect("Failed to make stream"); 71 | 72 | // play 73 | // can't move the expect here, as stream needs to be alive long enough 74 | stream.play().expect("Failed to play audio"); 75 | 76 | // read input 77 | for line in std::io::stdin().lines().map(|x| x.unwrap()) { 78 | for character in line.trim().chars().chain(Some(' ').into_iter()) { 79 | sender.send(character).expect("Failed to send audio"); 80 | } 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /examples/cli.rs: -------------------------------------------------------------------------------- 1 | use cpal::traits::{DeviceTrait, HostTrait, StreamTrait}; 2 | use grail_rs::{ 3 | IntoIntonator, IntoJitter, IntoSelector, IntoSequencer, IntoSynthesize, IntoTranscriber, 4 | }; 5 | 6 | use std::env; 7 | use std::fs::File; 8 | use std::io::prelude::*; 9 | 10 | // helps to check if there's an argument 11 | fn has_argument(args: &[String], short: &str, long: &str) -> bool { 12 | // looks through the array if there is any 13 | args.contains(&short.into()) || args.contains(&long.into()) 14 | } 15 | 16 | // helps find a value in a switch, if any 17 | fn find_argument(args: &[String], short: &str, long: &str) -> Option { 18 | // find the switch, if the first value is the right flag, the value after that is the one we need 19 | args.windows(2) 20 | .find(|x| match x { 21 | [switch, value] => switch.as_str() == short || switch.as_str() == long, 22 | _ => false, 23 | }) 24 | .map(|x| x[1].clone()) 25 | } 26 | 27 | // save a wav file 28 | fn save_wav(path: &str, data: &[f32], sample_rate: u32) { 29 | // open a file 30 | let mut file = std::fs::File::create(path).expect("Failed to create file"); 31 | 32 | // create a wav file 33 | let bytes = &[ 34 | &b"RIFF"[..], // riff header 35 | &((36 + data.len() * 2) as u32).to_le_bytes()[..], // file size 36 | &b"WAVE"[..], // wave header 37 | &b"fmt "[..], // format 38 | &(16 as u32).to_le_bytes()[..], // sub chunk size, aka how long is this chunk 39 | &(1 as u16).to_le_bytes()[..], // format, pcm 40 | &(1 as u16).to_le_bytes()[..], // 1 channel 41 | &(sample_rate as u32).to_le_bytes()[..], // sample rate 42 | &(sample_rate as u32 * 2).to_le_bytes()[..], // byte rate (sample rate * channels * bytes per sample) 43 | &(2 as u16).to_le_bytes()[..], // block align, num channels * bytes per sample 44 | &(16 as u16).to_le_bytes()[..], // bits per sample 45 | &b"data"[..], // data header 46 | &(data.len() as u32 * 2).to_le_bytes()[..], // section size 47 | &data.iter() 48 | .map(|x| { 49 | ((x * std::i16::MAX as f32) as i16) 50 | .to_le_bytes() 51 | .into_iter() 52 | }) 53 | .flatten() 54 | .collect::>()[..], 55 | ] 56 | .iter() 57 | .map(|x| *x) 58 | .flatten() 59 | .map(|x| *x) 60 | .collect::>(); 61 | 62 | // write file 63 | file.write(&bytes).expect("failed to store"); 64 | 65 | // and store 66 | file.flush().expect("Failed to write"); 67 | } 68 | 69 | fn main() { 70 | // get the command line args 71 | let args: Vec = env::args().collect(); 72 | 73 | // figure out what to do, no args, -h or --help is print help 74 | // -v or --voice is to set the voice 75 | // -o or --output to set the output file path 76 | // -l or --langauge sets the language ruleset 77 | // -r or --resample to change the sample rate REALLY NEEDED? 78 | // -i or --input to read from a file 79 | // -s or --silent to not play back any sound 80 | // -V or --version to display the version 81 | // anything not preceeded by a switch is assumed to be speech 82 | 83 | // the parameters we want to use as default 84 | let mut voice = String::from("sbinotto"); 85 | let mut language = String::from(""); 86 | let mut sample_rate = grail_rs::DEFAULT_SAMPLE_RATE; 87 | let mut input_file = String::new(); 88 | let mut output_file = String::new(); 89 | let mut play_sound = true; 90 | 91 | // check what we need to do 92 | if has_argument(&args, "-h", "--help") || args.len() < 2 { 93 | // print help menu 94 | println!("Grail, a rust speech synthesizer"); 95 | println!("The last argument is interpreted as text to be spoken"); 96 | println!( 97 | "So 'grail -v bob hello' will say 'hello'. -v is to set the voice, bob in this case" 98 | ); 99 | 100 | // flag descriptions 101 | println!("Flags:"); 102 | println!("-v or --voice is to set the voice"); 103 | println!("-o or --output to set the output file path"); 104 | println!("-l or --langauge sets the language ruleset"); 105 | println!("-r or --resample to change the sample rate"); 106 | println!("-i or --input to read from a file"); 107 | println!("-s or --silent to not play back any sound"); 108 | println!("-V or --version to display the version"); 109 | 110 | // list of voices 111 | println!("Voices:"); 112 | 113 | // list of languages 114 | println!("Languages:"); 115 | 116 | // stop 117 | return; 118 | } else if has_argument(&args, "-V", "--version") { 119 | // print the version 120 | println!("Grail-rs version {}", env!("CARGO_PKG_VERSION")); 121 | 122 | // stop 123 | return; 124 | } 125 | 126 | // now, parse the arguments with values 127 | if let Some(path) = find_argument(&args, "-i", "--input") { 128 | // open the file if it exists 129 | if let Ok(mut file) = File::open(path.as_str()) { 130 | // read the in file 131 | file.read_to_string(&mut input_file) 132 | .expect("Failed to read file"); 133 | } else { 134 | // give an error that we couldn't open the file 135 | println!("Could not open file \"{}\"", path); 136 | return; 137 | } 138 | } 139 | 140 | // set the output file, if any 141 | if let Some(path) = find_argument(&args, "-o", "--output") { 142 | output_file = path; 143 | } 144 | 145 | // do we need to be silent? 146 | if has_argument(&args, "-s", "--silent") { 147 | play_sound = false; 148 | } 149 | 150 | // what voice do we use? 151 | if let Some(speaker) = find_argument(&args, "-v", "--voice") { 152 | voice = speaker; 153 | } 154 | 155 | // figure out what to say, this is simply the last argument, if nothing is to be read from a file 156 | let to_say = if input_file != String::new() { 157 | // file was already read to here 158 | input_file 159 | } else { 160 | // read the last argument 161 | args.last().unwrap().clone() 162 | }; 163 | 164 | // Display what to say 165 | println!("\"{}\"", to_say); 166 | println!(" -- {}", voice); 167 | 168 | // synthesize the speech 169 | let mut generated_audio = Vec::with_capacity(sample_rate as usize * 4); 170 | 171 | // measure the time it takes to synthesize the audio 172 | let start = std::time::Instant::now(); 173 | 174 | // and extend the sound part with it 175 | generated_audio.extend( 176 | to_say 177 | .chars() 178 | .transcribe(grail_rs::languages::generic()) 179 | .intonate(grail_rs::languages::generic(), grail_rs::voices::generic()) 180 | .select(grail_rs::voices::generic()) 181 | .sequence(grail_rs::voices::generic()) 182 | .jitter(0, grail_rs::voices::generic()) 183 | .synthesize(), 184 | ); 185 | 186 | let duration = start.elapsed().as_micros(); 187 | 188 | // display info on how long the audio file is 189 | println!( 190 | "{:.2} seconds of audio, generated in {} microseconds", 191 | generated_audio.len() as f32 / sample_rate as f32, 192 | duration 193 | ); 194 | 195 | // if there's an output file, write to it 196 | if output_file != String::new() { 197 | println!("Writing generated sound to {}", output_file); 198 | 199 | // and save the file 200 | save_wav(&output_file, &generated_audio, sample_rate as u32); 201 | } 202 | 203 | // and play it back, if needed 204 | // TODO: clean this up a bit and move CPAL to a generic func 205 | if play_sound { 206 | // get cpal's host and output device 207 | let host = cpal::default_host(); 208 | let device = host.default_output_device().expect("No audio device found"); 209 | 210 | println!("Output device: {}", device.name().unwrap()); 211 | 212 | // get a config for the stream 213 | let config = device 214 | .supported_output_configs() 215 | .expect("No configs found") 216 | .next() 217 | .expect("Failed to get config") 218 | .with_sample_rate(cpal::SampleRate(sample_rate as u32)); 219 | 220 | // save audio length 221 | let audio_len = generated_audio.len(); 222 | 223 | // num channels 224 | let num_channels = config.channels() as usize; 225 | 226 | // consumer iterator to read the generated audio 227 | let mut consumer = generated_audio 228 | .into_iter() 229 | .flat_map(move |x| std::iter::repeat(x).take(num_channels)); 230 | 231 | // make a stream to play audio with 232 | let stream = match config.sample_format() { 233 | cpal::SampleFormat::F32 => device.build_output_stream( 234 | &config.into(), 235 | move |data: &mut [f32], _| { 236 | for i in data { 237 | *i = consumer.next().unwrap_or(0.0); 238 | } 239 | }, 240 | move |err| println!("Error: {:?}", err), 241 | ), 242 | cpal::SampleFormat::U16 => device.build_output_stream( 243 | &config.into(), 244 | move |data: &mut [u16], _| { 245 | for i in data { 246 | *i = 247 | ((consumer.next().unwrap_or(0.0) * 0.5 + 0.5) * u16::MAX as f32) as u16; 248 | } 249 | }, 250 | move |err| println!("Error: {:?}", err), 251 | ), 252 | cpal::SampleFormat::I16 => device.build_output_stream( 253 | &config.into(), 254 | move |data: &mut [i16], _| { 255 | for i in data { 256 | *i = (consumer.next().unwrap_or(0.0) * i16::MAX as f32) as i16; 257 | } 258 | }, 259 | move |err| println!("Error: {:?}", err), 260 | ), 261 | } 262 | .expect("Failed to make stream"); 263 | 264 | // play 265 | // can't move the expect here, as stream needs to be alive long enough 266 | stream.play().expect("Failed to play audio"); 267 | 268 | // wait till the sound stops playing 269 | std::thread::sleep(std::time::Duration::from_secs_f32( 270 | (audio_len as f32 / sample_rate as f32) + 0.5, 271 | )); 272 | } 273 | } 274 | -------------------------------------------------------------------------------- /LICENCE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2021 Dimas Leenman 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![no_std] 2 | #![forbid(unsafe_code)] 3 | 4 | // TODO: move phoneme related stuff into phoneme, and language related stuff into either language or transcribe 5 | // TODO: consider const generics (when done ofc)? 6 | // TODO: make most of the order easy to read, so keep the explanation 7 | // TODO: maybe have a CPAL feature to allow easy playback? 8 | // TODO: move back to svf filters, as they are easier and probably result in better audio 9 | 10 | // we'll want to allow voices to be used from this library 11 | pub mod voices; 12 | 13 | // and languages 14 | pub mod languages; 15 | 16 | // The main file the synth is in 17 | // first, define some constants 18 | 19 | /// default sample rate all voices use 20 | /// Resampling to a different sample rate is possible 21 | pub const DEFAULT_SAMPLE_RATE: f32 = 44100.0; 22 | 23 | /// the number of formants to synthesize 24 | pub const NUM_FORMANTS: usize = 8; 25 | 26 | // we'll want to implement these for arrays 27 | use core::ops::{Add, AddAssign, Div, Mul, Neg, Sub}; 28 | 29 | use core::iter::Peekable; 30 | 31 | // We'll need some helper functions 32 | // random number generation 33 | 34 | /// generates a random float in the range [-1, 1], and changes the state after doing so 35 | #[inline] 36 | pub fn random_f32(state: &mut u32) -> f32 { 37 | // here we change the state with a regular integer rng 38 | // This is the lehmer random number generator: https://en.wikipedia.org/wiki/Lehmer_random_number_generator 39 | // 16807 here is a magic number. In theory this could be any coprime, but there are some numbers that work better 40 | *state = state.wrapping_mul(16807).wrapping_add(1); 41 | 42 | // https://experilous.com/1/blog/post/perfect-fast-random-floating-point-numbers 43 | // and here we get the right part of the integer to generate our float from 44 | // this abuses IEE 754 floats (and works with doubles too) 45 | // the first 9 bits of the float are the sign bit, and the exponent 46 | // numbers from 1 - 2 in this have the same exponent (which the | 0x3F800000 sets) 47 | // then we can set the mantissa with the state 48 | // we shift that to the right so the first 9 bits become 0, and don't affect our exponent 49 | // for doubles (f64) we need to shift by 12, due to the sign and exponent taking up 12 bits, and set these to 0x3FF0000000000000 instead 50 | let res = (*state >> 9) | 0x3F800000; 51 | 52 | // and here we get the float number 53 | // we have a range of 1-2, but we want -1 to 1 54 | (f32::from_bits(res) - 1.5) * 2.0 55 | } 56 | 57 | // and some arithmatic functions 58 | // these are approximations to help speed things up 59 | // hyperbolic tangent, x is multiplied by pi 60 | /// Approximation of the hyperbolic tangent, tan(pi*x). 61 | /// Approximation is good for x = [0.0; 0.5] 62 | #[inline] 63 | pub fn tan_approx(x: f32) -> f32 { 64 | // tan(x) = sin(x) / cos(x) 65 | // we can approximate sin and x with the bhaskara I approximation quite well 66 | // which is 16x(pi - x) / 5pi^2 - 4x(pi - x) for sin 67 | // if we fill it in, multiply pi by and rewrite it, we get this: 68 | ((1.0 - x) * x * (5.0 - 4.0 * (x + 0.5) * (0.5 - x))) 69 | / ((x + 0.5) * (5.0 - 4.0 * (1.0 - x) * x) * (0.5 - x)) 70 | } 71 | 72 | /// Approximation of -exp(TAU * x) 73 | /// accurate for the range [0, 1] 74 | #[inline] 75 | pub fn exp_approx(x: f32) -> f32 { 76 | // exp(-2 * pi * x) ~ (1 - x) ^ 5 77 | let o = 1.0 - x; 78 | let o2 = o * o; 79 | 80 | // (1 - x) ^ 5 81 | o2 * o2 * o 82 | } 83 | 84 | // next, let's make a struct to help storing arrays, and do operations on them 85 | 86 | /// Array, containing NUM_FORMANTS floats. Used to store per-formant data 87 | #[derive(Copy, Clone, PartialEq, PartialOrd, Debug)] 88 | pub struct Array([f32; NUM_FORMANTS]); 89 | 90 | impl Array { 91 | /// makes a new Array from a given array 92 | #[inline] 93 | pub fn new(arr: [f32; NUM_FORMANTS]) -> Self { 94 | Self(arr) 95 | } 96 | 97 | /// make a new array from a given function 98 | #[inline] 99 | pub fn from_func f32>(mut f: F) -> Self { 100 | Self([(); NUM_FORMANTS].map(|_| f())) 101 | } 102 | 103 | /// makes a new array and fills it with a single element 104 | #[inline] 105 | pub fn splat(val: f32) -> Self { 106 | Self([val; NUM_FORMANTS]) 107 | } 108 | 109 | /// do something for every value in the array 110 | #[inline] 111 | pub fn map f32>(self, f: F) -> Self { 112 | Self(self.0.map(f)) 113 | } 114 | 115 | /// do something for every value in this array and the other 116 | #[inline] 117 | pub fn map2 f32>(self, other: Self, f: F) -> Self { 118 | Self(core::array::from_fn(|i| f(self.0[i], other.0[i]))) 119 | } 120 | 121 | /// sums all elements in an array together 122 | #[inline] 123 | pub fn sum(self) -> f32 { 124 | self.0.iter().sum::() 125 | } 126 | 127 | /// take the min of 2 arrays, element wise 128 | #[inline] 129 | pub fn min(self, other: Self) -> Self { 130 | self.map2(other, f32::min) 131 | } 132 | 133 | /// blend two arrays, based on some blend value 134 | #[inline] 135 | pub fn blend(self, other: Self, alpha: f32) -> Self { 136 | self.map2(other, |a, b| a * (1.0 - alpha) + b * alpha) 137 | } 138 | 139 | /// blend two arrays, based on some blend array 140 | #[inline] 141 | pub fn blend_multiple(self, other: Self, alpha: Array) -> Self { 142 | self * (Array::splat(1.0) - alpha) + other * alpha 143 | } 144 | 145 | /// hyperbolic tangent approximation 146 | #[inline] 147 | pub fn tan_approx(self) -> Self { 148 | self.map(tan_approx) 149 | } 150 | 151 | /// exp(-tau * x) approximation 152 | #[inline] 153 | pub fn exp_approx(self) -> Self { 154 | self.map(exp_approx) 155 | } 156 | } 157 | 158 | // and arithmatic 159 | // using the Op traits to make life easier here, this way we can just do +, - * and / 160 | impl Add for Array { 161 | type Output = Self; 162 | /// adds the values in two arrays together 163 | #[inline] 164 | fn add(self, other: Self) -> Self { 165 | self.map2(other, |a, b| a + b) 166 | } 167 | } 168 | 169 | impl AddAssign for Array { 170 | /// adds the other array to this array 171 | #[inline] 172 | fn add_assign(&mut self, other: Self) { 173 | *self = *self + other; 174 | } 175 | } 176 | 177 | impl Sub for Array { 178 | type Output = Self; 179 | /// subtracts the values in an array from another 180 | #[inline] 181 | fn sub(self, other: Self) -> Self { 182 | self.map2(other, |a, b| a - b) 183 | } 184 | } 185 | 186 | impl Mul for Array { 187 | type Output = Self; 188 | /// multiplies the values in two arrays together 189 | #[inline] 190 | fn mul(self, other: Self) -> Self { 191 | self.map2(other, |a, b| a * b) 192 | } 193 | } 194 | 195 | impl Div for Array { 196 | type Output = Self; 197 | /// divides the values of one array with another 198 | #[inline] 199 | fn div(self, other: Self) -> Self { 200 | self.map2(other, |a, b| a / b) 201 | } 202 | } 203 | 204 | impl Neg for Array { 205 | type Output = Self; 206 | /// negates all values in the array 207 | #[inline] 208 | fn neg(self) -> Self { 209 | self.map(|x| -x) 210 | } 211 | } 212 | 213 | // As well as a few utils to do better random generation, we want to make a few structs to help with generating noise 214 | /// Value noise 215 | /// This noise works by generating two values, and interpolates between them to generate the noise 216 | /// once it has gone too far, it generates a new next point to interpolate to 217 | #[derive(Copy, Clone, PartialEq, PartialOrd, Debug)] 218 | pub struct ValueNoise { 219 | current: f32, 220 | next: f32, 221 | phase: f32, 222 | state: u32, 223 | } 224 | 225 | impl ValueNoise { 226 | /// make a new value noise generator, from the given seed 227 | pub fn new(state: &mut u32) -> Self { 228 | let current = random_f32(state); 229 | let next = random_f32(state); 230 | 231 | Self { 232 | current, 233 | next, 234 | phase: 0.0, 235 | state: *state, 236 | } 237 | } 238 | 239 | /// generate the next value, and update the internal state 240 | pub fn next(&mut self, increment: f32) -> f32 { 241 | // increment the state 242 | self.phase += increment; 243 | 244 | // wrap it around if needed 245 | if self.phase > 1.0 { 246 | self.phase -= 1.0; 247 | 248 | // also update the noise 249 | self.current = self.next; 250 | self.next = random_f32(&mut self.state); 251 | } 252 | 253 | // and blend between the current and next 254 | self.current * (1.0 - self.phase) + self.next * self.phase 255 | } 256 | } 257 | 258 | // and for arrays too 259 | /// Value noise, for arrays 260 | #[derive(Copy, Clone, PartialEq, PartialOrd, Debug)] 261 | pub struct ArrayValueNoise { 262 | current: Array, 263 | next: Array, 264 | phase: f32, 265 | state: u32, 266 | } 267 | 268 | impl ArrayValueNoise { 269 | /// generate a new value noise generator, from the given seed 270 | pub fn new(state: &mut u32) -> Self { 271 | let mut current = [0.0; NUM_FORMANTS]; 272 | let mut next = [0.0; NUM_FORMANTS]; 273 | 274 | // write to the arrays 275 | for i in 0..NUM_FORMANTS { 276 | current[i] = random_f32(state); 277 | next[i] = random_f32(state); 278 | } 279 | 280 | Self { 281 | current: Array::new(current), 282 | next: Array::new(next), 283 | phase: 0.0, 284 | state: *state, 285 | } 286 | } 287 | 288 | /// generate the next value, and update the internal state 289 | pub fn next(&mut self, increment: f32) -> Array { 290 | // increment the state 291 | self.phase += increment; 292 | 293 | // wrap it around if needed 294 | if self.phase > 1.0 { 295 | self.phase -= 1.0; 296 | 297 | // also update the noise 298 | self.current = self.next; 299 | 300 | // next noise step 301 | self.next = Array::from_func(|| random_f32(&mut self.state)); 302 | } 303 | 304 | // and blend between the current and next 305 | self.current * Array::splat(1.0 - self.phase) + self.next * Array::splat(self.phase) 306 | } 307 | } 308 | 309 | // next up, let's go to the audio part 310 | // we'll want a way to represent what to synthesize 311 | 312 | /// Synthesis element, describes what to synthesize 313 | /// This describes the frequency and formants to synthesize 314 | /// All frequency values are normalized to 0-1, where 0 is 0 hz, and 1 is the sample frequency 315 | #[derive(Copy, Clone, PartialEq, PartialOrd, Debug)] 316 | pub struct SynthesisElem { 317 | /// base frequency, normalized to sample rate 318 | pub frequency: f32, 319 | 320 | /// formant frequencies, normalized to sample rate 321 | pub formant_freq: Array, 322 | 323 | /// formant bandwidth frequencies, normalized to sample rate 324 | pub formant_bw: Array, 325 | 326 | /// formant smoothness, how much the higher frequencies are filtered out 327 | pub formant_smooth: Array, 328 | 329 | /// how breathy each formant is. 0 means fully voiced, 1 means full breath 330 | pub formant_breath: Array, 331 | 332 | /// formant turbulence, affects how much noise is blended through when the glottis is open 333 | pub formant_turb: Array, 334 | 335 | /// formant amplitudes. If these sum up to one, the output amplitude will also be one 336 | pub formant_amp: Array, 337 | } 338 | 339 | // next, make some functions for the element 340 | // we want to make one from some sample rate, make one with the given sample rate, and blend them 341 | impl SynthesisElem { 342 | /// make a new synthesis element. For unit gain, formant_amp should sum up to 1 343 | pub fn new( 344 | sample_rate: f32, 345 | frequency: f32, 346 | formant_freq: [f32; NUM_FORMANTS], 347 | formant_smooth: [f32; NUM_FORMANTS], 348 | formant_bw: [f32; NUM_FORMANTS], 349 | formant_breath: [f32; NUM_FORMANTS], 350 | formant_turb: [f32; NUM_FORMANTS], 351 | formant_amp: [f32; NUM_FORMANTS], 352 | ) -> Self { 353 | // make a new element, and then resample it to the appropriate sample rate 354 | Self { 355 | frequency, 356 | formant_freq: Array::new(formant_freq), 357 | formant_bw: Array::new(formant_bw), 358 | formant_smooth: Array::new(formant_smooth), 359 | formant_breath: Array::new(formant_breath), 360 | formant_turb: Array::new(formant_turb), 361 | formant_amp: Array::new(formant_amp), 362 | } 363 | .resample(1.0, sample_rate) 364 | } 365 | 366 | /// create a new silent item 367 | pub fn silent() -> Self { 368 | Self { 369 | frequency: 0.25, 370 | formant_freq: Array::splat(0.25), 371 | formant_bw: Array::splat(0.25), 372 | formant_smooth: Array::splat(0.25), 373 | formant_breath: Array::splat(0.0), 374 | formant_turb: Array::splat(0.0), 375 | formant_amp: Array::splat(0.0), 376 | } 377 | } 378 | 379 | /// Make a new one with the default sample rate 380 | /// Also ensure that the formant amplitudes sum up to 1 to get unit gain 381 | pub fn new_phoneme( 382 | formant_freq: [f32; NUM_FORMANTS], 383 | formant_bw: [f32; NUM_FORMANTS], 384 | formant_smooth: [f32; NUM_FORMANTS], 385 | formant_turb: [f32; NUM_FORMANTS], 386 | formant_breath: [f32; NUM_FORMANTS], 387 | formant_amp: [f32; NUM_FORMANTS], 388 | ) -> Self { 389 | Self { 390 | frequency: 0.0, 391 | formant_freq: Array::new(formant_freq), 392 | formant_bw: Array::new(formant_bw), 393 | formant_smooth: Array::new(formant_smooth), 394 | formant_breath: Array::new(formant_breath), 395 | formant_turb: Array::new(formant_turb), 396 | 397 | // divide it by the sum of the entire amplitudes, that way we get unit gain 398 | formant_amp: Array::new(formant_amp) / Array::splat(Array::new(formant_amp).sum()), 399 | } 400 | .resample(1.0, DEFAULT_SAMPLE_RATE) 401 | } 402 | /// blend between this synthesis element and another one 403 | #[inline] 404 | pub fn blend(self, other: Self, alpha: f32) -> Self { 405 | Self { 406 | frequency: self.frequency * (1.0 - alpha) + other.frequency * alpha, 407 | formant_freq: self.formant_freq.blend(other.formant_freq, alpha), 408 | formant_smooth: self.formant_smooth.blend(other.formant_smooth, alpha), 409 | formant_bw: self.formant_bw.blend(other.formant_bw, alpha), 410 | formant_turb: self.formant_turb.blend(other.formant_turb, alpha), 411 | formant_breath: self.formant_breath.blend(other.formant_breath, alpha), 412 | formant_amp: self.formant_amp.blend(other.formant_amp, alpha), 413 | } 414 | } 415 | 416 | /// resample the synthesis element to a new sample rate 417 | #[inline] 418 | pub fn resample(self, old_sample_rate: f32, new_sample_rate: f32) -> Self { 419 | // scale factor for the sample rate 420 | let scale = old_sample_rate / new_sample_rate; 421 | 422 | // get the new frequency 423 | let formant_freq = self.formant_freq * Array::splat(scale); 424 | 425 | Self { 426 | // make sure it doesn't go above nyquist 427 | frequency: (self.frequency * scale).min(0.5), 428 | formant_freq: (self.formant_freq * Array::splat(scale)).min(Array::splat(0.5)), 429 | formant_bw: self.formant_bw * Array::splat(scale), 430 | formant_smooth: self.formant_smooth * Array::splat(scale), 431 | 432 | // drop all values above nyquist 433 | formant_amp: self 434 | .formant_amp 435 | .map2(formant_freq, |amp, freq| if freq > 0.5 { 0.0 } else { amp }), 436 | 437 | // leave the rest intact 438 | ..self 439 | } 440 | } 441 | 442 | /// copy it with a different frequency 443 | /// frequency is already divided by the sample rate here 444 | #[inline] 445 | pub fn copy_with_frequency(self, frequency: f32) -> Self { 446 | Self { 447 | frequency: frequency.min(0.5), 448 | ..self 449 | } 450 | } 451 | 452 | /// copy it without any sound 453 | #[inline] 454 | pub fn copy_silent(self) -> Self { 455 | Self { 456 | formant_amp: Array::splat(0.0), 457 | ..self 458 | } 459 | } 460 | } 461 | 462 | // next we'll want to synthesize some audio 463 | // for that, we'll use an iterator 464 | // it keeps track of the filter states, and the underlying iterator to get synthesis elements from 465 | // if iterators aren't available in the language you are porting to, use a function to get the next item from some state instead 466 | 467 | /// Synthesizer for synthesizing actual audio samples from synthesis elements 468 | /// this is created by calling .synthesize() on an iterator that produces synthesis elements 469 | #[derive(Copy, Clone, PartialEq, PartialOrd, Debug)] 470 | pub struct Synthesize> { 471 | /// underlying iterator 472 | iter: T, 473 | 474 | /// phase of the carrier 475 | phase: f32, 476 | 477 | /// lowpass filter state 478 | filter_state_a: Array, 479 | 480 | /// svf filter state 1 (ic1eq) 481 | filter_state_b: Array, 482 | 483 | /// svf filter statee 2 (ic2eq) 484 | filter_state_c: Array, 485 | 486 | /// noise state 487 | seed: u32, 488 | } 489 | 490 | // TODO: voice here? 491 | // needed because we probably want jitter to read it's parameters from voice, but we can do that later if really needed, and just pass voice.param_a in there 492 | 493 | // next up, implement iterator for the synthesizer, which takes care of synthesizing sound (in samples) from synthesis elements 494 | impl> Iterator for Synthesize { 495 | type Item = f32; 496 | 497 | fn next(&mut self) -> Option { 498 | // get the item from the underlying iterator, or return None if we can't 499 | let elem = self.iter.next()?; 500 | 501 | // generate an anti-aliased saw wave 502 | // polyblep offset, to do antialiasing 503 | let polyblep = if self.phase < elem.frequency { 504 | // if we're at the first sample, smooth it a bit 505 | let t = self.phase / elem.frequency; 506 | 2.0 * t - (t * t) - 1.0 507 | } else if self.phase > (1.0 - elem.frequency) { 508 | // same for last sample 509 | let t = (self.phase - 1.0) / elem.frequency; 510 | (t * t) + 2.0 * t + 1.0 511 | } else { 512 | // otherwise, no smoothing needed 513 | 0.0 514 | }; 515 | 516 | // saw wave 517 | let saw_wave = Array::splat((2.0 * self.phase - 1.0) - polyblep); 518 | 519 | // increment phase 520 | self.phase += elem.frequency; 521 | 522 | // wrap around, as this can't be above nyquist we can avoid fract 523 | if self.phase >= 1.0 { 524 | self.phase -= 1.0; 525 | } 526 | 527 | // [-1, 1] range 528 | let noise = Array::splat(random_f32(&mut self.seed)); 529 | 530 | // apply turbulence and noise 531 | let noise_wave = saw_wave.blend_multiple(noise, elem.formant_breath); 532 | 533 | // get the filter alpha 534 | // we can the parameter for the filter from the cutoff frequency with exp(-2*pi*x), which is exp_approx! 535 | let alpha = elem.formant_smooth.exp_approx(); 536 | 537 | // apply a low pass filter, single pole 538 | self.filter_state_a += (Array::splat(1.0) - alpha) * (noise_wave - self.filter_state_a); 539 | 540 | // get the result from the filter 541 | let glottal_wave = self.filter_state_a; 542 | 543 | // apply turbulence noise 544 | let turbulence_wave = 545 | glottal_wave * Array::splat(1.0).blend_multiple(noise, elem.formant_turb); 546 | 547 | // apply amplitude and scale it 548 | // makes sure it's the right amplitude to not make the filter go out of the [-1, 1] range 549 | // TODO: how 550 | let v0 = turbulence_wave * elem.formant_amp; 551 | 552 | // state variable filter 553 | // https://cytomic.com/files/dsp/SvfLinearTrapOptimised2.pdf 554 | // set the parameters 555 | let g = elem.formant_freq.tan_approx(); 556 | 557 | // k = 1 / Q, and Q = f_r / delta_f, where f_r is the resonant frequency, and delta_f is the bandwidth 558 | let k = elem.formant_bw / elem.formant_freq; 559 | 560 | let a1 = Array::splat(1.0) / (Array::splat(1.0) + g * (g + k)); 561 | let a2 = g * a1; 562 | let a3 = g * a2; 563 | 564 | // step the filter forwards to get the next state 565 | let v3 = v0 - self.filter_state_c; 566 | let v1 = a1 * self.filter_state_b + a2 * v3; 567 | let v2 = self.filter_state_c + a2 * self.filter_state_b + a3 * v3; 568 | 569 | // update actual state 570 | self.filter_state_b = Array::splat(2.0) * v1 - self.filter_state_b; 571 | self.filter_state_c = Array::splat(2.0) * v2 - self.filter_state_c; 572 | 573 | // and the bandpass result 574 | let res = v1.sum() * 0.5; 575 | 576 | // and return the wave, scaled by amplitude 577 | Some(res) 578 | } 579 | } 580 | 581 | // and we want to be able to easily make a synthesizer, so make a trait for it 582 | pub trait IntoSynthesize 583 | where 584 | Self: IntoIterator + Sized, 585 | { 586 | /// creates a new synthesizer from this iterator 587 | fn synthesize(self) -> Synthesize { 588 | Synthesize { 589 | iter: self.into_iter(), 590 | phase: 0.0, 591 | filter_state_a: Array::splat(0.0), 592 | filter_state_b: Array::splat(0.0), 593 | filter_state_c: Array::splat(0.0), 594 | seed: 0, 595 | } 596 | } 597 | } 598 | 599 | // implement it for anything that can become the right iterator 600 | impl IntoSynthesize for T where T: IntoIterator + Sized {} 601 | 602 | // ensure the peak values don't exceed 1.0 603 | #[test] 604 | fn synthesize_normalized() {} 605 | 606 | // ensure resampling gives a similar output 607 | #[test] 608 | fn synthesize_resampled() {} 609 | 610 | // that's it, sound synthesis done 611 | // before we continue, we'd like to set up the internal represenation for voices 612 | // a voice consists of a number of synthesis elements, each assigned to a phoneme 613 | // a phoneme is the smallest sound in speech, so we can use that for the internal representation nicely 614 | // the downside is that there are quite a few 615 | 616 | // first, set up the enum for all phonemes 617 | // TODO: IPA or some reduced set? 618 | // reducet set makes it easier to make voices 619 | 620 | // macro to generate the phonemes 621 | // takes list of (uppercase lowercase example) for each phoneme, 622 | // then generates the phoneme enum and storage and it's functions 623 | macro_rules! make_phonemes { 624 | ($($upper:ident $lower:ident $example:ident,)*) => { 625 | 626 | /// Represents all phonemes. 627 | /// This is a subset of the IPA, 628 | /// with a few extra special phonemes to help with properly converting the sounds represented in grail to actual sounds 629 | /// Most notable grail can't properly represent diphthongs and plosives with a single synthesis element, 630 | /// so it's required to add a few special marker phonemes to add these 631 | #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug)] 632 | pub enum Phoneme { 633 | /// Silence, will fade in/fade out any other phonemes surrounding it 634 | /// use when silence is intended 635 | Silence, 636 | 637 | /// glottal stop, behaves similarly to silence, 638 | /// but should be used when a glottal stop is intended. 639 | /// This is required for some phonemes to sound correct 640 | Stop, 641 | 642 | /// Blend the next phoneme into the other seamlessly, useful for indicating diphthongs 643 | Glide, 644 | // insert all uppercase phonemes into the enum, with the examples as in the documentation 645 | $( 646 | #[doc = concat!("as in ", stringify!($example))] 647 | $upper, 648 | )* 649 | } 650 | 651 | /// Stores all synthesis elements for the phonemes that have an associated sound 652 | #[derive(Copy, Clone, PartialEq, PartialOrd, Debug)] 653 | pub struct VoiceStorage { 654 | // insert all lowercase phonemes as a field 655 | $( 656 | #[doc = concat!("as in ", stringify!($example))] 657 | pub $lower: SynthesisElem, 658 | )* 659 | } 660 | 661 | impl VoiceStorage { 662 | 663 | /// retrieve a synthesis elem based on the given phoneme 664 | pub fn get(self, phoneme: Phoneme) -> Option { 665 | match phoneme { 666 | Phoneme::Silence | Phoneme::Stop | Phoneme::Glide => None, 667 | $( 668 | Phoneme::$upper => Some(self.$lower), 669 | )* 670 | } 671 | } 672 | 673 | /// run a function on all phonemes 674 | pub fn for_all(&mut self, func: fn(Phoneme, &mut SynthesisElem)) { 675 | $( 676 | func(Phoneme::$upper, &mut self.$lower); 677 | )* 678 | } 679 | } 680 | } 681 | } 682 | 683 | // make the phoneme structs 684 | // see https://en.wikipedia.org/wiki/Help:IPA 685 | // TODO! 686 | make_phonemes!( 687 | A a test, 688 | E e test, 689 | ); 690 | 691 | // and next, the full voice 692 | // which is just the voice storage + extra parameters for intonation 693 | 694 | /// A voice containing all needed parameters to synthesize sound from some given phonemes 695 | #[derive(Copy, Clone, PartialEq, PartialOrd, Debug)] 696 | pub struct Voice { 697 | /// sample rate this voice is at 698 | pub sample_rate: f32, 699 | 700 | /// phonemes, to generate sound 701 | pub phonemes: VoiceStorage, 702 | 703 | /// center frequency for the voice 704 | pub center_frequency: f32, 705 | 706 | /// frequency at which to jitter things, to improve voice naturalness 707 | pub jitter_frequency: f32, 708 | 709 | /// how much to jitter the base frequency 710 | pub jitter_delta_frequency: f32, 711 | 712 | /// how much to jitter the formant frequencies 713 | pub jitter_delta_formant_frequency: f32, 714 | 715 | /// how much to jitter the formant amplitudes 716 | pub jitter_delta_amplitude: f32, 717 | } 718 | 719 | // We also want to jitter all frequencies a bit for more realism, so let's do that next 720 | 721 | // now we can make our jitter work, as getting random numbers is now easier 722 | // all frequencies are in normalized form, so 1.0 is the sample frequency 723 | #[derive(Copy, Clone, PartialEq, PartialOrd, Debug)] 724 | pub struct Jitter> { 725 | /// underlying iterator 726 | iter: T, 727 | 728 | /// noise for the frequency 729 | freq_noise: ValueNoise, 730 | 731 | /// noise for the formant frequency 732 | formant_freq_noise: ArrayValueNoise, 733 | 734 | /// noise for the formant amplitude 735 | formant_amp_noise: ArrayValueNoise, 736 | 737 | /// noise frequency 738 | frequency: f32, 739 | 740 | /// frequency deviation 741 | delta_frequency: f32, 742 | 743 | /// formant deviation 744 | delta_formant_freq: f32, 745 | 746 | /// amplitude deviation 747 | delta_amplitude: f32, 748 | } 749 | 750 | impl> Iterator for Jitter { 751 | type Item = SynthesisElem; 752 | 753 | fn next(&mut self) -> Option { 754 | // get the next element from the underlying iterator 755 | let mut elem = self.iter.next()?; 756 | 757 | // gather all next noises 758 | let freq = self.freq_noise.next(self.frequency); 759 | let formant_freq = self.formant_freq_noise.next(self.frequency); 760 | let formant_amp = self.formant_amp_noise.next(self.frequency); 761 | 762 | // change them in the element 763 | elem.frequency += freq * self.delta_frequency; 764 | elem.formant_freq += formant_freq * Array::splat(self.delta_formant_freq); 765 | 766 | // we don't want it to get *louder*, so make sure it only becomes softer by doing (1 + [-1, 1]) / 2, which results in [0, 1] 767 | // we'll then multiply it by the appropriate amplitude so we can't end up with negative amplitudes for some sounds 768 | let formant_amp_delta = 769 | (formant_amp + Array::splat(1.0)) * Array::splat(0.5 * self.delta_amplitude); 770 | 771 | // multiplier is 1 - x, so that it doesn't become very soft 772 | let formant_amp_mul = Array::splat(1.0) - formant_amp_delta; 773 | elem.formant_amp = elem.formant_amp * formant_amp_mul; 774 | 775 | // and return the modified element 776 | Some(elem) 777 | } 778 | } 779 | 780 | // and we want to be able to easily make the jitter iterator 781 | pub trait IntoJitter 782 | where 783 | Self: IntoIterator + Sized, 784 | { 785 | /// creates a new synthesizer from this iterator 786 | fn jitter(self, mut seed: u32, voice: Voice) -> Jitter { 787 | Jitter { 788 | iter: self.into_iter(), 789 | freq_noise: ValueNoise::new(&mut seed), 790 | formant_freq_noise: ArrayValueNoise::new(&mut seed), 791 | formant_amp_noise: ArrayValueNoise::new(&mut seed), 792 | frequency: voice.jitter_frequency, 793 | delta_frequency: voice.jitter_delta_frequency, 794 | delta_formant_freq: voice.jitter_delta_formant_frequency, 795 | delta_amplitude: voice.jitter_delta_amplitude, 796 | } 797 | } 798 | } 799 | 800 | // implement it for anything that can become the right iterator 801 | impl IntoJitter for T where T: IntoIterator + Sized {} 802 | 803 | // ensure it doesn't exceed the parameter bounds 804 | #[test] 805 | fn jitter_within_bounds() {} 806 | 807 | // we now have a way to synthesize sound, and add random variations to it. 808 | // However, generating the induvidual samples is kinda a hassle to do, so it would be nicer if we can give each synthesis element a length 809 | // and then generate the right sequence from that 810 | // so, we'll create a sequencer that does this 811 | 812 | // for this, we'll first need a struct to help with adding the time 813 | #[derive(Copy, Clone, PartialEq, PartialOrd, Debug)] 814 | pub struct SequenceElem { 815 | /// the synthesis element 816 | /// Some if there, None if silent 817 | pub elem: Option, 818 | 819 | /// time this element lasts for 820 | pub length: f32, 821 | 822 | /// time the blending lasts for 823 | pub blend_length: f32, 824 | } 825 | 826 | impl SequenceElem { 827 | /// make a new element 828 | pub fn new(elem: Option, length: f32, blend_length: f32) -> Self { 829 | Self { 830 | elem, 831 | length, 832 | blend_length, 833 | } 834 | } 835 | } 836 | 837 | /// Sequencer, given a time and blend time, it generates the right amount of samples 838 | #[derive(Copy, Clone, PartialEq, PartialOrd, Debug)] 839 | pub struct Sequencer> { 840 | /// underlying iterator 841 | iter: T, 842 | 843 | /// current element 844 | cur_elem: Option, 845 | 846 | /// next element 847 | next_elem: Option, 848 | 849 | /// time remaining for this element 850 | time: f32, 851 | 852 | /// sample time, how long a sample lasts (1 / sample rate) 853 | delta_time: f32, 854 | } 855 | 856 | impl> Iterator for Sequencer { 857 | type Item = SynthesisElem; 858 | 859 | fn next(&mut self) -> Option { 860 | // decrease the amount of remaining time 861 | self.time -= self.delta_time; 862 | 863 | // if this is now below 0, we go to the next pair 864 | if self.time < 0.0 { 865 | // figure out what to do next 866 | match (self.cur_elem, self.next_elem) { 867 | // we have both, get a new one 868 | (Some(_), Some(a)) => { 869 | self.cur_elem = self.next_elem; 870 | self.next_elem = self.iter.next(); 871 | 872 | // set the time 873 | self.time += a.length; 874 | } 875 | // we have none, fetch new ones 876 | (None, None) => { 877 | self.cur_elem = self.iter.next(); 878 | self.next_elem = self.iter.next(); 879 | 880 | // if we have the current one, set the time 881 | if let Some(a) = self.cur_elem { 882 | self.time += a.length; 883 | } 884 | } 885 | // for the rest, we can simply exit early 886 | _ => return None, 887 | } 888 | } 889 | 890 | // and match on what to do 891 | match ( 892 | self.cur_elem, 893 | self.cur_elem.and_then(|x| x.elem), 894 | self.next_elem.and_then(|x| x.elem), 895 | ) { 896 | // both elements, all are on 897 | (Some(a), Some(b), Some(c)) => { 898 | // get the blend amount 899 | let alpha = (self.time / a.blend_length).min(1.0); 900 | 901 | // and blend the 2, because alpha goes from 1 to 0, we need to blend in the other order 902 | Some(c.blend(b, alpha)) 903 | } 904 | 905 | // only the first one, blend to silence 906 | (Some(a), Some(b), None) => { 907 | // get the blend amount 908 | let alpha = (self.time / a.blend_length).min(1.0); 909 | 910 | // and blend with a silent one 911 | Some(b.copy_silent().blend(b, alpha)) 912 | } 913 | 914 | // only the first one, blend from silence 915 | (Some(a), None, Some(c)) => { 916 | // get the blend amount 917 | let alpha = (self.time / a.blend_length).min(1.0); 918 | 919 | // and blend with a silent one 920 | Some(c.blend(c.copy_silent(), alpha)) 921 | } 922 | 923 | // both silent 924 | (Some(_), None, None) => { 925 | // just return silence 926 | Some(SynthesisElem::silent()) 927 | } 928 | 929 | // nothing else, return none 930 | _ => None, 931 | } 932 | } 933 | } 934 | 935 | // and implement an easy way to get the iterator 936 | pub trait IntoSequencer 937 | where 938 | Self: IntoIterator + Sized, 939 | { 940 | /// creates a new sequencer, from a given voice 941 | fn sequence(self, voice: Voice) -> Sequencer { 942 | Sequencer { 943 | iter: self.into_iter(), 944 | delta_time: 1.0 / voice.sample_rate, 945 | cur_elem: None, 946 | next_elem: None, 947 | time: 0.0, 948 | } 949 | } 950 | } 951 | 952 | // implement it for anything that can become the right iterator 953 | impl IntoSequencer for T where T: IntoIterator + Sized {} 954 | 955 | // TODO: how test 956 | 957 | // next up, we'll want to go from time + phoneme info to a sequence element, so let's do that 958 | // first, we'll want a new struct to also store timing info with phonemes 959 | 960 | #[derive(Copy, Clone, PartialEq, PartialOrd, Debug)] 961 | pub struct PhonemeElem { 962 | /// the phoneme 963 | pub phoneme: Phoneme, 964 | 965 | /// total length 966 | pub length: f32, 967 | 968 | /// length of blending 969 | pub blend_length: f32, 970 | 971 | /// the base frequency, normalized, so 1.0 is the sample frequency 972 | pub frequency: f32, 973 | } 974 | 975 | // and we'll want to make the selector next. 976 | // this simply selects the right synthesis elem from a voice 977 | // as well as makes sure a silence is blended correctly 978 | #[derive(Copy, Clone, PartialEq, PartialOrd, Debug)] 979 | pub struct Selector> { 980 | /// underlying iterator 981 | iter: T, 982 | 983 | /// underlying voice storage to get voice data from 984 | voice_storage: VoiceStorage, 985 | } 986 | 987 | impl> Iterator for Selector { 988 | type Item = SequenceElem; 989 | 990 | fn next(&mut self) -> Option { 991 | // get the next item if we can 992 | let phoneme = self.iter.next()?; 993 | 994 | // get the right phoneme, or none if it's silent. 995 | // this allows correct blending later on 996 | let elem = self.voice_storage.get(phoneme.phoneme); 997 | 998 | // and put it in a sequence element 999 | Some(SequenceElem::new( 1000 | // if there is any, copy it with the right frequency 1001 | elem.map(|x| x.copy_with_frequency(phoneme.frequency)), 1002 | phoneme.length, 1003 | phoneme.blend_length, 1004 | )) 1005 | } 1006 | } 1007 | 1008 | pub trait IntoSelector 1009 | where 1010 | Self: IntoIterator + Sized, 1011 | { 1012 | /// creates a selector from the given voice 1013 | fn select(self, voice: Voice) -> Selector { 1014 | Selector { 1015 | iter: self.into_iter(), 1016 | voice_storage: voice.phonemes, 1017 | } 1018 | } 1019 | } 1020 | 1021 | // implement it for anything that can become the right iterator 1022 | impl IntoSelector for T where T: IntoIterator + Sized {} 1023 | 1024 | // now, we need to do some more complex stuff again. 1025 | // so far we got most of the sound generating "backend" done, now time for the "frontend" 1026 | // this needs to take in text and convert it into phonemes + timing. 1027 | // let's first make the rules we use for text -> phoneme 1028 | 1029 | #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug)] 1030 | pub struct TranscriptionRule<'a> { 1031 | /// string to compare agains 1032 | pub string: &'a str, 1033 | 1034 | /// phonemes to generate from this 1035 | pub phonemes: &'a [Phoneme], 1036 | } 1037 | 1038 | // now make the actual language, which is just a set of transcription rules 1039 | pub struct Language<'a> { 1040 | /// rules for the language to transcribe phonemes 1041 | pub rules: &'a [TranscriptionRule<'a>], 1042 | 1043 | /// whether the language is case-sensitive 1044 | pub case_sensitive: bool, 1045 | } 1046 | 1047 | // next up, the intonator. 1048 | // this will add intonation to any phoneme sequence 1049 | pub struct Intonator> { 1050 | /// underlying iterator 1051 | iter: T, 1052 | 1053 | /// center frequency for the voice 1054 | center_frequency: f32, 1055 | } 1056 | 1057 | impl> Iterator for Intonator { 1058 | type Item = PhonemeElem; 1059 | fn next(&mut self) -> Option { 1060 | let phon = self.iter.next()?; 1061 | 1062 | // TODO: apply intonation 1063 | 1064 | // TODO: speaking rate 1065 | 1066 | // TODO: give certain phonemes a length 1067 | 1068 | Some(PhonemeElem { 1069 | phoneme: phon, 1070 | length: 0.5, 1071 | blend_length: 0.5, 1072 | frequency: self.center_frequency, 1073 | }) 1074 | } 1075 | } 1076 | 1077 | pub trait IntoIntonator 1078 | where 1079 | Self: IntoIterator + Sized, 1080 | { 1081 | fn intonate(self, _language: Language, voice: Voice) -> Intonator { 1082 | Intonator { 1083 | iter: self.into_iter(), 1084 | center_frequency: voice.center_frequency, 1085 | } 1086 | } 1087 | } 1088 | 1089 | impl IntoIntonator for T where T: IntoIterator + Sized {} 1090 | 1091 | // TODO: how test? 1092 | 1093 | // now we want to convert text into phonemes 1094 | // we're going to do this with a find-and-replace ruleset, as defined in language. 1095 | // this is assumed to be sorted, so we can binary search with the prefix, 1096 | // to figure out the range we need to search in and see if it's too low or too high 1097 | 1098 | pub struct Transcriber<'a, T: Iterator> { 1099 | /// underlying iterator 1100 | iter: Peekable, 1101 | 1102 | /// ruleset to use 1103 | ruleset: &'a [TranscriptionRule<'a>], 1104 | 1105 | /// whether we are case sensitive to match 1106 | // TODO: replace this with a better ruleset, as in: each rule becomes lowercase 1107 | case_sensitive: bool, 1108 | 1109 | /// buffer for the phonemes we have now 1110 | buffer: &'a [Phoneme], 1111 | } 1112 | 1113 | // silent buffer 1114 | const SILENCE: &[Phoneme] = &[Phoneme::Silence]; 1115 | 1116 | impl<'a, T: Iterator> Iterator for Transcriber<'a, T> { 1117 | type Item = Phoneme; 1118 | fn next(&mut self) -> Option { 1119 | // initial state 1120 | let mut search_min = 0; 1121 | let mut search_max = self.ruleset.len(); 1122 | let mut index = 0; 1123 | 1124 | // search as long as we haven't found a match 1125 | while self.buffer.is_empty() { 1126 | // get the current character 1127 | let character = self.iter.peek().map(|x| { 1128 | if self.case_sensitive { 1129 | *x 1130 | } else { 1131 | x.to_ascii_lowercase() 1132 | } 1133 | })?; 1134 | 1135 | // find the new search range 1136 | // now that we have a new item, we can reduce the search range 1137 | // this is binary search, where the left half is where the lower range is lexiographically lower than the current buffer content 1138 | // because we only get one char at a time, we can assume that the previous N characters were already found and reduced the range 1139 | // so no need to keep those around anymore 1140 | let new_min = self.ruleset[search_min..search_max] 1141 | .partition_point(|x| x.string.chars().nth(index).map_or(true, |x| x < character)) 1142 | + search_min; 1143 | 1144 | // same for the upper range, but now it's lower or equal 1145 | let new_max = self.ruleset[search_min..search_max].partition_point(|x| { 1146 | x.string 1147 | .chars() 1148 | .nth(index) 1149 | .map_or(false, |x| x <= character) 1150 | }) + search_min; 1151 | 1152 | // now decide on where to go 1153 | if new_min >= new_max && self.ruleset[search_min].string.chars().count() == index { 1154 | // if the new range is invalid, but the old search range lower bound matched, grab that 1155 | self.buffer = self.ruleset[search_min].phonemes; 1156 | } else if new_min >= new_max { 1157 | // if they are not equal but no previous search range matched, return silence 1158 | self.buffer = SILENCE; 1159 | 1160 | // advance, as we don't need this character for the next iteration, because it was part of a garbled sequence anyway 1161 | self.iter.next(); 1162 | } else { 1163 | // otherwise, we are still running 1164 | search_min = new_min; 1165 | search_max = new_max; 1166 | index += 1; 1167 | 1168 | // advance so we can check the next character 1169 | self.iter.next(); 1170 | 1171 | // if this fails, we won't be able to peek next iteration, so see if we can emit the final rule 1172 | if self.iter.peek().is_none() 1173 | && self.ruleset[search_min].string.chars().count() == index 1174 | { 1175 | self.buffer = self.ruleset[search_min].phonemes; 1176 | } else if self.iter.peek().is_none() { 1177 | // if no match could be found, indicate so by emitting a silence 1178 | self.buffer = SILENCE; 1179 | } 1180 | } 1181 | } 1182 | // try and get the first item if we have that 1183 | let result = self.buffer.get(0); 1184 | 1185 | // remove the first item if we can, and set the buffer to the rest of the remaining buffer 1186 | self.buffer = self.buffer.get(1..).unwrap_or(&[]); 1187 | 1188 | // return the result 1189 | result.copied() 1190 | } 1191 | } 1192 | 1193 | pub trait IntoTranscriber 1194 | where 1195 | Self: IntoIterator + Sized, 1196 | { 1197 | fn transcribe(self, language: Language) -> Transcriber { 1198 | Transcriber { 1199 | iter: self.into_iter().peekable(), 1200 | ruleset: language.rules, 1201 | buffer: SILENCE, 1202 | case_sensitive: language.case_sensitive, 1203 | } 1204 | } 1205 | } 1206 | 1207 | impl IntoTranscriber for T where T: IntoIterator + Sized {} 1208 | 1209 | // test the transcriber for correct behaviour 1210 | #[test] 1211 | fn transcribe_unique() { 1212 | let mut transcriber = Transcriber { 1213 | iter: "abc".chars().peekable(), 1214 | ruleset: &[ 1215 | TranscriptionRule { 1216 | string: "ab", 1217 | phonemes: &[Phoneme::A], 1218 | }, 1219 | TranscriptionRule { 1220 | string: "c", 1221 | phonemes: &[Phoneme::E], 1222 | }, 1223 | ], 1224 | buffer: &[], 1225 | case_sensitive: false, 1226 | }; 1227 | 1228 | assert_eq!(transcriber.next(), Some(Phoneme::A)); 1229 | assert_eq!(transcriber.next(), Some(Phoneme::E)); 1230 | assert_eq!(transcriber.next(), None); 1231 | } 1232 | 1233 | #[test] 1234 | fn transcribe_same_start() { 1235 | let mut transcriber = Transcriber { 1236 | iter: "abacab".chars().peekable(), 1237 | ruleset: &[ 1238 | TranscriptionRule { 1239 | string: "ab", 1240 | phonemes: &[Phoneme::A], 1241 | }, 1242 | TranscriptionRule { 1243 | string: "ac", 1244 | phonemes: &[Phoneme::E], 1245 | }, 1246 | ], 1247 | buffer: &[], 1248 | case_sensitive: false, 1249 | }; 1250 | 1251 | assert_eq!(transcriber.next(), Some(Phoneme::A)); 1252 | assert_eq!(transcriber.next(), Some(Phoneme::E)); 1253 | assert_eq!(transcriber.next(), Some(Phoneme::A)); 1254 | assert_eq!(transcriber.next(), None); 1255 | } 1256 | 1257 | #[test] 1258 | fn transcribe_same_char_different_length() { 1259 | let mut transcriber = Transcriber { 1260 | iter: "aaa".chars().peekable(), 1261 | ruleset: &[ 1262 | TranscriptionRule { 1263 | string: "a", 1264 | phonemes: &[Phoneme::A], 1265 | }, 1266 | TranscriptionRule { 1267 | string: "aa", 1268 | phonemes: &[Phoneme::E], 1269 | }, 1270 | ], 1271 | buffer: &[], 1272 | case_sensitive: false, 1273 | }; 1274 | 1275 | // should match the longest rule when possible, so aa first, then a 1276 | assert_eq!(transcriber.next(), Some(Phoneme::E)); 1277 | assert_eq!(transcriber.next(), Some(Phoneme::A)); 1278 | assert_eq!(transcriber.next(), None); 1279 | } 1280 | 1281 | // same as above, but now it cuts off early so it has to find the shortest when there's a character following 1282 | #[test] 1283 | fn transcribe_same_char_different_length_cutoff() { 1284 | let mut transcriber = Transcriber { 1285 | iter: "ae".chars().peekable(), 1286 | ruleset: &[ 1287 | TranscriptionRule { 1288 | string: "a", 1289 | phonemes: &[Phoneme::A], 1290 | }, 1291 | TranscriptionRule { 1292 | string: "aa", 1293 | phonemes: &[Phoneme::E], 1294 | }, 1295 | TranscriptionRule { 1296 | string: "e", 1297 | phonemes: &[Phoneme::E], 1298 | }, 1299 | ], 1300 | buffer: &[], 1301 | case_sensitive: false, 1302 | }; 1303 | 1304 | // should match the longest rule when possible, so aa first, then a 1305 | assert_eq!(transcriber.next(), Some(Phoneme::A)); 1306 | assert_eq!(transcriber.next(), Some(Phoneme::E)); 1307 | assert_eq!(transcriber.next(), None); 1308 | } 1309 | 1310 | #[test] 1311 | fn transcribe_skip_no_matches() { 1312 | let mut transcriber = Transcriber { 1313 | iter: "abuac".chars().peekable(), 1314 | ruleset: &[ 1315 | TranscriptionRule { 1316 | string: "ab", 1317 | phonemes: &[Phoneme::A], 1318 | }, 1319 | TranscriptionRule { 1320 | string: "ac", 1321 | phonemes: &[Phoneme::E], 1322 | }, 1323 | ], 1324 | buffer: &[], 1325 | case_sensitive: false, 1326 | }; 1327 | 1328 | // should match the longest rule when possible, so aa first, then a 1329 | assert_eq!(transcriber.next(), Some(Phoneme::A)); 1330 | assert_eq!(transcriber.next(), Some(Phoneme::Silence)); 1331 | assert_eq!(transcriber.next(), Some(Phoneme::E)); 1332 | assert_eq!(transcriber.next(), None); 1333 | } 1334 | 1335 | #[test] 1336 | fn transcribe_skip_partial_match_at_end() { 1337 | let mut transcriber = Transcriber { 1338 | iter: "abaca".chars().peekable(), 1339 | ruleset: &[ 1340 | TranscriptionRule { 1341 | string: "ab", 1342 | phonemes: &[Phoneme::A], 1343 | }, 1344 | TranscriptionRule { 1345 | string: "ac", 1346 | phonemes: &[Phoneme::E], 1347 | }, 1348 | ], 1349 | buffer: &[], 1350 | case_sensitive: false, 1351 | }; 1352 | 1353 | // should match the longest rule when possible, so aa first, then a 1354 | assert_eq!(transcriber.next(), Some(Phoneme::A)); 1355 | assert_eq!(transcriber.next(), Some(Phoneme::E)); 1356 | assert_eq!(transcriber.next(), Some(Phoneme::Silence)); 1357 | assert_eq!(transcriber.next(), None); 1358 | } 1359 | 1360 | // Here's how it will work 1361 | // synthesizer iterator to generate sound 1362 | // jitter iterator to add randomness to the frequencies 1363 | // sequencer iterator to blend phonemes 1364 | // intonator to add intonation 1365 | // transcriber to transcribe between text and phoneme 1366 | // parser to parse text and handle potential commands 1367 | --------------------------------------------------------------------------------