├── .gitignore
├── src
    ├── main.c
    ├── app_main.h
    ├── audio
    │   ├── pa_list_devices.h
    │   ├── wav_io.h
    │   ├── audio_buffer_test.c
    │   ├── audio_buffer.h
    │   ├── pa_list_devices_test.c
    │   ├── audio_buffer.c
    │   ├── wav_io.c
    │   ├── wav_io_test.c
    │   └── pa_list_devices.c
    ├── utils
    │   ├── trace.h
    │   ├── file_utils.h
    │   ├── yargs.h
    │   ├── string_utils.h
    │   ├── file_utils.c
    │   ├── string_utils.c
    │   ├── string_utils_test.c
    │   ├── file_utils_test.c
    │   ├── yargs.c
    │   └── yargs_test.c
    ├── settings.h
    ├── app_main_test.c
    ├── third_party
    │   └── termcolor-c.h
    ├── settings_test.c
    ├── settings.c
    ├── args.h
    └── app_main.c
├── .vscode
    ├── settings.json
    └── c_cpp_properties.json
├── scripts
    ├── generate_release_docker.sh
    ├── generate_release.sh
    ├── download_libs.sh
    ├── install_gh.sh
    ├── download_models.py
    ├── coqui_releases.csv
    └── create_deb_package.sh
├── .editorconfig
├── Makefile
├── notebooks
    └── install.ipynb
├── README.md
├── definitions.mk
└── LICENSE


/.gitignore:
--------------------------------------------------------------------------------
1 | spchcat
2 | build
3 | /audio*


--------------------------------------------------------------------------------
/src/main.c:
--------------------------------------------------------------------------------
1 | #include "app_main.h"
2 | 
3 | int main(int argc, char** argv) {
4 |   return app_main(argc, argv);
5 | }


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "files.associations": {
 3 |         "algorithm": "c",
 4 |         "*.tcc": "c",
 5 |         "memory": "c",
 6 |         "new": "c",
 7 |         "numeric": "c",
 8 |         "system_error": "c"
 9 |     }
10 | }


--------------------------------------------------------------------------------
/src/app_main.h:
--------------------------------------------------------------------------------
1 | #ifndef INCLUDE_APP_MAIN_H
2 | #define INCLUDE_APP_MAIN_H
3 | 
4 | // Putting the main logic here allows us to call it separately for testing
5 | // purposes.
6 | int app_main(int argc, char** argv);
7 | 
8 | #endif  // INCLUDE_APP_MAIN_H


--------------------------------------------------------------------------------
/src/audio/pa_list_devices.h:
--------------------------------------------------------------------------------
1 | #ifndef INCLUDE_PA_LIST_DEVICES_H
2 | #define INCLUDE_PA_LIST_DEVICES_H
3 | 
4 | // Returns a list of the names of all input devices found on this system by
5 | // Pulse Audio.
6 | void get_input_devices(char*** devices, int* devices_length);
7 | 
8 | #endif  // INCLUDE_PA_LIST_DEVICES_H


--------------------------------------------------------------------------------
/src/audio/wav_io.h:
--------------------------------------------------------------------------------
 1 | #ifndef INCLUDE_WAV_IO_H
 2 | #define INCLUDE_WAV_IO_H
 3 | 
 4 | #include <stdbool.h>
 5 | 
 6 | #include "audio_buffer.h"
 7 | 
 8 | bool wav_io_load(const char* filename, AudioBuffer** result);
 9 | 
10 | bool wav_io_save(const char* filename, const AudioBuffer* buffer);
11 | 
12 | #endif  // INCLUDE_WAV_IO_H


--------------------------------------------------------------------------------
/scripts/generate_release_docker.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | cd /spchcat/
 4 | 
 5 | # Install system dependencies.
 6 | apt-get -qq update
 7 | apt-get -qq install -y sox libsox-dev libpulse-dev make gcc g++ wget curl sudo
 8 | 
 9 | # Fetch the binary libraries distributed by Coqui.
10 | scripts/download_libs.sh
11 | 
12 | # Build the tool from source.
13 | make clean && make spchcat LINK_PATH_STT=-Lbuild/lib EXTRA_CFLAGS_STT=-Ibuild/lib
14 | 
15 | # Package into a Debian/Ubuntu installer.
16 | scripts/create_deb_package.sh


--------------------------------------------------------------------------------
/scripts/generate_release.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -xe
 2 | 
 3 | # Needs to happen outside Docker image, so we can reuse GitHub
 4 | # authentication. You can move this into the script if needed,
 5 | # but you'll have to figure out how to pass the right tokens.
 6 | #rm -rf build/models
 7 | #scripts/download_models.sh
 8 | 
 9 | # Run the rest of the script inside an Ubuntu 18.04 container,
10 | # so that we get a glibc that's widely compatible.
11 | sudo docker run -it -v`pwd`:/spchcat ubuntu:bionic /spchcat/scripts/generate_release_docker.sh


--------------------------------------------------------------------------------
/src/audio/audio_buffer_test.c:
--------------------------------------------------------------------------------
 1 | #include "acutest.h"
 2 | 
 3 | #include "audio_buffer.c"
 4 | 
 5 | static void test_audio_buffer_alloc() {
 6 |   AudioBuffer* buffer = audio_buffer_alloc(16000, 320, 2);
 7 |   TEST_CHECK(buffer != NULL);
 8 |   TEST_INTEQ(16000, buffer->sample_rate);
 9 |   TEST_INTEQ(320, buffer->samples_per_channel);
10 |   TEST_INTEQ(2, buffer->channels);
11 |   TEST_CHECK(buffer->data != NULL);
12 |   audio_buffer_free(buffer);
13 | }
14 | 
15 | TEST_LIST = {
16 |   {"audio_buffer_alloc", test_audio_buffer_alloc},
17 |   {NULL, NULL},
18 | };


--------------------------------------------------------------------------------
/src/audio/audio_buffer.h:
--------------------------------------------------------------------------------
 1 | #ifndef INCLUDE_AUDIO_BUFFER_H
 2 | #define INCLUDE_AUDIO_BUFFER_H
 3 | 
 4 | #include <stdint.h>
 5 | 
 6 | typedef struct AudioBufferStruct {
 7 |   int32_t sample_rate;
 8 |   int32_t samples_per_channel;
 9 |   int32_t channels;
10 |   // Convention is that samples are stored interleaved by channel, so 
11 |   // |C0|C1|C0|C1|...
12 |   int16_t* data;
13 | } AudioBuffer;
14 | 
15 | AudioBuffer* audio_buffer_alloc(int32_t sample_rate,
16 |   int32_t samples_per_channel, int32_t channels);
17 | void audio_buffer_free(AudioBuffer* buffer);
18 | 
19 | #endif  // INCLUDE_AUDIO_BUFFER_H


--------------------------------------------------------------------------------
/.vscode/c_cpp_properties.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "configurations": [
 3 |         {
 4 |             "name": "Linux",
 5 |             "includePath": [
 6 |                 "${workspaceFolder}/**",
 7 |                 "${workspaceFolder}/src",
 8 |                 "${workspaceFolder}/src/utils",
 9 |                 "${workspaceFolder}/src/audio"
10 |             ],
11 |             "defines": [],
12 |             "compilerPath": "/usr/bin/gcc",
13 |             "cStandard": "gnu17",
14 |             "cppStandard": "gnu++17",
15 |             "intelliSenseMode": "linux-gcc-x64"
16 |         }
17 |     ],
18 |     "version": 4
19 | }


--------------------------------------------------------------------------------
/src/utils/trace.h:
--------------------------------------------------------------------------------
 1 | #ifndef INCLUDE_UTILS_TRACE_H
 2 | #define INCLUDE_UTILS_TRACE_H
 3 | 
 4 | #include <stdio.h>
 5 | #include <stdint.h>
 6 | 
 7 | #define TRACE_STR(variable) do { fprintf(stderr, __FILE__":%d "#variable"=%s\n", __LINE__, variable); } while (0)
 8 | #define TRACE_INT(variable) do { fprintf(stderr, __FILE__":%d "#variable"=%d\n", __LINE__, variable); } while (0)
 9 | #define TRACE_PTR(variable) do { fprintf(stderr, __FILE__":%d "#variable"=0x%016lx\n", __LINE__, (uint64_t)(variable)); } while (0)
10 | #define TRACE_SIZ(variable) do { fprintf(stderr, __FILE__":%d "#variable"=%zu\n", __LINE__, variable); } while (0)
11 | 
12 | #endif  // INCLUDE_UTILS_TRACE_H
13 | 


--------------------------------------------------------------------------------
/src/audio/pa_list_devices_test.c:
--------------------------------------------------------------------------------
 1 | #include "acutest.h"
 2 | 
 3 | #include "pa_list_devices.c"
 4 | 
 5 | static void test_get_input_devices() {
 6 |   char** devices = NULL;
 7 |   int devices_length = 0;
 8 |   get_input_devices(&devices, &devices_length);
 9 |   fprintf(stderr, "Pulse Audio input devices found: \n");
10 |   if (devices_length == 0) {
11 |     fprintf(stderr, "None\n");
12 |   }
13 |   else {
14 |     for (int i = 0; i < devices_length; ++i) {
15 |       fprintf(stderr, "%s\n", devices[i]);
16 |     }
17 |   }
18 |   string_list_free(devices, devices_length);
19 | }
20 | 
21 | TEST_LIST = {
22 |   {"get_input_devices", test_get_input_devices},
23 |   {NULL, NULL},
24 | };


--------------------------------------------------------------------------------
/src/audio/audio_buffer.c:
--------------------------------------------------------------------------------
 1 | #include "audio_buffer.h"
 2 | 
 3 | #include "stdlib.h"
 4 | 
 5 | AudioBuffer* audio_buffer_alloc(int32_t sample_rate,
 6 |   int32_t samples_per_channel, int32_t channels) {
 7 |   AudioBuffer* result = calloc(1, sizeof(AudioBuffer));
 8 |   result->sample_rate = sample_rate;
 9 |   result->channels = channels;
10 |   result->samples_per_channel = samples_per_channel;
11 |   const size_t byte_count = (samples_per_channel * channels * sizeof(int16_t));
12 |   result->data = calloc(1, byte_count);
13 |   return result;
14 | }
15 | 
16 | void audio_buffer_free(AudioBuffer* buffer) {
17 |   if (buffer == NULL) {
18 |     return;
19 |   }
20 |   free(buffer->data);
21 |   free(buffer);
22 | }
23 | 


--------------------------------------------------------------------------------
/scripts/download_libs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -xe
 2 | 
 3 | BUILD_DIR=build/
 4 | 
 5 | ARCHITECTURE=`uname -m`
 6 | if [[ ${ARCHITECTURE} = "armv7l" ]]
 7 | then
 8 | LIB_URL="https://github.com/coqui-ai/STT/releases/download/v1.1.0/native_client.tflite.linux.armv7.tar.xz"
 9 | else
10 | LIB_URL="https://github.com/coqui-ai/STT/releases/download/v1.1.0/native_client.tflite.Linux.tar.xz"
11 | fi
12 | 
13 | LIB_TMP_DIR=${BUILD_DIR}lib_tmp/
14 | LIB_DIR=${BUILD_DIR}lib/
15 | 
16 | # Download, extract and copy the libraries from Coqui's binary release.
17 | rm -rf ${LIB_TMP_DIR} && mkdir -p ${LIB_TMP_DIR}
18 | wget -q ${LIB_URL} -O ${LIB_TMP_DIR}native_client.tflite.Linux.tar.xz
19 | unxz ${LIB_TMP_DIR}native_client.tflite.Linux.tar.xz
20 | rm -rf ${LIB_DIR}  && mkdir -p ${LIB_DIR}
21 | tar -xf ${LIB_TMP_DIR}native_client.tflite.Linux.tar --directory ${LIB_DIR}
22 | 


--------------------------------------------------------------------------------
/scripts/install_gh.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | # See https://github.com/cli/cli/blob/trunk/docs/install_linux.md#debian-ubuntu-linux-raspberry-pi-os-apt
 4 | curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | sudo dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg
 5 | echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null
 6 | sudo apt -qq update
 7 | sudo apt -qq install -y gh
 8 | 
 9 | echo "Go to https://github.com/settings/tokens/new to create a new token, and enter it below:"
10 | read GH_TOKEN
11 | echo "Enter your GitHub username:"
12 | read GH_USERNAME
13 | 
14 | mkdir -p ~/.config/gh/
15 | cat > ~/.config/gh/config.yml << EOM
16 | hosts: 
17 |     github.com:
18 |         oauth_token: ${GH_TOKEN}
19 |         user: ${GH_USERNAME}
20 | EOM
21 | 


--------------------------------------------------------------------------------
/src/utils/file_utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef INCLUDE_UTIL_FILE_UTILS_H
 2 | #define INCLUDE_UTIL_FILE_UTILS_H
 3 | 
 4 | #include <stdbool.h>
 5 | #include <stddef.h>
 6 | #include <sys/types.h>
 7 | 
 8 | #ifdef __CPLUSPLUS
 9 | extern "C" {
10 | #endif  // __CPLUSPLUS
11 | 
12 |   bool file_does_exist(const char* filename);
13 |   off_t file_size(const char* filename);
14 | 
15 |   // No order is guaranteed for these functions, so any matching file may be
16 |   // returned. The caller is responsible for freeing the memory after use.
17 |   char* file_find_one_with_prefix(const char* folder, const char* prefix);
18 |   char* file_find_one_with_suffix(const char* folder, const char* suffix);
19 | 
20 |   char* file_join_paths(const char* a, const char* b);
21 | 
22 |   bool file_read(const char* filename, char** contents, size_t* contents_length);
23 |   bool file_write(const char* filename, const char* contents,
24 |     size_t contents_length);
25 | 
26 |   bool file_list_dir(const char* folder, char*** list, int* list_length);
27 | 
28 | #ifdef __CPLUSPLUS
29 | }
30 | #endif  // __CPLUSPLUS
31 | 
32 | #endif  // INCLUDE_UTIL_FILE_UTILS_H
33 | 


--------------------------------------------------------------------------------
/src/settings.h:
--------------------------------------------------------------------------------
 1 | #ifndef INCLUDE_SETTINGS_H
 2 | #define INCLUDE_SETTINGS_H
 3 | 
 4 | #include <stdbool.h>
 5 | 
 6 | #include "yargs.h"
 7 | 
 8 | #ifdef __CPLUSPLUS
 9 | extern "C" {
10 | #endif  // __CPLUSPLUS
11 | 
12 |   typedef struct SettingsStruct {
13 |     char* language;
14 |     const char* language_from_args;
15 |     const char* source;
16 |     const char* languages_dir;
17 |     char* model;
18 |     char* scorer;
19 |     int source_buffer_size;
20 |     int beam_width;
21 |     bool set_alphabeta;
22 |     float lm_alpha;
23 |     float lm_beta;
24 |     bool show_times;
25 |     bool has_versions;
26 |     bool extended_metadata;
27 |     bool json_output;
28 |     int json_candidate_transcripts;
29 |     int stream_size;
30 |     int extended_stream_size;
31 |     const char* hot_words;
32 |     const char* stream_capture_file;
33 |     int stream_capture_duration;
34 |     char** files;
35 |     int files_count;
36 |   } Settings;
37 | 
38 |   Settings* settings_init_from_argv(int argc, char** argv);
39 |   void settings_free(Settings* settings);
40 | 
41 | #ifdef __CPLUSPLUS
42 | }
43 | #endif  // __CPLUSPLUS
44 | 
45 | #endif  // INCLUDE_SETTINGS_H


--------------------------------------------------------------------------------
/scripts/download_models.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import csv
 3 | import os
 4 | 
 5 | LANGUAGES_DIR = "build/models"
 6 | 
 7 | os.system("mkdir -p %s" % LANGUAGES_DIR)
 8 | 
 9 | with open('scripts/coqui_releases.csv') as csvfile:
10 |     reader = csv.reader(csvfile)
11 |     for line_index, row in enumerate(reader):
12 |         if line_index == 0:
13 |             continue
14 |         code = row[0]
15 |         release = row[1]
16 |         output_dir = "%s/%s" % (LANGUAGES_DIR, code)
17 |         print('Downloading release %s to %s' % (release, output_dir))
18 |         download_status = os.system(
19 |             "gh release download %s --repo=coqui-ai/STT-models --dir=%s" % (release, output_dir))
20 |         if download_status != 0:
21 |             print("Download failed")
22 |             exit(download_status)
23 |         # Delete large, unused files as soon as they're downloaded, to save space
24 |         # on size-limited systems like the Raspberry Pi.
25 |         # This logic should be kept in sync with the same in create_deb_package.sh.
26 |         os.system("find %s -type f -name '*.pb*' -delete" % output_dir)
27 |         os.system("find %s -iname '*.scorer' -size +150M -delete" % output_dir)


--------------------------------------------------------------------------------
/scripts/coqui_releases.csv:
--------------------------------------------------------------------------------
 1 | Code,Release
 2 | am_ET,amharic/itml/v0.1.0
 3 | bn_IN,bengali/twb/v0.1.0
 4 | br_FR,breton/itml/v0.1.1
 5 | ca_ES,catalan/ccoreilly/v0.14.0
 6 | cnh_MM,hakha-chin/itml/v0.1.1
 7 | cs_CZ,czech/comodoro/v0.2.0
 8 | cv_RU,chuvash/itml/v0.1.1
 9 | cy_GB,welsh/techiaith/v21.03
10 | de_DE,german/AASHISHAG/v0.9.0
11 | dv_MV,dhivehi/itml/v0.1.1
12 | el_GR,greek/itml/v0.1.1
13 | en_US,english/coqui/v1.0.0-large-vocab
14 | et_EE,estonian/itml/v0.1.1
15 | eu_ES,basque/itml/v0.1.1
16 | fi_FI,finnish/itml/v0.1.1
17 | fr_FR,french/commonvoice-fr/v0.6
18 | fy_NL,frisian/itml/v0.1.1
19 | ga_IE,irish/itml/v0.1.1
20 | hu_HU,hungarian/itml/v0.1.1
21 | id_ID,indonesian/itml/v0.1.1
22 | it_IT,italian/mozillaitalia/2020.8.7
23 | ka_GE,georgian/itml/v0.1.1
24 | ky_KG,kyrgyz/itml/v0.1.1
25 | lg_UG,luganda/itml/v0.1.1
26 | lt_LT,lithuanian/itml/v0.1.1
27 | lv_LV,latvian/itml/v0.1.1
28 | mn_MN,mongolian/itml/v0.1.1
29 | mt_MT,maltese/itml/v0.1.1
30 | nl_NL,dutch/acabunoc/v0.0.1
31 | or_IN,odia/itml/v0.1.1
32 | pt_PT,portuguese/itml/v0.1.1
33 | rm_CH,romansh-sursilvan/itml/v0.1.0
34 | ro_RO,romanian/itml/v0.1.1
35 | ru_RU,russian/jemeyer/v0.1.0
36 | rw_RW,kinyarwanda/digital-umuganda/v0.0.1
37 | sah_RU,sakha/itml/v0.1.1
38 | sb_DE,upper-sorbian/itml/v0.1.0
39 | sl_SI,slovenian/itml/v0.1.1
40 | sw_KE,swahili-congo/twb/v0.3.0
41 | ta_IN,tamil/itml/v0.1.0
42 | th_TH,thai/itml/v0.1.0
43 | tr_TR,turkish/itml/v0.1.0
44 | tt_RU,tatar/itml/v0.1.0
45 | uk_UK,ukrainian/robinhad/v0.4
46 | wo_SN,wolof/itml/v0.1.0
47 | yo_NG,yoruba/itml/v0.1.0


--------------------------------------------------------------------------------
/scripts/create_deb_package.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -xe
 2 | 
 3 | NAME=spchcat
 4 | VERSION=0.0-2
 5 | MAINTAINER=pete@petewarden.com
 6 | DESCRIPTION="Speech recognition tool to convert audio to text transcripts."
 7 | 
 8 | ARCHITECTURE=`uname -m`
 9 | if [[ ${ARCHITECTURE} = "armv7l" ]]
10 | then
11 | ARCH=armhf
12 | else
13 | ARCH=amd64
14 | fi
15 | 
16 | BUILD_DIR=build/
17 | MODELS_DIR=${BUILD_DIR}models/
18 | LIB_DIR=${BUILD_DIR}lib/
19 | DEB_DIR=${BUILD_DIR}${NAME}_${VERSION}_${ARCH}/
20 | DEB_MODELS_DIR=${DEB_DIR}/etc/spchcat/models/
21 | DEB_LIB_DIR=${DEB_DIR}/usr/lib/
22 | DEB_BIN_DIR=${DEB_DIR}/usr/local/bin/
23 | DEB_DEBIAN_DIR=${DEB_DIR}DEBIAN/
24 | DEB_CONTROL_FILE=${DEB_DEBIAN_DIR}control
25 | 
26 | rm -rf ${DEB_DIR}
27 | 
28 | # Copy over the language models. You'll need to run the download_releases.py
29 | # first to fetch the required files.
30 | mkdir -p ${DEB_MODELS_DIR}
31 | cp -r --verbose ${MODELS_DIR}* ${DEB_MODELS_DIR}
32 | # Remove the TensorFlow model graphs, since these are only used for training
33 | # and are quite large.
34 | find ${DEB_MODELS_DIR} -type f -name "*.pb*" -delete
35 | # Some scorers are also very large, so for convenience remove them too.
36 | find ${DEB_MODELS_DIR} -iname "*.scorer" -size +150M -delete
37 | 
38 | # Fetch the binary library release.
39 | scripts/download_libs.sh
40 | mkdir -p ${DEB_LIB_DIR}
41 | cp -r  --verbose ${LIB_DIR}*.so* ${DEB_LIB_DIR}
42 | 
43 | # Copy over the binary executable.
44 | mkdir -p ${DEB_BIN_DIR}
45 | cp -r --verbose ${NAME} ${DEB_BIN_DIR}
46 | 
47 | # Set up the metadata for the package.
48 | mkdir -p ${DEB_DEBIAN_DIR}
49 | cat << EOM > ${DEB_CONTROL_FILE}
50 | Package: ${NAME}
51 | Version: ${VERSION}
52 | Architecture: ${ARCH}
53 | Maintainer: ${MAINTAINER}
54 | Description: ${DESCRIPTION}
55 | EOM
56 | 
57 | # Build the package.
58 | dpkg-deb --build --root-owner-group ${DEB_DIR}


--------------------------------------------------------------------------------
/src/utils/yargs.h:
--------------------------------------------------------------------------------
 1 | #ifndef INCLUDE_YARGS_H
 2 | #define INCLUDE_YARGS_H
 3 | 
 4 | #include <stdbool.h>
 5 | #include <stddef.h>
 6 | #include <stdint.h>
 7 | 
 8 | #ifdef __CPLUSPLUS
 9 | extern "C" {
10 | #endif  // __CPLUSPLUS
11 | 
12 |   typedef enum YargsFlagTypeEnum {
13 |     FT_INT32 = 0,
14 |     FT_FLOAT = 1,
15 |     FT_BOOL = 2,
16 |     FT_STRING = 3,
17 |   } YargsFlagType;
18 | 
19 |   typedef struct YargsFlagStruct {
20 |     char* name;
21 |     char* short_name;
22 |     YargsFlagType type;
23 |     bool* bool_value;
24 |     float* float_value;
25 |     int32_t* int32_value;
26 |     const char** string_value;
27 |     char* description;
28 |   } YargsFlag;
29 | 
30 |   // Macros used to simplify adding flag entries.
31 | #define YARGS_BOOL(NAME, SHORT_NAME, VARIABLE_ADDR, DESCRIPTION) {\
32 |   NAME, SHORT_NAME, FT_BOOL, VARIABLE_ADDR, NULL, NULL, NULL, DESCRIPTION }
33 | 
34 | #define YARGS_FLOAT(NAME, SHORT_NAME, VARIABLE_ADDR, DESCRIPTION) {\
35 |   NAME, SHORT_NAME, FT_FLOAT, NULL, VARIABLE_ADDR, NULL, NULL, DESCRIPTION }
36 | 
37 | #define YARGS_INT32(NAME, SHORT_NAME, VARIABLE_ADDR, DESCRIPTION) {\
38 |   NAME, SHORT_NAME, FT_INT32, NULL, NULL, VARIABLE_ADDR, NULL, DESCRIPTION }
39 | 
40 | #define YARGS_STRING(NAME, SHORT_NAME, VARIABLE_ADDR, DESCRIPTION) {\
41 |   NAME, SHORT_NAME, FT_STRING, NULL, NULL, NULL, VARIABLE_ADDR, DESCRIPTION }
42 | 
43 |   bool yargs_init(const YargsFlag* flags, size_t flags_length,
44 |     const char* app_description, char** argv, int argc);
45 |   void yargs_free();
46 | 
47 |   void yargs_print_usage(const YargsFlag* flags, int flags_length,
48 |     const char* app_description);
49 | 
50 |   bool yargs_load_from_file(const YargsFlag* flags, int flags_length,
51 |     const char* filename);
52 |   bool yargs_save_to_file(const YargsFlag* flags, int flags_length,
53 |     const char* filename);
54 | 
55 |   int yargs_get_unnamed_length();
56 |   const char* yargs_get_unnamed(int index);
57 | 
58 |   const char* yargs_app_name();
59 | 
60 | #ifdef __CPLUSPLUS
61 | }
62 | #endif  // __CPLUSPLUS
63 | 
64 | #endif  // INCLUDE_YARGS_H


--------------------------------------------------------------------------------
/src/utils/string_utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef INCLUDE_UTIL_STRING_UTILS_H
 2 | #define INCLUDE_UTIL_STRING_UTILS_H
 3 | 
 4 | #include <stdbool.h>
 5 | 
 6 | #ifdef __CPLUSPLUS
 7 | extern "C" {
 8 | #endif  // __CPLUSPLUS
 9 | 
10 |   bool string_starts_with(const char* string, const char* ending);
11 |   bool string_ends_with(const char* string, const char* ending);
12 | 
13 |   // Returns a copy of the input. Caller owns and has to free() the result.
14 |   char* string_duplicate(const char* string);
15 | 
16 |   // Allocates a buffer of the right size and then calls sprintf to write into
17 |   // it, avoiding the risk of overwriting the end. Caller must free the result.
18 |   char* string_alloc_sprintf(const char* format, ...);
19 | 
20 |   char* string_append(const char* a, const char* b);
21 | 
22 |   char* string_append_in_place(char* a, const char* b);
23 | 
24 |   // Splits a string into multiple parts, based on the single-character separator.
25 |   // The `max_splits` arguments controls the maximum number of parts that will be
26 |   // produced, or -1 for no maximum. The caller is responsible for calling free() 
27 |   // on the `outputs` array, and for all of the entries in that array.
28 |   void string_split(const char* input, char separator, const int max_splits,
29 |     char*** outputs, int* outputs_length);
30 | 
31 |   // Convenience function to deallocate the memory allocated by the functions
32 |   // that produce a list of strings, like string_split.
33 |   void string_list_free(char** list, int list_length);
34 | 
35 |   // Appends a new string to the end of a list. A copy is made of the input
36 |   // string, so the original can modified or freed independently of the list.
37 |   void string_list_add(const char* new, char*** list, int* list_length);
38 | 
39 |   char* string_join(const char** list, int list_length, const char* separator);
40 | 
41 |   // Produces a new list that contains only the strings for which the callback
42 |   // function returns true.
43 |   typedef bool (*string_list_filter_funcptr)(const char* a, void* cookie);
44 |   void string_list_filter(const char** in_list, int in_list_length,
45 |     string_list_filter_funcptr should_keep_func, void* cookie, char*** out_list,
46 |     int* out_list_length);
47 | 
48 | #ifdef __CPLUSPLUS
49 | }
50 | #endif  // __CPLUSPLUS
51 | 
52 | #endif  // INCLUDE_UTIL_STRING_UTILS_H
53 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | [*]
 2 | indent_style = space
 3 | 
 4 | cpp_indent_braces=false
 5 | cpp_indent_multi_line_relative_to=innermost_parenthesis
 6 | cpp_indent_within_parentheses=indent
 7 | cpp_indent_preserve_within_parentheses=false
 8 | cpp_indent_case_labels=false
 9 | cpp_indent_case_contents=true
10 | cpp_indent_case_contents_when_block=false
11 | cpp_indent_lambda_braces_when_parameter=true
12 | cpp_indent_goto_labels=one_left
13 | cpp_indent_preprocessor=leftmost_column
14 | cpp_indent_access_specifiers=false
15 | cpp_indent_namespace_contents=true
16 | cpp_indent_preserve_comments=false
17 | cpp_new_line_before_open_brace_namespace=ignore
18 | cpp_new_line_before_open_brace_type=ignore
19 | cpp_new_line_before_open_brace_function=ignore
20 | cpp_new_line_before_open_brace_block=ignore
21 | cpp_new_line_before_open_brace_lambda=ignore
22 | cpp_new_line_scope_braces_on_separate_lines=false
23 | cpp_new_line_close_brace_same_line_empty_type=false
24 | cpp_new_line_close_brace_same_line_empty_function=false
25 | cpp_new_line_before_catch=true
26 | cpp_new_line_before_else=true
27 | cpp_new_line_before_while_in_do_while=false
28 | cpp_space_before_function_open_parenthesis=remove
29 | cpp_space_within_parameter_list_parentheses=false
30 | cpp_space_between_empty_parameter_list_parentheses=false
31 | cpp_space_after_keywords_in_control_flow_statements=true
32 | cpp_space_within_control_flow_statement_parentheses=false
33 | cpp_space_before_lambda_open_parenthesis=false
34 | cpp_space_within_cast_parentheses=false
35 | cpp_space_after_cast_close_parenthesis=false
36 | cpp_space_within_expression_parentheses=false
37 | cpp_space_before_block_open_brace=true
38 | cpp_space_between_empty_braces=false
39 | cpp_space_before_initializer_list_open_brace=false
40 | cpp_space_within_initializer_list_braces=true
41 | cpp_space_preserve_in_initializer_list=true
42 | cpp_space_before_open_square_bracket=false
43 | cpp_space_within_square_brackets=false
44 | cpp_space_before_empty_square_brackets=false
45 | cpp_space_between_empty_square_brackets=false
46 | cpp_space_group_square_brackets=true
47 | cpp_space_within_lambda_brackets=false
48 | cpp_space_between_empty_lambda_brackets=false
49 | cpp_space_before_comma=false
50 | cpp_space_after_comma=true
51 | cpp_space_remove_around_member_operators=true
52 | cpp_space_before_inheritance_colon=true
53 | cpp_space_before_constructor_colon=true
54 | cpp_space_remove_before_semicolon=true
55 | cpp_space_after_semicolon=false
56 | cpp_space_remove_around_unary_operator=true
57 | cpp_space_around_binary_operator=insert
58 | cpp_space_around_assignment_operator=insert
59 | cpp_space_pointer_reference_alignment=left
60 | cpp_space_around_ternary_operator=insert
61 | cpp_wrap_preserve_blocks=one_liners
62 | 
63 | # Tab indentation (no size specified)
64 | [Makefile]
65 | indent_style = tab
66 | 
67 | [*.{c,h,cpp,hpp}]
68 | indent_size = 2
69 | max_line_length = 80


--------------------------------------------------------------------------------
/src/utils/file_utils.c:
--------------------------------------------------------------------------------
  1 | #include "file_utils.h"
  2 | 
  3 | #include <dirent.h>
  4 | #include <stdio.h>
  5 | #include <stdlib.h>
  6 | #include <string.h>
  7 | #include <sys/stat.h>
  8 | 
  9 | #include "string_utils.h"
 10 | #include "trace.h"
 11 | 
 12 | // Helper type for callback functions.
 13 | typedef bool (*file_find_func_ptr)(const char*, void*);
 14 | 
 15 | static char* file_find_first_with_callback(const char* folder,
 16 |   file_find_func_ptr callback, void* cookie) {
 17 |   DIR* dir = opendir(folder);
 18 |   if (dir == NULL) {
 19 |     return NULL;
 20 |   }
 21 |   char* result = NULL;
 22 |   struct dirent* entry = readdir(dir);
 23 |   while (entry != NULL)
 24 |   {
 25 |     const char* filename = entry->d_name;
 26 |     if (callback(filename, cookie)) {
 27 |       result = string_duplicate(filename);
 28 |       break;
 29 |     }
 30 |     entry = readdir(dir);
 31 |   }
 32 |   closedir(dir);
 33 |   return result;
 34 | }
 35 | 
 36 | static bool file_has_prefix(const char* filename, void* cookie) {
 37 |   const char* prefix = (const char*)(cookie);
 38 |   return string_starts_with(filename, prefix);
 39 | }
 40 | 
 41 | static bool file_has_suffix(const char* filename, void* cookie) {
 42 |   const char* suffix = (const char*)(cookie);
 43 |   return string_ends_with(filename, suffix);
 44 | }
 45 | 
 46 | bool file_does_exist(const char* path) {
 47 |   struct stat sb;
 48 |   return (stat(path, &sb) == 0);
 49 | }
 50 | 
 51 | off_t file_size(const char* filename) {
 52 |   struct stat st;
 53 |   if (stat(filename, &st) == 0)
 54 |     return st.st_size;
 55 |   return -1;
 56 | }
 57 | 
 58 | char* file_find_one_with_prefix(const char* folder,
 59 |   const char* prefix) {
 60 |   return file_find_first_with_callback(folder, file_has_prefix, (void*)(prefix));
 61 | }
 62 | 
 63 | char* file_find_one_with_suffix(const char* folder,
 64 |   const char* suffix) {
 65 |   return file_find_first_with_callback(folder, file_has_suffix,
 66 |     (void*)(suffix));
 67 | }
 68 | 
 69 | char* file_join_paths(const char* a, const char* b) {
 70 |   const int a_length = strlen(a);
 71 |   const char* format;
 72 |   if (a[a_length - 1] == '/') {
 73 |     format = "%s%s";
 74 |   }
 75 |   else {
 76 |     format = "%s/%s";
 77 |   }
 78 |   return string_alloc_sprintf(format, a, b);
 79 | }
 80 | 
 81 | bool file_read(const char* filename, char** contents, size_t* contents_length) {
 82 |   *contents_length = file_size(filename);
 83 |   if (*contents_length == -1) {
 84 |     *contents = NULL;
 85 |     return false;
 86 |   }
 87 |   FILE* file = fopen(filename, "rb");
 88 |   if (file == NULL) {
 89 |     *contents = NULL;
 90 |     return false;
 91 |   }
 92 |   *contents = malloc(*contents_length);
 93 |   fread(*contents, 1, *contents_length, file);
 94 |   fclose(file);
 95 |   return true;
 96 | }
 97 | 
 98 | bool file_write(const char* filename, const char* contents, size_t contents_length) {
 99 |   FILE* file = fopen(filename, "wb");
100 |   if (file == NULL) {
101 |     return false;
102 |   }
103 |   fwrite(contents, 1, contents_length, file);
104 |   fclose(file);
105 |   return true;
106 | }
107 | 
108 | bool file_list_dir(const char* folder, char*** list, int* list_length) {
109 |   *list = NULL;
110 |   *list_length = 0;
111 |   DIR* dir = opendir(folder);
112 |   if (dir == NULL) {
113 |     return false;
114 |   }
115 |   struct dirent* entry = readdir(dir);
116 |   while (entry != NULL) {
117 |     *list_length += 1;
118 |     *list = realloc(*list, sizeof(char*) * (*list_length));
119 |     (*list)[(*list_length) - 1] = string_duplicate(entry->d_name);
120 |     entry = readdir(dir);
121 |   }
122 |   closedir(dir);
123 |   return true;
124 | }


--------------------------------------------------------------------------------
/src/app_main_test.c:
--------------------------------------------------------------------------------
  1 | #include "acutest.h"
  2 | 
  3 | #include "app_main.c"
  4 | 
  5 | #include "file_utils.h"
  6 | 
  7 | void test_plain_text_from_transcript() {
  8 |   TokenMetadata tokens1[] = {
  9 |     {"h", 50, 1.0f},
 10 |     {"e", 55, 1.1f},
 11 |     {"l", 60, 1.2f},
 12 |     {"l", 65, 1.3f},
 13 |     {"o", 70, 1.4f},
 14 |     {" ", 80, 1.6f},
 15 |     {"w", 85, 1.7f},
 16 |     {"o", 90, 1.8f},
 17 |     {"r", 95, 1.9f},
 18 |     {"l", 100, 2.0f},
 19 |     {"d", 105, 2.1f},
 20 |   };
 21 |   const int tokens1_length = sizeof(tokens1) / sizeof(tokens1[0]);
 22 |   CandidateTranscript transcript1 = {
 23 |     tokens1, tokens1_length, 1.0f,
 24 |   };
 25 |   char* result = plain_text_from_transcript(&transcript1);
 26 |   TEST_ASSERT(result != NULL);
 27 |   TEST_STREQ("hello world", result);
 28 |   free(result);
 29 | 
 30 |   TokenMetadata tokens2[] = {
 31 |     {"h", 50, 1.0f},
 32 |     {"e", 55, 1.1f},
 33 |     {"l", 60, 1.2f},
 34 |     {"l", 65, 1.3f},
 35 |     {"o", 70, 1.4f},
 36 |     {" ", 500, 10.0f},
 37 |     {"w", 505, 10.1f},
 38 |     {"o", 510, 10.2f},
 39 |     {"r", 515, 10.3f},
 40 |     {"l", 520, 10.4f},
 41 |     {"d", 525, 10.5f},
 42 |   };
 43 |   const int tokens2_length = sizeof(tokens2) / sizeof(tokens2[0]);
 44 |   CandidateTranscript transcript2 = {
 45 |     tokens2, tokens2_length, 1.0f,
 46 |   };
 47 |   result = plain_text_from_transcript(&transcript2);
 48 |   TEST_ASSERT(result != NULL);
 49 |   TEST_STREQ("hello\nworld", result);
 50 |   free(result);
 51 | 
 52 |   TokenMetadata tokens3[] = {
 53 |     {"h", 50, 1.0f},
 54 |     {"e", 55, 1.1f},
 55 |     {"l", 60, 1.2f},
 56 |     {"l", 65, 1.3f},
 57 |     {"o", 70, 1.4f},
 58 |     {" ", 75, 1.5f},
 59 |     {"w", 505, 10.1f},
 60 |     {"o", 510, 10.2f},
 61 |     {"r", 515, 10.3f},
 62 |     {"l", 520, 10.4f},
 63 |     {"d", 525, 10.5f},
 64 |   };
 65 |   const int tokens3_length = sizeof(tokens3) / sizeof(tokens3[0]);
 66 |   CandidateTranscript transcript3 = {
 67 |     tokens3, tokens3_length, 1.0f,
 68 |   };
 69 |   result = plain_text_from_transcript(&transcript3);
 70 |   TEST_ASSERT(result != NULL);
 71 |   TEST_STREQ("hello\nworld", result);
 72 |   free(result);
 73 | }
 74 | 
 75 | void test_print_changed_lines() {
 76 |   const char* test_filename = "/tmp/test_print_changed_lines.txt";
 77 |   FILE* test_file = fopen(test_filename, "wb");
 78 |   TEST_ASSERT(test_file != NULL);
 79 |   const char* previous1 = "Hello";
 80 |   const char* current1 = "Hello World";
 81 |   print_changed_lines(current1, previous1, test_file);
 82 |   fclose(test_file);
 83 | 
 84 |   const char* expected = "\rHello World        ";
 85 |   const int expected_length = strlen(expected);
 86 |   char* result = NULL;
 87 |   size_t result_length = 0;
 88 |   TEST_ASSERT(file_read(test_filename, &result, &result_length));
 89 |   TEST_ASSERT_((expected_length == result_length),
 90 |     "%d vs %zu", expected_length, result_length);
 91 |   TEST_MEMEQ(expected, result, strlen(expected));
 92 |   free(result);
 93 | 
 94 |   test_file = fopen(test_filename, "wb");
 95 |   TEST_ASSERT(test_file != NULL);
 96 |   const char* previous2 = "";
 97 |   const char* current2 = "Hello World\nThis is Pete";
 98 |   print_changed_lines(current2, previous2, test_file);
 99 |   fclose(test_file);
100 | 
101 |   const char* expected2 = "\rHello World\n\rThis is Pete        ";
102 |   const int expected2_length = strlen(expected2);
103 |   result = NULL;
104 |   result_length = 0;
105 |   TEST_ASSERT(file_read(test_filename, &result, &result_length));
106 |   TEST_CHECK(expected2_length == result_length);
107 |   TEST_MSG("%d vs %zu", expected2_length, result_length);
108 |   TEST_DUMP("result", result, result_length);
109 |   TEST_ASSERT(expected2_length == result_length);
110 |   TEST_MEMEQ(expected2, result, expected2_length);
111 |   free(result);
112 | 
113 |   test_file = fopen(test_filename, "wb");
114 |   TEST_ASSERT(test_file != NULL);
115 |   const char* previous3 = "\rHello World        ";
116 |   const char* current3 = "Hello World\nThis is Pete";
117 |   print_changed_lines(current3, previous3, test_file);
118 |   fclose(test_file);
119 | 
120 |   const char* expected3 = "\rHello World\n\rThis is Pete        ";
121 |   const int expected3_length = strlen(expected3);
122 |   result = NULL;
123 |   result_length = 0;
124 |   TEST_ASSERT(file_read(test_filename, &result, &result_length));
125 |   TEST_CHECK(expected3_length == result_length);
126 |   TEST_MSG("%d vs %zu", expected3_length, result_length);
127 |   TEST_DUMP("result", result, result_length);
128 |   TEST_ASSERT(expected3_length == result_length);
129 |   TEST_MEMEQ(expected3, result, expected3_length);
130 |   free(result);
131 | }
132 | 
133 | TEST_LIST = {
134 |   {"plain_text_from_transcript", test_plain_text_from_transcript},
135 |   {"print_changed_lines", test_print_changed_lines},
136 |   {NULL, NULL},
137 | };


--------------------------------------------------------------------------------
/src/third_party/termcolor-c.h:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * termcolor-c
  3 |  * -----------
  4 |  * A header-only ANSI C library for printing colored messages to the terminal
  5 |  * Based on termcolor for C++ by Ihor Kalnytskyi (3-clause BSD license)
  6 |  * -----------
  7 |  * Copyright (c) 2018, Alex Arslan
  8 |  * All rights reserved.
  9 |  *
 10 |  * Redistribution and use in source and binary forms, with or without
 11 |  * modification, are permitted provided that the following conditions are met:
 12 |  *
 13 |  * * Redistributions of source code must retain the above copyright notice, this
 14 |  *   list of conditions and the following disclaimer.
 15 |  *
 16 |  * * Redistributions in binary form must reproduce the above copyright notice,
 17 |  *   this list of conditions and the following disclaimer in the documentation
 18 |  *   and/or other materials provided with the distribution.
 19 |  *
 20 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 21 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 22 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 23 |  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 24 |  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 25 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 26 |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 27 |  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 28 |  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 29 |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30 |  */
 31 | #ifndef _TERMCOLOR_C_H_
 32 | #define _TERMCOLOR_C_H_
 33 | 
 34 | #ifdef __cplusplus
 35 | extern "C" {
 36 | #endif
 37 | 
 38 | #include <unistd.h>
 39 | #include <stdio.h>
 40 | #include <stdbool.h>
 41 | 
 42 | #define ANSI_CODE_RESET      "\033[00m"
 43 | #define ANSI_CODE_BOLD       "\033[1m"
 44 | #define ANSI_CODE_DARK       "\033[2m"
 45 | #define ANSI_CODE_UNDERLINE  "\033[4m"
 46 | #define ANSI_CODE_BLINK      "\033[5m"
 47 | #define ANSI_CODE_REVERSE    "\033[7m"
 48 | #define ANSI_CODE_CONCEALED  "\033[8m"
 49 | #define ANSI_CODE_GRAY       "\033[30m"
 50 | #define ANSI_CODE_GREY       "\033[30m"
 51 | #define ANSI_CODE_RED        "\033[31m"
 52 | #define ANSI_CODE_GREEN      "\033[32m"
 53 | #define ANSI_CODE_YELLOW     "\033[33m"
 54 | #define ANSI_CODE_BLUE       "\033[34m"
 55 | #define ANSI_CODE_MAGENTA    "\033[35m"
 56 | #define ANSI_CODE_CYAN       "\033[36m"
 57 | #define ANSI_CODE_WHITE      "\033[37m"
 58 | #define ANSI_CODE_BG_GRAY    "\033[40m"
 59 | #define ANSI_CODE_BG_GREY    "\033[40m"
 60 | #define ANSI_CODE_BG_RED     "\033[41m"
 61 | #define ANSI_CODE_BG_GREEN   "\033[42m"
 62 | #define ANSI_CODE_BG_YELLOW  "\033[43m"
 63 | #define ANSI_CODE_BG_BLUE    "\033[44m"
 64 | #define ANSI_CODE_BG_MAGENTA "\033[45m"
 65 | #define ANSI_CODE_BG_CYAN    "\033[46m"
 66 | #define ANSI_CODE_BG_WHITE   "\033[47m"
 67 | 
 68 |   /* TODO: Maybe non-TTYs support color? idk */
 69 |   bool supports_color(FILE* stream) {
 70 | #if !(defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__)))
 71 |     return false;
 72 | #else
 73 |     return true;
 74 | #endif
 75 |   }
 76 | 
 77 |   /**
 78 |    * These functions affect the appearance of text, i.e. the foreground, in the stream
 79 |    */
 80 | #define DEFUN_T(c, code) \
 81 |     FILE* text_##c(FILE* stream) { \
 82 |         if (supports_color(stream)) { \
 83 |             fputs(ANSI_CODE_##code, stream); \
 84 |         } \
 85 |         return stream; \
 86 |     }
 87 |   DEFUN_T(bold, BOLD);
 88 |   DEFUN_T(dark, DARK);
 89 |   DEFUN_T(underline, UNDERLINE);
 90 |   DEFUN_T(blink, BLINK);
 91 |   DEFUN_T(reverse, REVERSE);
 92 |   DEFUN_T(concealed, CONCEALED);
 93 |   DEFUN_T(gray, GRAY);
 94 |   DEFUN_T(grey, GREY);
 95 |   DEFUN_T(red, RED);
 96 |   DEFUN_T(green, GREEN);
 97 |   DEFUN_T(yellow, YELLOW);
 98 |   DEFUN_T(blue, BLUE);
 99 |   DEFUN_T(magenta, MAGENTA);
100 |   DEFUN_T(cyan, CYAN);
101 |   DEFUN_T(white, WHITE);
102 | #undef DEFUN_T
103 | 
104 |   /**
105 |    * These functions affect the appearance of the background of text in the stream
106 |    */
107 | #define DEFUN_B(c, code) \
108 |     FILE* background_##c(FILE* stream) { \
109 |         if (supports_color(stream)) { \
110 |             fputs(ANSI_CODE_BG_##code, stream); \
111 |         } \
112 |         return stream; \
113 |     }
114 |   DEFUN_B(gray, GRAY);
115 |   DEFUN_B(grey, GREY);
116 |   DEFUN_B(red, RED);
117 |   DEFUN_B(green, GREEN);
118 |   DEFUN_B(yellow, YELLOW);
119 |   DEFUN_B(blue, BLUE);
120 |   DEFUN_B(magenta, MAGENTA);
121 |   DEFUN_B(cyan, CYAN);
122 |   DEFUN_B(white, WHITE);
123 | #undef DEFUN_B
124 | 
125 |   void reset_colors(FILE* stream) {
126 |     if (supports_color(stream)) {
127 |       fputs(ANSI_CODE_RESET, stream);
128 |     }
129 |   }
130 | 
131 | #ifdef __cplusplus
132 | }
133 | #endif
134 | 
135 | #endif /* _TERMCOLOR_C_H_ */


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | CC := gcc
  2 | CCFLAGS := \
  3 |   -std=c99 \
  4 |   -Wall \
  5 |   -Werror \
  6 |   -g \
  7 |   -O0 \
  8 |   -Isrc \
  9 |   -Isrc/audio \
 10 |   -Isrc/third_party \
 11 |   -Isrc/utils \
 12 |   -Ibuild/lib
 13 | 
 14 | LDFLAGS := \
 15 |   -Lbuild/lib \
 16 |   -lstt \
 17 |   -ltensorflowlite \
 18 |   -ltflitedelegates \
 19 |   -lpulse \
 20 |   -lpulse-simple
 21 | 
 22 | TEST_CCFLAGS := \
 23 |   -fsanitize=address \
 24 |   -fsanitize=undefined \
 25 |   -fno-sanitize-recover=all \
 26 |   -fsanitize=float-divide-by-zero \
 27 |   -fsanitize=float-cast-overflow \
 28 |   -fno-sanitize=null \
 29 |   -fno-sanitize=alignment \
 30 |   -fno-omit-frame-pointer
 31 | 
 32 | DEPFLAGS = -MT $@ -MMD -MP -MF $(DEPDIR)$*.d
 33 | TEST_DEPFLAGS = -MT $@ -MMD -MP -MF $(DEPDIR)$*_test.d
 34 | 
 35 | BUILDDIR = build/
 36 | OBJDIR := $(BUILDDIR)obj/
 37 | BINDIR := $(BUILDDIR)bin/
 38 | DEPDIR := $(BUILDDIR)dep/
 39 | LIBDIR := $(BUILDDIR)lib/
 40 | 
 41 | OBJS := $(addprefix $(OBJDIR),$(subst .c,.o,$(SRCS)))
 42 | TEST_OBJS := $(addprefix $(OBJDIR),$(subst .c,.o,$(TEST_SRCS)))
 43 | 
 44 | .PHONY: all clean test
 45 | 
 46 | all: \
 47 |   $(BINDIR)file_utils_test \
 48 |   $(BINDIR)string_utils_test \
 49 |   $(BINDIR)yargs_test \
 50 |   $(BINDIR)settings_test \
 51 |   $(BINDIR)app_main_test \
 52 |   $(BINDIR)spchcat
 53 | 
 54 | clean:
 55 | 	rm -rf $(BUILDDIR)
 56 | 
 57 | clean_src:
 58 | 	rm -rf $(OBJDIR)
 59 | 	rm -rf $(BINDIR)
 60 | 	rm -rf $(DEPDIR)
 61 | 
 62 | test: \
 63 |   run_file_utils_test \
 64 |   run_string_utils_test \
 65 |   run_yargs_test \
 66 |   run_settings_test \
 67 |   run_pa_list_devices_test \
 68 |   run_audio_buffer_test \
 69 |   run_wav_io_test \
 70 |   run_app_main_test
 71 | 
 72 | $(OBJDIR)%.o: %.c $(DEPDIR)/%.d | $(DEPDIR)
 73 | 	@mkdir -p $(dir $@)
 74 | 	@mkdir -p $(dir $(DEPDIR)$*_test.d)
 75 | 	$(CC) $(CCFLAGS) $(DEPFLAGS) -c $< -o $@
 76 | 
 77 | $(OBJDIR)%_test.o: %_test.c $(DEPDIR)/%.d | $(DEPDIR)
 78 | 	@mkdir -p $(dir $@)
 79 | 	@mkdir -p $(dir $(DEPDIR)$*.d)
 80 | 	$(CC) $(CCFLAGS) $(TEST_CCFLAGS) $(TEST_DEPFLAGS) -c $< -o $@
 81 | 
 82 | $(BINDIR)file_utils_test: \
 83 |   $(OBJDIR)src/utils/file_utils_test.o \
 84 |   $(OBJDIR)src/utils/string_utils.o
 85 | 	@mkdir -p $(dir $@) 
 86 | 	$(CC) $(CCFLAGS) $(TEST_CCFLAGS) $^ -o $@
 87 | 
 88 | run_file_utils_test: $(BINDIR)file_utils_test
 89 | 	$<
 90 | 
 91 | $(BINDIR)string_utils_test: \
 92 |   $(OBJDIR)src/utils/string_utils_test.o
 93 | 	@mkdir -p $(dir $@) 
 94 | 	$(CC) $(CCFLAGS) $(TEST_CCFLAGS) $^ -o $@
 95 | 
 96 | run_string_utils_test: $(BINDIR)string_utils_test
 97 | 	$<
 98 | 
 99 | $(BINDIR)yargs_test: \
100 |   $(OBJDIR)src/utils/string_utils.o \
101 |   $(OBJDIR)src/utils/yargs_test.o
102 | 	@mkdir -p $(dir $@) 
103 | 	$(CC) $(CCFLAGS) $(TEST_CCFLAGS) $^ -o $@
104 | 
105 | run_yargs_test: $(BINDIR)yargs_test
106 | 	$<
107 | 
108 | $(BINDIR)pa_list_devices_test: \
109 |   $(OBJDIR)src/utils/string_utils.o \
110 |   $(OBJDIR)src/audio/pa_list_devices_test.o
111 | 	@mkdir -p $(dir $@) 
112 | 	$(CC) $(CCFLAGS) $(TEST_CCFLAGS) -lpulse $^ -o $@
113 | 
114 | run_pa_list_devices_test: $(BINDIR)pa_list_devices_test
115 | 	$<
116 | 
117 | $(BINDIR)audio_buffer_test: \
118 |   $(OBJDIR)src/audio/audio_buffer_test.o
119 | 	@mkdir -p $(dir $@) 
120 | 	$(CC) $(CCFLAGS) $(TEST_CCFLAGS) $^ -o $@
121 | 
122 | run_audio_buffer_test: $(BINDIR)audio_buffer_test
123 | 	$<
124 | 
125 | $(BINDIR)wav_io_test: \
126 |   $(OBJDIR)src/utils/file_utils.o \
127 |   $(OBJDIR)src/utils/string_utils.o \
128 |   $(OBJDIR)src/audio/audio_buffer.o \
129 |   $(OBJDIR)src/audio/wav_io_test.o
130 | 	@mkdir -p $(dir $@) 
131 | 	$(CC) $(CCFLAGS) $(TEST_CCFLAGS) -lm $^ -o $@
132 | 
133 | run_wav_io_test: $(BINDIR)wav_io_test
134 | 	$<
135 | 
136 | $(BINDIR)settings_test: \
137 |   $(OBJDIR)src/settings_test.o \
138 |   $(OBJDIR)src/utils/file_utils.o \
139 |   $(OBJDIR)src/utils/string_utils.o \
140 |   $(OBJDIR)src/utils/yargs.o
141 | 	@mkdir -p $(dir $@) 
142 | 	$(CC) $(CCFLAGS) $(TEST_CCFLAGS) $^ -o $@
143 | 
144 | run_settings_test: $(BINDIR)settings_test
145 | 	$<
146 | 
147 | $(BINDIR)app_main_test: \
148 |  $(OBJDIR)src/app_main_test.o \
149 |  $(OBJDIR)src/settings.o \
150 |  $(OBJDIR)src/audio/audio_buffer.o \
151 |  $(OBJDIR)src/audio/pa_list_devices.o \
152 |  $(OBJDIR)src/audio/wav_io.o \
153 |  $(OBJDIR)src/utils/file_utils.o \
154 |  $(OBJDIR)src/utils/string_utils.o \
155 |  $(OBJDIR)src/utils/yargs.o
156 | 	@mkdir -p $(dir $@) 
157 | 	$(CC) $(CCFLAGS) $(TEST_CCFLAGS) $^ -o $@ $(LDFLAGS)
158 | 
159 | run_app_main_test: $(BINDIR)app_main_test
160 | 	$<
161 | 
162 | $(BINDIR)spchcat: \
163 |  $(OBJDIR)src/app_main.o \
164 |  $(OBJDIR)src/main.o \
165 |  $(OBJDIR)src/settings.o \
166 |  $(OBJDIR)src/audio/audio_buffer.o \
167 |  $(OBJDIR)src/audio/pa_list_devices.o \
168 |  $(OBJDIR)src/audio/wav_io.o \
169 |  $(OBJDIR)src/utils/file_utils.o \
170 |  $(OBJDIR)src/utils/string_utils.o \
171 |  $(OBJDIR)src/utils/yargs.o
172 | 	@mkdir -p $(dir $@) 
173 | 	$(CC) $^ -o $@ $(LDFLAGS)
174 | 
175 | $(DEPDIR): ; @mkdir -p $@
176 | 
177 | SRCS := $(shell find src/ -type f -name '*.c')
178 | DEPFILES := $(SRCS:%.c=$(DEPDIR)/%.d)
179 | $(DEPFILES):
180 | 
181 | include $(wildcard $(DEPFILES))


--------------------------------------------------------------------------------
/src/utils/string_utils.c:
--------------------------------------------------------------------------------
  1 | #include "string_utils.h"
  2 | 
  3 | #include <assert.h>
  4 | #include <stdarg.h>
  5 | #include <stdlib.h>
  6 | #include <string.h>
  7 | 
  8 | #include "trace.h"
  9 | 
 10 | bool string_starts_with(const char* string, const char* start) {
 11 |   return (strncmp(string, start, strlen(start)) == 0);
 12 | }
 13 | 
 14 | bool string_ends_with(const char* string, const char* ending) {
 15 |   const int string_length = strlen(string);
 16 |   const int ending_length = strlen(ending);
 17 |   if (string_length < ending_length) {
 18 |     return false;
 19 |   }
 20 | 
 21 |   for (int i = 0; i < ending_length; ++i) {
 22 |     const int string_index = (string_length - (i + 1));
 23 |     const int ending_index = (ending_length - (i + 1));
 24 |     const char string_char = string[string_index];
 25 |     const char ending_char = ending[ending_index];
 26 |     if (string_char != ending_char) {
 27 |       return false;
 28 |     }
 29 |   }
 30 | 
 31 |   return true;
 32 | }
 33 | 
 34 | char* string_duplicate(const char* source) {
 35 |   if (source == NULL) {
 36 |     return NULL;
 37 |   }
 38 |   const int length = strlen(source);
 39 |   char* result = malloc(length + 1);
 40 |   strncpy(result, source, length + 1);
 41 |   return result;
 42 | }
 43 | 
 44 | char* string_alloc_sprintf(const char* format, ...) {
 45 |   va_list args;
 46 |   va_start(args, format);
 47 |   va_list args_copy;
 48 |   va_copy(args_copy, args);
 49 |   size_t size = vsnprintf(NULL, 0, format, args) + 1;
 50 |   va_end(args);
 51 |   char* result = malloc(size);
 52 |   vsnprintf(result, size, format, args_copy);
 53 |   va_end(args_copy);
 54 |   return result;
 55 | }
 56 | 
 57 | // Splits a string into multiple parts, based on the single-character separator.
 58 | // The `max_splits` arguments controls the maximum number of parts that will be
 59 | // produced, or -1 for no maximum. The caller is responsible for calling free() 
 60 | // on the `outputs` array, and for all of the entries in that array.
 61 | void string_split(const char* input, char separator, const int max_splits,
 62 |   char*** outputs, int* outputs_length) {
 63 |   assert(input != NULL);
 64 |   const int input_length = strlen(input);
 65 |   *outputs = NULL;
 66 |   *outputs_length = 0;
 67 |   int last_split_index = 0;
 68 |   for (int i = 0; i < input_length; ++i) {
 69 |     const char current = input[i];
 70 |     if ((current == separator) &&
 71 |       ((max_splits == -1) || (*outputs_length < (max_splits - 1)))) {
 72 |       const int split_length = (i - last_split_index);
 73 |       char* split = malloc(split_length + 1);
 74 |       for (int j = 0; j < split_length; ++j) {
 75 |         split[j] = input[last_split_index + j];
 76 |       }
 77 |       split[split_length] = 0;
 78 |       *outputs = realloc(*outputs, (*outputs_length + 1) * sizeof(char*));
 79 |       (*outputs)[*outputs_length] = split;
 80 |       *outputs_length += 1;
 81 |       last_split_index = i + 1;
 82 |     }
 83 |   }
 84 |   const int split_length = (input_length - last_split_index);
 85 |   if (split_length > 0) {
 86 |     char* split = malloc(split_length + 1);
 87 |     for (int j = 0; j < split_length; ++j) {
 88 |       split[j] = input[last_split_index + j];
 89 |     }
 90 |     split[split_length] = 0;
 91 |     *outputs = realloc(*outputs, (*outputs_length + 1) * sizeof(char*));
 92 |     (*outputs)[*outputs_length] = split;
 93 |     *outputs_length += 1;
 94 |   }
 95 | }
 96 | 
 97 | void string_list_free(char** list, int list_length) {
 98 |   for (int i = 0; i < list_length; ++i) {
 99 |     free(list[i]);
100 |   }
101 |   free(list);
102 | }
103 | 
104 | void string_list_add(const char* new, char*** list, int* list_length) {
105 |   *list = realloc(*list, (*list_length + 1) * sizeof(char*));
106 |   (*list)[*list_length] = string_duplicate(new);
107 |   *list_length += 1;
108 | }
109 | 
110 | char* string_append(const char* a, const char* b) {
111 |   return string_alloc_sprintf("%s%s", a, b);
112 | }
113 | 
114 | char* string_append_in_place(char* a, const char* b) {
115 |   char* result = string_alloc_sprintf("%s%s", a, b);
116 |   free(a);
117 |   return result;
118 | }
119 | 
120 | char* string_join(const char** list, int list_length, const char* separator) {
121 |   char* current = string_duplicate("");
122 |   for (int i = 0; i < list_length; ++i) {
123 |     char* next = string_append(current, list[i]);
124 |     free(current);
125 |     current = next;
126 |     if (i < (list_length - 1)) {
127 |       char* next = string_append(current, separator);
128 |       free(current);
129 |       current = next;
130 |     }
131 |   }
132 |   return current;
133 | }
134 | 
135 | void string_list_filter(const char** in_list, int in_list_length,
136 |   string_list_filter_funcptr should_keep_func, void* cookie, char*** out_list,
137 |   int* out_list_length) {
138 |   *out_list = NULL;
139 |   *out_list_length = 0;
140 |   for (int i = 0; i < in_list_length; ++i) {
141 |     const char* in = in_list[i];
142 |     if (should_keep_func(in, cookie)) {
143 |       *out_list_length += 1;
144 |       *out_list = realloc(*out_list, sizeof(const char*) * (*out_list_length));
145 |       (*out_list)[(*out_list_length) - 1] = string_duplicate(in);
146 |     }
147 |   }
148 | }
149 | 


--------------------------------------------------------------------------------
/src/audio/wav_io.c:
--------------------------------------------------------------------------------
  1 | #include "wav_io.h"
  2 | 
  3 | #include <stdio.h>
  4 | #include <stdlib.h>
  5 | #include <string.h>
  6 | 
  7 | #include "audio_buffer.h"
  8 | #include "string_utils.h"
  9 | #include "trace.h"
 10 | 
 11 | static bool expect_data(const char* expected, int expected_size,
 12 |   FILE* file) {
 13 |   uint8_t* data = calloc(1, expected_size);
 14 |   fread(data, expected_size, 1, file);
 15 |   const bool result = (memcmp(data, expected, expected_size) == 0);
 16 |   free(data);
 17 |   return result;
 18 | }
 19 | 
 20 | static void fread_and_discard(int size, FILE* file) {
 21 |   uint8_t* data = calloc(1, size);
 22 |   fread(data, size, 1, file);
 23 |   free(data);
 24 | }
 25 | 
 26 | static uint16_t fread_uint16(FILE* file) {
 27 |   uint16_t result;
 28 |   fread(&result, 2, 1, file);
 29 |   return result;
 30 | }
 31 | 
 32 | static void fwrite_uint16(const uint16_t value, FILE* file) {
 33 |   fwrite(&value, 2, 1, file);
 34 | }
 35 | 
 36 | static uint32_t fread_uint32(FILE* file) {
 37 |   uint32_t result;
 38 |   fread(&result, 4, 1, file);
 39 |   return result;
 40 | }
 41 | 
 42 | static void fwrite_uint32(const uint32_t value, FILE* file) {
 43 |   fwrite(&value, 4, 1, file);
 44 | }
 45 | 
 46 | bool wav_io_load(const char* filename, AudioBuffer** result) {
 47 |   *result = NULL;
 48 | 
 49 |   FILE* file = fopen(filename, "rb");
 50 |   if (file == NULL) {
 51 |     fprintf(stderr, "Couldn't load file '%s'\n", filename);
 52 |     return false;
 53 |   }
 54 | 
 55 |   if (!expect_data("RIFF", 4, file)) {
 56 |     fprintf(stderr, "'RIFF' wasn't found in header of WAV file '%s'\n",
 57 |       filename);
 58 |     fclose(file);
 59 |     return false;
 60 |   }
 61 |   fread_uint32(file);  // file_size_minus_eight
 62 |   if (!expect_data("WAVE", 4, file)) {
 63 |     fprintf(stderr, "'WAVE' wasn't found in header of WAV file '%s'\n",
 64 |       filename);
 65 |     fclose(file);
 66 |     return false;
 67 |   }
 68 | 
 69 |   uint8_t found_chunk_id[4];
 70 |   fread(found_chunk_id, 4, 1, file);
 71 |   while (memcmp(found_chunk_id, "fmt ", 4) != 0) {
 72 |     const uint32_t chunk_size = fread_uint32(file);
 73 |     fread_and_discard(chunk_size, file);
 74 |     fread(found_chunk_id, 4, 1, file);
 75 |   }
 76 |   const uint32_t format_chunk_size = fread_uint32(file);
 77 |   if ((format_chunk_size != 18) && (format_chunk_size != 16)) {
 78 |     fprintf(stderr,
 79 |       "Format chunk size was %d instead of 16 or 18 in WAV file '%s'\n",
 80 |       format_chunk_size, filename);
 81 |     return false;
 82 |   }
 83 |   const uint16_t format_type = fread_uint16(file);
 84 |   if (format_type != 1) {
 85 |     fprintf(stderr,
 86 |       "Format type was %d instead of 1 in WAV file '%s'\n",
 87 |       format_type, filename);
 88 |     return false;
 89 |   }
 90 |   const uint16_t channels = fread_uint16(file);
 91 |   const uint32_t sample_rate = fread_uint32(file);
 92 |   fread_uint32(file);  // bytes_per_second
 93 |   fread_uint16(file);  // bytes_per_sample
 94 |   const uint16_t bits_per_sample = fread_uint16(file);
 95 |   if (bits_per_sample != 16) {
 96 |     fprintf(stderr,
 97 |       "Bits per sample was %d instead of 16 in WAV file '%s'\n",
 98 |       bits_per_sample, filename);
 99 |     return false;
100 |   }
101 |   if (format_chunk_size == 18) {
102 |     fread_and_discard(2, file);
103 |   }
104 | 
105 |   fread(found_chunk_id, 4, 1, file);
106 |   while (memcmp(found_chunk_id, "data", 4) != 0) {
107 |     const uint32_t chunk_size = fread_uint32(file);
108 |     fread_and_discard(chunk_size, file);
109 |   }
110 | 
111 |   const uint32_t chunk_size = fread_uint32(file);
112 |   const int samples_per_channel = (chunk_size / channels) / 2;
113 |   *result = audio_buffer_alloc(sample_rate, samples_per_channel, channels);
114 |   fread((*result)->data, chunk_size, 1, file);
115 | 
116 |   fclose(file);
117 |   return true;
118 | }
119 | 
120 | bool wav_io_save(const char* filename, const AudioBuffer* buffer) {
121 |   const int header_byte_count = 44;
122 |   const int sample_bit_count = 16;
123 |   const int sample_byte_count = 2;
124 |   const int num_samples = buffer->samples_per_channel * buffer->channels;
125 |   const int bytes_per_second =
126 |     buffer->sample_rate * sample_byte_count * buffer->channels;
127 |   const int bytes_per_frame = sample_byte_count * buffer->channels;
128 |   const int data_byte_count = num_samples * sample_byte_count;
129 |   const int file_size = header_byte_count + data_byte_count;
130 | 
131 |   FILE* file = fopen(filename, "wb");
132 |   if (file == NULL) {
133 |     fprintf(stderr, "Couldn't open file '%s' for saving.\n", filename);
134 |     return false;
135 |   }
136 | 
137 |   fwrite("RIFF", 4, 1, file);
138 |   fwrite_uint32(file_size - 8, file);
139 |   fwrite("WAVE", 4, 1, file);
140 | 
141 |   fwrite("fmt ", 4, 1, file);
142 |   fwrite_uint32(16, file);
143 |   fwrite_uint16(1, file);
144 |   fwrite_uint16(buffer->channels, file);
145 |   fwrite_uint32(buffer->sample_rate, file);
146 |   fwrite_uint32(bytes_per_second, file);
147 |   fwrite_uint16(bytes_per_frame, file);
148 |   fwrite_uint16(sample_bit_count, file);
149 | 
150 |   fwrite("data", 4, 1, file);
151 |   fwrite_uint32(data_byte_count, file);
152 |   fwrite(buffer->data, data_byte_count, 1, file);
153 | 
154 |   fclose(file);
155 | 
156 |   return true;
157 | }
158 | 


--------------------------------------------------------------------------------
/notebooks/install.ipynb:
--------------------------------------------------------------------------------
1 | {"cells":[{"cell_type":"markdown","metadata":{"id":"v_SgamvYQ1ML"},"source":["# Installing `spchcat`\n","\n","Example script showing how to install the `spchcat` tool from a .deb package installer.\n","\n","<table class=\"tfo-notebook-buttons\" align=\"left\">\n","  <td>\n","      <a target=\"_blank\" href=\"https://colab.research.google.com/github/petewarden/spchcat/blob/main/notebooks/install.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n","  </td>\n","  <td>\n","      <a target=\"_blank\" href=\"https://github.com/petewarden/spchcat/blob/main/notebooks/install.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n","  </td>\n","</table>"]},{"cell_type":"markdown","metadata":{"id":"2iPaesXqRGL4"},"source":["## Fetch the Package\n","\n","We'll install the `spchcat` speech recognition tool from a `.deb` package, downloaded from [github.com/petewarden/spchcat/releases](https://github.com/petewarden/spchcat/releases), so use `wget` to fetch the latest version."]},{"cell_type":"code","execution_count":1,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":36614,"status":"ok","timestamp":1641235885457,"user":{"displayName":"Pete Warden","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GiRDB9VwLsxruonSf2xY25P4vvEWzDmpmsfjNLf=s64","userId":"03508279103930495710"},"user_tz":480},"id":"xbUFTBZwRwji","outputId":"e0f57b0b-78fd-4610-bfd1-d624a061e301"},"outputs":[{"name":"stdout","output_type":"stream","text":["--2022-01-03 18:50:48--  https://github.com/petewarden/spchcat/releases/download/v0.0.1-alpha/spchcat_0.0-1_amd64.deb\n","Resolving github.com (github.com)... 140.82.114.3\n","Connecting to github.com (github.com)|140.82.114.3|:443... connected.\n","HTTP request sent, awaiting response... 302 Found\n","Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/442925287/51c57b3e-b498-41e5-80e7-2edc8de94c07?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20220103%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20220103T185048Z&X-Amz-Expires=300&X-Amz-Signature=ff064e98c09215948b4ca1958adef26eeffe03aa0a47218e3b5f8c03706ec3c6&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=442925287&response-content-disposition=attachment%3B%20filename%3Dspchcat_0.0-1_amd64.deb&response-content-type=application%2Foctet-stream [following]\n","--2022-01-03 18:50:48--  https://objects.githubusercontent.com/github-production-release-asset-2e65be/442925287/51c57b3e-b498-41e5-80e7-2edc8de94c07?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20220103%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20220103T185048Z&X-Amz-Expires=300&X-Amz-Signature=ff064e98c09215948b4ca1958adef26eeffe03aa0a47218e3b5f8c03706ec3c6&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=442925287&response-content-disposition=attachment%3B%20filename%3Dspchcat_0.0-1_amd64.deb&response-content-type=application%2Foctet-stream\n","Resolving objects.githubusercontent.com (objects.githubusercontent.com)... 185.199.110.133, 185.199.108.133, 185.199.111.133, ...\n","Connecting to objects.githubusercontent.com (objects.githubusercontent.com)|185.199.110.133|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 1084496196 (1.0G) [application/octet-stream]\n","Saving to: ‘spchcat_0.0-1_amd64.deb’\n","\n","spchcat_0.0-1_amd64 100%[===================>]   1.01G  35.8MB/s    in 36s     \n","\n","2022-01-03 18:51:25 (28.7 MB/s) - ‘spchcat_0.0-1_amd64.deb’ saved [1084496196/1084496196]\n","\n"]}],"source":["!wget -q https://github.com/petewarden/spchcat/releases/download/v0.0.2-alpha/spchcat_0.0-2_amd64.deb"]},{"cell_type":"markdown","metadata":{"id":"oBDTBC57R8uy"},"source":["## Install the Package\n","\n","On a desktop machine we could just double-click on the package to open and install it, but because we're in a notebook we'll use the `dpkg` tool instead."]},{"cell_type":"code","execution_count":3,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":116483,"status":"ok","timestamp":1641236030493,"user":{"displayName":"Pete Warden","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GiRDB9VwLsxruonSf2xY25P4vvEWzDmpmsfjNLf=s64","userId":"03508279103930495710"},"user_tz":480},"id":"WPIGB-qPRhNA","outputId":"8b1b8226-b8c7-4056-d7cf-17729fcd4856"},"outputs":[{"name":"stdout","output_type":"stream","text":["Selecting previously unselected package spchcat.\n","(Reading database ... 155222 files and directories currently installed.)\n","Preparing to unpack spchcat_0.0-1_amd64.deb ...\n","Unpacking spchcat (0.0-1) ...\n","Setting up spchcat (0.0-1) ...\n"]}],"source":["!dpkg -i spchcat_0.0-1_amd64.deb"]},{"cell_type":"markdown","metadata":{"id":"hQ6FCiUDSLHK"},"source":["## Test `spchcat`\n","\n","The tool needs an audio input to run on, and since this may be running in Colab or another environment without a microphone attached, we'll download some test audio and run it on that."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"yVLUfQp_Seh5"},"outputs":[],"source":["!wget --quiet https://github.com/coqui-ai/STT/releases/download/v1.1.0/audio-1.1.0.tar.gz\n","!tar -xzf audio-1.1.0.tar.gz"]},{"cell_type":"code","execution_count":7,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":377,"status":"ok","timestamp":1641236415781,"user":{"displayName":"Pete Warden","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GiRDB9VwLsxruonSf2xY25P4vvEWzDmpmsfjNLf=s64","userId":"03508279103930495710"},"user_tz":480},"id":"p3hTA19dSJvN","outputId":"ac151cd6-d339-458e-f1a5-0c9c99942824"},"outputs":[{"name":"stdout","output_type":"stream","text":["spchcat: /usr/lib/x86_64-linux-gnu/libstdc++.so.6: version `GLIBCXX_3.4.26' not found (required by spchcat)\n","spchcat: /lib/x86_64-linux-gnu/libc.so.6: version `GLIBC_2.34' not found (required by spchcat)\n","spchcat: /lib/x86_64-linux-gnu/libc.so.6: version `GLIBC_2.33' not found (required by spchcat)\n"]}],"source":["!spchcat audio/4507-16021-0012.wav"]}],"metadata":{"colab":{"authorship_tag":"ABX9TyOUEFnuIlWvgI2yToO63QAv","name":"Installing spchat","provenance":[]},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"name":"python"}},"nbformat":4,"nbformat_minor":0}
2 | 


--------------------------------------------------------------------------------
/src/audio/wav_io_test.c:
--------------------------------------------------------------------------------
  1 | #include "acutest.h"
  2 | 
  3 | #include "wav_io.c"
  4 | 
  5 | #include "file_utils.h"
  6 | 
  7 | void test_expect_data() {
  8 |   const char* test_filename = "/tmp/test_expect_data";
  9 |   file_write(test_filename, "Foo", 3);
 10 | 
 11 |   FILE* file = fopen(test_filename, "rb");
 12 |   TEST_CHECK(file != NULL);
 13 |   TEST_CHECK(expect_data("Foo", 3, file));
 14 |   fclose(file);
 15 | 
 16 |   file = fopen(test_filename, "rb");
 17 |   TEST_CHECK(file != NULL);
 18 |   TEST_CHECK(!expect_data("Bar", 3, file));
 19 |   fclose(file);
 20 | }
 21 | 
 22 | void test_fread_and_discard() {
 23 |   const char* test_filename = "/tmp/test_fread_and_discard";
 24 |   file_write(test_filename, "FooBarBaz", 9);
 25 | 
 26 |   FILE* file = fopen(test_filename, "rb");
 27 |   TEST_CHECK(file != NULL);
 28 | 
 29 |   TEST_CHECK(expect_data("Foo", 3, file));
 30 |   fread_and_discard(3, file);
 31 |   TEST_CHECK(expect_data("Baz", 3, file));
 32 | 
 33 |   fclose(file);
 34 | }
 35 | 
 36 | void test_fread_uint16() {
 37 |   const char* test_filename = "/tmp/test_fread_uint16";
 38 |   uint16_t value = 0x3123;
 39 |   file_write(test_filename, (char*)(&value), 2);
 40 | 
 41 |   FILE* file = fopen(test_filename, "rb");
 42 |   TEST_CHECK(file != NULL);
 43 |   TEST_INTEQ(0x3123, fread_uint16(file));
 44 |   fclose(file);
 45 | }
 46 | 
 47 | void test_fwrite_uint16() {
 48 |   const char* test_filename = "/tmp/test_fwrite_uint16";
 49 |   FILE* file = fopen(test_filename, "wb");
 50 |   TEST_CHECK(file != NULL);
 51 |   fwrite_uint16(0x3123, file);
 52 |   fclose(file);
 53 | 
 54 |   file = fopen(test_filename, "rb");
 55 |   TEST_CHECK(file != NULL);
 56 |   uint16_t value;
 57 |   fread(&value, 2, 1, file);
 58 |   TEST_INTEQ(0x3123, value);
 59 |   fclose(file);
 60 | }
 61 | 
 62 | void test_fread_uint32() {
 63 |   const char* test_filename = "/tmp/test_fread_uint32";
 64 |   uint32_t value = 0x12345678;
 65 |   file_write(test_filename, (char*)(&value), 4);
 66 | 
 67 |   FILE* file = fopen(test_filename, "rb");
 68 |   TEST_CHECK(file != NULL);
 69 |   TEST_INTEQ(0x12345678, fread_uint32(file));
 70 |   fclose(file);
 71 | }
 72 | 
 73 | void test_fwrite_uint32() {
 74 |   const char* test_filename = "/tmp/test_fwrite_uint16";
 75 |   FILE* file = fopen(test_filename, "wb");
 76 |   TEST_CHECK(file != NULL);
 77 |   fwrite_uint32(0x12345678, file);
 78 |   fclose(file);
 79 | 
 80 |   file = fopen(test_filename, "rb");
 81 |   TEST_CHECK(file != NULL);
 82 |   uint32_t value;
 83 |   fread(&value, 4, 1, file);
 84 |   TEST_INTEQ(0x12345678, value);
 85 |   fclose(file);
 86 | }
 87 | 
 88 | void test_wav_io_load() {
 89 |   const char* test_filename = "/tmp/test_wav_io_load.wav";
 90 |   unsigned char test_data[] = {
 91 |     'R', 'I', 'F', 'F',
 92 |     52, 0, 0, 0,
 93 |     'W', 'A', 'V', 'E',
 94 |     'f', 'm', 't', ' ',
 95 |     16, 0, 0, 0,  // Format chunk size.
 96 |     1, 0,  // Format type.
 97 |     2, 0,  // Channels.
 98 |     0x80, 0x3e, 0, 0,  // Sample rate.
 99 |     0x00, 0xfa, 0, 0,  // Bytes per second.
100 |     4, 0,  // Bytes per frame (#channels).
101 |     16, 0, // Bits per sample.
102 |     'd', 'a', 't', 'a',
103 |     16, 0, 0, 0,  // Data chunk size.
104 |     23, 33,  // Sample #1, Channel #1.
105 |     11, 77,  // Sample #1, Channel #2.
106 |     101, 89,  // Sample #2, Channel #1.
107 |     55, 91,  // Sample #2, Channel #2.
108 |     117, 18,  // Sample #3, Channel #1.
109 |     33, 212,  // Sample #3, Channel #2.
110 |     169, 134,  // Sample #4, Channel #1.
111 |     42, 121,  // Sample #4, Channel #2.
112 |   };
113 |   const size_t test_data_length = sizeof(test_data) / sizeof(test_data[0]);
114 |   file_write(test_filename, (char*)(test_data), test_data_length);
115 | 
116 |   AudioBuffer* buffer = NULL;
117 |   TEST_ASSERT(wav_io_load(test_filename, &buffer));
118 |   TEST_ASSERT(buffer != NULL);
119 |   TEST_INTEQ(2, buffer->channels);
120 |   TEST_INTEQ(16000, buffer->sample_rate);
121 |   TEST_INTEQ(4, buffer->samples_per_channel);
122 |   TEST_CHECK(buffer->data != NULL);
123 |   TEST_INTEQ(8471, buffer->data[0]);
124 |   TEST_INTEQ(19723, buffer->data[1]);
125 |   TEST_INTEQ(22885, buffer->data[2]);
126 |   TEST_INTEQ(23351, buffer->data[3]);
127 |   TEST_INTEQ(4725, buffer->data[4]);
128 |   TEST_INTEQ(-11231, buffer->data[5]);
129 |   TEST_INTEQ(-31063, buffer->data[6]);
130 |   TEST_INTEQ(31018, buffer->data[7]);
131 |   audio_buffer_free(buffer);
132 | }
133 | 
134 | void test_wav_io_save() {
135 |   const char* test_filename = "/tmp/test_wav_io_save.wav";
136 | 
137 |   AudioBuffer* buffer = audio_buffer_alloc(16000, 4, 2);
138 |   buffer->data[0] = 8471;
139 |   buffer->data[1] = 19723;
140 |   buffer->data[2] = 22885;
141 |   buffer->data[3] = 23351;
142 |   buffer->data[4] = 4725;
143 |   buffer->data[5] = -11231;
144 |   buffer->data[6] = -31063;
145 |   buffer->data[7] = 31018;
146 | 
147 |   TEST_CHECK(wav_io_save(test_filename, buffer));
148 |   audio_buffer_free(buffer);
149 | 
150 |   unsigned char expected_data[] = {
151 |     'R', 'I', 'F', 'F',  // #0
152 |     52, 0, 0, 0,  // #4
153 |     'W', 'A', 'V', 'E',  // #8
154 |     'f', 'm', 't', ' ',  // #12
155 |     16, 0, 0, 0,  // #16, Format chunk size.
156 |     1, 0,  // #20, Format type.
157 |     2, 0,  // #22, Channels.
158 |     0x80, 0x3e, 0, 0,  // #24, Sample rate.
159 |     0x00, 0xfa, 0, 0,  // #28, Bytes per second.
160 |     4, 0,  // #32, Bytes per frame.
161 |     16, 0,  // #34, Bits per sample.
162 |     'd', 'a', 't', 'a',  // #36
163 |     16, 0, 0, 0,  // #40, Data chunk size.
164 |     23, 33,  // #44, Sample #1, Channel #1.
165 |     11, 77,  // #46, Sample #1, Channel #2.
166 |     101, 89,  // #48, Sample #2, Channel #1.
167 |     55, 91,  // #50, Sample #2, Channel #2.
168 |     117, 18,  // #52, Sample #3, Channel #1.
169 |     33, 212,  // #54, Sample #3, Channel #2.
170 |     169, 134,  // #56, Sample #4, Channel #1.
171 |     42, 121,  // #58, Sample #4, Channel #2.
172 |   };
173 |   const size_t expected_data_length =
174 |     sizeof(expected_data) / sizeof(expected_data[0]);
175 | 
176 |   FILE* file = fopen(test_filename, "rb");
177 | 
178 |   uint8_t* found_data = calloc(1, expected_data_length);
179 |   fread(found_data, expected_data_length, 1, file);
180 |   fclose(file);
181 | 
182 |   for (int i = 0; i < expected_data_length; ++i) {
183 |     TEST_INTEQ(expected_data[i], found_data[i]);
184 |     TEST_MSG("At position %d", i);
185 |   }
186 | 
187 |   free(found_data);
188 | }
189 | 
190 | void test_wav_io_save_listenable() {
191 |   const char* test_filename = "/tmp/test_wav_io_save_listenable.wav";
192 | 
193 |   const int sample_rate = 16000;
194 |   const int sample_count = sample_rate * 2;
195 |   const int channels = 1;
196 | 
197 |   AudioBuffer* buffer = audio_buffer_alloc(sample_rate, sample_count, channels);
198 | 
199 |   for (int i = 0; i < sample_count; ++i) {
200 |     const float phase = (float)(i) / 10.0f;
201 |     buffer->data[i] = (int)(sinf(phase) * 32767);
202 |   }
203 | 
204 |   TEST_CHECK(wav_io_save(test_filename, buffer));
205 |   audio_buffer_free(buffer);
206 | }
207 | 
208 | TEST_LIST = {
209 |   {"expect_data", test_expect_data},
210 |   {"fread_and_discard", test_fread_and_discard},
211 |   {"fread_uint16", test_fread_uint16},
212 |   {"fwrite_uint16", test_fwrite_uint16},
213 |   {"fread_uint32", test_fread_uint32},
214 |   {"fwrite_uint32", test_fwrite_uint32},
215 |   {"wav_io_load", test_wav_io_load},
216 |   {"wav_io_save", test_wav_io_save},
217 |   {"wav_io_save_listenable", test_wav_io_save_listenable},
218 |   {NULL, NULL},
219 | };


--------------------------------------------------------------------------------
/src/utils/string_utils_test.c:
--------------------------------------------------------------------------------
  1 | #include "acutest.h"
  2 | 
  3 | #include "string_utils.c"
  4 | 
  5 | void test_string_starts_with() {
  6 | 
  7 |   const char* string = "foo.txt";
  8 |   const char* start = "foo";
  9 |   TEST_CHECK(string_starts_with(string, start));
 10 |   TEST_MSG("%s, %s", string, start);
 11 | 
 12 |   string = "foo.txt";
 13 |   start = "bar";
 14 |   TEST_CHECK(!string_starts_with(string, start));
 15 |   TEST_MSG("%s, %s", string, start);
 16 | 
 17 |   string = "foo.txt";
 18 |   start = "toolongtobeastarts";
 19 |   TEST_CHECK(!string_starts_with(string, start));
 20 |   TEST_MSG("%s, %s", string, start);
 21 | 
 22 |   string = "foo.txt";
 23 |   start = "";
 24 |   TEST_CHECK(string_starts_with(string, start));
 25 |   TEST_MSG("%s, %s", string, start);
 26 | 
 27 |   string = "";
 28 |   start = "";
 29 |   TEST_CHECK(string_starts_with(string, start));
 30 |   TEST_MSG("%s, %s", string, start);
 31 | 
 32 |   string = "";
 33 |   start = "foo";
 34 |   TEST_CHECK(!string_starts_with(string, start));
 35 |   TEST_MSG("%s, %s", string, start);
 36 | }
 37 | 
 38 | void test_string_ends_with() {
 39 | 
 40 |   const char* string = "foo.txt";
 41 |   const char* ending = ".txt";
 42 |   TEST_CHECK(string_ends_with(string, ending));
 43 |   TEST_MSG("%s, %s", string, ending);
 44 | 
 45 |   string = "short";
 46 |   ending = "longer";
 47 |   TEST_CHECK(!string_ends_with(string, ending));
 48 |   TEST_MSG("%s, %s", string, ending);
 49 | 
 50 |   string = "something";
 51 |   ending = "";
 52 |   TEST_CHECK(string_ends_with(string, ending));
 53 |   TEST_MSG("%s, %s", string, ending);
 54 | 
 55 |   string = "";
 56 |   ending = "something";
 57 |   TEST_CHECK(!string_ends_with(string, ending));
 58 |   TEST_MSG("%s, %s", string, ending);
 59 | 
 60 |   string = "foo.txt.old";
 61 |   ending = ".txt";
 62 |   TEST_CHECK(!string_ends_with(string, ending));
 63 |   TEST_MSG("%s, %s", string, ending);
 64 | 
 65 |   string = "";
 66 |   ending = "";
 67 |   TEST_CHECK(string_ends_with(string, ending));
 68 |   TEST_MSG("%s, %s", string, ending);
 69 | 
 70 |   string = "afile.txt";
 71 |   ending = "rfile.txt";
 72 |   TEST_CHECK(!string_ends_with(string, ending));
 73 |   TEST_MSG("%s, %s", string, ending);
 74 | }
 75 | 
 76 | void test_string_duplicate() {
 77 |   const char* original = "original";
 78 |   char* copy = string_duplicate(original);
 79 |   TEST_STREQ(original, copy);
 80 |   free(copy);
 81 | 
 82 |   original = "";
 83 |   copy = string_duplicate(original);
 84 |   TEST_STREQ(original, copy);
 85 |   free(copy);
 86 | 
 87 |   original = NULL;
 88 |   copy = string_duplicate(original);
 89 |   TEST_CHECK(copy == NULL);
 90 |   TEST_MSG("%p", copy);
 91 |   free(copy);
 92 | }
 93 | 
 94 | void test_string_alloc_sprintf() {
 95 | 
 96 |   char* result = string_alloc_sprintf("%s/%s", "foo", "bar");
 97 |   TEST_STREQ("foo/bar", result);
 98 |   free(result);
 99 | 
100 |   result = string_alloc_sprintf("%s/%d", "foo", 10);
101 |   TEST_STREQ("foo/10", result);
102 |   free(result);
103 | 
104 |   result = string_alloc_sprintf("%s/0x%08x", "foo", 10);
105 |   TEST_STREQ("foo/0x0000000a", result);
106 |   free(result);
107 | }
108 | 
109 | void test_string_split() {
110 |   char** parts = NULL;
111 |   int parts_length = 0;
112 |   string_split("nosepshere", ':', -1, &parts, &parts_length);
113 |   TEST_INTEQ(1, parts_length);
114 |   TEST_STREQ("nosepshere", parts[0]);
115 |   string_list_free(parts, parts_length);
116 | 
117 |   parts = NULL;
118 |   parts_length = 0;
119 |   string_split("seps:r:us", ':', -1, &parts, &parts_length);
120 |   TEST_INTEQ(3, parts_length);
121 |   TEST_STREQ("seps", parts[0]);
122 |   TEST_STREQ("r", parts[1]);
123 |   TEST_STREQ("us", parts[2]);
124 |   string_list_free(parts, parts_length);
125 | 
126 |   parts = NULL;
127 |   parts_length = 0;
128 |   string_split("too-many-seps", '-', 2, &parts, &parts_length);
129 |   TEST_INTEQ(2, parts_length);
130 |   TEST_STREQ("too", parts[0]);
131 |   TEST_STREQ("many-seps", parts[1]);
132 |   string_list_free(parts, parts_length);
133 | 
134 |   parts = NULL;
135 |   parts_length = 0;
136 |   string_split("weird!trailing!sep!", '!', -1, &parts, &parts_length);
137 |   TEST_INTEQ(3, parts_length);
138 |   TEST_STREQ("weird", parts[0]);
139 |   TEST_STREQ("trailing", parts[1]);
140 |   TEST_STREQ("sep", parts[2]);
141 |   string_list_free(parts, parts_length);
142 | }
143 | 
144 | void test_string_append() {
145 |   char* result = string_append("a", "b");
146 |   TEST_STREQ("ab", result);
147 |   free(result);
148 | 
149 |   result = string_append("", "");
150 |   TEST_STREQ("", result);
151 |   free(result);
152 | 
153 |   result = string_append("something ", "else");
154 |   TEST_STREQ("something else", result);
155 |   free(result);
156 | }
157 | 
158 | void test_string_append_in_place() {
159 |   char* original = string_duplicate("original");
160 |   char* result = string_append_in_place(original, "b");
161 |   TEST_STREQ("originalb", result);
162 |   free(result);
163 | }
164 | 
165 | void test_string_join() {
166 |   const char* list1[] = { "a", "b", "c" };
167 |   const int list1_length = sizeof(list1) / sizeof(list1[0]);
168 | 
169 |   char* result = string_join(list1, list1_length, ", ");
170 |   TEST_STREQ(result, "a, b, c");
171 |   free(result);
172 | 
173 |   result = string_join(list1, list1_length, ":");
174 |   TEST_STREQ(result, "a:b:c");
175 |   free(result);
176 | 
177 |   result = string_join(list1, list1_length, "");
178 |   TEST_STREQ(result, "abc");
179 |   free(result);
180 | 
181 |   const char* list2[] = { "foo", "", "bar", "baz" };
182 |   const int list2_length = sizeof(list2) / sizeof(list2[0]);
183 | 
184 |   result = string_join(list2, list2_length, "a");
185 |   TEST_STREQ(result, "fooaabarabaz");
186 |   free(result);
187 | 
188 |   result = string_join(list2, list2_length, ", ");
189 |   TEST_STREQ(result, "foo, , bar, baz");
190 |   free(result);
191 | }
192 | 
193 | bool test_string_list_filter_func(const char* a, void* cookie) {
194 |   const char* search = cookie;
195 |   return (strstr(a, search) == NULL);
196 | }
197 | 
198 | void test_string_list_filter() {
199 |   const char* list[] = {
200 |     "foo",
201 |     "bar",
202 |     "baz",
203 |     "fish",
204 |   };
205 |   const int list_length = sizeof(list) / sizeof(list[0]);
206 | 
207 |   char** results;
208 |   int results_length;
209 |   string_list_filter(list, list_length, test_string_list_filter_func, "a",
210 |     &results, &results_length);
211 |   TEST_INTEQ(2, results_length);
212 |   TEST_STREQ("foo", results[0]);
213 |   TEST_STREQ("fish", results[1]);
214 |   string_list_free(results, results_length);
215 | 
216 |   string_list_filter(list, list_length, test_string_list_filter_func, "fo",
217 |     &results, &results_length);
218 |   TEST_INTEQ(3, results_length);
219 |   TEST_STREQ("bar", results[0]);
220 |   TEST_STREQ("baz", results[1]);
221 |   TEST_STREQ("fish", results[2]);
222 |   string_list_free(results, results_length);
223 | }
224 | 
225 | void test_string_list_add() {
226 |   char** list = NULL;
227 |   int list_length = 0;
228 |   string_list_add("foo", &list, &list_length);
229 |   TEST_INTEQ(1, list_length);
230 |   TEST_STREQ("foo", list[0]);
231 | 
232 |   string_list_add("bar", &list, &list_length);
233 |   string_list_add("baz", &list, &list_length);
234 |   TEST_INTEQ(3, list_length);
235 |   TEST_STREQ("foo", list[0]);
236 |   TEST_STREQ("bar", list[1]);
237 |   TEST_STREQ("baz", list[2]);
238 | 
239 |   string_list_free(list, list_length);
240 | }
241 | 
242 | TEST_LIST = {
243 |   {"string_starts_with", test_string_starts_with},
244 |   {"string_ends_with", test_string_ends_with},
245 |   {"string_duplicate", test_string_duplicate},
246 |   {"string_alloc_sprintf", test_string_alloc_sprintf},
247 |   {"string_split", test_string_split},
248 |   {"string_append", test_string_append},
249 |   {"string_append_in_place", test_string_append_in_place},
250 |   {"string_join", test_string_join},
251 |   {"string_list_filter", test_string_list_filter},
252 |   {"string_list_add", test_string_list_add},
253 |   {NULL, NULL},
254 | };


--------------------------------------------------------------------------------
/src/utils/file_utils_test.c:
--------------------------------------------------------------------------------
  1 | #include "acutest.h"
  2 | 
  3 | #include "file_utils.c"
  4 | 
  5 | #include <stdio.h>
  6 | #include <stdlib.h>
  7 | 
  8 | #include "string_utils.h"
  9 | 
 10 | void test_file_does_exist() {
 11 |   const char* test_filename = "/tmp/file_does_exist_test.txt";
 12 |   FILE* test_file = fopen(test_filename, "w");
 13 |   fprintf(test_file, "Test contents.");
 14 |   fclose(test_file);
 15 | 
 16 |   TEST_CHECK(file_does_exist(test_filename));
 17 |   TEST_MSG("%s", test_filename);
 18 | 
 19 |   const char* nonexistent_filename = "/some/very/unlikely/path/foo.bar";
 20 |   TEST_CHECK(!file_does_exist(nonexistent_filename));
 21 | }
 22 | 
 23 | void test_file_size() {
 24 |   const char* test_filename = "/tmp/file_size_test.txt";
 25 |   FILE* test_file = fopen(test_filename, "w");
 26 |   fprintf(test_file, "Test contents.");
 27 |   fclose(test_file);
 28 | 
 29 |   off_t size = file_size(test_filename);
 30 |   TEST_SIZEQ(14, size);
 31 |   TEST_MSG("%s", test_filename);
 32 | 
 33 |   const char* nonexistent_filename = "/some/very/unlikely/path/foo.bar";
 34 |   size = file_size(nonexistent_filename);
 35 |   TEST_SIZEQ(-1, size);
 36 |   TEST_MSG("%s", test_filename);
 37 | }
 38 | 
 39 | void test_file_find_one_with_prefix() {
 40 |   char* tmp_dirname = "/tmp/tmpdir_test_file_find_one_with_prefix";
 41 |   mkdir(tmp_dirname, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
 42 | 
 43 |   char* filename = file_join_paths(tmp_dirname, "afile.txt");
 44 |   file_write(filename, "a", 1);
 45 |   free(filename);
 46 | 
 47 |   filename = file_join_paths(tmp_dirname, "anotherfile.txt");
 48 |   file_write(filename, "a", 1);
 49 |   free(filename);
 50 | 
 51 |   filename = file_join_paths(tmp_dirname, "randomfile");
 52 |   file_write(filename, "a", 1);
 53 |   free(filename);
 54 | 
 55 |   char* found = file_find_one_with_prefix(tmp_dirname, "a");
 56 |   TEST_CHECK(found != NULL);
 57 |   // No order is guaranteed, so just make sure it starts with the prefix.
 58 |   TEST_CHECK(found[0] == 'a');
 59 |   free(found);
 60 | 
 61 |   found = file_find_one_with_prefix(tmp_dirname, "another");
 62 |   TEST_CHECK(found != NULL);
 63 |   TEST_STREQ("anotherfile.txt", found);
 64 |   free(found);
 65 | 
 66 |   found = file_find_one_with_prefix(tmp_dirname, "nosuch");
 67 |   TEST_CHECK(found == NULL);
 68 |   free(found);
 69 | 
 70 |   rmdir(tmp_dirname);
 71 | }
 72 | 
 73 | void test_file_find_one_with_suffix() {
 74 |   char* tmp_dirname = "/tmp/tmpdir_test_file_find_one_with_suffix";
 75 |   mkdir(tmp_dirname, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
 76 | 
 77 |   char* filename = file_join_paths(tmp_dirname, "afile.txt");
 78 |   file_write(filename, "a", 1);
 79 |   free(filename);
 80 | 
 81 |   filename = file_join_paths(tmp_dirname, "anotherfile.txt");
 82 |   file_write(filename, "a", 1);
 83 |   free(filename);
 84 | 
 85 |   filename = file_join_paths(tmp_dirname, "randomfile");
 86 |   file_write(filename, "a", 1);
 87 |   free(filename);
 88 | 
 89 |   char* found = file_find_one_with_suffix(tmp_dirname, ".txt");
 90 |   TEST_CHECK(found != NULL);
 91 |   // No order is guaranteed, so just make sure it starts with the prefix.
 92 |   TEST_CHECK(string_ends_with(found, ".txt"));
 93 |   TEST_MSG("%s", found);
 94 |   free(found);
 95 | 
 96 |   found = file_find_one_with_suffix(tmp_dirname, "rfile.txt");
 97 |   TEST_CHECK(found != NULL);
 98 |   TEST_STREQ("anotherfile.txt", found);
 99 |   free(found);
100 | 
101 |   found = file_find_one_with_suffix(tmp_dirname, "nosuch");
102 |   TEST_CHECK(found == NULL);
103 |   free(found);
104 | 
105 |   rmdir(tmp_dirname);
106 | }
107 | 
108 | void test_file_join_paths() {
109 |   const char* a = "/some/path";
110 |   const char* b = "another/path";
111 |   char* result = file_join_paths(a, b);
112 |   TEST_STREQ("/some/path/another/path", result);
113 |   free(result);
114 | 
115 |   a = "/some/path/";
116 |   b = "another/path";
117 |   result = file_join_paths(a, b);
118 |   TEST_STREQ("/some/path/another/path", result);
119 |   free(result);
120 | 
121 |   a = "path";
122 |   b = "file.txt";
123 |   result = file_join_paths(a, b);
124 |   TEST_STREQ("path/file.txt", result);
125 |   free(result);
126 | }
127 | 
128 | void test_file_read() {
129 |   const char* test_filename = "/tmp/file_read_test.txt";
130 |   FILE* test_file = fopen(test_filename, "w");
131 |   fprintf(test_file, "Test contents.%c", 0);
132 |   fclose(test_file);
133 | 
134 |   char* contents;
135 |   size_t contents_length;
136 |   bool status = file_read(test_filename, &contents, &contents_length);
137 |   TEST_CHECK(status);
138 |   TEST_SIZEQ(15, contents_length);
139 |   TEST_STREQ("Test contents.", contents);
140 |   free(contents);
141 |   contents = NULL;
142 |   contents_length = 0;
143 | 
144 |   const char* nonexistent_filename = "/some/very/unlikely/path/foo.bar";
145 |   status = file_read(nonexistent_filename, &contents, &contents_length);
146 |   TEST_CHECK(!status);
147 | }
148 | 
149 | void test_file_write() {
150 |   const char* test_filename = "/tmp/file_read_test.txt";
151 |   const char* contents = "Some test content.";
152 |   const int contents_length = strlen(contents) + 1;
153 |   const bool status = file_write(test_filename, contents, contents_length);
154 |   TEST_CHECK(status);
155 |   TEST_MSG("%s", test_filename);
156 | 
157 |   FILE* file = fopen(test_filename, "rb");
158 |   TEST_CHECK(file != NULL);
159 |   TEST_MSG("%s", test_filename);
160 |   char* read_contents = malloc(contents_length);
161 |   fread(read_contents, 1, contents_length, file);
162 |   fclose(file);
163 |   TEST_STREQ(contents, read_contents);
164 |   free(read_contents);
165 | }
166 | 
167 | void test_file_list_dir() {
168 |   const char* tmp_dirname = "/tmp/tmpdir_test_file_list_dir";
169 |   mkdir(tmp_dirname, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
170 | 
171 |   char* filename = file_join_paths(tmp_dirname, "afile.txt");
172 |   file_write(filename, "a", 1);
173 |   free(filename);
174 | 
175 |   filename = file_join_paths(tmp_dirname, "anotherfile.txt");
176 |   file_write(filename, "a", 1);
177 |   free(filename);
178 | 
179 |   filename = file_join_paths(tmp_dirname, "randomfile");
180 |   file_write(filename, "a", 1);
181 |   free(filename);
182 | 
183 |   const char* tmp_subdirname = "/tmp/tmpdir_test_file_list_dir/somedir";
184 |   mkdir(tmp_subdirname, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
185 | 
186 |   char** list;
187 |   int list_length;
188 |   bool status = file_list_dir(tmp_dirname, &list, &list_length);
189 |   TEST_CHECK(status);
190 |   TEST_INTEQ(6, list_length);
191 |   char* list_joined = string_join((const char**)(list), list_length, ", ");
192 | 
193 |   // Order can be arbitrary, so look for matches anywhere.
194 |   const char* expected_list[] = {
195 |     ".",
196 |     "..",
197 |     "afile.txt",
198 |     "anotherfile.txt",
199 |     "randomfile",
200 |     "somedir",
201 |   };
202 |   const int expected_list_length =
203 |     sizeof(expected_list) / sizeof(expected_list[0]);
204 |   for (int i = 0; i < expected_list_length; ++i) {
205 |     const char* expected = expected_list[i];
206 |     bool expected_found = false;
207 |     for (int j = 0; j < list_length; ++j) {
208 |       char* entry = list[j];
209 |       if (strcmp(expected, entry) == 0) {
210 |         TEST_CHECK(!expected_found);
211 |         TEST_MSG("%s was found multiple times in %s", expected, list_joined);
212 |         expected_found = true;
213 |       }
214 |     }
215 |     TEST_CHECK(expected_found);
216 |     TEST_MSG("%s not found in %s", expected, list_joined);
217 |   }
218 |   free(list_joined);
219 |   string_list_free(list, list_length);
220 | 
221 |   rmdir(tmp_dirname);
222 | }
223 | 
224 | TEST_LIST = {
225 |   {"file_does_exist", test_file_does_exist},
226 |   {"file_size", test_file_size},
227 |   {"file_find_one_with_prefix", test_file_find_one_with_prefix},
228 |   {"file_find_one_with_suffix", test_file_find_one_with_suffix},
229 |   {"file_join_paths", test_file_join_paths},
230 |   {"file_read", test_file_read},
231 |   {"file_write", test_file_write},
232 |   {"file_list_dir", test_file_list_dir},
233 |   {NULL, NULL},
234 | };


--------------------------------------------------------------------------------
/src/audio/pa_list_devices.c:
--------------------------------------------------------------------------------
  1 | #include "pa_list_devices.h"
  2 | 
  3 | #include <pulse/pulseaudio.h>
  4 | #include <stdio.h>
  5 | #include <string.h>
  6 | 
  7 | #include "string_utils.h"
  8 | 
  9 | // Field list is here:
 10 | // http://0pointer.de/lennart/projects/pulseaudio/doxygen/structpa__sink__info.html
 11 | typedef struct pa_devicelist {
 12 |   uint8_t initialized;
 13 |   char name[512];
 14 |   uint32_t index;
 15 |   char description[256];
 16 | } pa_devicelist_t;
 17 | 
 18 | void pa_state_cb(pa_context* c, void* userdata);
 19 | void pa_sinklist_cb(pa_context* c, const pa_sink_info* l, int eol,
 20 |   void* userdata);
 21 | void pa_sourcelist_cb(pa_context* c, const pa_source_info* l, int eol,
 22 |   void* userdata);
 23 | int pa_get_devicelist(pa_devicelist_t* input, pa_devicelist_t* output);
 24 | 
 25 | // This callback gets called when our context changes state.  We really only
 26 | // care about when it's ready or if it has failed
 27 | void pa_state_cb(pa_context* c, void* userdata) {
 28 |   pa_context_state_t state;
 29 |   int* pa_ready = (int*)(userdata);
 30 | 
 31 |   state = pa_context_get_state(c);
 32 |   switch (state) {
 33 |     // There are just here for reference
 34 |   case PA_CONTEXT_UNCONNECTED:
 35 |   case PA_CONTEXT_CONNECTING:
 36 |   case PA_CONTEXT_AUTHORIZING:
 37 |   case PA_CONTEXT_SETTING_NAME:
 38 |   default:
 39 |     break;
 40 |   case PA_CONTEXT_FAILED:
 41 |   case PA_CONTEXT_TERMINATED:
 42 |     *pa_ready = 2;
 43 |     break;
 44 |   case PA_CONTEXT_READY:
 45 |     *pa_ready = 1;
 46 |     break;
 47 |   }
 48 | }
 49 | 
 50 | // pa_mainloop will call this function when it's ready to tell us about a sink.
 51 | // Since we're not threading, there's no need for mutexes on the devicelist
 52 | // structure
 53 | void pa_sinklist_cb(pa_context* c, const pa_sink_info* l, int eol,
 54 |   void* userdata) {
 55 |   pa_devicelist_t* pa_devicelist = (pa_devicelist_t*)(userdata);
 56 |   int ctr = 0;
 57 | 
 58 |   // If eol is set to a positive number, you're at the end of the list
 59 |   if (eol > 0) {
 60 |     return;
 61 |   }
 62 | 
 63 |   // We know we've allocated 16 slots to hold devices.  Loop through our
 64 |   // structure and find the first one that's "uninitialized."  Copy the
 65 |   // contents into it and we're done.  If we receive more than 16 devices,
 66 |   // they're going to get dropped.  You could make this dynamically allocate
 67 |   // space for the device list, but this is a simple example.
 68 |   for (ctr = 0; ctr < 16; ctr++) {
 69 |     if (!pa_devicelist[ctr].initialized) {
 70 |       strncpy(pa_devicelist[ctr].name, l->name, 511);
 71 |       strncpy(pa_devicelist[ctr].description, l->description, 255);
 72 |       pa_devicelist[ctr].index = l->index;
 73 |       pa_devicelist[ctr].initialized = 1;
 74 |       break;
 75 |     }
 76 |   }
 77 | }
 78 | 
 79 | // See above.  This callback is pretty much identical to the previous
 80 | void pa_sourcelist_cb(pa_context* c, const pa_source_info* l, int eol,
 81 |   void* userdata) {
 82 |   pa_devicelist_t* pa_devicelist = (pa_devicelist_t*)(userdata);
 83 |   int ctr = 0;
 84 | 
 85 |   if (eol > 0) {
 86 |     return;
 87 |   }
 88 | 
 89 |   for (ctr = 0; ctr < 16; ctr++) {
 90 |     if (!pa_devicelist[ctr].initialized) {
 91 |       strncpy(pa_devicelist[ctr].name, l->name, 511);
 92 |       strncpy(pa_devicelist[ctr].description, l->description, 255);
 93 |       pa_devicelist[ctr].index = l->index;
 94 |       pa_devicelist[ctr].initialized = 1;
 95 |       break;
 96 |     }
 97 |   }
 98 | }
 99 | 
100 | int pa_get_devicelist(pa_devicelist_t* input, pa_devicelist_t* output) {
101 |   // Define our pulse audio loop and connection variables
102 |   pa_mainloop* pa_ml;
103 |   pa_mainloop_api* pa_mlapi;
104 |   pa_operation* pa_op;
105 |   pa_context* pa_ctx;
106 | 
107 |   // We'll need these state variables to keep track of our requests
108 |   int state = 0;
109 |   int pa_ready = 0;
110 | 
111 |   // Initialize our device lists
112 |   memset(input, 0, sizeof(pa_devicelist_t) * 16);
113 |   memset(output, 0, sizeof(pa_devicelist_t) * 16);
114 | 
115 |   // Create a mainloop API and connection to the default server
116 |   pa_ml = pa_mainloop_new();
117 |   pa_mlapi = pa_mainloop_get_api(pa_ml);
118 |   pa_ctx = pa_context_new(pa_mlapi, "test");
119 | 
120 |   // This function connects to the pulse server
121 |   pa_context_connect(pa_ctx, NULL, (pa_context_flags_t)(0), NULL);
122 | 
123 |   // This function defines a callback so the server will tell us it's state.
124 |   // Our callback will wait for the state to be ready.  The callback will
125 |   // modify the variable to 1 so we know when we have a connection and it's
126 |   // ready.
127 |   // If there's an error, the callback will set pa_ready to 2
128 |   pa_context_set_state_callback(pa_ctx, pa_state_cb, &pa_ready);
129 | 
130 |   // Now we'll enter into an infinite loop until we get the data we receive
131 |   // or if there's an error
132 |   for (;;) {
133 |     // We can't do anything until PA is ready, so just iterate the mainloop
134 |     // and continue
135 |     if (pa_ready == 0) {
136 |       pa_mainloop_iterate(pa_ml, 1, NULL);
137 |       continue;
138 |     }
139 |     // We couldn't get a connection to the server, so exit out
140 |     if (pa_ready == 2) {
141 |       pa_context_disconnect(pa_ctx);
142 |       pa_context_unref(pa_ctx);
143 |       pa_mainloop_free(pa_ml);
144 |       return -1;
145 |     }
146 |     // At this point, we're connected to the server and ready to make
147 |     // requests
148 |     switch (state) {
149 |       // State 0: we haven't done anything yet
150 |     case 0:
151 |       // This sends an operation to the server.  pa_sinklist_info is
152 |       // our callback function and a pointer to our devicelist will
153 |       // be passed to the callback The operation ID is stored in the
154 |       // pa_op variable
155 |       pa_op = pa_context_get_sink_info_list(pa_ctx, pa_sinklist_cb, output);
156 | 
157 |       // Update state for next iteration through the loop
158 |       state++;
159 |       break;
160 |     case 1:
161 |       // Now we wait for our operation to complete.  When it's
162 |       // complete our pa_output_devicelist is filled out, and we move
163 |       // along to the next state
164 |       if (pa_operation_get_state(pa_op) == PA_OPERATION_DONE) {
165 |         pa_operation_unref(pa_op);
166 | 
167 |         // Now we perform another operation to get the source
168 |         // (input device) list just like before.  This time we pass
169 |         // a pointer to our input structure
170 |         pa_op =
171 |           pa_context_get_source_info_list(pa_ctx, pa_sourcelist_cb, input);
172 |         // Update the state so we know what to do next
173 |         state++;
174 |       }
175 |       break;
176 |     case 2:
177 |       if (pa_operation_get_state(pa_op) == PA_OPERATION_DONE) {
178 |         // Now we're done, clean up and disconnect and return
179 |         pa_operation_unref(pa_op);
180 |         pa_context_disconnect(pa_ctx);
181 |         pa_context_unref(pa_ctx);
182 |         pa_mainloop_free(pa_ml);
183 |         return 0;
184 |       }
185 |       break;
186 |     default:
187 |       // We should never see this state
188 |       fprintf(stderr, "in state %d\n", state);
189 |       return -1;
190 |     }
191 |     // Iterate the main loop and go again.  The second argument is whether
192 |     // or not the iteration should block until something is ready to be
193 |     // done.  Set it to zero for non-blocking.
194 |     pa_mainloop_iterate(pa_ml, 1, NULL);
195 |   }
196 | }
197 | 
198 | void get_input_devices(char*** devices, int* devices_length) {
199 |   *devices = NULL;
200 |   *devices_length = 0;
201 | 
202 |   pa_devicelist_t pa_input_devicelist[16];
203 |   pa_devicelist_t pa_output_devicelist[16];
204 |   if (pa_get_devicelist(pa_input_devicelist, pa_output_devicelist) < 0) {
205 |     fprintf(stderr, "failed to get device list\n");
206 |     return;
207 |   }
208 |   for (int ctr = 0; ctr < 16; ctr++) {
209 |     if (!pa_input_devicelist[ctr].initialized) {
210 |       break;
211 |     }
212 |     const char* device_name = pa_input_devicelist[ctr].name;
213 |     string_list_add(device_name, devices, devices_length);
214 |   }
215 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Spchcat
  2 | 
  3 | Speech recognition tool to convert audio to text transcripts, for Linux and Raspberry Pi.
  4 | 
  5 | ## Description
  6 | 
  7 | <a href="https://tenor.com/view/cat-smiling-awkward-smile-sonrisa-gif-11170177"><img height="120px" style="float:left; margin-right: 10px;" src="https://c.tenor.com/wX3b9R6sb90AAAAC/cat-smiling.gif"/></a>
  8 | 
  9 | [`spchcat`](https://github.com/petewarden/spchcat) is a command-line tool that reads in audio from .WAV files, a microphone, or system audio inputs and converts any speech found into text. It runs locally on your machine, with no web API calls or network activity, and is open source. It is built on top of [Coqui's speech to text library](https://github.com/coqui-ai/STT), [TensorFlow](https://www.tensorflow.org/), [KenLM](https://kheafield.com/code/kenlm/), and [data from Mozilla's Common Voice project](https://commonvoice.mozilla.org/).
 10 | 
 11 | It supports multiple languages thanks to [Coqui's library of models](https://github.com/coqui-ai/STT-models). The accuracy of the recognized text will vary widely depending on the language, since some have only small amounts of training data. You can help improve future models by [contributing your voice](https://commonvoice.mozilla.org/).
 12 | 
 13 | ## Installation
 14 | 
 15 | ### x86
 16 | 
 17 | On Debian-based x86 Linux systems like Ubuntu you should be able to install [the latest .deb package](https://github.com/petewarden/spchcat/releases/download/v0.0.2-alpha/spchcat_0.0-2_amd64.deb) by downloading and double-clicking it. Other distributions are currently unsupported. The tool requires PulseAudio, which is already present on most desktop systems, but can be [installed manually](https://wiki.debian.org/PulseAudio).
 18 | 
 19 | There's a notebook you can run in Colab at [notebooks/install.ipynb](https://github.com/petewarden/spchcat/blob/main/notebooks/install.ipynb) that shows all installation steps.
 20 | 
 21 | ### Raspberry Pi
 22 | 
 23 | To install on a Raspberry Pi, download the [latest .deb installer package](https://github.com/petewarden/spchcat/releases/download/v0.0.2-rpi-alpha/spchcat_0.0-2_armhf.deb) and either double-click on it from the desktop, or run `dpkg -i ~/Downloads/spchcat_0.0-2_armhf.deb` from the terminal. It will take several minutes to unpack all the language files. This version has only been tested on the latest release of Raspbian, released October 30th 2021, and on a Raspberry Pi 4. It's expected to fail on Raspberry Pi 1's and 0's, due to their CPU architecture.
 24 | 
 25 | ## Usage
 26 | 
 27 | After installation, you should be able to run it with no arguments to start capturing audio from the default microphone source, with the results output to the terminal:
 28 | 
 29 | ```bash
 30 | spchcat
 31 | ```
 32 | 
 33 | After you've run the command, start speaking, and you should see the words you're saying appear. The speech recognition is still a work in progress, and the accuracy will depend a lot on the noise levels, your accent, and the complexity of the words, but hopefully you should see something close enough to be useful for simple note taking or other purposes.
 34 | 
 35 | ### System Audio
 36 | 
 37 | If you don't have a microphone attached, or want to transcribe audio coming from another program, you can set the `--source` argument to 'system'. This will attempt to listen to the audio that your machine is playing, including any videos or songs, and transcribe any speech found.
 38 | 
 39 | ```bash
 40 | spchcat --source=system
 41 | ```
 42 | 
 43 | ### WAV Files
 44 | 
 45 | One of the most common audio file formats is WAV. If you don't have any to test with, you can download [Coqui's test set](https://github.com/coqui-ai/STT/releases/download/v1.1.0/audio-1.1.0.tar.gz) to try this option out. If you need to convert files from another format like '.mp3', I recommend using [FFMPeg](https://www.ffmpeg.org/). As with the other source options, `spchcat` will attempt to find any speech in the files and convert it into a transcript. You don't have to explicitly set the `--source` argument, as long as file names are present on the command line that will be the default.
 46 | 
 47 | ```bash
 48 | spchcat audio/8455-210777-0068.wav 
 49 | ```
 50 | 
 51 | If you're using the audio file from the test set, you should see output like the following:
 52 | 
 53 | ```bash
 54 | TensorFlow: v2.3.0-14-g4bdd3955115
 55 |  Coqui STT: v1.1.0-0-gf3605e23
 56 | your power is sufficient i said 
 57 | ```
 58 | 
 59 | You can also specify a folder instead of a single filename, and all `.wav` files within that directory will be transcribed.
 60 | 
 61 | ### Language Support
 62 | 
 63 | So far this documentation has assumed you're using American English, but the tool will default to looking for the language your system has been configured to use. It first looks for the one specified in the `LANG` environment variable. If no model for that language is found, it will default back to 'en_US'. You can override this by setting the `--language` argument on the command line, for example:
 64 | 
 65 | ```bash
 66 | spchcat --language=de_DE
 67 | ```
 68 | 
 69 | This works independently of `--source` and other options, so you can transcribe microphone, system audio, or files in any of the supported languages. It should be noted that some languages have very small amounts of data and so their quality may suffer. If you don't care about country-specific variants, you can also just specify the language part of the code, for example `--language=en`. This will pick any model that supports the language, regardless of country. The same thing happens if a particular language and country pair isn't found, it will log a warning and fall back to any country that supports the language. For example, if 'en_GB' is specified but only 'en_US' is present, 'en_US' will be used.
 70 | 
 71 | | Language Name | Code    |
 72 | | ------------: |:-------------|
 73 | |am_ET|Amharic|
 74 | |bn_IN|Bengali|
 75 | |br_FR|Breton|
 76 | |ca_ES|Catalan|
 77 | |cnh_MM|Hakha-Chin|
 78 | |cs_CZ|Czech|
 79 | |cv_RU|Chuvash|
 80 | |cy_GB|Welsh|
 81 | |de_DE|German|
 82 | |dv_MV|Dhivehi|
 83 | |el_GR|Greek|
 84 | |en_US|English|
 85 | |et_EE|Estonian|
 86 | |eu_ES|Basque|
 87 | |fi_FI|Finnish|
 88 | |fr_FR|French|
 89 | |fy_NL|Frisian|
 90 | |ga_IE|Irish|
 91 | |hu_HU|Hungarian|
 92 | |id_ID|Indonesian|
 93 | |it_IT|Italian|
 94 | |ka_GE|Georgian|
 95 | |ky_KG|Kyrgyz|
 96 | |lg_UG|Luganda|
 97 | |lt_LT|Lithuanian|
 98 | |lv_LV|Latvian|
 99 | |mn_MN|Mongolian|
100 | |mt_MT|Maltese|
101 | |nl_NL|Dutch|
102 | |or_IN|Odia|
103 | |pt_PT|Portuguese|
104 | |rm_CH|Romansh-Sursilvan|
105 | |ro_RO|Romanian|
106 | |ru_RU|Russian|
107 | |rw_RW|Kinyarwanda|
108 | |sah_RU|Sakha|
109 | |sb_DE|Upper-Sorbian|
110 | |sl_SI|Slovenian|
111 | |sw_KE|Swahili-Congo|
112 | |ta_IN|Tamil|
113 | |th_TH|Thai|
114 | |tr_TR|Turkish|
115 | |tt_RU|Tatar|
116 | |uk_UK|Ukrainian|
117 | |wo_SN|Wolof|
118 | |yo_NG|Yoruba|
119 | 
120 | All of these models have been collected by Coqui, and contributed by organizations like [Inclusive Technology for Marginalized Languages](https://itml.cl.indiana.edu/) or individuals. All are using the conventions for Coqui's STT library, so custom models could potentially be used, but training and deployment of those is outside the scope of this document. The models themselves are provided under a variety of open source licenses, which can be inspected in their source folders (typically inside `/etc/spchcat/models/`).
121 | 
122 | ### Saving Output
123 | 
124 | By default `spchcat` writes any recognized text to the terminal, but it's designed to behave like a normal Unix command-line tool, so it can also be written to a file using indirection like this:
125 | 
126 | ```bash
127 | spchcat audio/8455-210777-0068.wav > /tmp/transcript.txt
128 | ```
129 | 
130 | If you then run `cat /tmp/transcript.txt` (or open it in an editor) you should see `your power is sufficient i said'. You can also pipe the output to another command. Unfortunately you can't pipe audio into the tool from another executable, since pipes aren't designed for non-text data. 
131 | 
132 | There is one subtle difference between writing to a file and to the terminal. The transcription itself can take some time to settle into a final form, especially when waiting for long words to finish, so when it's being run live in a terminal you'll often see the last couple of words change. This isn't useful when writing to a file, so instead the output is finalized before it's written. This can introduce a small delay when writing live microphone or system audio input.
133 | 
134 | ## Build from Source
135 | 
136 | ### Tool
137 | 
138 | It's possible to build all dependencies from source, but I recommending downloading binary versions of Coqui's STT, TensorFlow Lite, and KenLM libraries from [github.com/coqui-ai/STT/releases/download/v1.1.0/native_client.tflite.Linux.tar.xz](https://github.com/coqui-ai/STT/releases/download/v1.1.0/native_client.tflite.Linux.tar.xz). Extract this to a folder, and then from inside a folder containing this repo run to build the `spchcat` tool itself:
139 | 
140 | ```bash
141 | make spchcat LINK_PATH_STT=-L../STT_download
142 | ```
143 | 
144 | You should replace `../STT_download` with the path to the Coqui library folder. After this you should see a `spchcat` executable binary in the repo folder. Because it relies on shared libraries, you'll need to specify a path to these too using `LD_LIBRARY_PATH` unless you have copies in system folders.
145 | 
146 | ```bash
147 | LD_LIBRARY_PATH=../STT_download ./spchcat
148 | ```
149 | 
150 | ### Models
151 | 
152 | The previous step only built the executable binary itself, but for the complete tool you also need data files for each language. If you have the [`gh` GitHub command line tool](https://cli.github.com/) you can run the `download_models.py` script to fetch [Coqui's releases](https://github.com/coqui-ai/STT-models/releases) into the `build/models` folder in your local repo. You can then run your locally-built tool against these models using the `--languages_dir` option:
153 | 
154 | ```bash
155 | LD_LIBRARY_PATH=../STT_download ./spchcat --languages_dir=build/models/
156 | ```
157 | 
158 | ### Installer
159 | 
160 | After you have the tool built and the model data downloaded, `create_deb_package.sh` will attempt to package them into a Debian installer archive. It will take several minutes to run, and the result ends up in `spchcat_0.0-2_amd64.deb`.
161 | 
162 | ### Release Process
163 | 
164 | There's a notebook at [notebooks/build.pynb](https://github.com/petewarden/spchcat/blob/main/notebooks/build.ipynb) that runs through all the build steps needed to downloaded dependencies, data, build the executable, and create the final package. These steps are run inside an Ubuntu 18.04 Docker image to create [the binaries that are released](https://github.com/petewarden/spchcat/releases).
165 | 
166 | ```bash
167 | sudo docker run -it -v`pwd`:/spchcat ubuntu:bionic bash
168 | ```
169 | 
170 | ## Contributors
171 | 
172 | Tool code written by [Pete Warden](https://twitter.com/petewarden), pete@petewarden.com, heavily based on [Coqui's STT example](https://github.com/coqui-ai/STT/blob/main/native_client/stt.cc). It's a pretty thin wrapper on top of [Coqui's speech to text library](https://github.com/coqui-ai/STT), so the Coqui team should get credit for their amazing work. Also relies on [TensorFlow](https://www.tensorflow.org/), [KenLM](https://kheafield.com/code/kenlm/), [data from Mozilla's Common Voice project](https://commonvoice.mozilla.org/), and all the contributors to [Coqui's model zoo](https://coqui.ai/models).
173 | 
174 | ## License
175 | 
176 | Tool code is licensed under the Mozilla Public License Version 2.0, see LICENSE in this folder.
177 | 
178 | All other libraries and model data are released under their own licenses, see the relevant folders for more details.
179 | 


--------------------------------------------------------------------------------
/src/settings_test.c:
--------------------------------------------------------------------------------
  1 | #include "settings.h"
  2 | 
  3 | #include "acutest.h"
  4 | 
  5 | #include "settings.c"
  6 | 
  7 | #include <stdlib.h>
  8 | 
  9 | // Not sure why this declaration isn't pulled in?
 10 | extern int setenv(const char*, const char*, int);
 11 | 
 12 | static void create_mock_languages_dir(const char* root, const char** dirs,
 13 |   int dirs_length) {
 14 |   mkdir(root, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
 15 |   for (int i = 0; i < dirs_length; ++i) {
 16 |     char* dirpath = file_join_paths(root, dirs[i]);
 17 |     mkdir(dirpath, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
 18 |     free(dirpath);
 19 |   }
 20 | }
 21 | 
 22 | void test_is_real_entry() {
 23 |   TEST_CHECK(is_real_entry("foo", NULL));
 24 |   TEST_CHECK(!is_real_entry(".", NULL));
 25 |   TEST_CHECK(!is_real_entry("..", NULL));
 26 | }
 27 | 
 28 | void test_available_languages() {
 29 |   const char* languages_dir = "/tmp/test_lang_dir";
 30 |   const char* languages[] = {
 31 |     "en_US", "es_ES", "fr_FR",
 32 |   };
 33 |   const int languages_length = sizeof(languages) / sizeof(languages[0]);
 34 |   create_mock_languages_dir(languages_dir, languages, languages_length);
 35 | 
 36 |   char* result = available_languages(languages_dir);
 37 |   TEST_STR_CONTAINS("en_US", result);
 38 |   TEST_STR_CONTAINS("es_ES", result);
 39 |   TEST_STR_CONTAINS("fr_FR", result);
 40 |   TEST_SIZEQ(19, strlen(result));
 41 | 
 42 |   free(result);
 43 |   rmdir(languages_dir);
 44 | }
 45 | 
 46 | void test_language_description() {
 47 |   const char* languages_dir = "/tmp/test_lang_dir2";
 48 |   const char* languages[] = {
 49 |     "en_US", "de_DE", "fr_FR",
 50 |   };
 51 |   const int languages_length = sizeof(languages) / sizeof(languages[0]);
 52 |   create_mock_languages_dir(languages_dir, languages, languages_length);
 53 | 
 54 |   Settings settings;
 55 |   settings.languages_dir = languages_dir;
 56 |   settings.language = string_duplicate("es_ES");
 57 |   char* result = language_description(&settings);
 58 |   TEST_STR_CONTAINS("en_US", result);
 59 |   TEST_STR_CONTAINS("de_DE", result);
 60 |   TEST_STR_CONTAINS("fr_FR", result);
 61 |   TEST_STR_CONTAINS("es_ES", result);
 62 | 
 63 |   free(result);
 64 |   rmdir(languages_dir);
 65 |   free(settings.language);
 66 | }
 67 | 
 68 | void test_set_defaults() {
 69 |   const char* old_lang = getenv("LANG");
 70 | 
 71 |   Settings settings1 = {};
 72 |   setenv("LANG", "en_UK.UTF-8", 1);
 73 |   set_defaults(&settings1);
 74 |   TEST_STREQ("en_UK", settings1.language);
 75 |   free(settings1.language);
 76 | 
 77 |   Settings settings2 = {};
 78 |   setenv("LANG", "", 1);
 79 |   set_defaults(&settings2);
 80 |   TEST_STREQ("en_US", settings2.language);
 81 |   free(settings2.language);
 82 | 
 83 |   setenv("LANG", old_lang, 1);
 84 | }
 85 | 
 86 | void test_find_model_for_language() {
 87 |   const char* languages_dir = "/tmp/test_lang_dir3";
 88 |   const char* languages[] = {
 89 |     "en_US", "de_DE", "de_AT", "fr_FR",
 90 |   };
 91 |   const int languages_length = sizeof(languages) / sizeof(languages[0]);
 92 |   create_mock_languages_dir(languages_dir, languages, languages_length);
 93 |   char* en_us_dir = file_join_paths(languages_dir, "en_US");
 94 |   char* en_us_model = file_join_paths(en_us_dir, "model.tflite");
 95 |   free(en_us_dir);
 96 |   file_write(en_us_model, "a\0", 2);
 97 | 
 98 |   Settings* settings1 = calloc(sizeof(Settings), 1);
 99 |   settings1->model = "/foo/bar/baz.tflite";
100 |   find_model_for_language(settings1);
101 |   TEST_STREQ("/foo/bar/baz.tflite", settings1->model);
102 |   settings_free(settings1);
103 | 
104 |   Settings* settings2 = calloc(sizeof(Settings), 1);
105 |   settings2->language_from_args = "en_US";
106 |   settings2->languages_dir = languages_dir;
107 |   find_model_for_language(settings2);
108 |   TEST_STREQ(en_us_model, settings2->model);
109 |   settings_free(settings2);
110 | 
111 |   Settings* settings3 = calloc(sizeof(Settings), 1);
112 |   settings3->language_from_args = "en_UK";
113 |   settings3->languages_dir = languages_dir;
114 |   find_model_for_language(settings3);
115 |   TEST_STREQ(en_us_model, settings3->model);
116 |   settings_free(settings3);
117 | 
118 |   Settings* settings4 = calloc(sizeof(Settings), 1);
119 |   settings4->language_from_args = "de_UK";
120 |   settings4->languages_dir = languages_dir;
121 |   find_model_for_language(settings4);
122 |   TEST_CHECK(settings4->model == NULL);
123 |   settings_free(settings4);
124 | 
125 |   free(en_us_model);
126 |   rmdir(languages_dir);
127 | }
128 | 
129 | void test_find_scorer_for_language() {
130 |   const char* languages_dir = "/tmp/test_lang_dir3";
131 |   const char* languages[] = {
132 |     "en_US", "de_DE", "de_AT", "fr_FR",
133 |   };
134 |   const int languages_length = sizeof(languages) / sizeof(languages[0]);
135 |   create_mock_languages_dir(languages_dir, languages, languages_length);
136 |   char* en_us_dir = file_join_paths(languages_dir, "en_US");
137 |   char* en_us_scorer = file_join_paths(en_us_dir, "some.scorer");
138 |   free(en_us_dir);
139 |   file_write(en_us_scorer, "a\0", 2);
140 | 
141 |   Settings* settings1 = calloc(sizeof(Settings), 1);
142 |   settings1->scorer = "/foo/bar/baz.scorer";
143 |   find_scorer_for_language(settings1);
144 |   TEST_STREQ("/foo/bar/baz.scorer", settings1->scorer);
145 |   settings_free(settings1);
146 | 
147 |   Settings* settings2 = calloc(sizeof(Settings), 1);
148 |   settings2->language_from_args = "en_US";
149 |   settings2->languages_dir = languages_dir;
150 |   find_scorer_for_language(settings2);
151 |   TEST_STREQ(en_us_scorer, settings2->model);
152 |   settings_free(settings2);
153 | 
154 |   free(en_us_scorer);
155 |   rmdir(languages_dir);
156 | }
157 | 
158 | void test_set_source() {
159 |   char* argv1[] = { "program" };
160 |   const int argc1 = sizeof(argv1) / sizeof(argv1[0]);
161 |   Settings* settings1 = settings_init_from_argv(argc1, argv1);
162 |   TEST_CHECK(settings1 != NULL);
163 |   TEST_STREQ("mic", settings1->source);
164 |   settings_free(settings1);
165 | 
166 |   char* argv2[] = { "program", "--source", "foo" };
167 |   const int argc2 = sizeof(argv2) / sizeof(argv2[0]);
168 |   Settings* settings2 = settings_init_from_argv(argc2, argv2);
169 |   TEST_CHECK(settings2 != NULL);
170 |   TEST_STREQ("foo", settings2->source);
171 |   settings_free(settings2);
172 | 
173 |   char* argv3[] = { "program", "--source", "file", "foo.wav", "bar.wav" };
174 |   const int argc3 = sizeof(argv3) / sizeof(argv3[0]);
175 |   Settings* settings3 = settings_init_from_argv(argc3, argv3);
176 |   TEST_CHECK(settings3 != NULL);
177 |   TEST_STREQ("file", settings3->source);
178 |   TEST_INTEQ(2, settings3->files_count);
179 |   TEST_STREQ("foo.wav", settings3->files[0]);
180 |   TEST_STREQ("bar.wav", settings3->files[1]);
181 |   settings_free(settings3);
182 | 
183 |   char* argv4[] = { "program", "baz.wav", "fish.wav" };
184 |   const int argc4 = sizeof(argv4) / sizeof(argv4[0]);
185 |   Settings* settings4 = settings_init_from_argv(argc4, argv4);
186 |   TEST_CHECK(settings4 != NULL);
187 |   TEST_STREQ("file", settings4->source);
188 |   TEST_INTEQ(2, settings4->files_count);
189 |   TEST_STREQ("baz.wav", settings4->files[0]);
190 |   TEST_STREQ("fish.wav", settings4->files[1]);
191 |   settings_free(settings4);
192 | 
193 |   char* argv5[] = { "program", "--source=foo", "baz.wav", "fish.wav" };
194 |   const int argc5 = sizeof(argv5) / sizeof(argv5[0]);
195 |   Settings* settings5 = settings_init_from_argv(argc5, argv5);
196 |   TEST_CHECK(settings5 == NULL);
197 |   settings_free(settings5);
198 | 
199 |   char* argv6[] = { "program", "--source=file" };
200 |   const int argc6 = sizeof(argv6) / sizeof(argv6[0]);
201 |   Settings* settings6 = settings_init_from_argv(argc6, argv6);
202 |   TEST_CHECK(settings6 == NULL);
203 |   settings_free(settings6);
204 | }
205 | 
206 | void test_settings_init_from_argv() {
207 |   char* languages_dir = "/tmp/test_lang_dir3";
208 |   const char* languages[] = {
209 |     "en_US", "de_DE", "de_AT", "fr_FR",
210 |   };
211 |   const int languages_length = sizeof(languages) / sizeof(languages[0]);
212 |   create_mock_languages_dir(languages_dir, languages, languages_length);
213 |   char* en_us_dir = file_join_paths(languages_dir, "en_US");
214 |   char* en_us_model = file_join_paths(en_us_dir, "model.tflite");
215 |   free(en_us_dir);
216 |   file_write(en_us_model, "a\0", 2);
217 | 
218 |   char* argv1[] = { "program" };
219 |   const int argc1 = sizeof(argv1) / sizeof(argv1[0]);
220 |   Settings* settings1 = settings_init_from_argv(argc1, argv1);
221 |   TEST_CHECK(settings1 != NULL);
222 |   settings_free(settings1);
223 | 
224 |   char* argv2[] = { "program", "--unknown_flag" };
225 |   const int argc2 = sizeof(argv2) / sizeof(argv2[0]);
226 |   Settings* settings2 = settings_init_from_argv(argc2, argv2);
227 |   TEST_CHECK(settings2 == NULL);
228 | 
229 |   char* argv3[] = { "program", "--model", "foo/bar/baz.tflite" };
230 |   const int argc3 = sizeof(argv3) / sizeof(argv3[0]);
231 |   Settings* settings3 = settings_init_from_argv(argc3, argv3);
232 |   TEST_CHECK(settings3 != NULL);
233 |   TEST_STREQ("foo/bar/baz.tflite", settings3->model);
234 |   settings_free(settings3);
235 | 
236 |   char* argv4[] = { "program",
237 |     "--language", "en",
238 |     "--languages_dir", languages_dir,
239 |   };
240 |   const int argc4 = sizeof(argv4) / sizeof(argv4[0]);
241 |   Settings* settings4 = settings_init_from_argv(argc4, argv4);
242 |   TEST_CHECK(settings4 != NULL);
243 |   TEST_STREQ(en_us_model, settings4->model);
244 |   settings_free(settings4);
245 | 
246 |   char* argv5[] = { "program",
247 |     "--language", "en_US",
248 |     "--source", "file",
249 |     "--languages_dir", languages_dir,
250 |     "--scorer", "foo",
251 |     "--source_buffer_size", "160",
252 |     "--beam_width", "5",
253 |     "--lm_alpha", "3.0",
254 |     "--lm_beta", "4.0",
255 |     "--show_times",
256 |     "--has_versions",
257 |     "--extended_metadata",
258 |     "--json_output",
259 |     "--json_candidate_transcripts", "3",
260 |     "--stream_size", "320",
261 |     "--extended_stream_size", "640",
262 |     "--hot_words", "baz:10.0,fish:20.0",
263 |     "--stream_capture_file", "/foo/bar.wav",
264 |     "--stream_capture_duration", "32000",
265 |     "file1", "file2",
266 |   };
267 |   const int argc5 = sizeof(argv5) / sizeof(argv5[0]);
268 |   Settings* settings5 = settings_init_from_argv(argc5, argv5);
269 |   TEST_CHECK(settings5 != NULL);
270 |   TEST_STREQ("en_US", settings5->language);
271 |   TEST_STREQ("file", settings5->source);
272 |   TEST_STREQ(languages_dir, settings5->languages_dir);
273 |   TEST_STREQ(en_us_model, settings5->model);
274 |   TEST_STREQ("foo", settings5->scorer);
275 |   TEST_INTEQ(160, settings5->source_buffer_size);
276 |   TEST_INTEQ(5, settings5->beam_width);
277 |   TEST_FLTEQ(3.0f, settings5->lm_alpha, 0.0001f);
278 |   TEST_FLTEQ(4.0f, settings5->lm_beta, 0.0001f);
279 |   TEST_CHECK(settings5->show_times);
280 |   TEST_CHECK(settings5->has_versions);
281 |   TEST_CHECK(settings5->extended_metadata);
282 |   TEST_CHECK(settings5->json_output);
283 |   TEST_INTEQ(3, settings5->json_candidate_transcripts);
284 |   TEST_INTEQ(320, settings5->stream_size);
285 |   TEST_INTEQ(640, settings5->extended_stream_size);
286 |   TEST_STREQ("baz:10.0,fish:20.0", settings5->hot_words);
287 |   TEST_STREQ("/foo/bar.wav", settings5->stream_capture_file);
288 |   TEST_INTEQ(32000, settings5->stream_capture_duration);
289 |   settings_free(settings5);
290 | 
291 |   free(en_us_model);
292 | }
293 | 
294 | TEST_LIST = {
295 |   {"is_real_entry", test_is_real_entry},
296 |   {"available_languages", test_available_languages},
297 |   {"test_language_description", test_language_description},
298 |   {"test_set_defaults", test_set_defaults},
299 |   {"test_find_model_for_language", test_find_model_for_language},
300 |   {"test_set_source", test_set_source},
301 |   {"test_settings_init_from_argv", test_settings_init_from_argv},
302 |   {NULL, NULL},
303 | };


--------------------------------------------------------------------------------
/definitions.mk:
--------------------------------------------------------------------------------
  1 | NC_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
  2 | 
  3 | TARGET    ?= host
  4 | ROOT_DIR  ?= $(abspath $(NC_DIR)/..)
  5 | TFDIR     ?= $(abspath $(NC_DIR)/../tensorflow)
  6 | PREFIX    ?= /usr/local
  7 | SO_SEARCH ?= $(TFDIR)/bazel-bin/
  8 | 
  9 | TOOL_AS   := as
 10 | TOOL_CC   := gcc
 11 | TOOL_CXX  := c++
 12 | TOOL_LD   := ld
 13 | TOOL_LDD  := ldd
 14 | TOOL_LIBEXE :=
 15 | 
 16 | OS        := $(shell uname -s)
 17 | 
 18 | ifeq ($(findstring _NT,$(OS)),_NT)
 19 | PLATFORM_EXE_SUFFIX := .exe
 20 | endif
 21 | 
 22 | SPCHCAT_BIN   := spchcat$(PLATFORM_EXE_SUFFIX)
 23 | CFLAGS_STT    := -std=c++11 -o $(SPCHCAT_BIN) -Os -Isrc/
 24 | # Allow user-supplied flags (like include paths) from the command line.
 25 | EXTRA_CFLAGS_STT :=
 26 | CFLAGS_STT    += ${EXTRA_CFLAGS_STT}
 27 | LINK_STT      := -lstt -lkenlm -ltflitedelegates -ltensorflowlite -lpulse -l pulse-simple
 28 | LINK_PATH_STT := -L${TFDIR}/bazel-bin/native_client -L${TFDIR}/bazel-bin/tensorflow/lite
 29 | 
 30 | ifeq ($(TARGET),host)
 31 | TOOLCHAIN       :=
 32 | CFLAGS          :=
 33 | CXXFLAGS        :=
 34 | LDFLAGS         :=
 35 | SOX_CFLAGS      := -I$(ROOT_DIR)/sox-build/include
 36 | ifeq ($(OS),Linux)
 37 | MAGIC_LINK_LZMA := $(shell objdump -tTC /usr/lib/`uname -m`-linux-gnu/libmagic.so | grep lzma | grep '*UND*' | wc -l)
 38 | ifneq ($(MAGIC_LINK_LZMA),0)
 39 | MAYBE_LINK_LZMA := -llzma
 40 | endif # MAGIC_LINK_LZMA
 41 | MAGIC_LINK_BZ2  := $(shell objdump -tTC /usr/lib/`uname -m`-linux-gnu/libmagic.so | grep BZ2 | grep '*UND*' | wc -l)
 42 | ifneq ($(MAGIC_LINK_BZ2),0)
 43 | MAYBE_LINK_BZ2  := -lbz2
 44 | endif # MAGIC_LINK_BZ2
 45 | SOX_LDFLAGS     := -L$(ROOT_DIR)/sox-build/lib -lsox
 46 | else ifeq ($(OS),Darwin)
 47 | LIBSOX_PATH             := $(shell echo `pkg-config --libs-only-L sox | sed -e 's/^-L//'`/lib`pkg-config --libs-only-l sox | sed -e 's/^-l//'`.dylib)
 48 | LIBOPUSFILE_PATH        := $(shell echo `pkg-config --libs-only-L opusfile | sed -e 's/^-L//'`/lib`pkg-config --libs-only-l opusfile | sed -e 's/^-l//'`.dylib)
 49 | LIBSOX_STATIC_DEPS      := $(shell echo `otool -L $(LIBSOX_PATH) | tail -n +2 | cut -d' ' -f1 | grep /opt/ | sed -E "s/\.[[:digit:]]+\.dylib/\.a/" | tr '\n' ' '`)
 50 | LIBOPUSFILE_STATIC_DEPS := $(shell echo `otool -L $(LIBOPUSFILE_PATH) | tail -n +2 | cut -d' ' -f1 | grep /opt/ | sed -E "s/\.[[:digit:]]+\.dylib/\.a/" | tr '\n' ' '`)
 51 | SOX_LDFLAGS             := $(LIBSOX_STATIC_DEPS) $(LIBOPUSFILE_STATIC_DEPS) -framework CoreAudio -lz
 52 | else
 53 | SOX_LDFLAGS     := `pkg-config --libs sox`
 54 | endif # OS others
 55 | PYTHON_PACKAGES := numpy${NUMPY_BUILD_VERSION}
 56 | ifeq ($(OS),Linux)
 57 | PYTHON_PLATFORM_NAME ?= --plat-name manylinux_2_24_x86_64
 58 | endif
 59 | endif
 60 | 
 61 | ifeq ($(findstring _NT,$(OS)),_NT)
 62 | TOOLCHAIN := '$(VCToolsInstallDir)\bin\Hostx64\x64\'
 63 | TOOL_CC     := cl.exe
 64 | TOOL_CXX    := cl.exe
 65 | TOOL_LD     := link.exe
 66 | TOOL_LIBEXE := lib.exe
 67 | LINK_STT      := $(shell cygpath "$(TFDIR)/bazel-bin/native_client/libstt.so.if.lib") $(shell cygpath "$(TFDIR)/bazel-bin/native_client/libkenlm.so.if.lib") $(shell cygpath "$(TFDIR)/bazel-bin/native_client/libtflitedelegates.so.if.lib") $(shell cygpath "$(TFDIR)/bazel-bin/tensorflow/lite/libtensorflowlite.so.if.lib")
 68 | LINK_PATH_STT :=
 69 | CFLAGS_STT    := -nologo -Fe$(SPCHCAT_BIN)
 70 | SOX_CFLAGS      :=
 71 | SOX_LDFLAGS     :=
 72 | PYTHON_PACKAGES := numpy${NUMPY_BUILD_VERSION}
 73 | endif
 74 | 
 75 | ifeq ($(TARGET),rpi3)
 76 | TOOLCHAIN_DIR ?= ${TFDIR}/bazel-$(shell basename "${TFDIR}")/external/LinaroArmGcc72/bin
 77 | TOOLCHAIN   ?= $(TOOLCHAIN_DIR)/arm-linux-gnueabihf-
 78 | RASPBIAN    ?= $(abspath $(NC_DIR)/../multistrap-raspbian-buster)
 79 | # -D_XOPEN_SOURCE -D_FILE_OFFSET_BITS=64 => to avoid EOVERFLOW on readdir() with 64-bits inode
 80 | CFLAGS      := -march=armv7-a -mtune=cortex-a53 -mfpu=neon-fp-armv8 -mfloat-abi=hard -D_GLIBCXX_USE_CXX11_ABI=0 -D_XOPEN_SOURCE -D_FILE_OFFSET_BITS=64 --sysroot $(RASPBIAN)
 81 | CXXFLAGS    := $(CFLAGS)
 82 | LDFLAGS     := -Wl,-rpath-link,$(RASPBIAN)/lib/arm-linux-gnueabihf/ -Wl,-rpath-link,$(RASPBIAN)/usr/lib/arm-linux-gnueabihf/
 83 | 
 84 | SOX_CFLAGS  := -I$(RASPBIAN)/usr/include
 85 | SOX_LDFLAGS := $(RASPBIAN)/usr/lib/arm-linux-gnueabihf/libsox.so
 86 | 
 87 | PYVER := $(shell python -c "import platform; maj, min, _ = platform.python_version_tuple(); print(maj+'.'+min);")
 88 | PYTHON_PACKAGES      :=
 89 | PYTHON_PATH          := PYTHONPATH=$(RASPBIAN)/usr/lib/python$(PYVER)/:$(RASPBIAN)/usr/lib/python3/dist-packages/
 90 | NUMPY_INCLUDE        := NUMPY_INCLUDE=$(RASPBIAN)/usr/include/python3.7m/
 91 | PYTHON_SYSCONFIGDATA := _PYTHON_SYSCONFIGDATA_NAME=_sysconfigdata_m_linux_arm-linux-gnueabihf
 92 | PYTHON_PLATFORM_NAME := --plat-name linux_armv7l
 93 | NODE_PLATFORM_TARGET := --target_arch=arm --target_platform=linux
 94 | TOOLCHAIN_LDD_OPTS   := --root $(RASPBIAN)/
 95 | endif # ($(TARGET),rpi3)
 96 | 
 97 | ifeq ($(TARGET),rpi3-armv8)
 98 | TOOLCHAIN_DIR ?= ${TFDIR}/bazel-$(shell basename "${TFDIR}")/external/LinaroAarch64Gcc72/bin
 99 | TOOLCHAIN   ?= $(TOOLCHAIN_DIR)/aarch64-linux-gnu-
100 | RASPBIAN    ?= $(abspath $(NC_DIR)/../multistrap-raspbian64-buster)
101 | CFLAGS      := -march=armv8-a -mtune=cortex-a53 -D_GLIBCXX_USE_CXX11_ABI=0 --sysroot $(RASPBIAN)
102 | CXXFLAGS    := $(CFLAGS)
103 | LDFLAGS     := -Wl,-rpath-link,$(RASPBIAN)/lib/aarch64-linux-gnu/ -Wl,-rpath-link,$(RASPBIAN)/usr/lib/aarch64-linux-gnu/
104 | 
105 | SOX_CFLAGS  := -I$(RASPBIAN)/usr/include
106 | SOX_LDFLAGS := $(RASPBIAN)/usr/lib/aarch64-linux-gnu/libsox.so
107 | 
108 | PYVER := $(shell python -c "import platform; maj, min, _ = platform.python_version_tuple(); print(maj+'.'+min);")
109 | PYTHON_PACKAGES      :=
110 | PYTHON_PATH          := PYTHONPATH=$(RASPBIAN)/usr/lib/python$(PYVER)/:$(RASPBIAN)/usr/lib/python3/dist-packages/
111 | PYTHON_SYSCONFIGDATA := _PYTHON_SYSCONFIGDATA_NAME=_sysconfigdata_m_linux_aarch64-linux-gnu
112 | NUMPY_INCLUDE        := NUMPY_INCLUDE=$(RASPBIAN)/usr/include/python3.7/
113 | PYTHON_PLATFORM_NAME := --plat-name linux_aarch64
114 | NODE_PLATFORM_TARGET := --target_arch=arm64 --target_platform=linux
115 | TOOLCHAIN_LDD_OPTS   := --root $(RASPBIAN)/
116 | endif # ($(TARGET),rpi3-armv8)
117 | 
118 | ifeq ($(TARGET),ios-simulator)
119 | CFLAGS          := -isysroot $(shell xcrun -sdk iphonesimulator13.5 -show-sdk-path)
120 | SOX_CFLAGS      :=
121 | SOX_LDFLAGS     :=
122 | LDFLAGS         :=
123 | endif
124 | 
125 | ifeq ($(TARGET),ios-arm64)
126 | CFLAGS          := -target arm64-apple-ios -isysroot $(shell xcrun -sdk iphoneos13.5 -show-sdk-path)
127 | SOX_CFLAGS      :=
128 | SOX_LDFLAGS     :=
129 | LDFLAGS         :=
130 | endif
131 | 
132 | # -Wl,--no-as-needed is required to force linker not to evict libs it thinks we
133 | # dont need ; will fail the build on OSX because that option does not exists
134 | ifeq ($(OS),Linux)
135 | LDFLAGS_NEEDED := -Wl,--no-as-needed
136 | LDFLAGS_RPATH  := -Wl,-rpath,\$$ORIGIN
137 | endif
138 | ifeq ($(OS),Darwin)
139 | CXXFLAGS       += -stdlib=libc++
140 | LDFLAGS_NEEDED := -stdlib=libc++
141 | LDFLAGS_RPATH  := -Wl,-rpath,@executable_path
142 | ifeq ($(TARGET),host)
143 | CXXFLAGS       += -mmacosx-version-min=10.10
144 | LDFLAGS_NEEDED += -mmacosx-version-min=10.10
145 | endif
146 | endif
147 | 
148 | CFLAGS   += $(EXTRA_CFLAGS)
149 | CXXFLAGS += $(EXTRA_CXXFLAGS)
150 | LIBS     := $(LINK_STT) $(EXTRA_LIBS)
151 | LDFLAGS_DIRS := $(LINK_PATH_STT) $(EXTRA_LDFLAGS)
152 | LDFLAGS  += $(LDFLAGS_NEEDED) $(LDFLAGS_RPATH) $(LDFLAGS_DIRS) $(LIBS)
153 | 
154 | AS      := $(TOOLCHAIN)$(TOOL_AS)
155 | CC      := $(TOOLCHAIN)$(TOOL_CC)
156 | CXX     := $(TOOLCHAIN)$(TOOL_CXX)
157 | LD      := $(TOOLCHAIN)$(TOOL_LD)
158 | LDD     := $(TOOLCHAIN)$(TOOL_LDD) $(TOOLCHAIN_LDD_OPTS)
159 | LIBEXE  := $(TOOLCHAIN)$(TOOL_LIBEXE)
160 | 
161 | RPATH_PYTHON         := '-Wl,-rpath,\$$ORIGIN/lib/' $(LDFLAGS_RPATH)
162 | RPATH_NODEJS         := '-Wl,-rpath,$$\$$ORIGIN/../'
163 | META_LD_LIBRARY_PATH := LD_LIBRARY_PATH
164 | ifeq ($(OS),Darwin)
165 | META_LD_LIBRARY_PATH := DYLD_LIBRARY_PATH
166 | RPATH_PYTHON         := '-Wl,-rpath,@loader_path/lib/' $(LDFLAGS_RPATH)
167 | RPATH_NODEJS         := '-Wl,-rpath,@loader_path/../'
168 | endif
169 | 
170 | # Takes care of looking into bindings built (SRC_FILE, can contain a wildcard)
171 | # for missing dependencies and copying those dependencies into the
172 | # TARGET_LIB_DIR. If supplied, MANIFEST_IN will be echo'ed to a list of
173 | # 'include x.so'.
174 | #
175 | # On OSX systems, this will also take care of calling install_name_tool to set
176 | # proper path for those dependencies, using @rpath/lib.
177 | define copy_missing_libs
178 |     SRC_FILE=$(1); \
179 |     TARGET_LIB_DIR=$(2); \
180 |     MANIFEST_IN=$(3); \
181 |     echo "Analyzing $$SRC_FILE copying missing libs to $$TARGET_LIB_DIR"; \
182 |     echo "Maybe outputting to $$MANIFEST_IN"; \
183 |     \
184 |     (mkdir $$TARGET_LIB_DIR || true); \
185 |     missing_libs=""; \
186 |     for lib in $$SRC_FILE; do \
187 |         if [ "$(OS)" = "Darwin" ]; then \
188 |             new_missing="$$( (for f in $$(otool -L $$lib 2>/dev/null | tail -n +2 | awk '{ print $$1 }' | grep -v '$$lib'); do ls -hal $$f; done;) 2>&1 | grep 'No such' | cut -d':' -f2 | xargs basename -a)"; \
189 |             missing_libs="$$missing_libs $$new_missing"; \
190 |         elif [ "$(OS)" = "${CI_MSYS_VERSION}" ]; then \
191 |             missing_libs="libstt.so libkenlm.so libtflitedelegates.so libtensorflowlite.so"; \
192 |         else \
193 |             missing_libs="$$missing_libs $$($(LDD) $$lib | grep 'not found' | awk '{ print $$1 }')"; \
194 |         fi; \
195 |     done; \
196 |     \
197 |     echo "Missing libs = $$missing_libs"; \
198 |     for missing in $$missing_libs; do \
199 |         find $(SO_SEARCH) -type f -name "$$missing" -exec cp {} $$TARGET_LIB_DIR \; ; \
200 |         chmod +w $$TARGET_LIB_DIR/*.so ; \
201 |         if [ ! -z "$$MANIFEST_IN" ]; then \
202 |             echo "include $$TARGET_LIB_DIR/$$missing" >> $$MANIFEST_IN; \
203 |         fi; \
204 |     done; \
205 |     \
206 |     if [ "$(OS)" = "Darwin" ]; then \
207 |         for lib in $$SRC_FILE; do \
208 |             for dep in $$( (for f in $$(otool -L $$lib 2>/dev/null | tail -n +2 | awk '{ print $$1 }' | grep -v '$$lib'); do ls -hal $$f; done;) 2>&1 | grep 'No such' | cut -d':' -f2 ); do \
209 |                 dep_basename=$$(basename "$$dep"); \
210 |                 install_name_tool -change "$$dep" "@rpath/$$dep_basename" "$$lib"; \
211 |             done; \
212 |         done; \
213 |     fi;
214 | endef
215 | 
216 | SWIG_DIST_URL ?=
217 | ifeq ($(SWIG_DIST_URL),)
218 | ifeq ($(findstring Linux,$(OS)),Linux)
219 | SWIG_DIST_URL := "https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/ds-swig.linux.amd64.tar.gz"
220 | else ifeq ($(findstring Darwin,$(OS)),Darwin)
221 | SWIG_DIST_URL := "https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/ds-swig.darwin.amd64.tar.gz"
222 | else ifeq ($(findstring _NT,$(OS)),_NT)
223 | SWIG_DIST_URL := "https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/ds-swig.win.amd64.tar.gz"
224 | else
225 | $(error There is no prebuilt SWIG available for your platform. Please produce one and set SWIG_DIST_URL.)
226 | endif # findstring()
227 | endif # ($(SWIG_DIST_URL),)
228 | 
229 | # Should point to native_client/ subdir by default
230 | SWIG_ROOT ?= $(abspath $(shell dirname "$(lastword $(MAKEFILE_LIST))"))/ds-swig
231 | ifeq ($(findstring _NT,$(OS)),_NT)
232 | SWIG_ROOT ?= $(shell cygpath -u "$(SWIG_ROOT)")
233 | endif
234 | SWIG_LIB ?= $(SWIG_ROOT)/share/swig/4.1.0/
235 | 
236 | SWIG_BIN := swig$(PLATFORM_EXE_SUFFIX)
237 | DS_SWIG_BIN := ds-swig$(PLATFORM_EXE_SUFFIX)
238 | DS_SWIG_BIN_PATH := $(SWIG_ROOT)/bin
239 | 
240 | DS_SWIG_ENV := SWIG_LIB="$(SWIG_LIB)" PATH="$(DS_SWIG_BIN_PATH):${PATH}"
241 | 
242 | $(DS_SWIG_BIN_PATH)/swig:
243 | 	mkdir -p $(SWIG_ROOT)
244 | 	curl -sSL "$(SWIG_DIST_URL)" | tar -C $(SWIG_ROOT) -zxf -
245 | 	ln -s $(DS_SWIG_BIN) $(DS_SWIG_BIN_PATH)/$(SWIG_BIN)
246 | 
247 | ds-swig: $(DS_SWIG_BIN_PATH)/swig
248 | 	$(DS_SWIG_ENV) swig -version
249 | 	$(DS_SWIG_ENV) swig -swiglib
250 | 


--------------------------------------------------------------------------------
/src/settings.c:
--------------------------------------------------------------------------------
  1 | #include "settings.h"
  2 | 
  3 | #include <dirent.h>
  4 | #include <stdio.h>
  5 | #include <stdlib.h>
  6 | #include <string.h>
  7 | 
  8 | #include "file_utils.h"
  9 | #include "string_utils.h"
 10 | #include "trace.h"
 11 | #include "yargs.h"
 12 | 
 13 | static bool is_real_entry(const char* name, void* cookie) {
 14 |   return ((strcmp(name, ".") != 0) &&
 15 |     (strcmp(name, "..") != 0));
 16 | }
 17 | 
 18 | static char* available_languages(const char* languages_dir) {
 19 |   char** dirs_list;
 20 |   int dirs_list_length;
 21 |   const bool list_status =
 22 |     file_list_dir(languages_dir, &dirs_list, &dirs_list_length);
 23 |   if (!list_status) {
 24 |     return string_duplicate("");
 25 |   }
 26 |   char** real_dirs_list;
 27 |   int real_dirs_list_length;
 28 |   string_list_filter((const char**)(dirs_list), dirs_list_length, is_real_entry, NULL,
 29 |     &real_dirs_list, &real_dirs_list_length);
 30 |   string_list_free(dirs_list, dirs_list_length);
 31 | 
 32 |   char* result = string_join((const char**)(real_dirs_list), real_dirs_list_length, ", ");
 33 |   string_list_free(real_dirs_list, real_dirs_list_length);
 34 |   return result;
 35 | }
 36 | 
 37 | static char* language_description(Settings* settings) {
 38 |   char* available_languages_string =
 39 |     available_languages(settings->languages_dir);
 40 |   char* result = string_alloc_sprintf("Which language to look for (default '"
 41 |     "%s', can be %s)", settings->language, available_languages_string);
 42 |   free(available_languages_string);
 43 |   return result;
 44 | }
 45 | 
 46 | static void set_defaults(Settings* settings) {
 47 |   const char* env_language = getenv("LANG");
 48 |   if ((env_language == NULL) || (strlen(env_language) == 0)) {
 49 |     settings->language = string_duplicate("en_US");
 50 |   }
 51 |   else {
 52 |     char** parts;
 53 |     int parts_length;
 54 |     string_split(env_language, '.', 2, &parts, &parts_length);
 55 |     settings->language = string_duplicate(parts[0]);
 56 |     string_list_free(parts, parts_length);
 57 |   }
 58 |   settings->language_from_args = NULL;
 59 |   settings->source = NULL;
 60 |   settings->languages_dir = "/etc/spchcat/models/";
 61 |   settings->model = NULL;
 62 |   settings->scorer = NULL;
 63 |   settings->source_buffer_size = 160 * 4;
 64 |   settings->beam_width = 0;
 65 |   settings->lm_alpha = 0.0f;
 66 |   settings->lm_beta = 0.0f;
 67 |   settings->show_times = false;
 68 |   settings->has_versions = false;
 69 |   settings->extended_metadata = false;
 70 |   settings->json_output = false;
 71 |   settings->json_candidate_transcripts = 3;
 72 |   settings->stream_size = 0;
 73 |   settings->extended_stream_size = 0;
 74 |   settings->hot_words = NULL;
 75 |   settings->stream_capture_file = NULL;
 76 |   settings->stream_capture_duration = 16000;
 77 | }
 78 | 
 79 | static void find_model_for_language(Settings* settings) {
 80 |   // If the model filename was explicitly set on the command line, don't worry
 81 |   // about searching for it.
 82 |   if (settings->model != NULL) {
 83 |     // Make a copy of the string we got from the arg parsing, so that we can
 84 |     // free it ourselves, like the other paths below.
 85 |     settings->model = string_duplicate(settings->model);
 86 |     return;
 87 |   }
 88 | 
 89 |   // Override any language we've guessed from env variables with any args
 90 |   // specified on the command line.
 91 |   if (settings->language_from_args != NULL) {
 92 |     free(settings->language);
 93 |     settings->language = string_duplicate(settings->language_from_args);
 94 |   }
 95 | 
 96 |   // Look for the exact match to the language and country combination, and if
 97 |   // a model file exists at that path, use it.
 98 |   char* language_folder = file_join_paths(settings->languages_dir, settings->language);
 99 |   char* model_filename = file_find_one_with_suffix(language_folder, ".tflite");
100 |   if (model_filename != NULL) {
101 |     settings->model = file_join_paths(language_folder, model_filename);
102 |     free(model_filename);
103 |     free(language_folder);
104 |     return;
105 |   }
106 |   free(language_folder);
107 | 
108 |   // If the right country wasn't found, try falling back to any folder
109 |   // with the right language.
110 |   char** lang_parts;
111 |   int lang_parts_length;
112 |   string_split(settings->language, '_', 2, &lang_parts, &lang_parts_length);
113 |   char* lang_only = string_append(lang_parts[0], "_");
114 |   string_list_free(lang_parts, lang_parts_length);
115 |   char* lang_only_folder =
116 |     file_find_one_with_prefix(settings->languages_dir, lang_only);
117 |   free(lang_only);
118 |   if (lang_only_folder == NULL) {
119 |     fprintf(stderr, "Unable to find a language model for '%s' in '%s'",
120 |       settings->language, settings->languages_dir);
121 |     return;
122 |   }
123 |   char** path_parts;
124 |   int path_parts_length;
125 |   string_split(lang_only_folder, '/', -1, &path_parts,
126 |     &path_parts_length);
127 |   free(lang_only_folder);
128 |   char* found_language =
129 |     string_duplicate(path_parts[path_parts_length - 1]);
130 |   string_list_free(path_parts, path_parts_length);
131 |   char* found_language_folder =
132 |     file_join_paths(settings->languages_dir, found_language);
133 |   char* found_model_filename = file_find_one_with_suffix(found_language_folder,
134 |     ".tflite");
135 |   if (found_model_filename == NULL) {
136 |     fprintf(stderr, "Unable to find a language model for '%s' in '%s'\n",
137 |       settings->language, found_language_folder);
138 |     free(found_language_folder);
139 |     free(found_language);
140 |     return;
141 |   }
142 |   fprintf(stderr, "Warning: Language '%s' not found, falling back to '%s'\n",
143 |     settings->language, found_language);
144 |   free(settings->language);
145 |   settings->language = found_language;
146 |   settings->model = file_join_paths(found_language_folder, found_model_filename);
147 |   free(found_language_folder);
148 |   free(found_model_filename);
149 | }
150 | 
151 | static void find_scorer_for_language(Settings* settings) {
152 |   // If the scorer filename was explicitly set on the command line, don't worry
153 |   // about searching for it.
154 |   if (settings->scorer != NULL) {
155 |     if (strcmp(settings->scorer, "none") == 0) {
156 |       settings->scorer = NULL;
157 |     }
158 |     else {
159 |       // Make a copy of the string we got from the arg parsing, so that we can
160 |       // free it ourselves, like the other paths below.
161 |       settings->scorer = string_duplicate(settings->scorer);
162 |     }
163 |     return;
164 |   }
165 | 
166 |   char* language_folder =
167 |     file_join_paths(settings->languages_dir, settings->language);
168 | 
169 |   char* scorer = file_find_one_with_suffix(language_folder, ".scorer");
170 |   if ((scorer == NULL) ||
171 |     (strstr(scorer, "command") != NULL) ||
172 |     (strstr(scorer, "digit") != NULL) ||
173 |     (strstr(scorer, "yesno") != NULL)) {
174 |     // These are too small to be useful, so skip them.
175 |     free(scorer);
176 |     free(language_folder);
177 |     return;
178 |   }
179 | 
180 |   settings->scorer = file_join_paths(language_folder, scorer);
181 |   free(scorer);
182 |   free(language_folder);
183 | }
184 | 
185 | static bool set_source(Settings* settings) {
186 |   const int files_length = yargs_get_unnamed_length();
187 |   if (settings->source != NULL) {
188 |     if ((files_length != 0) &&
189 |       (strcmp(settings->source, "file") != 0)) {
190 |       fprintf(stderr,
191 |         "Source '%s' was specified, but files were also passed as arguments.\n",
192 |         settings->source);
193 |       return false;
194 |     }
195 |     else if ((files_length == 0) &&
196 |       (strcmp(settings->source, "file") == 0)) {
197 |       fprintf(stderr,
198 |         "File source was specified, but no files were passed as arguments.\n");
199 |       return false;
200 |     }
201 |   }
202 |   else if (files_length == 0) {
203 |     settings->source = "mic";
204 |   }
205 |   else {
206 |     settings->source = "file";
207 |   }
208 | 
209 |   if (strcmp(settings->source, "file") == 0) {
210 |     settings->files_count = files_length;
211 |     settings->files = calloc(files_length, sizeof(char*));
212 |     for (int i = 0; i < files_length; ++i) {
213 |       settings->files[i] = string_duplicate(yargs_get_unnamed(i));
214 |     }
215 |   }
216 |   else {
217 |     settings->files_count = 0;
218 |     settings->files = NULL;
219 |   }
220 | 
221 |   return true;
222 | }
223 | 
224 | Settings* settings_init_from_argv(int argc, char** argv) {
225 | 
226 |   Settings* settings = (Settings*)(calloc(1, sizeof(Settings)));
227 |   set_defaults(settings);
228 | 
229 |   char* language_description_string = language_description(settings);
230 |   YargsFlag language_flag = YARGS_STRING("language", "l",
231 |     &settings->language_from_args, language_description_string);
232 | 
233 |   bool show_help = false;
234 |   const YargsFlag flags[] = {
235 |     YARGS_BOOL("help", "?", &show_help, "Displays usage information"),
236 |     language_flag,
237 |     YARGS_STRING("source", "s", &settings->source, ""),
238 |     YARGS_STRING("hot_words", "h", &settings->hot_words, ""),
239 |     YARGS_STRING("languages_dir", "d", &settings->languages_dir, ""),
240 |     YARGS_STRING("model", "m", (const char**)(&settings->model), ""),
241 |     YARGS_STRING("scorer", "c", (const char**)(&settings->scorer), ""),
242 |     YARGS_INT32("source_buffer_size", "o", &settings->source_buffer_size, ""),
243 |     YARGS_INT32("beam_width", "b", &settings->beam_width, ""),
244 |     YARGS_FLOAT("lm_alpha", "a", &settings->lm_alpha, ""),
245 |     YARGS_FLOAT("lm_beta", "e", &settings->lm_beta, ""),
246 |     YARGS_BOOL("show_times", "t", &settings->show_times, ""),
247 |     YARGS_BOOL("has_versions", "q", &settings->has_versions, ""),
248 |     YARGS_BOOL("extended_metadata", "x", &settings->extended_metadata, ""),
249 |     YARGS_BOOL("json_output", "j", &settings->json_output, ""),
250 |     YARGS_INT32("json_candidate_transcripts", "n", &settings->json_candidate_transcripts, ""),
251 |     YARGS_INT32("stream_size", "z", &settings->stream_size, ""),
252 |     YARGS_INT32("extended_stream_size", "r", &settings->extended_stream_size, ""),
253 |     YARGS_STRING("stream_capture_file", "f", &settings->stream_capture_file, ""),
254 |     YARGS_INT32("stream_capture_duration", "g", &settings->stream_capture_duration, ""),
255 |   };
256 |   const int flags_length = sizeof(flags) / sizeof(flags[0]);
257 | 
258 |   const char* app_description =
259 |     "Speech recognition tool to convert audio to text transcripts.";
260 |   const bool init_status = yargs_init(flags, flags_length,
261 |     app_description, argv, argc);
262 |   if (!init_status) {
263 |     free(language_description_string);
264 |     settings_free(settings);
265 |     return NULL;
266 |   }
267 | 
268 |   if (show_help) {
269 |     // Make sure we pick up any languages from a possibly-changed model path.
270 |     free(language_flag.description);
271 |     language_flag.description = language_description(settings);
272 |     yargs_print_usage(flags, flags_length, app_description);
273 |     yargs_free();
274 |     free(language_flag.description);
275 |     free(settings);
276 |     return NULL;
277 |   }
278 | 
279 |   find_model_for_language(settings);
280 |   if (settings->model == NULL) {
281 |     yargs_free();
282 |     free(language_flag.description);
283 |     free(settings);
284 |     return NULL;
285 |   }
286 | 
287 |   find_scorer_for_language(settings);
288 | 
289 |   if (!set_source(settings)) {
290 |     yargs_free();
291 |     free(language_flag.description);
292 |     free(settings->language);
293 |     free(settings->model);
294 |     free(settings->scorer);
295 |     free(settings);
296 |     return NULL;
297 |   }
298 | 
299 |   free(language_description_string);
300 | 
301 |   return settings;
302 | }
303 | 
304 | void settings_free(Settings* settings) {
305 |   yargs_free();
306 |   if (settings == NULL) {
307 |     return;
308 |   }
309 |   free(settings->language);
310 |   free(settings->model);
311 |   free(settings->scorer);
312 |   string_list_free(settings->files, settings->files_count);
313 |   free(settings);
314 | }
315 | 


--------------------------------------------------------------------------------
/src/args.h:
--------------------------------------------------------------------------------
  1 | #ifndef __ARGS_H__
  2 | #define __ARGS_H__
  3 | 
  4 | #if defined(_MSC_VER)
  5 | #include "getopt_win.h"
  6 | #else
  7 | #include <getopt.h>
  8 | #endif
  9 | #include <iostream>
 10 | #include <sys/stat.h>
 11 | 
 12 | #include "coqui-stt.h"
 13 | 
 14 | const char* app_name = NULL;
 15 | 
 16 | const char* language = NULL;
 17 | 
 18 | const char* source = NULL;
 19 | 
 20 | std::vector<std::string> filename_args;
 21 | 
 22 | const char* default_languages_dir = "/etc/spchcat/models/";
 23 | std::string languages_dir = default_languages_dir;
 24 | 
 25 | const char* model = NULL;
 26 | 
 27 | const char* scorer = NULL;
 28 | 
 29 | int source_buffer_size = 160 * 4;
 30 | 
 31 | bool set_beamwidth = false;
 32 | 
 33 | int beam_width = 0;
 34 | 
 35 | bool set_alphabeta = false;
 36 | 
 37 | float lm_alpha = 0.f;
 38 | 
 39 | float lm_beta = 0.f;
 40 | 
 41 | bool show_times = false;
 42 | 
 43 | bool has_versions = false;
 44 | 
 45 | bool extended_metadata = false;
 46 | 
 47 | bool json_output = false;
 48 | 
 49 | int json_candidate_transcripts = 3;
 50 | 
 51 | int stream_size = 0;
 52 | 
 53 | int extended_stream_size = 0;
 54 | 
 55 | char* hot_words = NULL;
 56 | 
 57 | const std::string ListAvailableLanguages() {
 58 |   std::string result;
 59 |   DIR* dp = opendir(languages_dir.c_str());
 60 |   if (dp != NULL)
 61 |   {
 62 |     struct dirent* ep;
 63 |     while (ep = readdir(dp))
 64 |     {
 65 |       if (ep->d_name[0] == '.') {
 66 |         continue;
 67 |       }
 68 |       if (result.length() != 0) {
 69 |         result += ", ";
 70 |       }
 71 |       result += "'" + std::string(ep->d_name) + "'";
 72 |     }
 73 |     closedir(dp);
 74 |   }
 75 |   return result;
 76 | }
 77 | 
 78 | void PrintHelp(const char* bin)
 79 | {
 80 |   std::cout <<
 81 |     "Usage: " << bin << " [--source mic|system|file] [--language <language code>] <WAV files>\n"
 82 |     "\n"
 83 |     "Speech recognition tool to convert audio to text transcripts.\n"
 84 |     "\n"
 85 |     "\t--language\tWhich language to look for (default '" << language << "', can be " << ListAvailableLanguages() << ")\n"
 86 |     "\t--source NAME\tName of the audio source (default 'mic', can also be 'system', 'file')\n"
 87 |     "\t--help\t\tShow help\n"
 88 |     "\nAdvanced settings:\n\n"
 89 |     "\t--languages_dir\t\t\tPath to folder containing models (default '" << default_languages_dir << "')\n"
 90 |     "\t--model MODEL\t\t\tPath to the model (protocol buffer binary file)\n"
 91 |     "\t--scorer SCORER\t\t\tPath to the external scorer file\n"
 92 |     "\t--source_buffer_size SIZE\tNumber of samples to fetch from source\n"
 93 |     "\t--audio AUDIO\t\t\tPath to the audio file to run (WAV format)\n"
 94 |     "\t--beam_width BEAM_WIDTH\t\tValue for decoder beam width (int)\n"
 95 |     "\t--lm_alpha LM_ALPHA\t\tValue for language model alpha param (float)\n"
 96 |     "\t--lm_beta LM_BETA\t\tValue for language model beta param (float)\n"
 97 |     "\t-t\t\t\t\tRun in benchmark mode, output mfcc & inference time\n"
 98 |     "\t--extended\t\t\tOutput string from extended metadata\n"
 99 |     "\t--json\t\t\t\tExtended output, shows word timings as JSON\n"
100 |     "\t--candidate_transcripts NUMBER\tNumber of candidate transcripts to include in JSON output\n"
101 |     "\t--stream size\t\t\tRun in stream mode, output intermediate results\n"
102 |     "\t--extended_stream size\t\tRun in stream mode using metadata output, output intermediate results\n"
103 |     "\t--hot_words\t\t\tHot-words and their boosts. Word:Boost pairs are comma-separated\n"
104 |     "\t--version\t\t\tPrint version and exits\n";
105 |   char* version = STT_Version();
106 |   std::cerr << "Coqui STT " << version << "\n";
107 |   STT_FreeString(version);
108 |   exit(1);
109 | }
110 | 
111 | bool DoesFileExist(const std::string& path) {
112 |   struct stat sb;
113 |   return (stat(path.c_str(), &sb) == 0);
114 | }
115 | 
116 | bool HasEnding(std::string const& fullString, std::string const& ending) {
117 |   if (fullString.length() >= ending.length()) {
118 |     return (0 == fullString.compare(fullString.length() - ending.length(), ending.length(), ending));
119 |   }
120 |   else {
121 |     return false;
122 |   }
123 | }
124 | 
125 | bool HasPrefix(std::string const& fullString, std::string const& prefix) {
126 |   return (strncmp(fullString.c_str(), prefix.c_str(), prefix.size()) == 0);
127 | }
128 | 
129 | std::string FindFileWithExtension(const std::string& folder, const std::string& extension,
130 |   const std::vector<std::string>& excludes = {}) {
131 |   std::string result;
132 |   DIR* dp = opendir(folder.c_str());
133 |   if (dp != NULL)
134 |   {
135 |     struct dirent* ep;
136 |     while (ep = readdir(dp))
137 |     {
138 |       std::string filename = ep->d_name;
139 |       if (HasEnding(filename, extension)) {
140 |         bool exclusion_found = false;
141 |         for (const std::string& exclude : excludes) {
142 |           if (filename.find(exclude) != std::string::npos) {
143 |             exclusion_found = true;
144 |           }
145 |         }
146 |         if (!exclusion_found) {
147 |           result = folder + filename;
148 |           break;
149 |         }
150 |       }
151 |     }
152 |     closedir(dp);
153 |   }
154 |   return result;
155 | }
156 | 
157 | std::string FindFileWithPrefix(const std::string& folder, const std::string& prefix) {
158 |   std::string result;
159 |   DIR* dp = opendir(folder.c_str());
160 |   if (dp != NULL)
161 |   {
162 |     struct dirent* ep;
163 |     while (ep = readdir(dp))
164 |     {
165 |       std::string filename = ep->d_name;
166 |       if (HasPrefix(filename, prefix)) {
167 |         result = folder + filename;
168 |         break;
169 |       }
170 |     }
171 |     closedir(dp);
172 |   }
173 |   return result;
174 | }
175 | 
176 | void SplitString(std::string const& str, const char delim,
177 |   std::vector<std::string>& out)
178 | {
179 |   size_t start;
180 |   size_t end = 0;
181 | 
182 |   while ((start = str.find_first_not_of(delim, end)) != std::string::npos)
183 |   {
184 |     end = str.find(delim, start);
185 |     out.push_back(str.substr(start, end - start));
186 |   }
187 | }
188 | 
189 | bool ProcessArgs(int argc, char** argv)
190 | {
191 |   app_name = argv[0];
192 | 
193 |   language = getenv("LANG");
194 |   if (language == NULL) {
195 |     language = "en_US";
196 |   }
197 |   else {
198 |     static std::vector<std::string> parts;
199 |     SplitString(language, '.', parts);
200 |     language = parts[0].c_str();
201 |   }
202 | 
203 |   const char* const short_opts = "s:l:m:y:o:z:b:c:d:tejs:r:R:w:vh";
204 |   const option long_opts[] = {
205 |           {"source", required_argument, nullptr, 's'},
206 |           {"language", required_argument, nullptr, 'l'},
207 |           {"model", required_argument, nullptr, 'm'},
208 |           {"languages_dir", required_argument, nullptr, 'y'},
209 |           {"scorer", required_argument, nullptr, 'o'},
210 |           {"source_buffer_size", required_argument, nullptr, 'z'},
211 |           {"audio", required_argument, nullptr, 'a'},
212 |           {"beam_width", required_argument, nullptr, 'b'},
213 |           {"lm_alpha", required_argument, nullptr, 'c'},
214 |           {"lm_beta", required_argument, nullptr, 'd'},
215 |           {"t", no_argument, nullptr, 't'},
216 |           {"extended", no_argument, nullptr, 'e'},
217 |           {"json", no_argument, nullptr, 'j'},
218 |           {"candidate_transcripts", required_argument, nullptr, 150},
219 |           {"stream", required_argument, nullptr, 'r'},
220 |           {"extended_stream", required_argument, nullptr, 'R'},
221 |           {"hot_words", required_argument, nullptr, 'w'},
222 |           {"version", no_argument, nullptr, 'v'},
223 |           {"help", no_argument, nullptr, 'h'},
224 |           {nullptr, no_argument, nullptr, 0}
225 |   };
226 | 
227 |   bool should_print_help = false;
228 | 
229 |   while (true)
230 |   {
231 |     const auto opt = getopt_long(argc, argv, short_opts, long_opts, nullptr);
232 | 
233 |     if (-1 == opt)
234 |       break;
235 | 
236 |     switch (opt)
237 |     {
238 |     case 'l':
239 |       language = optarg;
240 |       break;
241 | 
242 |     case 's':
243 |       source = optarg;
244 |       break;
245 | 
246 |     case 'y':
247 |       languages_dir = optarg;
248 |       break;
249 | 
250 |     case 'm':
251 |       model = optarg;
252 |       break;
253 | 
254 |     case 'o':
255 |       scorer = optarg;
256 |       break;
257 | 
258 |     case 'z':
259 |       source_buffer_size = atoi(optarg);
260 |       break;
261 | 
262 |     case 'b':
263 |       set_beamwidth = true;
264 |       beam_width = atoi(optarg);
265 |       break;
266 | 
267 |     case 'c':
268 |       set_alphabeta = true;
269 |       lm_alpha = atof(optarg);
270 |       break;
271 | 
272 |     case 'd':
273 |       set_alphabeta = true;
274 |       lm_beta = atof(optarg);
275 |       break;
276 | 
277 |     case 't':
278 |       show_times = true;
279 |       break;
280 | 
281 |     case 'e':
282 |       extended_metadata = true;
283 |       break;
284 | 
285 |     case 'j':
286 |       json_output = true;
287 |       break;
288 | 
289 |     case 150:
290 |       json_candidate_transcripts = atoi(optarg);
291 |       break;
292 | 
293 |     case 'r':
294 |       stream_size = atoi(optarg);
295 |       break;
296 | 
297 |     case 'R':
298 |       extended_stream_size = atoi(optarg);
299 |       break;
300 | 
301 |     case 'v':
302 |       has_versions = true;
303 |       break;
304 | 
305 |     case 'w':
306 |       hot_words = optarg;
307 |       break;
308 | 
309 |     case 'h': // -h or --help
310 |     case '?': // Unrecognized option
311 |     default:
312 |       should_print_help = true;
313 |       break;
314 |     }
315 |   }
316 | 
317 |   // Capture any non '-' prefixed file names at the end of the command line.
318 |   if (optind < argc) {
319 |     do {
320 |       char* file = argv[optind];
321 |       filename_args.push_back(file);
322 |     } while (++optind < argc);
323 |   }
324 | 
325 |   if (has_versions) {
326 |     char* version = STT_Version();
327 |     std::cout << "Coqui " << version << "\n";
328 |     STT_FreeString(version);
329 |     return false;
330 |   }
331 | 
332 |   if (!model) {
333 |     // Look for the exact match to the language and country combination.
334 |     const std::string language_folder = languages_dir + language + "/";
335 |     static std::string model_string = FindFileWithExtension(language_folder, ".tflite");
336 |     if (model_string.length() == 0) {
337 |       // If the right country wasn't found, try falling back to any folder
338 |       // with the right language.
339 |       std::vector<std::string> lang_parts;
340 |       SplitString(language, '_', lang_parts);
341 |       const std::string& lang_only = lang_parts[0] + "_";
342 |       const std::string lang_only_folder = FindFileWithPrefix(languages_dir, lang_only);
343 |       if (lang_only_folder.length() > 0) {
344 |         std::vector<std::string> path_parts;
345 |         SplitString(lang_only_folder, '/', path_parts);
346 |         static std::string found_language = path_parts[path_parts.size() - 1];
347 |         const std::string found_language_folder = languages_dir + found_language + "/";
348 |         model_string = FindFileWithExtension(found_language_folder, ".tflite");
349 |         if (model_string.length() > 0) {
350 |           fprintf(stderr, "Warning: Language '%s' not found, falling back to '%s'\n",
351 |             language, found_language.c_str());
352 |           language = found_language.c_str();
353 |         }
354 |       }
355 |     }
356 | 
357 |     if (model_string.length() == 0) {
358 |       fprintf(stderr, "Warning: Model not found in %s\n", language_folder.c_str());
359 |     }
360 |     else {
361 |       model = model_string.c_str();
362 |     }
363 |   }
364 | 
365 |   if (!scorer) {
366 |     const std::string language_folder = languages_dir + language + "/";
367 |     static std::string scorer_string = FindFileWithExtension(language_folder, ".scorer",
368 |       { "command", "digits", "yesno" });
369 |     if (scorer_string.length() == 0) {
370 |       fprintf(stderr, "Warning: Scorer not found in %s\n", language_folder.c_str());
371 |     }
372 |     else {
373 |       scorer = scorer_string.c_str();
374 |     }
375 |   }
376 | 
377 |   if ((stream_size < 0 || stream_size % 160 != 0) || (extended_stream_size < 0 || extended_stream_size % 160 != 0)) {
378 |     std::cout <<
379 |       "Stream buffer size must be multiples of 160\n";
380 |     return false;
381 |   }
382 | 
383 |   if (source == NULL) {
384 |     if (filename_args.size() == 0) {
385 |       source = "mic";
386 |     }
387 |     else {
388 |       source = "file";
389 |     }
390 |   }
391 |   else {
392 |     if ((filename_args.size() > 0) && (strcmp(source, "file") != 0)) {
393 |       std::cout <<
394 |         "Files were specified on command line, but --source was not set to file\n";
395 |       return false;
396 |     }
397 |   }
398 | 
399 |   if (should_print_help) {
400 |     PrintHelp(argv[0]);
401 |     return false;
402 |   }
403 | 
404 |   return true;
405 | }
406 | 
407 | #endif // __ARGS_H__
408 | 


--------------------------------------------------------------------------------
/src/app_main.c:
--------------------------------------------------------------------------------
  1 | #include "app_main.h"
  2 | 
  3 | #include <stdio.h>
  4 | #include <stdlib.h>
  5 | #include <string.h>
  6 | 
  7 | #include <pulse/simple.h>
  8 | #include <pulse/error.h>
  9 | 
 10 | #include "coqui-stt.h"
 11 | 
 12 | #include "audio_buffer.h"
 13 | #include "pa_list_devices.h"
 14 | #include "settings.h"
 15 | #include "string_utils.h"
 16 | #include "trace.h"
 17 | #include "wav_io.h"
 18 | 
 19 | static bool load_model(const Settings* settings, ModelState** model_state) {
 20 |   const int create_status = STT_CreateModel(settings->model, model_state);
 21 |   if (create_status != 0) {
 22 |     char* error_message = STT_ErrorCodeToErrorMessage(create_status);
 23 |     fprintf(stderr, "STT_CreateModel failed with '%s' (%d)\n", error_message,
 24 |       create_status);
 25 |     free(error_message);
 26 |     return false;
 27 |   }
 28 | 
 29 |   if (settings->beam_width > 0) {
 30 |     const int beam_width_status = STT_SetModelBeamWidth(*model_state,
 31 |       settings->beam_width);
 32 |     if (beam_width_status != 0) {
 33 |       char* error_message = STT_ErrorCodeToErrorMessage(beam_width_status);
 34 |       fprintf(stderr, "STT_SetModelBeamWidth failed with '%s' (%d)\n", error_message,
 35 |         beam_width_status);
 36 |       free(error_message);
 37 |     }
 38 |   }
 39 | 
 40 |   return true;
 41 | }
 42 | 
 43 | static bool load_scorer(const Settings* settings, ModelState* model_state) {
 44 |   if (settings->scorer == NULL) {
 45 |     return true;
 46 |   }
 47 |   const int scorer_status = STT_EnableExternalScorer(model_state,
 48 |     settings->scorer);
 49 |   if (scorer_status != 0) {
 50 |     char* error_message = STT_ErrorCodeToErrorMessage(scorer_status);
 51 |     fprintf(stderr, "STT_EnableExternalScorer failed with '%s' (%d)\n", error_message,
 52 |       scorer_status);
 53 |     free(error_message);
 54 |     return false;
 55 |   }
 56 |   if (settings->lm_alpha > 0.0f) {
 57 |     const int alpha_status = STT_SetScorerAlphaBeta(model_state,
 58 |       settings->lm_alpha, settings->lm_beta);
 59 |     if (alpha_status != 0) {
 60 |       char* error_message = STT_ErrorCodeToErrorMessage(alpha_status);
 61 |       fprintf(stderr, "STT_SetScorerAlphaBeta failed with '%s' (%d)\n", error_message,
 62 |         alpha_status);
 63 |       free(error_message);
 64 |       return false;
 65 |     }
 66 |   }
 67 |   if (settings->hot_words) {
 68 |     char** parts = NULL;
 69 |     int parts_length = 0;
 70 |     string_split(settings->hot_words, ',', -1, &parts, &parts_length);
 71 |     for (int i = 0; i < parts_length; ++i) {
 72 |       char* part = parts[i];
 73 |       char** entry_parts = NULL;
 74 |       int entry_parts_length = 0;
 75 |       string_split(part, ':', 2, &entry_parts, &entry_parts_length);
 76 |       if (entry_parts_length != 2) {
 77 |         fprintf(stderr,
 78 |           "Expected format 'word:number' in --hotwords but found '%s'.\n",
 79 |           part);
 80 |         string_list_free(entry_parts, entry_parts_length);
 81 |         string_list_free(parts, parts_length);
 82 |         return false;
 83 |       }
 84 |       char* hot_word = entry_parts[0];
 85 |       char* boost_string = entry_parts[1];
 86 |       char* conversion_end = NULL;
 87 |       const float boost = strtof(boost_string, &conversion_end);
 88 |       const int converted_length = (conversion_end - boost_string);
 89 |       if (converted_length != strlen(boost_string)) {
 90 |         fprintf(stderr,
 91 |           "Expected format 'word:number' in --hotwords but found '%s'.\n",
 92 |           part);
 93 |         string_list_free(entry_parts, entry_parts_length);
 94 |         string_list_free(parts, parts_length);
 95 |         return false;
 96 |       }
 97 |       const int hot_word_status = STT_AddHotWord(model_state, hot_word, boost);
 98 |       if (hot_word_status != 0) {
 99 |         char* error_message = STT_ErrorCodeToErrorMessage(hot_word_status);
100 |         fprintf(stderr, "STT_AddHotWord failed with '%s' (%d)\n", error_message,
101 |           hot_word_status);
102 |         string_list_free(entry_parts, entry_parts_length);
103 |         string_list_free(parts, parts_length);
104 |         free(error_message);
105 |         return false;
106 |       }
107 |       string_list_free(entry_parts, entry_parts_length);
108 |     }
109 |     string_list_free(parts, parts_length);
110 |   }
111 | 
112 |   return true;
113 | }
114 | 
115 | static char* get_device_name(const char* source) {
116 |   if (strcmp(source, "mic") == 0) {
117 |     return NULL;
118 |   }
119 |   else if (strcmp(source, "system") == 0) {
120 |     char** input_devices = NULL;
121 |     int input_devices_length = 0;
122 |     get_input_devices(&input_devices, &input_devices_length);
123 |     char* result = NULL;
124 |     for (int i = 0; i < input_devices_length; ++i) {
125 |       char* input_device = input_devices[i];
126 |       if (string_ends_with(input_device, ".monitor")) {
127 |         result = string_duplicate(input_device);
128 |         break;
129 |       }
130 |     }
131 |     if (result == NULL) {
132 |       fprintf(stderr, "System source was specified, but none was found.\n");
133 |     }
134 |     return result;
135 |   }
136 |   else {
137 |     return string_duplicate(source);
138 |   }
139 | }
140 | 
141 | static char* plain_text_from_transcript(const CandidateTranscript* transcript) {
142 |   char* result = string_duplicate("");
143 |   float previous_time = 0.0f;
144 |   for (int i = 0; i < transcript->num_tokens; ++i) {
145 |     const TokenMetadata* token = &transcript->tokens[i];
146 |     const float current_time = token->start_time;
147 |     const float time_since_previous = current_time - previous_time;
148 |     if (time_since_previous > 1.0f) {
149 |       const int result_length = strlen(result);
150 |       if (result[result_length - 1] == ' ') {
151 |         result[result_length - 1] = '\n';
152 |       }
153 |       else {
154 |         result = string_append_in_place(result, "\n");
155 |       }
156 |       if (strcmp(token->text, " ") != 0) {
157 |         result = string_append_in_place(result, token->text);
158 |       }
159 |     }
160 |     else {
161 |       result = string_append_in_place(result, token->text);
162 |     }
163 |     previous_time = current_time;
164 |   }
165 |   return result;
166 | }
167 | 
168 | static void print_changed_lines(const char* current_text,
169 |   const char* previous_text, FILE* file) {
170 |   // Has anything changed since last time?
171 |   if ((previous_text == NULL) ||
172 |     (strcmp(current_text, previous_text) == 0)) {
173 |     return;
174 |   }
175 | 
176 |   if (file == NULL) {
177 |     file = stdout;
178 |   }
179 | 
180 |   char** current_lines = NULL;
181 |   int current_lines_length = 0;
182 |   string_split(current_text, '\n', -1, &current_lines, &current_lines_length);
183 | 
184 |   char** previous_lines = NULL;
185 |   int previous_lines_length = 0;
186 |   string_split(previous_text, '\n', -1, &previous_lines,
187 |     &previous_lines_length);
188 | 
189 |   if (current_lines_length > previous_lines_length) {
190 |     int start_index = (previous_lines_length - 1);
191 |     if (start_index < 0) {
192 |       start_index = 0;
193 |     }
194 |     for (int i = start_index; i < (current_lines_length - 1); ++i) {
195 |       fprintf(file, "\r%s\n", current_lines[i]);
196 |     }
197 |   }
198 | 
199 |   fprintf(file, "\r%s        ", current_lines[current_lines_length - 1]);
200 |   fflush(file);
201 | 
202 |   string_list_free(current_lines, current_lines_length);
203 |   string_list_free(previous_lines, previous_lines_length);
204 | }
205 | 
206 | static void output_streaming_transcript(const Metadata* current_metadata,
207 |   const Metadata* previous_metadata) {
208 |   const CandidateTranscript* current_transcript =
209 |     &current_metadata->transcripts[0];
210 |   char* current_text = plain_text_from_transcript(current_transcript);
211 |   char* previous_text;
212 |   if (previous_metadata == NULL) {
213 |     previous_text = string_duplicate("");
214 |   }
215 |   else {
216 |     const CandidateTranscript* previous_transcript =
217 |       &previous_metadata->transcripts[0];
218 |     previous_text = plain_text_from_transcript(previous_transcript);
219 |   }
220 | 
221 |   print_changed_lines(current_text, previous_text, stdout);
222 | 
223 |   free(current_text);
224 |   free(previous_text);
225 | }
226 | 
227 | static bool process_file(const Settings* settings, ModelState* model_state,
228 |   const char* filename) {
229 |   AudioBuffer* buffer = NULL;
230 |   if (!wav_io_load(filename, &buffer)) {
231 |     return false;
232 |   }
233 |   Metadata* metadata = STT_SpeechToTextWithMetadata(model_state, buffer->data,
234 |     buffer->samples_per_channel, 1);
235 |   output_streaming_transcript(metadata, NULL);
236 |   audio_buffer_free(buffer);
237 |   return true;
238 | }
239 | 
240 | static bool process_files(const Settings* settings, ModelState* model_state) {
241 |   for (int i = 0; i < settings->files_count; ++i) {
242 |     if (!process_file(settings, model_state, settings->files[i])) {
243 |       return false;
244 |     }
245 |   }
246 |   return true;
247 | }
248 | 
249 | static bool process_live_input(const Settings* settings, ModelState* model_state) {
250 |   char* device_name = get_device_name(settings->source);
251 | 
252 |   const uint32_t model_rate = STT_GetModelSampleRate(model_state);
253 |   const pa_sample_spec sample_spec = { PA_SAMPLE_S16LE, model_rate, 1 };
254 |   int pa_error;
255 |   pa_simple* source_stream = pa_simple_new(
256 |     NULL, yargs_app_name(), PA_STREAM_RECORD, device_name, yargs_app_name(),
257 |     &sample_spec, NULL, NULL, &pa_error);
258 |   if (source_stream == NULL) {
259 |     if (device_name == NULL) {
260 |       fprintf(stderr, "Unable to open default audio input device.\n");
261 |     }
262 |     else {
263 |       fprintf(stderr,
264 |         "Unable to open audio input device named '%s', from source '%s'.\n",
265 |         device_name, settings->source);
266 |     }
267 |     fprintf(stderr, "The command 'pactl list sources' will show available devices.\n");
268 |     fprintf(stderr, "You can use the contents of the 'Name:' field as the '--source' argument to specify one.\n");
269 |     free(device_name);
270 |     return false;
271 |   }
272 | 
273 |   StreamingState* streaming_state = NULL;
274 |   const int stream_error = STT_CreateStream(model_state, &streaming_state);
275 |   if (stream_error != STT_ERR_OK) {
276 |     const char* error_message = STT_ErrorCodeToErrorMessage(stream_error);
277 |     fprintf(stderr, "STT_CreateStream() failed with '%s'\n", error_message);
278 |     pa_simple_free(source_stream);
279 |     free(device_name);
280 |     return false;
281 |   }
282 | 
283 |   const size_t source_buffer_byte_count = settings->source_buffer_size * 2;
284 |   int16_t* source_buffer = malloc(source_buffer_byte_count);
285 | 
286 |   AudioBuffer* capture_buffer = NULL;
287 |   if (settings->stream_capture_file != NULL) {
288 |     capture_buffer =
289 |       audio_buffer_alloc(model_rate, settings->stream_capture_duration, 1);
290 |   }
291 |   int stream_capture_offset = 0;
292 | 
293 |   Metadata* previous_metadata = NULL;
294 |   while (true) {
295 |     int read_error;
296 |     const int read_result = pa_simple_read(source_stream, source_buffer,
297 |       source_buffer_byte_count, &read_error);
298 |     if (read_result < 0) {
299 |       fprintf(stderr, "pa_simple_read() failed with '%s'.\n",
300 |         pa_strerror(read_error));
301 |       break;
302 |     }
303 |     if (capture_buffer != NULL) {
304 |       if ((stream_capture_offset + settings->source_buffer_size) > settings->stream_capture_duration) {
305 |         break;
306 |       }
307 |       int16_t* current_capture = capture_buffer->data + stream_capture_offset;
308 |       memcpy(current_capture, source_buffer, source_buffer_byte_count);
309 |       stream_capture_offset += settings->source_buffer_size;
310 |     }
311 | 
312 |     STT_FeedAudioContent(streaming_state, source_buffer,
313 |       settings->source_buffer_size);
314 |     Metadata* current_metadata = STT_IntermediateDecodeWithMetadata(streaming_state, 1);
315 | 
316 |     output_streaming_transcript(current_metadata, previous_metadata);
317 | 
318 |     if (previous_metadata != NULL) {
319 |       STT_FreeMetadata(previous_metadata);
320 |     }
321 |     previous_metadata = current_metadata;
322 |   }
323 | 
324 |   if (capture_buffer != NULL) {
325 |     wav_io_save(settings->stream_capture_file, capture_buffer);
326 |     audio_buffer_free(capture_buffer);
327 |   }
328 | 
329 |   if (previous_metadata != NULL) {
330 |     STT_FreeMetadata(previous_metadata);
331 |   }
332 |   pa_simple_free(source_stream);
333 |   free(device_name);
334 |   return true;
335 | }
336 | 
337 | static bool process_audio(const Settings* settings, ModelState* model_state) {
338 |   if (strcmp(settings->source, "file") == 0) {
339 |     return process_files(settings, model_state);
340 |   }
341 |   else {
342 |     return process_live_input(settings, model_state);
343 |   }
344 | }
345 | 
346 | int app_main(int argc, char** argv) {
347 |   Settings* settings = settings_init_from_argv(argc, argv);
348 |   if (settings == NULL) {
349 |     return 1;
350 |   }
351 | 
352 |   ModelState* model_state = NULL;
353 |   if (!load_model(settings, &model_state)) {
354 |     return 1;
355 |   }
356 | 
357 |   if (!load_scorer(settings, model_state)) {
358 |     return 1;
359 |   }
360 | 
361 |   if (!process_audio(settings, model_state)) {
362 |     return 1;
363 |   }
364 | 
365 |   STT_FreeModel(model_state);
366 | 
367 |   return 0;
368 | }


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Mozilla Public License Version 2.0
  2 | ==================================
  3 | 
  4 | 1. Definitions
  5 | --------------
  6 | 
  7 | 1.1. "Contributor"
  8 |     means each individual or legal entity that creates, contributes to
  9 |     the creation of, or owns Covered Software.
 10 | 
 11 | 1.2. "Contributor Version"
 12 |     means the combination of the Contributions of others (if any) used
 13 |     by a Contributor and that particular Contributor's Contribution.
 14 | 
 15 | 1.3. "Contribution"
 16 |     means Covered Software of a particular Contributor.
 17 | 
 18 | 1.4. "Covered Software"
 19 |     means Source Code Form to which the initial Contributor has attached
 20 |     the notice in Exhibit A, the Executable Form of such Source Code
 21 |     Form, and Modifications of such Source Code Form, in each case
 22 |     including portions thereof.
 23 | 
 24 | 1.5. "Incompatible With Secondary Licenses"
 25 |     means
 26 | 
 27 |     (a) that the initial Contributor has attached the notice described
 28 |         in Exhibit B to the Covered Software; or
 29 | 
 30 |     (b) that the Covered Software was made available under the terms of
 31 |         version 1.1 or earlier of the License, but not also under the
 32 |         terms of a Secondary License.
 33 | 
 34 | 1.6. "Executable Form"
 35 |     means any form of the work other than Source Code Form.
 36 | 
 37 | 1.7. "Larger Work"
 38 |     means a work that combines Covered Software with other material, in
 39 |     a separate file or files, that is not Covered Software.
 40 | 
 41 | 1.8. "License"
 42 |     means this document.
 43 | 
 44 | 1.9. "Licensable"
 45 |     means having the right to grant, to the maximum extent possible,
 46 |     whether at the time of the initial grant or subsequently, any and
 47 |     all of the rights conveyed by this License.
 48 | 
 49 | 1.10. "Modifications"
 50 |     means any of the following:
 51 | 
 52 |     (a) any file in Source Code Form that results from an addition to,
 53 |         deletion from, or modification of the contents of Covered
 54 |         Software; or
 55 | 
 56 |     (b) any new file in Source Code Form that contains any Covered
 57 |         Software.
 58 | 
 59 | 1.11. "Patent Claims" of a Contributor
 60 |     means any patent claim(s), including without limitation, method,
 61 |     process, and apparatus claims, in any patent Licensable by such
 62 |     Contributor that would be infringed, but for the grant of the
 63 |     License, by the making, using, selling, offering for sale, having
 64 |     made, import, or transfer of either its Contributions or its
 65 |     Contributor Version.
 66 | 
 67 | 1.12. "Secondary License"
 68 |     means either the GNU General Public License, Version 2.0, the GNU
 69 |     Lesser General Public License, Version 2.1, the GNU Affero General
 70 |     Public License, Version 3.0, or any later versions of those
 71 |     licenses.
 72 | 
 73 | 1.13. "Source Code Form"
 74 |     means the form of the work preferred for making modifications.
 75 | 
 76 | 1.14. "You" (or "Your")
 77 |     means an individual or a legal entity exercising rights under this
 78 |     License. For legal entities, "You" includes any entity that
 79 |     controls, is controlled by, or is under common control with You. For
 80 |     purposes of this definition, "control" means (a) the power, direct
 81 |     or indirect, to cause the direction or management of such entity,
 82 |     whether by contract or otherwise, or (b) ownership of more than
 83 |     fifty percent (50%) of the outstanding shares or beneficial
 84 |     ownership of such entity.
 85 | 
 86 | 2. License Grants and Conditions
 87 | --------------------------------
 88 | 
 89 | 2.1. Grants
 90 | 
 91 | Each Contributor hereby grants You a world-wide, royalty-free,
 92 | non-exclusive license:
 93 | 
 94 | (a) under intellectual property rights (other than patent or trademark)
 95 |     Licensable by such Contributor to use, reproduce, make available,
 96 |     modify, display, perform, distribute, and otherwise exploit its
 97 |     Contributions, either on an unmodified basis, with Modifications, or
 98 |     as part of a Larger Work; and
 99 | 
100 | (b) under Patent Claims of such Contributor to make, use, sell, offer
101 |     for sale, have made, import, and otherwise transfer either its
102 |     Contributions or its Contributor Version.
103 | 
104 | 2.2. Effective Date
105 | 
106 | The licenses granted in Section 2.1 with respect to any Contribution
107 | become effective for each Contribution on the date the Contributor first
108 | distributes such Contribution.
109 | 
110 | 2.3. Limitations on Grant Scope
111 | 
112 | The licenses granted in this Section 2 are the only rights granted under
113 | this License. No additional rights or licenses will be implied from the
114 | distribution or licensing of Covered Software under this License.
115 | Notwithstanding Section 2.1(b) above, no patent license is granted by a
116 | Contributor:
117 | 
118 | (a) for any code that a Contributor has removed from Covered Software;
119 |     or
120 | 
121 | (b) for infringements caused by: (i) Your and any other third party's
122 |     modifications of Covered Software, or (ii) the combination of its
123 |     Contributions with other software (except as part of its Contributor
124 |     Version); or
125 | 
126 | (c) under Patent Claims infringed by Covered Software in the absence of
127 |     its Contributions.
128 | 
129 | This License does not grant any rights in the trademarks, service marks,
130 | or logos of any Contributor (except as may be necessary to comply with
131 | the notice requirements in Section 3.4).
132 | 
133 | 2.4. Subsequent Licenses
134 | 
135 | No Contributor makes additional grants as a result of Your choice to
136 | distribute the Covered Software under a subsequent version of this
137 | License (see Section 10.2) or under the terms of a Secondary License (if
138 | permitted under the terms of Section 3.3).
139 | 
140 | 2.5. Representation
141 | 
142 | Each Contributor represents that the Contributor believes its
143 | Contributions are its original creation(s) or it has sufficient rights
144 | to grant the rights to its Contributions conveyed by this License.
145 | 
146 | 2.6. Fair Use
147 | 
148 | This License is not intended to limit any rights You have under
149 | applicable copyright doctrines of fair use, fair dealing, or other
150 | equivalents.
151 | 
152 | 2.7. Conditions
153 | 
154 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
155 | in Section 2.1.
156 | 
157 | 3. Responsibilities
158 | -------------------
159 | 
160 | 3.1. Distribution of Source Form
161 | 
162 | All distribution of Covered Software in Source Code Form, including any
163 | Modifications that You create or to which You contribute, must be under
164 | the terms of this License. You must inform recipients that the Source
165 | Code Form of the Covered Software is governed by the terms of this
166 | License, and how they can obtain a copy of this License. You may not
167 | attempt to alter or restrict the recipients' rights in the Source Code
168 | Form.
169 | 
170 | 3.2. Distribution of Executable Form
171 | 
172 | If You distribute Covered Software in Executable Form then:
173 | 
174 | (a) such Covered Software must also be made available in Source Code
175 |     Form, as described in Section 3.1, and You must inform recipients of
176 |     the Executable Form how they can obtain a copy of such Source Code
177 |     Form by reasonable means in a timely manner, at a charge no more
178 |     than the cost of distribution to the recipient; and
179 | 
180 | (b) You may distribute such Executable Form under the terms of this
181 |     License, or sublicense it under different terms, provided that the
182 |     license for the Executable Form does not attempt to limit or alter
183 |     the recipients' rights in the Source Code Form under this License.
184 | 
185 | 3.3. Distribution of a Larger Work
186 | 
187 | You may create and distribute a Larger Work under terms of Your choice,
188 | provided that You also comply with the requirements of this License for
189 | the Covered Software. If the Larger Work is a combination of Covered
190 | Software with a work governed by one or more Secondary Licenses, and the
191 | Covered Software is not Incompatible With Secondary Licenses, this
192 | License permits You to additionally distribute such Covered Software
193 | under the terms of such Secondary License(s), so that the recipient of
194 | the Larger Work may, at their option, further distribute the Covered
195 | Software under the terms of either this License or such Secondary
196 | License(s).
197 | 
198 | 3.4. Notices
199 | 
200 | You may not remove or alter the substance of any license notices
201 | (including copyright notices, patent notices, disclaimers of warranty,
202 | or limitations of liability) contained within the Source Code Form of
203 | the Covered Software, except that You may alter any license notices to
204 | the extent required to remedy known factual inaccuracies.
205 | 
206 | 3.5. Application of Additional Terms
207 | 
208 | You may choose to offer, and to charge a fee for, warranty, support,
209 | indemnity or liability obligations to one or more recipients of Covered
210 | Software. However, You may do so only on Your own behalf, and not on
211 | behalf of any Contributor. You must make it absolutely clear that any
212 | such warranty, support, indemnity, or liability obligation is offered by
213 | You alone, and You hereby agree to indemnify every Contributor for any
214 | liability incurred by such Contributor as a result of warranty, support,
215 | indemnity or liability terms You offer. You may include additional
216 | disclaimers of warranty and limitations of liability specific to any
217 | jurisdiction.
218 | 
219 | 4. Inability to Comply Due to Statute or Regulation
220 | ---------------------------------------------------
221 | 
222 | If it is impossible for You to comply with any of the terms of this
223 | License with respect to some or all of the Covered Software due to
224 | statute, judicial order, or regulation then You must: (a) comply with
225 | the terms of this License to the maximum extent possible; and (b)
226 | describe the limitations and the code they affect. Such description must
227 | be placed in a text file included with all distributions of the Covered
228 | Software under this License. Except to the extent prohibited by statute
229 | or regulation, such description must be sufficiently detailed for a
230 | recipient of ordinary skill to be able to understand it.
231 | 
232 | 5. Termination
233 | --------------
234 | 
235 | 5.1. The rights granted under this License will terminate automatically
236 | if You fail to comply with any of its terms. However, if You become
237 | compliant, then the rights granted under this License from a particular
238 | Contributor are reinstated (a) provisionally, unless and until such
239 | Contributor explicitly and finally terminates Your grants, and (b) on an
240 | ongoing basis, if such Contributor fails to notify You of the
241 | non-compliance by some reasonable means prior to 60 days after You have
242 | come back into compliance. Moreover, Your grants from a particular
243 | Contributor are reinstated on an ongoing basis if such Contributor
244 | notifies You of the non-compliance by some reasonable means, this is the
245 | first time You have received notice of non-compliance with this License
246 | from such Contributor, and You become compliant prior to 30 days after
247 | Your receipt of the notice.
248 | 
249 | 5.2. If You initiate litigation against any entity by asserting a patent
250 | infringement claim (excluding declaratory judgment actions,
251 | counter-claims, and cross-claims) alleging that a Contributor Version
252 | directly or indirectly infringes any patent, then the rights granted to
253 | You by any and all Contributors for the Covered Software under Section
254 | 2.1 of this License shall terminate.
255 | 
256 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all
257 | end user license agreements (excluding distributors and resellers) which
258 | have been validly granted by You or Your distributors under this License
259 | prior to termination shall survive termination.
260 | 
261 | ************************************************************************
262 | *                                                                      *
263 | *  6. Disclaimer of Warranty                                           *
264 | *  -------------------------                                           *
265 | *                                                                      *
266 | *  Covered Software is provided under this License on an "as is"       *
267 | *  basis, without warranty of any kind, either expressed, implied, or  *
268 | *  statutory, including, without limitation, warranties that the       *
269 | *  Covered Software is free of defects, merchantable, fit for a        *
270 | *  particular purpose or non-infringing. The entire risk as to the     *
271 | *  quality and performance of the Covered Software is with You.        *
272 | *  Should any Covered Software prove defective in any respect, You     *
273 | *  (not any Contributor) assume the cost of any necessary servicing,   *
274 | *  repair, or correction. This disclaimer of warranty constitutes an   *
275 | *  essential part of this License. No use of any Covered Software is   *
276 | *  authorized under this License except under this disclaimer.         *
277 | *                                                                      *
278 | ************************************************************************
279 | 
280 | ************************************************************************
281 | *                                                                      *
282 | *  7. Limitation of Liability                                          *
283 | *  --------------------------                                          *
284 | *                                                                      *
285 | *  Under no circumstances and under no legal theory, whether tort      *
286 | *  (including negligence), contract, or otherwise, shall any           *
287 | *  Contributor, or anyone who distributes Covered Software as          *
288 | *  permitted above, be liable to You for any direct, indirect,         *
289 | *  special, incidental, or consequential damages of any character      *
290 | *  including, without limitation, damages for lost profits, loss of    *
291 | *  goodwill, work stoppage, computer failure or malfunction, or any    *
292 | *  and all other commercial damages or losses, even if such party      *
293 | *  shall have been informed of the possibility of such damages. This   *
294 | *  limitation of liability shall not apply to liability for death or   *
295 | *  personal injury resulting from such party's negligence to the       *
296 | *  extent applicable law prohibits such limitation. Some               *
297 | *  jurisdictions do not allow the exclusion or limitation of           *
298 | *  incidental or consequential damages, so this exclusion and          *
299 | *  limitation may not apply to You.                                    *
300 | *                                                                      *
301 | ************************************************************************
302 | 
303 | 8. Litigation
304 | -------------
305 | 
306 | Any litigation relating to this License may be brought only in the
307 | courts of a jurisdiction where the defendant maintains its principal
308 | place of business and such litigation shall be governed by laws of that
309 | jurisdiction, without reference to its conflict-of-law provisions.
310 | Nothing in this Section shall prevent a party's ability to bring
311 | cross-claims or counter-claims.
312 | 
313 | 9. Miscellaneous
314 | ----------------
315 | 
316 | This License represents the complete agreement concerning the subject
317 | matter hereof. If any provision of this License is held to be
318 | unenforceable, such provision shall be reformed only to the extent
319 | necessary to make it enforceable. Any law or regulation which provides
320 | that the language of a contract shall be construed against the drafter
321 | shall not be used to construe this License against a Contributor.
322 | 
323 | 10. Versions of the License
324 | ---------------------------
325 | 
326 | 10.1. New Versions
327 | 
328 | Mozilla Foundation is the license steward. Except as provided in Section
329 | 10.3, no one other than the license steward has the right to modify or
330 | publish new versions of this License. Each version will be given a
331 | distinguishing version number.
332 | 
333 | 10.2. Effect of New Versions
334 | 
335 | You may distribute the Covered Software under the terms of the version
336 | of the License under which You originally received the Covered Software,
337 | or under the terms of any subsequent version published by the license
338 | steward.
339 | 
340 | 10.3. Modified Versions
341 | 
342 | If you create software not governed by this License, and you want to
343 | create a new license for such software, you may create and use a
344 | modified version of this License if you rename the license and remove
345 | any references to the name of the license steward (except to note that
346 | such modified license differs from this License).
347 | 
348 | 10.4. Distributing Source Code Form that is Incompatible With Secondary
349 | Licenses
350 | 
351 | If You choose to distribute Source Code Form that is Incompatible With
352 | Secondary Licenses under the terms of this version of the License, the
353 | notice described in Exhibit B of this License must be attached.
354 | 
355 | Exhibit A - Source Code Form License Notice
356 | -------------------------------------------
357 | 
358 |   This Source Code Form is subject to the terms of the Mozilla Public
359 |   License, v. 2.0. If a copy of the MPL was not distributed with this
360 |   file, You can obtain one at http://mozilla.org/MPL/2.0/.
361 | 
362 | If it is not possible or desirable to put the notice in a particular
363 | file, then You may include the notice in a location (such as a LICENSE
364 | file in a relevant directory) where a recipient would be likely to look
365 | for such a notice.
366 | 
367 | You may add additional accurate notices of copyright ownership.
368 | 
369 | Exhibit B - "Incompatible With Secondary Licenses" Notice
370 | ---------------------------------------------------------
371 | 
372 |   This Source Code Form is "Incompatible With Secondary Licenses", as
373 |   defined by the Mozilla Public License, v. 2.0.
374 | 


--------------------------------------------------------------------------------
/src/utils/yargs.c:
--------------------------------------------------------------------------------
  1 | #include "yargs.h"
  2 | 
  3 | #include <assert.h>
  4 | #include <math.h>
  5 | #include <stddef.h>
  6 | #include <stdio.h>
  7 | #include <stdlib.h>
  8 | #include <string.h>
  9 | #include <sys/stat.h>
 10 | #include <sys/types.h>
 11 | 
 12 | #include "termcolor-c.h"
 13 | #include "string_utils.h"
 14 | 
 15 | // Application name from argv[0], used for usage printout.
 16 | static const char* app_name = NULL;
 17 | 
 18 | // Any arguments that weren't associated with named flags. Typically used
 19 | // for file names.
 20 | static char** unnamed_args = NULL;
 21 | static int unnamed_args_length = 0;
 22 | 
 23 | // We have to allocate some memory for string copies, and to keep track of
 24 | // them for freeing we use this array.
 25 | static void** free_at_end = NULL;
 26 | static int free_at_end_length = 0;
 27 | 
 28 | // Find a flag definition for this long name, or return NULL.
 29 | static const YargsFlag* GetFlagWithName(const YargsFlag* flags,
 30 |   int flags_length, const char* name) {
 31 |   for (int i = 0; i < flags_length; ++i) {
 32 |     const YargsFlag* flag = &flags[i];
 33 |     if (strcmp(flag->name, name) == 0) {
 34 |       return flag;
 35 |     }
 36 |   }
 37 | 
 38 |   return NULL;
 39 | }
 40 | 
 41 | // Find a flag definition matching a single-character short name, or
 42 | // return NULL.
 43 | static const YargsFlag* GetFlagWithShortName(const YargsFlag* flags,
 44 |   int flags_length, const char* short_name) {
 45 |   for (int i = 0; i < flags_length; ++i) {
 46 |     const YargsFlag* flag = &flags[i];
 47 |     if ((flag->short_name != NULL) &&
 48 |       (strcmp(flag->short_name, short_name) == 0)) {
 49 |       return flag;
 50 |     }
 51 |   }
 52 |   return NULL;
 53 | }
 54 | 
 55 | // Splits "--foo=bar" into "--foo", "bar", "-xyz" into "-x", "-y", "-z". This
 56 | // makes it easier to parse the arguments. Also skips over the first argument,
 57 | // since that just contains the application name.
 58 | static void NormalizeArgs(char** argv, int argc, char*** norm_argv, int* norm_argc) {
 59 |   *norm_argv = NULL;
 60 |   *norm_argc = 0;
 61 | 
 62 |   for (int i = 1; i < argc; ++i) {
 63 |     const char* arg = argv[i];
 64 |     if ((arg[0] == '-') && (arg[1] == '-')) {
 65 |       char** opt_parts = NULL;
 66 |       int opt_parts_length = 0;
 67 |       string_split(arg, '=', 2, &opt_parts, &opt_parts_length);
 68 |       for (int j = 0; j < opt_parts_length; ++j) {
 69 |         *norm_argc += 1;
 70 |         *norm_argv = realloc(*norm_argv, sizeof(const char*) * (*norm_argc));
 71 |         (*norm_argv)[(*norm_argc) - 1] = opt_parts[j];
 72 |       }
 73 |       free(opt_parts);
 74 |     }
 75 |     else if ((arg[0] == '-') &&
 76 |       (strlen(arg) > 1) && // Ignore single dashes.
 77 |       ((arg[1] < '0') || (arg[1] > '9'))) { // Ignore negative numbers.
 78 | 
 79 |       char** opt_parts = NULL;
 80 |       int opt_parts_length = 0;
 81 |       string_split(arg, '=', 2, &opt_parts, &opt_parts_length);
 82 |       if (opt_parts_length > 1) {
 83 |         // If there's an '=', assume there's only a single short name specified
 84 |         // and break the arg into the name and the value.
 85 |         for (int j = 0; j < opt_parts_length; ++j) {
 86 |           *norm_argc += 1;
 87 |           *norm_argv = realloc(*norm_argv, sizeof(const char*) * (*norm_argc));
 88 |           (*norm_argv)[(*norm_argc) - 1] = opt_parts[j];
 89 |         }
 90 |         free(opt_parts);
 91 |       }
 92 |       else {
 93 |         string_list_free(opt_parts, opt_parts_length);
 94 |         const char* short_opts = &(arg[1]);
 95 |         const int short_opts_length = strlen(short_opts);
 96 |         for (int j = 0; j < short_opts_length; ++j) {
 97 |           char* short_opt = malloc(3);
 98 |           short_opt[0] = '-';
 99 |           short_opt[1] = short_opts[j];
100 |           short_opt[2] = 0;
101 |           *norm_argc += 1;
102 |           *norm_argv = realloc(*norm_argv, sizeof(const char*) * (*norm_argc));
103 |           (*norm_argv)[(*norm_argc) - 1] = short_opt;
104 |         }
105 |       }
106 |     }
107 |     else {
108 |       int arg_length = strlen(arg);
109 |       char* arg_copy = malloc(arg_length + 1);
110 |       strcpy(arg_copy, arg);
111 |       *norm_argc += 1;
112 |       *norm_argv = realloc(*norm_argv, sizeof(char*) * (*norm_argc));
113 |       (*norm_argv)[(*norm_argc) - 1] = arg_copy;
114 |     }
115 |   }
116 | }
117 | 
118 | // Give back the memory we allocated to hold the normalized arguments.
119 | static void FreeNormalizedArgs(char** norm_argv, int norm_argc) {
120 |   for (int i = 0; i < norm_argc; ++i) {
121 |     free(norm_argv[i]);
122 |   }
123 |   free(norm_argv);
124 | }
125 | 
126 | // A friendly wrapper around the raw strtof() numerical string parsing. This
127 | // only returns true if the whole string could be understood as a number.
128 | static bool InterpretValueAsFloat(const char* string, float* output) {
129 |   char* conversion_end = NULL;
130 |   *output = strtof(string, &conversion_end);
131 |   const int converted_length = (conversion_end - string);
132 |   if (converted_length != strlen(string)) {
133 |     return false;
134 |   }
135 |   else {
136 |     return true;
137 |   }
138 | }
139 | 
140 | // A friendly wrapper around the raw strtol() numerical string parsing. This
141 | // only returns true if the whole string could be understood as a number.
142 | static bool InterpretValueAsInt32(const char* string, int32_t* output) {
143 |   char* conversion_end = NULL;
144 |   long int result = strtol(string, &conversion_end, 10);
145 |   *output = (int32_t)(result);
146 |   const int converted_length = (conversion_end - string);
147 |   if (converted_length != strlen(string)) {
148 |     return false;
149 |   }
150 |   else {
151 |     return true;
152 |   }
153 | }
154 | 
155 | // Perform checks on the input data supplied by the caller, to ensure there are
156 | // no obvious logical errors.
157 | static bool ValidateYargsFlags(const YargsFlag* flags, int flags_length) {
158 |   for (int i = 0; i < flags_length; ++i) {
159 |     const YargsFlag* flag = &flags[i];
160 |     if ((flag->name == NULL)) {
161 |       fprintf(stderr, "Missing name in flag definition #%d.\n", i);
162 |       return false;
163 |     }
164 |     if (strlen(flag->name) < 2) {
165 |       fprintf(stderr, "Name '%s' is too short in flag definition #%d.\n", flag->name, i);
166 |       return false;
167 |     }
168 |     if ((flag->short_name != NULL) && strlen(flag->short_name) > 1) {
169 |       fprintf(stderr, "Short name '%s' should be one character long in flag definition '%s' (#%d).\n",
170 |         flag->short_name, flag->name, i);
171 |       return false;
172 |     }
173 |     for (int j = i + 1; j < flags_length; ++j) {
174 |       const YargsFlag* other_flag = &flags[j];
175 |       if (strcmp(flag->name, other_flag->name) == 0) {
176 |         fprintf(stderr,
177 |           "Name '%s' is repeated in flag definitions #%d and %d.\n", flag->name,
178 |           i, j);
179 |         return false;
180 |       }
181 |       if ((flag->short_name != NULL) && (other_flag->short_name != NULL) &&
182 |         (strcmp(flag->short_name, other_flag->short_name) == 0)) {
183 |         fprintf(stderr,
184 |           "Short name '%s' is repeated in flag definitions #%d and %d.\n",
185 |           flag->short_name, i, j);
186 |         return false;
187 |       }
188 |     }
189 | 
190 |     switch (flag->type) {
191 |     case FT_BOOL: {
192 |       if (flag->bool_value == NULL) {
193 |         fprintf(stderr, "Missing bool value for flag definition '%s' (#%d).\n",
194 |           flag->name, i);
195 |         return false;
196 |       }
197 |     } break;
198 |     case FT_FLOAT: {
199 |       if (flag->float_value == NULL) {
200 |         fprintf(stderr, "Missing float value for flag definition '%s' (#%d).\n",
201 |           flag->name, i);
202 |         return false;
203 |       }
204 |     } break;
205 |     case FT_INT32: {
206 |       if (flag->int32_value == NULL) {
207 |         fprintf(stderr, "Missing integer value for flag definition '%s' (#%d).\n",
208 |           flag->name, i);
209 |         return false;
210 |       }
211 |     } break;
212 |     case FT_STRING: {
213 |       if (flag->string_value == NULL) {
214 |         fprintf(stderr, "Missing string value for flag definition '%s' (#%d).\n",
215 |           flag->name, i);
216 |         return false;
217 |       }
218 |     } break;
219 |     default: {
220 |       fprintf(stderr, "Bad type %d in flag definition '%s' (#%d).\n",
221 |         flag->type, flag->name, i);
222 |       return false;
223 |     } break;
224 |     }
225 |   }
226 |   return true;
227 | }
228 | 
229 | static void AddToFreeAtEndList(void* ptr) {
230 |   free_at_end_length += 1;
231 |   free_at_end = realloc(free_at_end, sizeof(void*) * free_at_end_length);
232 |   free_at_end[free_at_end_length - 1] = ptr;
233 | }
234 | 
235 | // Hopefully-portable way of getting the size of a file (see endless 
236 | // StackOverflow threads for why fseek/ftell isn't guaranteed to work on binary
237 | // files).
238 | static off_t FileSize(const char* filename) {
239 |   struct stat st;
240 |   if (stat(filename, &st) == 0)
241 |     return st.st_size;
242 |   return -1;
243 | }
244 | 
245 | static bool LoadFromFileContents(const YargsFlag* flags, size_t flags_length,
246 |   const char* contents) {
247 | 
248 |   char** argv = NULL;
249 |   int argc = 0;
250 |   string_split(contents, ' ', -1, &argv, &argc);
251 | 
252 |   const bool result = yargs_init(flags, flags_length, NULL, argv, argc);
253 | 
254 |   string_list_free(argv, argc);
255 |   return result;
256 | }
257 | 
258 | bool yargs_init(const YargsFlag* flags, size_t flags_length,
259 |   const char* app_description, char** argv, int argc) {
260 |   assert(flags != NULL);
261 | 
262 |   if (!ValidateYargsFlags(flags, flags_length)) {
263 |     return false;
264 |   }
265 | 
266 |   app_name = argv[0];
267 | 
268 |   // If we're called multiple times, make sure we don't keep appending to the
269 |   // list of unnamed arguments.
270 |   if (unnamed_args != NULL) {
271 |     free(unnamed_args);
272 |     unnamed_args = NULL;
273 |   }
274 |   unnamed_args_length = 0;
275 | 
276 |   char** norm_argv;
277 |   int norm_argc;
278 |   NormalizeArgs(argv, argc, &norm_argv, &norm_argc);
279 | 
280 |   for (int i = 0; i < norm_argc; ++i) {
281 |     const char* arg = norm_argv[i];
282 |     const char* next_value = NULL;
283 |     // Whether we should skip over the next arg because it's the value of
284 |     // the current option, and not an option name.
285 |     bool consume_next_value = true;
286 |     if ((i + 1) < norm_argc) {
287 |       next_value = norm_argv[i + 1];
288 |     }
289 |     if ((arg[0] == '-') &&
290 |       (strlen(arg) > 1) &&  // Skip single dashes.
291 |       ((arg[1] < '0') || (arg[1] > '9'))) { // Skip negative numbers.
292 |       const YargsFlag* flag = NULL;
293 |       if (arg[1] == '-') {
294 |         flag = GetFlagWithName(flags, flags_length, &arg[2]);
295 |       }
296 |       else {
297 |         flag = GetFlagWithShortName(flags, flags_length, &arg[1]);
298 |       }
299 |       if (flag == NULL) {
300 |         fprintf(stderr, "Flag '%s' not recognized.\n", arg);
301 |         yargs_print_usage(flags, flags_length, app_description);
302 |         FreeNormalizedArgs(norm_argv, norm_argc);
303 |         return false;
304 |       }
305 |       switch (flag->type) {
306 |       case FT_BOOL: {
307 |         if (next_value == NULL) {
308 |           *(flag->bool_value) = true;
309 |         }
310 |         else if (next_value[0] == '-') {
311 |           // If the next arg is an option name, don't try to intepret it
312 |           // as a value.
313 |           consume_next_value = false;
314 |           *(flag->bool_value) = true;
315 |         }
316 |         else if (
317 |           (strcmp(next_value, "true") == 0) ||
318 |           (strcmp(next_value, "TRUE") == 0) ||
319 |           (strcmp(next_value, "yes") == 0) ||
320 |           (strcmp(next_value, "YES") == 0) ||
321 |           (strcmp(next_value, "1") == 0)) {
322 |           *(flag->bool_value) = true;
323 |         }
324 |         else if (
325 |           (strcmp(next_value, "false") == 0) ||
326 |           (strcmp(next_value, "FALSE") == 0) ||
327 |           (strcmp(next_value, "no") == 0) ||
328 |           (strcmp(next_value, "NO") == 0) ||
329 |           (strcmp(next_value, "0") == 0)) {
330 |           *(flag->bool_value) = true;
331 |         }
332 |         else {
333 |           fprintf(stderr, "Boolean value for argument '%s' is '%s', but needs to be one of "
334 |             "true, false, yes, no, 1, or 0\n", flag->name, next_value);
335 |           FreeNormalizedArgs(norm_argv, norm_argc);
336 |           return false;
337 |         }
338 |       } break;
339 | 
340 |       case FT_FLOAT: {
341 |         if (next_value == NULL) {
342 |           fprintf(stderr, "No value found for argument '%s'\n", flag->name);
343 |           FreeNormalizedArgs(norm_argv, norm_argc);
344 |           return false;
345 |         }
346 | 
347 |         if (!InterpretValueAsFloat(next_value, flag->float_value)) {
348 |           fprintf(stderr, "Couldn't interpret '%s' as a floating point number for argument '%s'\n",
349 |             next_value, flag->name);
350 |           FreeNormalizedArgs(norm_argv, norm_argc);
351 |           return false;
352 |         }
353 |       } break;
354 | 
355 |       case FT_INT32: {
356 |         if (next_value == NULL) {
357 |           fprintf(stderr, "No value found for argument '%s'\n", flag->name);
358 |           FreeNormalizedArgs(norm_argv, norm_argc);
359 |           return false;
360 |         }
361 |         if (!InterpretValueAsInt32(next_value, flag->int32_value)) {
362 |           fprintf(stderr, "Couldn't interpret '%s' as an integer for argument '%s'\n",
363 |             next_value, flag->name);
364 |           FreeNormalizedArgs(norm_argv, norm_argc);
365 |           return false;
366 |         }
367 |       } break;
368 | 
369 |       case FT_STRING: {
370 |         if (next_value == NULL) {
371 |           fprintf(stderr, "No value found for argument '%s'\n", flag->name);
372 |           FreeNormalizedArgs(norm_argv, norm_argc);
373 |           return false;
374 |         }
375 |         const int next_value_length = strlen(next_value);
376 |         char* next_string = malloc(next_value_length + 1);
377 |         AddToFreeAtEndList(next_string);
378 |         strcpy(next_string, next_value);
379 |         *(flag->string_value) = next_string;
380 |       } break;
381 | 
382 |       default: {
383 |         fprintf(stderr, "Unknown type %d in definition for flag '%s'\n", flag->type, flag->name);
384 |         FreeNormalizedArgs(norm_argv, norm_argc);
385 |         return false;
386 |       }
387 |       }
388 |     }
389 |     else {
390 |       unnamed_args_length += 1;
391 |       unnamed_args = realloc(unnamed_args, sizeof(const char*) * unnamed_args_length);
392 |       const int arg_length = strlen(arg);
393 |       char* arg_copy = malloc(arg_length + 1);
394 |       AddToFreeAtEndList(arg_copy);
395 |       strcpy(arg_copy, arg);
396 |       unnamed_args[unnamed_args_length - 1] = arg_copy;
397 |       consume_next_value = false;
398 |     }
399 | 
400 |     // If the following argument in the list was used as a value by this option,
401 |     // skip over it.
402 |     if (consume_next_value) {
403 |       i += 1;
404 |     }
405 |   }
406 | 
407 |   FreeNormalizedArgs(norm_argv, norm_argc);
408 |   return true;
409 | }
410 | 
411 | void yargs_free() {
412 |   for (int i = 0; i < free_at_end_length; ++i) {
413 |     free(free_at_end[i]);
414 |   }
415 |   free(free_at_end);
416 |   free_at_end = NULL;
417 |   free_at_end_length = 0;
418 | 
419 |   free(unnamed_args);
420 |   unnamed_args = NULL;
421 |   unnamed_args_length = 0;
422 | }
423 | 
424 | void yargs_print_usage(const YargsFlag* flags, int flags_length,
425 |   const char* app_description) {
426 |   text_bold(stderr);
427 |   fprintf(stderr, "Usage");
428 |   reset_colors(stderr);
429 |   text_red(stderr);
430 |   fprintf(stderr, ": %s ", app_name);
431 |   reset_colors(stderr);
432 |   for (int i = 0; i < flags_length; ++i) {
433 |     const YargsFlag* flag = &flags[i];
434 |     text_green(stderr);
435 |     fprintf(stderr, "--%s", flag->name);
436 |     reset_colors(stderr);
437 |     if (flag->short_name != NULL) {
438 |       fprintf(stderr, "/");
439 |       text_green(stderr);
440 |       fprintf(stderr, "-%s", flag->short_name);
441 |       reset_colors(stderr);
442 |     }
443 |     fprintf(stderr, " ");
444 |     switch (flag->type) {
445 |     case FT_BOOL: {
446 |       // Do nothing.
447 |     } break;
448 |     case FT_FLOAT: {
449 |       fprintf(stderr, "<float> ");
450 |     } break;
451 |     case FT_INT32: {
452 |       fprintf(stderr, "<integer> ");
453 |     } break;
454 |     case FT_STRING: {
455 |       fprintf(stderr, "<string> ");
456 |     } break;
457 |     default: {
458 |       assert(false);
459 |     } break;
460 |     }
461 |   }
462 |   fprintf(stderr, "\n");
463 |   if (app_description != NULL) {
464 |     text_bold(stderr);
465 |     fprintf(stderr, "%s\n", app_description);
466 |     reset_colors(stderr);
467 |   }
468 |   for (int i = 0; i < flags_length; ++i) {
469 |     const YargsFlag* flag = &flags[i];
470 |     text_green(stderr);
471 |     fprintf(stderr, "--%s", flag->name);
472 |     reset_colors(stderr);
473 |     if (flag->short_name != NULL) {
474 |       fprintf(stderr, "/");
475 |       text_green(stderr);
476 |       fprintf(stderr, "-%s", flag->short_name);
477 |       reset_colors(stderr);
478 |     }
479 |     fprintf(stderr, "\t");
480 |     if (flag->description != NULL) {
481 |       text_cyan(stderr);
482 |       fprintf(stderr, "%s\n", flag->description);
483 |       reset_colors(stderr);
484 |     }
485 |   }
486 | }
487 | 
488 | bool yargs_load_from_file(const YargsFlag* flags, int flags_length,
489 |   const char* filename) {
490 |   FILE* file = fopen(filename, "r");
491 |   if (file == NULL) {
492 |     fprintf(stderr, "Flags_LoadFromIniFile(): Couldn't load file '%s'\n",
493 |       filename);
494 |     return false;
495 |   }
496 | 
497 |   const off_t file_size = FileSize(filename);
498 |   if (file_size <= 0) {
499 |     fprintf(stderr, "Flags_LoadFromIniFile(): File size error loading '%s'\n",
500 |       filename);
501 |     return false;
502 |   }
503 | 
504 |   char* file_contents = malloc(file_size + 1);
505 |   fread(file_contents, 1, file_size, file);
506 |   file_contents[file_size] = 0;
507 |   const bool result = LoadFromFileContents(flags, flags_length,
508 |     file_contents);
509 |   free(file_contents);
510 | 
511 |   return result;
512 | }
513 | 
514 | bool yargs_save_to_file(const YargsFlag* flags, int flags_length,
515 |   const char* filename) {
516 | 
517 |   FILE* file = fopen(filename, "w");
518 |   if (file == NULL) {
519 |     fprintf(stderr, "yargs_save_to_file: Couldn't write to file '%s'\n",
520 |       filename);
521 |     return false;
522 |   }
523 | 
524 |   fprintf(file, "%s ", app_name);
525 |   for (int i = 0; i < flags_length; ++i) {
526 |     const YargsFlag* flag = &flags[i];
527 |     fprintf(file, "--%s ", flag->name);
528 |     switch (flag->type) {
529 |     case FT_BOOL: {
530 |       if (*(flag->bool_value)) {
531 |         fprintf(file, "true");
532 |       }
533 |       else {
534 |         fprintf(file, "false");
535 |       }
536 |     } break;
537 |     case FT_FLOAT: {
538 |       fprintf(file, "%f", *(flag->float_value));
539 |     } break;
540 |     case FT_INT32: {
541 |       fprintf(file, "%d", *(flag->int32_value));
542 |     } break;
543 |     case FT_STRING: {
544 |       fprintf(file, "%s", *(flag->string_value));
545 |     } break;
546 |     default: {
547 |       assert(false);
548 |       return false;
549 |     } break;
550 |     }
551 |     if (i < (flags_length - 1)) {
552 |       fprintf(file, " ");
553 |     }
554 |   }
555 | 
556 |   for (int i = 0; i < unnamed_args_length; ++i) {
557 |     fprintf(file, " %s", unnamed_args[i]);
558 |   }
559 | 
560 |   fclose(file);
561 |   return true;
562 | }
563 | 
564 | int yargs_get_unnamed_length() {
565 |   return unnamed_args_length;
566 | }
567 | 
568 | const char* yargs_get_unnamed(int index) {
569 |   if ((index < 0) || (index >= unnamed_args_length)) {
570 |     return NULL;
571 |   }
572 |   else {
573 |     return unnamed_args[index];
574 |   }
575 | }
576 | 
577 | const char* yargs_app_name() {
578 |   return app_name;
579 | }
580 | 


--------------------------------------------------------------------------------
/src/utils/yargs_test.c:
--------------------------------------------------------------------------------
  1 | #include "yargs.h"
  2 | 
  3 | #include "acutest.h"
  4 | 
  5 | // Include the original source file so we can test static functions.
  6 | #include "yargs.c"
  7 | 
  8 | void test_GetFlagWithName() {
  9 |   const char* some_name = "some_value";
 10 |   int32_t some_other_name = 10;
 11 |   const YargsFlag test_flags[] = {
 12 |     YARGS_STRING("some_name", NULL, &some_name, "Test value."),
 13 |     YARGS_INT32("some_other_name", NULL, &some_other_name, "Another test."),
 14 |   };
 15 |   const int test_flags_length = sizeof(test_flags) / sizeof(test_flags[0]);
 16 | 
 17 |   const YargsFlag* result = GetFlagWithName(test_flags, test_flags_length,
 18 |     "some_name");
 19 |   TEST_ASSERT(result != NULL);
 20 |   TEST_STREQ("some_name", result->name);
 21 |   TEST_STREQ("some_value", *(result->string_value));
 22 | 
 23 |   result = GetFlagWithName(test_flags, test_flags_length, "some_other_name");
 24 |   TEST_ASSERT(result != NULL);
 25 |   TEST_STREQ("some_other_name", result->name);
 26 |   TEST_INTEQ(10, *(result->int32_value));
 27 | 
 28 |   result = GetFlagWithName(test_flags, test_flags_length, "nonexistent");
 29 |   TEST_CHECK(result == NULL);
 30 | }
 31 | 
 32 | void test_GetFlagWithShortName() {
 33 |   const char* some_name = "some_value";
 34 |   int32_t no_short_name = 10;
 35 |   int32_t some_other_name = 10;
 36 |   const YargsFlag test_flags[] = {
 37 |     YARGS_STRING("some_name", "a", &some_name, "Test value."),
 38 |     YARGS_INT32("no_short_name", NULL, &no_short_name, "No short name."),
 39 |     YARGS_INT32("some_other_name", "b", &some_other_name, "Another test."),
 40 |   };
 41 |   const int test_flags_length = sizeof(test_flags) / sizeof(test_flags[0]);
 42 | 
 43 |   const YargsFlag* result = GetFlagWithShortName(test_flags, test_flags_length,
 44 |     "a");
 45 |   TEST_ASSERT(result != NULL);
 46 |   TEST_STREQ("some_name", result->name);
 47 |   TEST_STREQ("a", result->short_name);
 48 |   TEST_STREQ("some_value", *(result->string_value));
 49 | 
 50 |   result = GetFlagWithShortName(test_flags, test_flags_length, "b");
 51 |   TEST_ASSERT(result != NULL);
 52 |   TEST_STREQ("some_other_name", result->name);
 53 |   TEST_STREQ("b", result->short_name);
 54 |   TEST_INTEQ(10, *(result->int32_value));
 55 | 
 56 |   result = GetFlagWithShortName(test_flags, test_flags_length, "z");
 57 |   TEST_CHECK(result == NULL);
 58 | }
 59 | 
 60 | void test_NormalizeArgs() {
 61 |   char* argv[] = {
 62 |     "progname",
 63 |     "unnamed",
 64 |     "--flag1",
 65 |     "value1",
 66 |     "--flag2=value2",
 67 |     "-s",
 68 |     "value3",
 69 |     "-xyz",
 70 |     "-",
 71 |     "-99.9",
 72 |     "-f=-99.9",
 73 |     "anotherunnamed"
 74 |   };
 75 |   const int argc = sizeof(argv) / sizeof(argv[0]);
 76 | 
 77 |   char** norm_argv = NULL;
 78 |   int norm_argc = 0;
 79 |   NormalizeArgs(argv, argc, &norm_argv, &norm_argc);
 80 |   TEST_INTEQ(15, norm_argc);
 81 |   TEST_STREQ("unnamed", norm_argv[0]);
 82 |   TEST_STREQ("--flag1", norm_argv[1]);
 83 |   TEST_STREQ("value1", norm_argv[2]);
 84 |   TEST_STREQ("--flag2", norm_argv[3]);
 85 |   TEST_STREQ("value2", norm_argv[4]);
 86 |   TEST_STREQ("-s", norm_argv[5]);
 87 |   TEST_STREQ("value3", norm_argv[6]);
 88 |   TEST_STREQ("-x", norm_argv[7]);
 89 |   TEST_STREQ("-y", norm_argv[8]);
 90 |   TEST_STREQ("-z", norm_argv[9]);
 91 |   TEST_STREQ("-", norm_argv[10]);
 92 |   TEST_STREQ("-99.9", norm_argv[11]);
 93 |   TEST_STREQ("-f", norm_argv[12]);
 94 |   TEST_STREQ("-99.9", norm_argv[13]);
 95 |   TEST_STREQ("anotherunnamed", norm_argv[14]);
 96 | 
 97 |   FreeNormalizedArgs(norm_argv, norm_argc);
 98 | }
 99 | 
100 | void test_InterpretValueAsFloat() {
101 |   float output = 0.0f;
102 |   bool status = InterpretValueAsFloat("10.0", &output);
103 |   TEST_CHECK(status);
104 |   TEST_FLTEQ(10.0f, output, 0.0001f);
105 | 
106 |   output = 0.0f;
107 |   status = InterpretValueAsFloat("-33.3333", &output);
108 |   TEST_CHECK(status);
109 |   TEST_FLTEQ(-33.3333f, output, 0.0001f);
110 | 
111 |   output = 0.0f;
112 |   status = InterpretValueAsFloat("string", &output);
113 |   TEST_CHECK(!status);
114 | 
115 |   output = 0.0f;
116 |   status = InterpretValueAsFloat("10.0x", &output);
117 |   TEST_CHECK(!status);
118 | }
119 | 
120 | void test_InterpretValueAsInt32() {
121 |   int32_t output = 0;
122 |   bool status = InterpretValueAsInt32("10", &output);
123 |   TEST_CHECK(status);
124 |   TEST_INTEQ(10, output);
125 | 
126 |   output = 0;
127 |   status = InterpretValueAsInt32("-33", &output);
128 |   TEST_CHECK(status);
129 |   TEST_INTEQ(-33, output);
130 | 
131 |   output = 0;
132 |   status = InterpretValueAsInt32("string", &output);
133 |   TEST_CHECK(!status);
134 | 
135 |   output = 0;
136 |   status = InterpretValueAsInt32("10x", &output);
137 |   TEST_CHECK(!status);
138 | 
139 |   output = 0;
140 |   status = InterpretValueAsInt32("9999.9", &output);
141 |   TEST_CHECK(!status);
142 | }
143 | 
144 | void test_ValidateYargsFlags() {
145 |   const char* some_name = "some_value";
146 |   int32_t some_other_name = 0;
147 |   const YargsFlag good_flags[] = {
148 |     YARGS_STRING("some_name", NULL, &some_name, "Test value."),
149 |     YARGS_INT32("some_other_name", NULL, &some_other_name, "Another test."),
150 |   };
151 |   bool result = ValidateYargsFlags(&good_flags[0],
152 |     sizeof(good_flags) / sizeof(good_flags[0]));
153 |   TEST_CHECK(result);
154 | 
155 |   const YargsFlag bad_type_flags[] = {
156 |     {"some_name", NULL, -1, NULL, NULL, NULL, NULL, "Bad type."},
157 |   };
158 |   result = ValidateYargsFlags(&bad_type_flags[0],
159 |     sizeof(bad_type_flags) / sizeof(bad_type_flags[0]));
160 |   TEST_CHECK(!result);
161 | 
162 |   const char* bad_name_string = "Bad name string";
163 |   const YargsFlag bad_name_flags[] = {
164 |     {NULL, NULL, FT_STRING, NULL, NULL, NULL, &bad_name_string, "Bad name."},
165 |   };
166 |   result = ValidateYargsFlags(&bad_name_flags[0],
167 |     sizeof(bad_name_flags) / sizeof(bad_name_flags[0]));
168 |   TEST_CHECK(!result);
169 | 
170 |   const YargsFlag bad_ptr_flags[] = {
171 |     {NULL, NULL, FT_STRING, NULL, NULL, NULL, NULL, "Bad pointer."},
172 |   };
173 |   result = ValidateYargsFlags(&bad_ptr_flags[0],
174 |     sizeof(bad_ptr_flags) / sizeof(bad_ptr_flags[0]));
175 |   TEST_CHECK(!result);
176 | 
177 |   const char* repeated_name_string1 = "Repeated name string1";
178 |   const char* repeated_name_string2 = "Repeated name string2";
179 |   const YargsFlag repeated_name_flags[] = {
180 |     YARGS_STRING("repeated_name", NULL, &repeated_name_string1, "Test value."),
181 |     YARGS_STRING("repeated_name", NULL, &repeated_name_string2, "Test value."),
182 |   };
183 |   result = ValidateYargsFlags(&repeated_name_flags[0],
184 |     sizeof(repeated_name_flags) / sizeof(repeated_name_flags[0]));
185 |   TEST_CHECK(!result);
186 | 
187 |   const YargsFlag repeated_short_name_flags[] = {
188 |     YARGS_STRING("some_name", "a", &repeated_name_string1, "Test value."),
189 |     YARGS_STRING("some_other_name", "a", &repeated_name_string2, "Test value."),
190 |   };
191 |   result = ValidateYargsFlags(&repeated_short_name_flags[0],
192 |     sizeof(repeated_short_name_flags) / sizeof(repeated_short_name_flags[0]));
193 |   TEST_CHECK(!result);
194 | }
195 | 
196 | void test_yargs_init() {
197 |   const char* some_name = "some_value";
198 |   int32_t no_short_name = 10;
199 |   bool some_other_name = false;
200 |   float float_arg = 23.0f;
201 |   YargsFlag test_flags[] = {
202 |     YARGS_STRING("some_name", "a", &some_name, "Some name."),
203 |     YARGS_INT32("no_short_name", NULL, &no_short_name, "No short name."),
204 |     YARGS_BOOL("some_other_name", "b", &some_other_name, "Some other name."),
205 |     YARGS_FLOAT("float_arg", "f", &float_arg, "Float argument"),
206 |   };
207 |   const int test_flags_length = sizeof(test_flags) / sizeof(test_flags[0]);
208 | 
209 |   char* argv1[] = {
210 |     "program", "--some_name", "new_value", "unnamed1", "--no_short_name=32",
211 |     "--some_other_name", "--float_arg", "-99.9", "unnamed2",
212 |   };
213 |   const int argc1 = sizeof(argv1) / sizeof(argv1[0]);
214 |   bool status = yargs_init(test_flags, test_flags_length, NULL, argv1, argc1);
215 |   TEST_CHECK(status);
216 |   TEST_STREQ("new_value", some_name);
217 |   TEST_INTEQ(32, no_short_name);
218 |   TEST_CHECK(some_other_name);
219 |   TEST_FLTEQ(-99.9, float_arg, 0.0001f);
220 |   TEST_INTEQ(2, yargs_get_unnamed_length());
221 |   TEST_STREQ("unnamed1", yargs_get_unnamed(0));
222 |   TEST_STREQ("unnamed2", yargs_get_unnamed(1));
223 |   yargs_free();
224 | 
225 |   some_name = "some_value";
226 |   no_short_name = 10;
227 |   some_other_name = false;
228 |   float_arg = 23.0f;
229 |   char* argv_no_args[] = {
230 |     "program",
231 |   };
232 |   const int argc_no_args = sizeof(argv_no_args) / sizeof(argv_no_args[0]);
233 |   status = yargs_init(test_flags, test_flags_length, NULL, argv_no_args,
234 |     argc_no_args);
235 |   TEST_CHECK(status);
236 |   TEST_STREQ("some_value", some_name);
237 |   TEST_INTEQ(10, no_short_name);
238 |   TEST_CHECK(!some_other_name);
239 |   TEST_FLTEQ(23.0f, float_arg, 0.0001f);
240 |   TEST_INTEQ(0, yargs_get_unnamed_length());
241 |   yargs_free();
242 | 
243 |   some_name = "some_value";
244 |   no_short_name = 10;
245 |   some_other_name = false;
246 |   float_arg = 23.0f;
247 |   char* argv_short_names[] = {
248 |     "program", "-a", "new_value", "unnamed1", "-b", "-f=-99.9", "unnamed2",
249 |   };
250 |   const int argc_short_names =
251 |     sizeof(argv_short_names) / sizeof(argv_short_names[0]);
252 |   status = yargs_init(test_flags, test_flags_length, NULL, argv_short_names,
253 |     argc_short_names);
254 |   TEST_CHECK(status);
255 |   TEST_STREQ("new_value", some_name);
256 |   TEST_INTEQ(10, no_short_name);
257 |   TEST_CHECK(some_other_name);
258 |   TEST_FLTEQ(-99.9, float_arg, 0.0001f);
259 |   TEST_INTEQ(2, yargs_get_unnamed_length());
260 |   TEST_STREQ("unnamed1", yargs_get_unnamed(0));
261 |   TEST_STREQ("unnamed2", yargs_get_unnamed(1));
262 |   yargs_free();
263 | 
264 |   char* argv_bad_short[] = {
265 |     "program", "-ab=value",
266 |   };
267 |   const int argc_bad_short = sizeof(argv_bad_short) / sizeof(argv_bad_short[0]);
268 |   status = yargs_init(test_flags, test_flags_length, NULL, argv_bad_short,
269 |     argc_bad_short);
270 |   TEST_CHECK(!status);
271 |   yargs_free();
272 | 
273 |   some_name = "some_value";
274 |   no_short_name = 10;
275 |   some_other_name = false;
276 |   float_arg = 23.0f;
277 |   char* argv_unnamed_only[] = {
278 |     "program", "unnamed1", "unnamed2",
279 |   };
280 |   const int argc_unnamed_only =
281 |     sizeof(argv_unnamed_only) / sizeof(argv_unnamed_only[0]);
282 |   status = yargs_init(test_flags, test_flags_length, NULL, argv_unnamed_only,
283 |     argc_unnamed_only);
284 |   TEST_CHECK(status);
285 |   TEST_STREQ("some_value", some_name);
286 |   TEST_INTEQ(10, no_short_name);
287 |   TEST_CHECK(!some_other_name);
288 |   TEST_FLTEQ(23.0f, float_arg, 0.0001f);
289 |   TEST_INTEQ(2, yargs_get_unnamed_length());
290 |   TEST_STREQ("unnamed1", yargs_get_unnamed(0));
291 |   TEST_STREQ("unnamed2", yargs_get_unnamed(1));
292 |   yargs_free();
293 | 
294 |   char* argv_no_such[] = {
295 |     "program", "--no_such",
296 |   };
297 |   const int argc_no_such = sizeof(argv_no_such) / sizeof(argv_no_such[0]);
298 |   status = yargs_init(test_flags, test_flags_length, NULL, argv_no_such,
299 |     argc_no_such);
300 |   TEST_CHECK(!status);
301 |   yargs_free();
302 | 
303 |   char* argv_no_such_short[] = {
304 |     "program", "-x",
305 |   };
306 |   const int argc_no_such_short =
307 |     sizeof(argv_no_such_short) / sizeof(argv_no_such_short[0]);
308 |   status = yargs_init(test_flags, test_flags_length, NULL, argv_no_such_short,
309 |     argc_no_such_short);
310 |   TEST_CHECK(!status);
311 |   yargs_free();
312 | 
313 |   const YargsFlag empty_flags[] = {};
314 |   const int empty_flags_length = 0;
315 | 
316 |   status = yargs_init(empty_flags, empty_flags_length, NULL, argv_no_args,
317 |     argc_no_args);
318 |   TEST_CHECK(status);
319 |   yargs_free();
320 | 
321 |   status = yargs_init(empty_flags, empty_flags_length, NULL, argv_unnamed_only,
322 |     argc_unnamed_only);
323 |   TEST_CHECK(status);
324 |   TEST_INTEQ(2, yargs_get_unnamed_length());
325 |   TEST_STREQ("unnamed1", yargs_get_unnamed(0));
326 |   TEST_STREQ("unnamed2", yargs_get_unnamed(1));
327 |   yargs_free();
328 | }
329 | 
330 | void test_LoadFileFromContents() {
331 |   const char* some_name = "some_value";
332 |   int32_t no_short_name = 10;
333 |   bool some_other_name = false;
334 |   float float_arg = 23.0f;
335 |   YargsFlag test_flags[] = {
336 |     YARGS_STRING("some_name", "a", &some_name, "Some name."),
337 |     YARGS_INT32("no_short_name", NULL, &no_short_name, "No short name."),
338 |     YARGS_BOOL("some_other_name", "b", &some_other_name, "Some other name."),
339 |     YARGS_FLOAT("float_arg", "f", &float_arg, "Float argument"),
340 |   };
341 |   const int test_flags_length = sizeof(test_flags) / sizeof(test_flags[0]);
342 | 
343 |   const char* contents1 =
344 |     "program --some_name new_value --no_short_name=32 --some_other_name "
345 |     "--float_arg -99.9";
346 |   bool status = LoadFromFileContents(test_flags, test_flags_length,
347 |     contents1);
348 |   TEST_CHECK(status);
349 |   TEST_STREQ("new_value", some_name);
350 |   TEST_INTEQ(32, no_short_name);
351 |   TEST_CHECK(some_other_name);
352 |   TEST_FLTEQ(-99.9, float_arg, 0.0001f);
353 |   TEST_INTEQ(0, yargs_get_unnamed_length());
354 |   yargs_free();
355 | 
356 |   some_name = "some_value";
357 |   no_short_name = 10;
358 |   some_other_name = false;
359 |   float_arg = 23.0f;
360 |   const char* content_no_args = "program";
361 |   status = LoadFromFileContents(test_flags, test_flags_length, content_no_args);
362 |   TEST_CHECK(status);
363 |   TEST_STREQ("some_value", some_name);
364 |   TEST_INTEQ(10, no_short_name);
365 |   TEST_CHECK(!some_other_name);
366 |   TEST_FLTEQ(23.0f, float_arg, 0.0001f);
367 |   TEST_INTEQ(0, yargs_get_unnamed_length());
368 |   yargs_free();
369 | 
370 |   some_name = "some_value";
371 |   no_short_name = 10;
372 |   some_other_name = false;
373 |   float_arg = 23.0f;
374 |   const char* content_short_names =
375 |     "program -a new_value unnamed1 -b -f=-99.9 unnamed2";
376 |   status = LoadFromFileContents(test_flags, test_flags_length,
377 |     content_short_names);
378 |   TEST_CHECK(status);
379 |   TEST_STREQ("new_value", some_name);
380 |   TEST_INTEQ(10, no_short_name);
381 |   TEST_CHECK(some_other_name);
382 |   TEST_FLTEQ(-99.9, float_arg, 0.0001f);
383 |   TEST_INTEQ(2, yargs_get_unnamed_length());
384 |   TEST_STREQ("unnamed1", yargs_get_unnamed(0));
385 |   TEST_STREQ("unnamed2", yargs_get_unnamed(1));
386 |   yargs_free();
387 | 
388 |   const char* content_bad_short = "program -ab=value";
389 |   status = LoadFromFileContents(test_flags, test_flags_length,
390 |     content_bad_short);
391 |   TEST_CHECK(!status);
392 |   yargs_free();
393 | 
394 |   some_name = "some_value";
395 |   no_short_name = 10;
396 |   some_other_name = false;
397 |   float_arg = 23.0f;
398 |   const char* content_unnamed_only = "program unnamed1 unnamed2";
399 |   status = LoadFromFileContents(test_flags, test_flags_length,
400 |     content_unnamed_only);
401 |   TEST_CHECK(status);
402 |   TEST_STREQ("some_value", some_name);
403 |   TEST_INTEQ(10, no_short_name);
404 |   TEST_CHECK(!some_other_name);
405 |   TEST_FLTEQ(23.0f, float_arg, 0.0001f);
406 |   TEST_INTEQ(2, yargs_get_unnamed_length());
407 |   TEST_STREQ("unnamed1", yargs_get_unnamed(0));
408 |   TEST_STREQ("unnamed2", yargs_get_unnamed(1));
409 |   yargs_free();
410 | 
411 |   const char* contents_no_such = "program --no_such";
412 |   status = LoadFromFileContents(test_flags, test_flags_length,
413 |     contents_no_such);
414 |   TEST_CHECK(!status);
415 |   yargs_free();
416 | 
417 |   const char* contents_no_such_short = "program -x";
418 |   status = LoadFromFileContents(test_flags, test_flags_length,
419 |     contents_no_such_short);
420 |   TEST_CHECK(!status);
421 |   yargs_free();
422 | 
423 |   const YargsFlag empty_flags[] = {};
424 |   const int empty_flags_length = 0;
425 | 
426 |   status = LoadFromFileContents(empty_flags, empty_flags_length,
427 |     content_no_args);
428 |   TEST_CHECK(status);
429 |   yargs_free();
430 | 
431 |   status = LoadFromFileContents(empty_flags, empty_flags_length,
432 |     content_unnamed_only);
433 |   TEST_CHECK(status);
434 |   TEST_INTEQ(2, yargs_get_unnamed_length());
435 |   TEST_STREQ("unnamed1", yargs_get_unnamed(0));
436 |   TEST_STREQ("unnamed2", yargs_get_unnamed(1));
437 |   yargs_free();
438 | }
439 | 
440 | void test_yargs_load_from_file() {
441 |   const char* some_name = "some_value";
442 |   int32_t no_short_name = 10;
443 |   bool some_other_name = false;
444 |   float float_arg = 23.0f;
445 |   YargsFlag test_flags[] = {
446 |     YARGS_STRING("some_name", "a", &some_name, "Some name."),
447 |     YARGS_INT32("no_short_name", NULL, &no_short_name, "No short name."),
448 |     YARGS_BOOL("some_other_name", "b", &some_other_name, "Some other name."),
449 |     YARGS_FLOAT("float_arg", "f", &float_arg, "Float argument"),
450 |   };
451 |   const int test_flags_length = sizeof(test_flags) / sizeof(test_flags[0]);
452 | 
453 |   const char* contents1 =
454 |     "program --some_name new_value --no_short_name=32 --some_other_name "
455 |     "--float_arg -99.9";
456 |   const char* test_filename1 = "/tmp/yargs_test_file1.txt";
457 |   FILE* test_file1 = fopen(test_filename1, "w");
458 |   fprintf(test_file1, "%s", contents1);
459 |   fclose(test_file1);
460 |   bool status = yargs_load_from_file(test_flags, test_flags_length,
461 |     test_filename1);
462 |   TEST_CHECK(status);
463 |   TEST_STREQ("new_value", some_name);
464 |   TEST_INTEQ(32, no_short_name);
465 |   TEST_CHECK(some_other_name);
466 |   TEST_FLTEQ(-99.9, float_arg, 0.0001f);
467 |   TEST_INTEQ(0, yargs_get_unnamed_length());
468 |   yargs_free();
469 | }
470 | 
471 | void test_yargs_save_to_file() {
472 |   const char* some_name = "some_value";
473 |   int32_t no_short_name = 10;
474 |   bool some_other_name = false;
475 |   float float_arg = 23.0f;
476 |   YargsFlag test_flags[] = {
477 |     YARGS_STRING("some_name", "a", &some_name, "Some name."),
478 |     YARGS_INT32("no_short_name", NULL, &no_short_name, "No short name."),
479 |     YARGS_BOOL("some_other_name", "b", &some_other_name, "Some other name."),
480 |     YARGS_FLOAT("float_arg", "f", &float_arg, "Float argument"),
481 |   };
482 |   const int test_flags_length = sizeof(test_flags) / sizeof(test_flags[0]);
483 | 
484 |   char* argv1[] = {
485 |     "program", "--some_name", "new_value", "unnamed1", "--no_short_name=32",
486 |     "--some_other_name", "--float_arg", "-99.9", "unnamed2",
487 |   };
488 |   const int argc1 = sizeof(argv1) / sizeof(argv1[0]);
489 |   bool status = yargs_init(test_flags, test_flags_length, NULL, argv1, argc1);
490 |   TEST_CHECK(status);
491 |   const char* test_filename1 = "/tmp/yargs_test_file1.txt";
492 |   yargs_save_to_file(test_flags, test_flags_length, test_filename1);
493 |   yargs_free();
494 | 
495 |   some_name = "some_value";
496 |   no_short_name = 10;
497 |   some_other_name = false;
498 |   float_arg = 23.0f;
499 | 
500 |   status = yargs_load_from_file(test_flags, test_flags_length,
501 |     test_filename1);
502 |   TEST_CHECK(status);
503 |   TEST_STREQ("new_value", some_name);
504 |   TEST_INTEQ(32, no_short_name);
505 |   TEST_CHECK(some_other_name);
506 |   TEST_FLTEQ(-99.9, float_arg, 0.0001f);
507 |   TEST_INTEQ(2, yargs_get_unnamed_length());
508 |   TEST_STREQ("unnamed1", yargs_get_unnamed(0));
509 |   TEST_STREQ("unnamed2", yargs_get_unnamed(1));
510 |   yargs_free();
511 | }
512 | 
513 | static void test_yargs_app_name() {
514 |   YargsFlag test_flags[] = {
515 |   };
516 |   const int test_flags_length = sizeof(test_flags) / sizeof(test_flags[0]);
517 |   char* argv[] = { "program" };
518 |   const int argc = sizeof(argv) / sizeof(argv[0]);
519 |   bool status = yargs_init(test_flags, test_flags_length, NULL, argv, argc);
520 |   TEST_CHECK(status);
521 |   TEST_STREQ("program", yargs_app_name());
522 | }
523 | 
524 | TEST_LIST = {
525 |   {"GetFlagWithName", test_GetFlagWithName},
526 |   {"GetFlagWithShortName", test_GetFlagWithShortName},
527 |   {"NormalizeArgs", test_NormalizeArgs},
528 |   {"InterpretValueAsFloat", test_InterpretValueAsFloat},
529 |   {"InterpretValueAsInt32", test_InterpretValueAsInt32},
530 |   {"ValidateYargsFlags", test_ValidateYargsFlags},
531 |   {"yargs_init", test_yargs_init},
532 |   {"LoadFileFromContents", test_LoadFileFromContents},
533 |   {"yargs_load_from_file", test_yargs_load_from_file},
534 |   {"yargs_save_to_file", test_yargs_save_to_file},
535 |   {"yargs_app_name", test_yargs_app_name},
536 |   {NULL, NULL},
537 | };


--------------------------------------------------------------------------------