├── LICENSE
├── README.md
├── architecture.svg
├── config.yaml.sample
├── data_server
    ├── append_low_frequency_chars.py
    ├── character_frequency.py
    ├── clone.py
    ├── create_dataset.py
    ├── create_filehashes.py
    ├── dataset_filters.py
    ├── db_pool_proxy.py
    ├── exclusion_chars
    │   ├── de.txt
    │   ├── en.txt
    │   └── es.txt
    ├── import_dataset.py
    ├── sanity_check.py
    ├── schema.psql
    ├── server.py
    ├── start_wsgi.sh
    ├── test_batch_whisper.py
    ├── test_batch_workflow.py
    ├── training_session_pg.py
    ├── update_durations.py
    ├── update_lang.py
    ├── utils.py
    ├── validate_media_entries.py
    ├── whisper_benchmark.py
    ├── whisper_multiple_files.py
    ├── whisper_single_file.py
    └── worker.py
├── podcasts
    ├── generate_list_from_podcastindex.py
    ├── html_stats.py
    ├── podcast_lists
    │   ├── rss_feeds_de
    │   ├── rss_feeds_de_at
    │   ├── rss_feeds_de_ch
    │   └── rss_feeds_fr
    ├── search_ddg.py
    ├── simple_podcast_downloader.py
    └── utils.py
├── requirements.txt
├── requirements_worker.txt
├── stats_screenshot.png
└── tedx
    ├── filter_by_language.py
    ├── get_tedx_titles.sh
    └── tedx_yt_videolist


/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/LICENSE


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/README.md


--------------------------------------------------------------------------------
/architecture.svg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/architecture.svg


--------------------------------------------------------------------------------
/config.yaml.sample:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/config.yaml.sample


--------------------------------------------------------------------------------
/data_server/append_low_frequency_chars.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/data_server/append_low_frequency_chars.py


--------------------------------------------------------------------------------
/data_server/character_frequency.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/data_server/character_frequency.py


--------------------------------------------------------------------------------
/data_server/clone.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/data_server/clone.py


--------------------------------------------------------------------------------
/data_server/create_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/data_server/create_dataset.py


--------------------------------------------------------------------------------
/data_server/create_filehashes.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/data_server/create_filehashes.py


--------------------------------------------------------------------------------
/data_server/dataset_filters.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/data_server/dataset_filters.py


--------------------------------------------------------------------------------
/data_server/db_pool_proxy.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/data_server/db_pool_proxy.py


--------------------------------------------------------------------------------
/data_server/exclusion_chars/de.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/data_server/exclusion_chars/de.txt


--------------------------------------------------------------------------------
/data_server/exclusion_chars/en.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/data_server/exclusion_chars/en.txt


--------------------------------------------------------------------------------
/data_server/exclusion_chars/es.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/data_server/exclusion_chars/es.txt


--------------------------------------------------------------------------------
/data_server/import_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/data_server/import_dataset.py


--------------------------------------------------------------------------------
/data_server/sanity_check.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/data_server/sanity_check.py


--------------------------------------------------------------------------------
/data_server/schema.psql:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/data_server/schema.psql


--------------------------------------------------------------------------------
/data_server/server.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/data_server/server.py


--------------------------------------------------------------------------------
/data_server/start_wsgi.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/data_server/start_wsgi.sh


--------------------------------------------------------------------------------
/data_server/test_batch_whisper.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/data_server/test_batch_whisper.py


--------------------------------------------------------------------------------
/data_server/test_batch_workflow.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/data_server/test_batch_workflow.py


--------------------------------------------------------------------------------
/data_server/training_session_pg.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/data_server/training_session_pg.py


--------------------------------------------------------------------------------
/data_server/update_durations.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/data_server/update_durations.py


--------------------------------------------------------------------------------
/data_server/update_lang.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/data_server/update_lang.py


--------------------------------------------------------------------------------
/data_server/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/data_server/utils.py


--------------------------------------------------------------------------------
/data_server/validate_media_entries.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/data_server/validate_media_entries.py


--------------------------------------------------------------------------------
/data_server/whisper_benchmark.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/data_server/whisper_benchmark.py


--------------------------------------------------------------------------------
/data_server/whisper_multiple_files.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/data_server/whisper_multiple_files.py


--------------------------------------------------------------------------------
/data_server/whisper_single_file.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/data_server/whisper_single_file.py


--------------------------------------------------------------------------------
/data_server/worker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/data_server/worker.py


--------------------------------------------------------------------------------
/podcasts/generate_list_from_podcastindex.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/podcasts/generate_list_from_podcastindex.py


--------------------------------------------------------------------------------
/podcasts/html_stats.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/podcasts/html_stats.py


--------------------------------------------------------------------------------
/podcasts/podcast_lists/rss_feeds_de:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/podcasts/podcast_lists/rss_feeds_de


--------------------------------------------------------------------------------
/podcasts/podcast_lists/rss_feeds_de_at:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/podcasts/podcast_lists/rss_feeds_de_at


--------------------------------------------------------------------------------
/podcasts/podcast_lists/rss_feeds_de_ch:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/podcasts/podcast_lists/rss_feeds_de_ch


--------------------------------------------------------------------------------
/podcasts/podcast_lists/rss_feeds_fr:
--------------------------------------------------------------------------------
1 | https://cgwhy.net/feed/mp3/
2 | 


--------------------------------------------------------------------------------
/podcasts/search_ddg.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/podcasts/search_ddg.py


--------------------------------------------------------------------------------
/podcasts/simple_podcast_downloader.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/podcasts/simple_podcast_downloader.py


--------------------------------------------------------------------------------
/podcasts/utils.py:
--------------------------------------------------------------------------------
1 | ../data_server/utils.py


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/requirements.txt


--------------------------------------------------------------------------------
/requirements_worker.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/requirements_worker.txt


--------------------------------------------------------------------------------
/stats_screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/stats_screenshot.png


--------------------------------------------------------------------------------
/tedx/filter_by_language.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/tedx/filter_by_language.py


--------------------------------------------------------------------------------
/tedx/get_tedx_titles.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/tedx/get_tedx_titles.sh


--------------------------------------------------------------------------------
/tedx/tedx_yt_videolist:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechcatcher-asr/speechcatcher-data/HEAD/tedx/tedx_yt_videolist


--------------------------------------------------------------------------------