├── .gitignore
├── README.md
├── app.py
├── requirements.txt
└── voices
    ├── Alex_Danivero.wav
    ├── Denzel_Wash.wav
    ├── Mia.wav
    ├── Nimbus.wav
    ├── Roland.wav
    └── Tony_King.wav


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 
162 | # vscode
163 | .vscode/
164 | 
165 | podcasts/
166 | dialogs/


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Podcast Generator
 2 | 
 3 | This application is a podcast generator that works %100 locally on your computer. It uses Phidata assistant client to generate dialogues for a podcast based on user inputs. It is built with Python and uses the Streamlit library for the web interface, llama3-8b-instruct model for AI dialog generation, and the TTS (Text-to-Speech) library for generating audio from the generated dialogues.
 4 | 
 5 | ![Podcast Generator](https://i.ibb.co/gZJJYXT/Ekran-g-r-nt-s-2024-05-21-201959.png)
 6 | 
 7 | ## Features
 8 | 
 9 | - User can select a host character and guest characters for the podcast.
10 | - User can define the persona for each character.
11 | - User can specify the topic of the podcast.
12 | - User can specify the number of dialogues in the podcast.
13 | - The application generates a transcript of the podcast.
14 | - The application generates an audio file of the podcast.
15 | 
16 | ## Prerequisites
17 | 
18 | Before running the application, ensure that you meet the following prerequisites:
19 | 
20 | - Download and install Ollama from https://ollama.com/
21 | - You need to have `ffmpeg` installed on your system. `ffmpeg` is a free and open-source software project consisting of a large suite of libraries and programs for handling video, audio, and other multimedia files and streams.
22 | - Your system should have at least 4GB of VRAM as the application uses a local TTS model which is resource-intensive.
23 | 
24 | ### Installing ffmpeg
25 | 
26 | #### On Windows
27 | 
28 | 1.  Download the latest version of ffmpeg from the  official website.
29 | 2.  Extract the downloaded file.
30 | 3.  Add the bin folder from the extracted file to your system's PATH.
31 |    
32 | Or with chocolatey
33 | 
34 | ```bash
35 | choco install ffmpeg
36 | ```
37 | 
38 | #### On macOS
39 | 
40 | You can install ffmpeg using Homebrew:
41 | ```bash
42 | brew  install  ffmpeg
43 | ```
44 | #### On Linux
45 | 
46 | You can install ffmpeg using apt:
47 | 
48 | ```bash
49 | sudo apt update
50 | sudo apt install ffmpeg
51 | ```
52 | 
53 | After installing ffmpeg, you can run the application as described in the "How to Run" section.
54 | ## How to Run
55 | 
56 | 1. Clone the repository.
57 | 2. Install the required Python libraries using pip:
58 | 
59 | ```bash
60 | pip install -r requirements.txt
61 | ```
62 | 
63 | 3. Run the Streamlit application:
64 | 
65 | ```bash
66 | streamlit run app.py
67 | ```
68 | 
69 | 4. Open the application in your web browser at `http://localhost:8501`.
70 | 
71 | ## Usage
72 | 
73 | 1. Select your host character and the guests you would like to have on the show.
74 | 2. Enter your podcast topic.
75 | 3. Set the number of dialogues you want in the podcast.
76 | 4. Define the persona for each character.
77 | 5. Click the "Submit" button to generate the podcast.
78 | 6. The application will generate a transcript and an audio file of the podcast.
79 | 
80 | 
81 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | import time
  3 | import os
  4 | from TTS.api import TTS
  5 | import torch
  6 | import subprocess
  7 | from typing import List
  8 | from pydantic import BaseModel, Field
  9 | from phi.assistant import Assistant
 10 | from phi.llm.ollama import Ollama
 11 | 
 12 | 
 13 | st.title("🎙️Podcast Generator🎙️")
 14 | 
 15 | if "characters_and_topics_submitted" not in st.session_state or st.sidebar.button("Restart the podcast"):
 16 |    st.session_state["messages"] = []
 17 |    st.session_state["characters_and_topics_submitted"] = False
 18 |    st.session_state["character_persona_submitted"] = False
 19 |    st.rerun()
 20 | 
 21 | def generate_dialog(number_of_dialogs, timestamp, debug=True):
 22 | 
 23 |     character_personas = ""
 24 |     for guest in st.session_state["guests"]:
 25 |         character_personas += f"- {guest} Persona: {st.session_state[f'{guest}_persona']}\n"
 26 |     
 27 |     guest_introductions = ""
 28 |     for guest in st.session_state["guests"]:
 29 |         guest_introductions += f", {guest}"
 30 | 
 31 |     podcast_template = f"""## Podcast Outline\nThis is a podcast between {st.session_state["host_character"]}{guest_introductions}.\n 
 32 | {st.session_state["host_character"]} is the host of the show.\n
 33 | {st.session_state['podcast_topic']}\n
 34 | Character Personas:\n {character_personas}\n"""
 35 |     instructions = f"""Instructions:\n
 36 | - The podcast should more or less have {number_of_dialogs} dialogs. Always include a closure dialog to the podcast.\n
 37 | - Don't use non-verbal cues like *laughs* or *ahem* or parentheticals in the podcast. Use Hehe or Haha instead of *laughs*.\n
 38 | - Also don't use a speaker label on the content parameter but you should always set speaker key for the correct speaker."""
 39 | 
 40 |     st.write(podcast_template)
 41 |     if not os.path.exists('podcasts'):
 42 |         os.makedirs('podcasts')
 43 |     transcript_file_name = f"podcasts/podcast{timestamp}.txt"
 44 |     transcript_file = open(transcript_file_name, "w")
 45 | 
 46 |     
 47 | 
 48 |     class PodcastScript(BaseModel):
 49 |         dialogs: List[dict] = Field(..., description="Contains dictionaries with these key values: speaker, content and the dialog_counter. speaker: name of the speaker, content: content of the speech, dialog_counter: The number of the dialog. Should be incremented by 1 for each dialog")
 50 |     
 51 | 
 52 |     podcast_assistant = Assistant(
 53 |         llm=Ollama(model="llama3:8b-instruct-q4_K_M"),
 54 |         description="You are a podcast transcript writer",
 55 |         output_model=PodcastScript
 56 |     )
 57 |     
 58 |     result = podcast_assistant.run(f"Generate a podcast transcript for this Podcast Outline: {podcast_template} {instructions}")
 59 |         
 60 |     print(result.dialogs)
 61 | 
 62 |     dialogs = result.dialogs
 63 | 
 64 |     for dialog in dialogs:
 65 |         transcript_line = dialog['speaker'] + " says: " + dialog['content']  + "\n"
 66 |         transcript_file.write(transcript_line)
 67 | 
 68 |     transcript_file.close()
 69 |     return dialogs
 70 | 
 71 | def generate_audio(dialogs, timestamp):
 72 |     device = "cuda" if torch.cuda.is_available() else "cpu"
 73 |     voice_names = {
 74 |         "Mia"    : r"voices\Mia.wav",
 75 |         "Denzel" : r"voices\Denzel_Wash.wav",
 76 |         "Alex"   : r"voices\Alex_Danivero.wav",
 77 |         "Nimbus" : r"voices\Nimbus.wav",
 78 |         "Tony"   : r"voices\Tony_King.wav",
 79 |         "Roland" : r"voices\Roland.wav",
 80 |     }
 81 |     dialog_files = []
 82 | 
 83 |     if not os.path.exists('dialogs'):
 84 |         os.makedirs('dialogs')
 85 |         
 86 |     concat_file = open("concat.txt", "w")
 87 |     tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
 88 |     try:
 89 |         for i, dialog in enumerate(dialogs):
 90 |             filename = f"dialogs/dialog{i}.wav"
 91 | 
 92 |             if len(dialog["content"]) > 250:
 93 |                 split_sentences = True
 94 |             else:
 95 |                 split_sentences = False
 96 | 
 97 |             tts.tts_to_file(text=dialog["content"], speaker_wav=voice_names[dialog["speaker"]], language="en", split_sentences=split_sentences ,file_path=filename)
 98 |             concat_file.write("file " + filename + "\n")
 99 |             dialog_files.append(filename)
100 |     except Exception as e:
101 |         print(f"ERROR: {e}")
102 | 
103 |     concat_file.close()
104 | 
105 |     podcast_file = f"podcasts/podcast{timestamp}.wav"
106 | 
107 |     print("Concatenating audio")
108 |     subprocess.run(f"ffmpeg -f concat -safe 0 -i concat.txt -c copy {podcast_file}", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
109 | 
110 |     os.unlink("concat.txt")
111 | 
112 |     for file in dialog_files:
113 |         os.unlink(file)
114 |     
115 |     st.audio(podcast_file, format='audio/wav')
116 | 
117 | def generate_podcast():
118 |     current_time = time.time()
119 |     with st.spinner("📜 Generating the transcript..."):
120 |         dialogs = generate_dialog(st.session_state["dialog_count"], current_time, st.session_state["podcast_topic"])
121 |     st.write("Transcript generated successfully")
122 |     with st.spinner("🎤 Generating the audio..."):
123 |         generate_audio(dialogs, current_time)
124 |     
125 | if not st.session_state["characters_and_topics_submitted"]:
126 |     with st.form("characters_and_topics"):
127 |         st.selectbox(
128 |         "Select your host character",
129 |         ("Tony", "Mia"), key="host_character")
130 |         st.multiselect(
131 |         "Select the guests you would like to have on the show",
132 |         ["Denzel", "Alex", "Nimbus", "Tony", "Roland"],key="guests")
133 |         st.text_area("Enter your podcast topic here", key="podcast_topic")
134 |         st.slider("Number of dialogs", 7, 15, 12, key="dialog_count")
135 |         st.session_state["characters_and_topics_submitted"] = st.form_submit_button("Submit")
136 | 
137 | if st.session_state["characters_and_topics_submitted"]:
138 |     with st.form("character_persona"):
139 |         for guest in st.session_state["guests"]:
140 |             st.text_area(f"Enter persona for {guest}", key=f"{guest}_persona")
141 |         st.session_state["character_persona_submitted"] = st.form_submit_button("Submit")
142 | 
143 | if st.session_state["character_persona_submitted"]:
144 |     generate_podcast()
145 |     st.write("Podcast generated successfully")


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | phidata==2.3.90
2 | pydantic==2.7.1
3 | streamlit==1.29.0
4 | torch==2.2.0+cu121
5 | TTS==0.22.0
6 | 


--------------------------------------------------------------------------------
/voices/Alex_Danivero.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kturung/streamlit_podcast_generator/2be90da9652e04d625e2fd00d042ede40c8a6011/voices/Alex_Danivero.wav


--------------------------------------------------------------------------------
/voices/Denzel_Wash.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kturung/streamlit_podcast_generator/2be90da9652e04d625e2fd00d042ede40c8a6011/voices/Denzel_Wash.wav


--------------------------------------------------------------------------------
/voices/Mia.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kturung/streamlit_podcast_generator/2be90da9652e04d625e2fd00d042ede40c8a6011/voices/Mia.wav


--------------------------------------------------------------------------------
/voices/Nimbus.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kturung/streamlit_podcast_generator/2be90da9652e04d625e2fd00d042ede40c8a6011/voices/Nimbus.wav


--------------------------------------------------------------------------------
/voices/Roland.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kturung/streamlit_podcast_generator/2be90da9652e04d625e2fd00d042ede40c8a6011/voices/Roland.wav


--------------------------------------------------------------------------------
/voices/Tony_King.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kturung/streamlit_podcast_generator/2be90da9652e04d625e2fd00d042ede40c8a6011/voices/Tony_King.wav


--------------------------------------------------------------------------------