├── utils
    ├── __init__.py
    ├── common.py
    └── regex_utils.py
├── images_utils
    ├── __init__.py
    ├── google_crawl.py
    └── image_grabber.py
├── .gitignore
├── requirements.txt
├── main.py
├── LICENSE
├── audio_utils
    ├── audio_deprecated.py
    └── audio.py
├── TextToVideo.py
├── test_script.txt
├── README.md
├── video_utils
    └── video_segment.py
└── text_utils
    └── text_processor.py


/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/images_utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | tts_output/
3 | downloads/
4 | .vscode/
5 | .idea/
6 | output/


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iJohnMaged/Text-To-Video-Py/HEAD/requirements.txt


--------------------------------------------------------------------------------
/utils/common.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | 
4 | def mkdir(directory):
5 |     if not os.path.exists(directory):
6 |         os.makedirs(directory)
7 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | from TextToVideo import TextToVideo
 2 | 
 3 | 
 4 | def main():
 5 |     with open("test_script.txt", "r") as f:
 6 |         text = f.read()
 7 |     text = text.replace("\n", " ")
 8 |     ttv = TextToVideo(text, "anime.mp4")
 9 |     ttv.generate_video()
10 |     ttv.save_video()
11 | 
12 | 
13 | if __name__ == "__main__":
14 |     main()


--------------------------------------------------------------------------------
/utils/regex_utils.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | 
 4 | def group_and_split(text, group_re, split_re):
 5 | 
 6 |     matches = re.finditer(group_re, text, re.MULTILINE)
 7 |     groups = []
 8 |     for _, match in enumerate(matches):
 9 |         for i in range(len(match.groups())):
10 |             groups.append(match.group(i + 1))
11 | 
12 |     splits = []
13 |     i = 0
14 |     for split in re.split(split_re, text):
15 |         if len(split.strip()) > 0:
16 |             splits.append((split.strip(), groups[i]))
17 |             i += 1
18 |     return groups, splits


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 John Maged
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/audio_utils/audio_deprecated.py:
--------------------------------------------------------------------------------
 1 | """A class to help with audio processing,
 2 | A wrapper for gTTS
 3 | """
 4 | import os
 5 | from typing import Tuple
 6 | 
 7 | 
 8 | from gtts import gTTS
 9 | from mutagen.mp3 import MP3
10 | from utils.common import mkdir
11 | 
12 | 
13 | class TTS:
14 |     def __init__(self, download_location: str = "audio"):
15 |         """
16 |         Args:
17 |             download_location (str, optional): folder to download audio to. Defaults to "audio".
18 |         """
19 |         self._memory = {}
20 |         self.download_location = download_location
21 |         mkdir(download_location)
22 |         self._load_audio()
23 | 
24 |     def _load_audio(self):
25 |         local_files = {}
26 |         for file in os.listdir("audio"):
27 |             audio_file = os.path.join("audio", file)
28 |             if os.path.isfile(audio_file):
29 |                 mp3 = MP3(audio_file)
30 |                 local_files[file.strip(".mp3")] = (audio_file, mp3.info.length)
31 |         self._memory = local_files
32 | 
33 |     def getTTS(self, text: str) -> Tuple[str, float]:
34 |         """
35 |         Gets TTS for a given string and downloads it to download_location.
36 | 
37 |         Args:
38 |             text(str): text to turn to speech
39 |         Returns:
40 |             Tuple[str, float]: path to saved file
41 |         """
42 |         if text in self._memory:
43 |             return self._memory[text]
44 | 
45 |         tts = gTTS(text)
46 |         audio_file = f"{self.download_location}/{text}.mp3"
47 |         tts.save(audio_file)
48 | 
49 |         # Get audio length for video duration.
50 |         mp3 = MP3(audio_file)
51 |         self._memory[text] = (audio_file, mp3.info.length)
52 |         return self._memory[text]
53 | 


--------------------------------------------------------------------------------
/TextToVideo.py:
--------------------------------------------------------------------------------
 1 | """The main class for the project.
 2 | This module turns given text to a video with images grabbed from google search
 3 | as well as audio from google tts.
 4 | """
 5 | 
 6 | import os
 7 | 
 8 | from moviepy.editor import (
 9 |     concatenate_videoclips,
10 | )
11 | 
12 | from images_utils.image_grabber import ImageGrabber
13 | from text_utils.text_processor import TextProcessor
14 | from audio_utils.audio import WaveNetTTS
15 | 
16 | from utils.common import mkdir
17 | 
18 | 
19 | class TextToVideo:
20 |     def __init__(self, text: str, output: str):
21 |         """This class processes the images and audio then generates the required vidoe
22 | 
23 |         Args:
24 |             text (str): Text to turn into images/audio
25 |             output (str): Output file name
26 |         """
27 |         self.text = text
28 |         self.output = output
29 |         self._gid = ImageGrabber(
30 |             search_options="ift:jpg",
31 |             resize=True,
32 |         )
33 |         self._text_processor = TextProcessor(self.text)
34 |         self._wnTTS = WaveNetTTS()
35 |         self._output_folder = "output"
36 |         self._video_clips = []
37 |         mkdir(os.path.join(os.getcwd(), self._output_folder))
38 | 
39 |     def generate_video(self) -> None:
40 |         """Generates the video clips/segments to be concatenated on save"""
41 | 
42 |         video_segments = self._text_processor.video_segments
43 |         for segment in video_segments:
44 |             final_clip = segment.generate_segment(self._wnTTS, self._gid)
45 |             self._video_clips.append(final_clip)
46 | 
47 |     def save_video(self, fps: int = 24) -> None:
48 |         """Saves the processed video
49 | 
50 |         Args:
51 |             fps (int, optional): Desired video FPS. Defaults to 24.
52 |         """
53 | 
54 |         if len(self._video_clips) == 0:
55 |             raise VideoElementsNotProcessed
56 | 
57 |         final_video = concatenate_videoclips(self._video_clips, method="compose")
58 |         final_video.fps = 24
59 |         final_video.write_videofile(f"{self._output_folder}/{self.output}")
60 | 
61 | 
62 | class VideoElementsNotProcessed(Exception):
63 |     pass


--------------------------------------------------------------------------------
/test_script.txt:
--------------------------------------------------------------------------------
1 | [IMAGE: Shonen anime] In anime, shonen in particular, characters will often have incredible powers that will set them aside from the rest of the characters.
2 | [IMAGE: Overpowered anime characters] you will find that the entire anime's reputation is particularly based on the main character's abilities whether it is magic or other
3 | super natural forms of power.
4 | [IMAGE: strength anime characters]
5 | But some characters, however, manage to be heroes in their own way, meaning that they don't actually posses powers.
6 | they go by sheer strength and will power instead. In this video you will see 10 anime characters who are overpowered without super powers.
7 | 
8 | [IMAGE: assassination classroom anime 7] [VOICE: I] Coming at number 10, nagisa Shiota from assassination classroom. Nagisa's physical abilities are mediocre, and it worthy to note that he is one of the weakest amongst the boys as his constitution and strength are more easily comparable to a female of his age. However, Nagisa has an extraordinary amount of natural talent as an assassin due to his impressive observant nature, his remarkable stealthy movements, and his incredible bloodlust, to the point that he was described by Korosensei to be a 'pure assassin'. This inclination has also been made apparent numerous times during the span of the story, such as his duels against Tataoka, his fight against Karma, and finally Korosensei. Several other characters such as Karma and Maehara have noted that he is full of surprises - one can never know when or where Nagisa will aim to strike, and his talent in the art of assassination has garnered respect from even those who are not his peers to the point that others have commented on the potential Nagisa possessed that could be fulfilled in the role of a professional hitman. In the end, though, Nagisa does not pursue this path and decides to use his abilities to help others in the same way Korosensei did: teaching to a "problematic class".[/VOICE]
9 | [IMAGE: no game no life anime 4] [VOICE: I] And at number 9, we have sora from no game no life. Despite being a shut-in, Sora has incredible superhuman charisma and eloquence. He is an excellent and natural speaker, always knowing exactly what to say to get others on his side. He has no problems dealing with people or groups, after overcoming the initial shock. While not as intelligent as Shiro in calculations, he is shown to have a brilliant super-genius IQ and an eidetic memory which enables him to quickly learn, understand and easily recall limitless amounts of information perfectly and rapidly. This is demonstrated during his card game with Stephanie. Rather than computing numbers like Shiro, Sora can quickly determine what his opponents are going to do based on observation of their behavior and motivations, a technique called "cold reading". His amazing observational and deductive skills help him come up with complex plans quickly and efficiently. This is shown during his game against Jibril and when he figured out the previous King's plan to beat the Werebeast. He was even able to learn to read Immanity-go (language) in just about one hour slower than his sister (self-proclaimed).[/VOICE]


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Text-To-Video
 2 | 
 3 | The idea of this project is to be able to _generate_ videos just from text input that includes:
 4 | 
 5 | * Images.
 6 | * Videos.
 7 | * Voice over.
 8 | * Music.
 9 | 
10 | This is a little fun side project that I plan to work on in my free time over the next few weeks.
11 | # Example
12 | [![Video example](https://img.youtube.com/vi/xBDiMOTY1UI/0.jpg)](https://www.youtube.com/watch?v=xBDiMOTY1UI)
13 | 
14 | Script template used for this example can be found [**here**](./test_script.txt)
15 | 
16 | # Current status
17 | 
18 | Given a script (text with a special template), the program will scrap google images for requested images, and use WaveNet TTS to voice over the script.
19 | 
20 | # Script template
21 | 
22 | ## Image tag
23 | ```
24 | [IMAGE: Shonen anime] In anime, shonen in particular, characters will often have incredible powers that will set them aside from the rest of the characters.
25 | ```
26 | 
27 | Your script must always start with an `[IMAGE: keyword count]` tag, the `IMAGE` tells the program what images to search and display till the next `[IMAGE]` tag, the program will then turn the following text into voice over, and display each image for `speech duration/count` seconds.
28 | 
29 | `keyword` : The keyword that the program will use to search google images
30 | 
31 | `count` (Optional) : Number of images to be shown in this segment of video. Defaults to 5.
32 | 
33 | ## Voice Tag
34 | ```
35 | [IMAGE: strength anime characters]
36 | But some characters, however, [VOICE: J]manage to be heroes in their own way, meaning that they don't actually posses powers.[/VOICE]
37 | they go by sheer strength and will power instead. In this video you will see 10 anime characters who are overpowered without super powers.
38 | ```
39 | You can wrap text in `[VOICE: voice_name]` tag, notice there's a closing tag for this one `[/VOICE]`, you can find more info about voice names [here](https://cloud.google.com/text-to-speech/docs/voices).
40 | 
41 | For simplicity, this program uses `en-US-Wavenet-` voices, and you need to pass the voice name letter to the tag, this will be changed later as more language support is added.
42 | 
43 | # How to run
44 | 
45 | Create a virtual environment and run
46 | 
47 | `pip install -r requirement.txt`
48 | 
49 | Then you can create a script in `test_script.txt` and run 
50 | 
51 | `python main.py`
52 | 
53 | # Important Note
54 | This program uses Google's `Cloud text-to-speech`, so sadly you need to enable their API set up authentication to work and try this program. Check more inforamtion on how to do this [here](https://cloud.google.com/text-to-speech/docs/libraries).
55 | 
56 | 
57 | 
58 | # TODOs
59 | - [ ] Create a pipeline that handles the text processing for the template with many tags.
60 | - [ ] Add new video tag (from file) to the script template.
61 | - [ ] Add new music tag (from file) to the script template.
62 | - [ ] Add tags for special video effects.
63 | - [ ] Add multiple keyword for image search, comma separated.
64 | - [ ] Add ArgParser to the program instead of using `main.py`.
65 | - [ ] Variable display time for images.
66 | - [ ] Validation and testing.
67 | - [ ] Add other languages support for TTS.
68 | - [ ] Explore more TTS options, and machine learning tools that can help with this project.
69 | 
70 | # License
71 | [MIT](./LICENSE)


--------------------------------------------------------------------------------
/video_utils/video_segment.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from typing import List, Dict
 3 | from moviepy.editor import (
 4 |     ImageClip,
 5 |     AudioFileClip,
 6 |     VideoClip,
 7 |     concatenate_videoclips,
 8 |     concatenate_audioclips,
 9 | )
10 | from audio_utils.audio import WaveNetTTS
11 | from images_utils.image_grabber import ImageGrabber
12 | 
13 | 
14 | class VideoSegment:
15 |     """This class represents and handles a single video segment, which
16 |     includes audio, images. Input text is split into different segments
17 |     every [IMAGE] tag.
18 | 
19 |     Attributes:
20 |         text (str): Raw input text for this segment
21 |         voiceover_text (List[Dict]): List of Dict of this format (voice name,
22 |         text to be voiced over).
23 |         image_keyword (str): Keyword for images to be scraped for this segment.
24 |         segment_number (int): number of segment in the entire video.
25 |         images_number (int): number of images to be displayed in this segment
26 |     """
27 | 
28 |     def __init__(
29 |         self,
30 |         text: str,
31 |         voiceover_text: List[Dict],
32 |         image_keyword: str,
33 |         segment_number: int,
34 |         images_number: int = 5,
35 |     ):
36 |         self.segment_number = segment_number
37 |         self.text = text
38 |         self.voiceover_text = voiceover_text
39 |         self.image_keyword = image_keyword
40 |         self.images_number = images_number
41 | 
42 |     def generate_segment(self, tts: WaveNetTTS, gid: ImageGrabber) -> VideoClip:
43 |         """Generates a video segment by searching the images, combining them
44 |         and adding TTS voice over.
45 | 
46 |         Args:
47 |             tts (WaveNetTTS): TTS object
48 |             gid (ImageGrabber): Image search/grabber object
49 | 
50 |         Returns:
51 |             VideoClip: complete video clip combined from images/TTS.
52 |         """
53 | 
54 |         print(f"[INFO] Generating video segment #{self.segment_number}")
55 |         image_clips = []
56 |         audio_clips = []
57 | 
58 |         # Total duration of segment in seconds
59 |         segment_duration = 0
60 | 
61 |         # Start by first generating TTS audio file
62 |         for idx, voiceover in enumerate(self.voiceover_text):
63 |             audio_file, duration = tts.generate_tts(
64 |                 voiceover["text"],
65 |                 f"video-segment{self.segment_number}-{idx+1}.mp3",
66 |                 voiceover["voice"],
67 |             )
68 |             # Add audio duration to the segment duration
69 |             segment_duration += duration
70 |             audio_clips.append(AudioFileClip(audio_file))
71 | 
72 |         # Image duration is total duration / number of images, this could be
73 |         # changed to be random period of times between 0 and segment_duration
74 |         image_duration = segment_duration / self.images_number
75 |         images = gid.search_image(self.image_keyword)
76 |         # Randomly select the images, this needs to be checked for number of
77 |         # images downloaded.
78 |         random_images = random.sample(images, self.images_number)
79 | 
80 |         # Create the image clips and produce final video
81 |         for video_image in random_images:
82 |             image_clips.append(ImageClip(video_image, duration=image_duration))
83 | 
84 |         audio_clip = concatenate_audioclips(audio_clips)
85 |         final_clip = concatenate_videoclips(image_clips, method="compose")
86 |         final_clip.fps = 24
87 |         final_clip = final_clip.set_audio(audio_clip)
88 |         return final_clip
89 | 


--------------------------------------------------------------------------------
/text_utils/text_processor.py:
--------------------------------------------------------------------------------
 1 | """This module processes custom text input used to generate videos.
 2 | So far the only tag supported is: [IMAGE: <IMAGE KEYWORD>]
 3 | """
 4 | 
 5 | import re
 6 | from typing import List, Dict
 7 | from video_utils.video_segment import VideoSegment
 8 | 
 9 | 
10 | class TextProcessor:
11 | 
12 |     TextTemplateRe = {
13 |         "image": r"\[IMAGE: (.+?)(\d*?)]",
14 |         "split_image": r"\[IMAGE: .+?\d*?]",
15 |         "search_voice": r"\[VOICE: (.+?)](.+?)\[\/VOICE]",
16 |         "split_voice": r"\[VOICE: .+?](.+?)\[\/VOICE]",
17 |     }
18 | 
19 |     def __init__(self, text: str):
20 |         """
21 |         Args:
22 |             text (str): Text to be processed
23 |         """
24 |         self.text = text
25 |         self.video_segments = []
26 |         self.sentences = []
27 |         print("[INFO] Processing text...")
28 |         self._process_text_for_images()
29 |         print("[INFO] Processed text..")
30 | 
31 |     def _process_text_for_images(self) -> None:
32 |         """processes and formats text
33 |         Eventually, this function will set `self.sentences` to be an array of
34 |         2-tuples: Tuple[sentence, images_keyword]. Then each pair, sentence
35 |         will be used for voice over and images_keyword will be used for image
36 |         search over this segment.
37 |         """
38 |         matches = re.finditer(
39 |             TextProcessor.TextTemplateRe["image"], self.text, re.DOTALL
40 |         )
41 | 
42 |         groups = []
43 |         for _, match in enumerate(matches):
44 |             try:
45 |                 images_number = int(match.group(2))
46 |             except:
47 |                 images_number = 5
48 |             groups.append((match.group(1), images_number))
49 | 
50 |         i = 0
51 |         for sentence in re.split(
52 |             TextProcessor.TextTemplateRe["split_image"],
53 |             self.text,
54 |             re.DOTALL,
55 |         ):
56 |             if len(sentence) > 0:
57 |                 self.video_segments.append(
58 |                     VideoSegment(
59 |                         sentence.strip(),
60 |                         self._process_voices(sentence.strip()),
61 |                         groups[i][0],
62 |                         i + 1,
63 |                         groups[i][1],
64 |                     )
65 |                 )
66 |                 self.sentences.append((sentence.strip(), groups[i][0]))
67 |                 i += 1
68 | 
69 |     def _process_voices(self, text) -> List[Dict]:
70 |         """Extracts [VOICE] tags from video and sets every group of text to the
71 |         the correct voice.
72 | 
73 |         Args:
74 |             text (str): Text to be processed
75 | 
76 |         Returns:
77 |             List[Dict]: List of Dict of this format {"voice": str, "text": str}
78 |             Where voice is the voice name and text is the text to be voiced
79 |             over.
80 |         """
81 |         # TODO do this, but better.
82 |         # Split all text on voice tags and sets everything to "default"
83 |         voiceover_segment = []
84 |         for sentence in re.split(
85 |             TextProcessor.TextTemplateRe["split_voice"], text, re.DOTALL | re.MULTILINE
86 |         ):
87 |             if len(sentence.strip()) > 0:
88 |                 voiceover_segment.append({"voice": "DEFAULT", "text": sentence.strip()})
89 | 
90 |         # Iterate again but this time set the text to the correct voice in tag.
91 |         for sentence in re.finditer(
92 |             TextProcessor.TextTemplateRe["search_voice"], text, re.DOTALL | re.MULTILINE
93 |         ):
94 |             for idx, t in enumerate(voiceover_segment):
95 |                 if t["text"] == sentence.group(2).strip():
96 |                     voiceover_segment[idx]["voice"] = sentence.group(1)
97 | 
98 |         return voiceover_segment
99 | 


--------------------------------------------------------------------------------
/audio_utils/audio.py:
--------------------------------------------------------------------------------
  1 | """A wrapper for google cloud TextToSpeech service which utilizes WaveNet to generate speech.
  2 | """
  3 | 
  4 | import os
  5 | from typing import Tuple
  6 | from google.cloud import texttospeech
  7 | from mutagen.mp3 import MP3
  8 | from utils.common import mkdir
  9 | 
 10 | 
 11 | class WaveNetTTS:
 12 | 
 13 |     VOICES = {
 14 |         "A": ("en-US-Wavenet-A", 1),
 15 |         "B": ("en-US-Wavenet-B", 1),
 16 |         "C": ("en-US-Wavenet-C", 2),
 17 |         "D": ("en-US-Wavenet-D", 1),
 18 |         "E": ("en-US-Wavenet-E", 2),
 19 |         "F": ("en-US-Wavenet-F", 2),
 20 |         "G": ("en-US-Wavenet-G", 2),
 21 |         "H": ("en-US-Wavenet-H", 2),
 22 |         "I": ("en-US-Wavenet-I", 1),
 23 |         "J": ("en-US-Wavenet-J", 1),
 24 |         "DEFAULT": ("en-US-Wavenet-J", 1),
 25 |     }
 26 | 
 27 |     @classmethod
 28 |     def get_voices(cls, gender):
 29 |         """Class method to return all voices by given gender
 30 | 
 31 |         Args:
 32 |             gender (str): gender to filter by
 33 | 
 34 |         Returns:
 35 |             List[Tuple[str, int]]: List of Tuples of (voice_name, gender)
 36 |         """
 37 |         if gender.lower() == "male":
 38 |             gender = 1
 39 |         elif gender.lower() == "female":
 40 |             gender = 2
 41 |         else:
 42 |             return None
 43 |         return [v for _, v in WaveNetTTS.VOICE_NAMES.items() if v[1] == gender]
 44 | 
 45 |     def __init__(
 46 |         self,
 47 |         audio_config: texttospeech.AudioConfig = None,
 48 |     ):
 49 |         """Initializes client to google's tts
 50 | 
 51 |         Args:
 52 |             audio_config (texttospeech.AudioConfig, optional): Audio configs like pitch, speed, more info on google tts
 53 |             documentation. Defaults to None.
 54 |         """
 55 |         self.client = texttospeech.TextToSpeechClient()
 56 |         self.audio_config = audio_config
 57 |         if self.audio_config is None:
 58 |             self.audio_config = texttospeech.AudioConfig(
 59 |                 audio_encoding=texttospeech.AudioEncoding.MP3, speaking_rate=1
 60 |             )
 61 |         self.output = os.path.join(os.getcwd(), "tts_output")
 62 |         mkdir(self.output)
 63 | 
 64 |     def generate_tts(
 65 |         self, text: str, filename: str, voice_name: str = None
 66 |     ) -> Tuple[str, float]:
 67 |         """Synthesizes speech and generates the audio file for a given text
 68 | 
 69 |         Args:
 70 |             text (str): text to turn into speech
 71 |             filename (str): filename to save output
 72 |             voice (texttospeech.VoiceSelectionParams, optional): Voice selection for WaveNet. Defaults to None.
 73 |         Returns:
 74 |             Tuple[str, float]: output audio file path, audio file duration in seconds
 75 |         """
 76 |         if voice_name is None:
 77 |             voice = texttospeech.VoiceSelectionParams(
 78 |                 language_code="en-US", ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL
 79 |             )
 80 |         else:
 81 |             voice_params = WaveNetTTS.VOICES[voice_name]
 82 |             voice = texttospeech.VoiceSelectionParams(
 83 |                 language_code="en-US",
 84 |                 name=voice_params[0],
 85 |                 ssml_gender=voice_params[1],
 86 |             )
 87 |         synthesis_input = texttospeech.SynthesisInput(text=text)
 88 |         response = self.client.synthesize_speech(
 89 |             input=synthesis_input, voice=voice, audio_config=self.audio_config
 90 |         )
 91 | 
 92 |         audio_file = os.path.join(self.output, filename)
 93 |         with open(audio_file, "wb") as out:
 94 |             # Write the response to the output file.
 95 |             out.write(response.audio_content)
 96 |             print(f'[INFO] Audio content written to file "{self.output}/{filename}"')
 97 | 
 98 |         mp3 = MP3(audio_file)
 99 |         return audio_file, mp3.info.length
100 | 


--------------------------------------------------------------------------------
/images_utils/google_crawl.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # requires: selenium, chromium-driver, retry
  3 | # Thanks to `Sam Watkins` on stackoverflow for this file. :)
  4 | # https://stackoverflow.com/users/218294/sam-watkins
  5 | # https://stackoverflow.com/questions/20716842/python-download-images-from-google-image-search/61982397#61982397?newreg=1c27b159a6a44bd08ae95ddbcf083ae1
  6 | 
  7 | from selenium import webdriver
  8 | from webdriver_manager.chrome import ChromeDriverManager
  9 | from selenium.webdriver.chrome.options import Options
 10 | import selenium.common.exceptions as sel_ex
 11 | import sys
 12 | import time
 13 | import urllib.parse
 14 | from retry import retry
 15 | import argparse
 16 | import logging
 17 | 
 18 | logging.basicConfig(stream=sys.stderr, level=logging.INFO)
 19 | logger = logging.getLogger()
 20 | retry_logger = None
 21 | 
 22 | css_thumbnail = "img.Q4LuWd"
 23 | css_large = "img.n3VNCb"
 24 | css_load_more = ".mye4qd"
 25 | selenium_exceptions = (
 26 |     sel_ex.ElementClickInterceptedException,
 27 |     sel_ex.ElementNotInteractableException,
 28 |     sel_ex.StaleElementReferenceException,
 29 | )
 30 | 
 31 | 
 32 | def scroll_to_end(wd):
 33 |     wd.execute_script("window.scrollTo(0, document.body.scrollHeight);")
 34 | 
 35 | 
 36 | @retry(exceptions=KeyError, tries=6, delay=0.1, backoff=2, logger=retry_logger)
 37 | def get_thumbnails(wd, want_more_than=0):
 38 |     wd.execute_script("document.querySelector('{}').click();".format(css_load_more))
 39 |     thumbnails = wd.find_elements_by_css_selector(css_thumbnail)
 40 |     n_results = len(thumbnails)
 41 |     if n_results <= want_more_than:
 42 |         raise KeyError("no new thumbnails")
 43 |     return thumbnails
 44 | 
 45 | 
 46 | @retry(exceptions=KeyError, tries=6, delay=0.1, backoff=2, logger=retry_logger)
 47 | def get_image_src(wd):
 48 |     actual_images = wd.find_elements_by_css_selector(css_large)
 49 |     sources = []
 50 |     for img in actual_images:
 51 |         src = img.get_attribute("src")
 52 |         if src.startswith("http") and not src.startswith(
 53 |             "https://encrypted-tbn0.gstatic.com/"
 54 |         ):
 55 |             sources.append(src)
 56 |     if not len(sources):
 57 |         raise KeyError("no large image")
 58 |     return sources
 59 | 
 60 | 
 61 | @retry(
 62 |     exceptions=selenium_exceptions, tries=6, delay=0.1, backoff=2, logger=retry_logger
 63 | )
 64 | def retry_click(el):
 65 |     el.click()
 66 | 
 67 | 
 68 | def get_images(wd, start=0, n=20, out=None):
 69 |     thumbnails = []
 70 |     count = len(thumbnails)
 71 |     while count < n:
 72 |         scroll_to_end(wd)
 73 |         try:
 74 |             thumbnails = get_thumbnails(wd, want_more_than=count)
 75 |         except KeyError as e:
 76 |             logger.warning("cannot load enough thumbnails")
 77 |             break
 78 |         count = len(thumbnails)
 79 |     sources = []
 80 |     for tn in thumbnails:
 81 |         try:
 82 |             retry_click(tn)
 83 |         except selenium_exceptions as e:
 84 |             logger.warning("main image click failed")
 85 |             continue
 86 |         sources1 = []
 87 |         try:
 88 |             sources1 = get_image_src(wd)
 89 |         except KeyError as e:
 90 |             pass
 91 |             # logger.warning("main image not found")
 92 |         if not sources1:
 93 |             tn_src = tn.get_attribute("src")
 94 |             if not tn_src.startswith("data"):
 95 |                 logger.warning("no src found for main image, using thumbnail")
 96 |                 sources1 = [tn_src]
 97 |             else:
 98 |                 logger.warning("no src found for main image, thumbnail is a data URL")
 99 |         for src in sources1:
100 |             if not src in sources:
101 |                 sources.append(src)
102 |                 if out:
103 |                     print(src, file=out)
104 |                     out.flush()
105 |         if len(sources) >= n:
106 |             break
107 |     return sources
108 | 
109 | 
110 | def google_image_search(wd, query, safe="off", n=20, opts="", out=None):
111 |     search_url_t = "https://www.google.com/search?safe={safe}&site=&tbm=isch&source=hp&q={q}&oq={q}&gs_l=img&tbs={opts}"
112 |     search_url = search_url_t.format(
113 |         q=urllib.parse.quote(query), opts=urllib.parse.quote(opts), safe=safe
114 |     )
115 |     wd.get(search_url)
116 |     sources = get_images(wd, n=n, out=out)
117 |     return sources
118 | 
119 | 
120 | def run_search(query, safe, n, otions, out=None):
121 |     opts = Options()
122 |     opts.add_argument("--headless")
123 |     
124 |     with webdriver.Chrome(ChromeDriverManager().install(), options=opts) as wd:
125 |         sources = google_image_search(
126 |             wd, query, safe=safe, n=n, opts=otions, out=None
127 |         )
128 |     return sources


--------------------------------------------------------------------------------
/images_utils/image_grabber.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import Tuple, List
  3 | from PIL import Image
  4 | from google_images_download import google_images_download
  5 | from .google_crawl import run_search
  6 | import requests
  7 | from utils.common import mkdir
  8 | 
  9 | 
 10 | class ImageGrabber:
 11 |     """Responsible to grab and process images from the internet giving a
 12 |         keyword.
 13 |     Attributes:
 14 |         _search_options (str): Options to be passed to the search engine.
 15 |         _resize (bool): Whether to resize images after download or not.
 16 |         _size (Tuple[int, int]): Size to resize images to.
 17 |         images_count (int): number of images downloaded.
 18 |         _memory (Dict[str, str]): Mapping between keyword to files paths.
 19 |             this is checked before searching for a keyword to avoid multiple
 20 |             searches for the same keyword.
 21 |     """
 22 | 
 23 |     def __init__(
 24 |         self,
 25 |         search_options: str = "",
 26 |         resize: bool = False,
 27 |         size: Tuple[int, int] = (1920, 1080),
 28 |         to_download: int = 20,
 29 |     ):
 30 |         """Initialize class variables and gid instance
 31 |         Args:
 32 |             search_options (str, optional): Search options to pass to the
 33 |                 downloader, e.g. isz:lt,islt:svga,itp:photo,ic:color,ift:jpg.
 34 |             resize (bool, optional): Resizes images after downloading if True.
 35 |                 Defaults to False.
 36 |             size (Tuple[int, int], optional): Resizes images to this size if
 37 |                 resize is set to True. Defaults to (1920, 1080).
 38 |             to_download (int): number of images to download
 39 |         """
 40 |         self._search_options = search_options
 41 |         self._resize = resize
 42 |         self._size = size
 43 |         self._downloader = google_images_download.googleimagesdownload()
 44 |         self.download_folder = os.path.join(os.getcwd(), "downloads")
 45 |         self.images_count = 0
 46 |         self.to_download = to_download
 47 |         self._memory = {}
 48 | 
 49 |         # Create downloads folder if it doesn't exist and load local images
 50 |         mkdir("downloads")
 51 |         self._load_images()
 52 | 
 53 |     def _load_images(self) -> None:
 54 |         """Tries to load local files first"""
 55 |         local_files = {}
 56 |         directory = os.path.join(os.getcwd(), "downloads")
 57 |         for root, _, files in os.walk(directory):
 58 |             # Skip the main folder
 59 |             if root == directory:
 60 |                 continue
 61 |             local_files[os.path.basename(root).lower()] = []
 62 |             for file in files:
 63 |                 local_files[os.path.basename(root).lower()].append(
 64 |                     os.path.abspath(os.path.join(root, file))
 65 |                 )
 66 |         self._memory = local_files
 67 | 
 68 |     def _download_from_url(self, url: str, keyword: str) -> str:
 69 |         """Downloads a single image from a url
 70 | 
 71 |         Args:
 72 |             url (str): url to download
 73 |             keyword(str): keyword searched, to create directory.
 74 |         Returns:
 75 |             str: path to downloaded file
 76 |         """
 77 |         self.images_count += 1
 78 | 
 79 |         # Make keyword directory in downloads if it doesn't exist
 80 |         mkdir(f"{self.download_folder}/{keyword}")
 81 | 
 82 |         print(f"[INFO] Downloading from URL: {url}")
 83 |         print(
 84 |             "[INFO] Downloading to: "
 85 |             + f"{self.download_folder}/"
 86 |             + f"{keyword}/image_{self.images_count}.jpg"
 87 |         )
 88 | 
 89 |         # Load the image via requests
 90 |         res = requests.get(url)
 91 |         if res.status_code != 200:
 92 |             print(f"[INFO] Skipping downloading image, got status {res.status_code}")
 93 |             self.images_count -= 1
 94 |             return None
 95 | 
 96 |         img_data = res.content
 97 | 
 98 |         # Save image to desk
 99 |         with open(
100 |             f"{self.download_folder}/{keyword}/image_{self.images_count}.jpg",
101 |             "wb",
102 |         ) as handler:
103 |             handler.write(img_data)
104 | 
105 |         return f"{self.download_folder}/{keyword}/image_{self.images_count}.jpg"
106 | 
107 |     def search_image(self, keyword: str) -> List[str]:
108 |         """Searches google images with the keyword given and arguments supplied to instance.
109 |         Does not start a new search if keyword is already searched.
110 | 
111 |         Args:
112 |             keyword (str): single keyword to search
113 | 
114 |         Returns:
115 |             List[str]: List of downloaded files paths
116 |         """
117 | 
118 |         word = keyword.strip()
119 | 
120 |         # Return images paths if it already exists
121 |         if word.lower() in self._memory:
122 |             return self._memory[word.lower()]
123 | 
124 |         print(f"[INFO] Downloading images for keyword: {word}")
125 |         # Scrape google images search to get urls of images
126 |         urls = run_search(word, "off", self.to_download, self._search_options)
127 | 
128 |         # Download the images and add the path to list
129 |         paths = []
130 |         for url in urls:
131 |             path = self._download_from_url(url, word)
132 |             if path is not None:
133 |                 paths.append(path)
134 | 
135 |         # Save keyword and paths to memory
136 |         self._memory[word] = paths
137 | 
138 |         # Process images
139 |         if self._resize and len(paths) > 0:
140 |             directory = os.path.dirname(os.path.abspath(paths[0]))
141 |             self._resize_images(self._size, directory)
142 | 
143 |         return paths
144 | 
145 |     def _resize_images(self, size: Tuple[int, int], directory: str) -> None:
146 |         """resizes all images inside a directory to the given size
147 | 
148 |         Args:
149 |             size (Tuple[int, int]): a 2-tuple for the desired size
150 |             directory (str): path to directory, relative or absolute
151 |         """
152 | 
153 |         # Get only files from that directory
154 |         files = [
155 |             os.path.join(directory, f)
156 |             for f in os.listdir(directory)
157 |             if os.path.isfile(os.path.join(directory, f))
158 |         ]
159 | 
160 |         for file in files:
161 |             # Create a new black image with specified size as background
162 |             background = Image.new("RGB", size)
163 |             im = Image.open(file)
164 | 
165 |             # convert image mode to RGB
166 |             if im.mode != "RGB":
167 |                 im = im.convert("RGB")
168 | 
169 |             # Resize image
170 |             # TODO fix this
171 |             wr = size[0] / im.width
172 |             hr = size[1] / im.height
173 | 
174 |             if wr > hr:
175 |                 nw = (im.width) * hr
176 |                 im = im.resize((int(nw), size[1]), Image.ANTIALIAS)
177 |             else:
178 |                 nh = (im.height) * wr
179 |                 im = im.resize((size[0], int(nh)), Image.ANTIALIAS)
180 | 
181 |             # Add the image to the background centered
182 |             x = (size[0] - im.width) // 2
183 |             y = (size[1] - im.height) // 2
184 |             background.paste(im, (x, y))
185 | 
186 |             # To avoid saved WEBP images, which don't work with MoviePy.
187 |             if im.format == "WEBP":
188 |                 background.save(file + ".jpg", "JPEG")
189 |             else:
190 |                 background.save(file, "JPEG")
191 | 
192 | 
193 | def main():
194 |     ig = ImageGrabber(resize=True)
195 |     ig.search_image("test")
196 | 
197 | 
198 | if __name__ == "__main__":
199 |     main()


--------------------------------------------------------------------------------