├── utils ├── __init__.py ├── common.py └── regex_utils.py ├── images_utils ├── __init__.py ├── google_crawl.py └── image_grabber.py ├── .gitignore ├── requirements.txt ├── main.py ├── LICENSE ├── audio_utils ├── audio_deprecated.py └── audio.py ├── TextToVideo.py ├── test_script.txt ├── README.md ├── video_utils └── video_segment.py └── text_utils └── text_processor.py /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /images_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | tts_output/ 3 | downloads/ 4 | .vscode/ 5 | .idea/ 6 | output/ -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iJohnMaged/Text-To-Video-Py/HEAD/requirements.txt -------------------------------------------------------------------------------- /utils/common.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def mkdir(directory): 5 | if not os.path.exists(directory): 6 | os.makedirs(directory) 7 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from TextToVideo import TextToVideo 2 | 3 | 4 | def main(): 5 | with open("test_script.txt", "r") as f: 6 | text = f.read() 7 | text = text.replace("\n", " ") 8 | ttv = TextToVideo(text, "anime.mp4") 9 | ttv.generate_video() 10 | ttv.save_video() 11 | 12 | 13 | if __name__ == "__main__": 14 | main() -------------------------------------------------------------------------------- /utils/regex_utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | 4 | def group_and_split(text, group_re, split_re): 5 | 6 | matches = re.finditer(group_re, text, re.MULTILINE) 7 | groups = [] 8 | for _, match in enumerate(matches): 9 | for i in range(len(match.groups())): 10 | groups.append(match.group(i + 1)) 11 | 12 | splits = [] 13 | i = 0 14 | for split in re.split(split_re, text): 15 | if len(split.strip()) > 0: 16 | splits.append((split.strip(), groups[i])) 17 | i += 1 18 | return groups, splits -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 John Maged 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /audio_utils/audio_deprecated.py: -------------------------------------------------------------------------------- 1 | """A class to help with audio processing, 2 | A wrapper for gTTS 3 | """ 4 | import os 5 | from typing import Tuple 6 | 7 | 8 | from gtts import gTTS 9 | from mutagen.mp3 import MP3 10 | from utils.common import mkdir 11 | 12 | 13 | class TTS: 14 | def __init__(self, download_location: str = "audio"): 15 | """ 16 | Args: 17 | download_location (str, optional): folder to download audio to. Defaults to "audio". 18 | """ 19 | self._memory = {} 20 | self.download_location = download_location 21 | mkdir(download_location) 22 | self._load_audio() 23 | 24 | def _load_audio(self): 25 | local_files = {} 26 | for file in os.listdir("audio"): 27 | audio_file = os.path.join("audio", file) 28 | if os.path.isfile(audio_file): 29 | mp3 = MP3(audio_file) 30 | local_files[file.strip(".mp3")] = (audio_file, mp3.info.length) 31 | self._memory = local_files 32 | 33 | def getTTS(self, text: str) -> Tuple[str, float]: 34 | """ 35 | Gets TTS for a given string and downloads it to download_location. 36 | 37 | Args: 38 | text(str): text to turn to speech 39 | Returns: 40 | Tuple[str, float]: path to saved file 41 | """ 42 | if text in self._memory: 43 | return self._memory[text] 44 | 45 | tts = gTTS(text) 46 | audio_file = f"{self.download_location}/{text}.mp3" 47 | tts.save(audio_file) 48 | 49 | # Get audio length for video duration. 50 | mp3 = MP3(audio_file) 51 | self._memory[text] = (audio_file, mp3.info.length) 52 | return self._memory[text] 53 | -------------------------------------------------------------------------------- /TextToVideo.py: -------------------------------------------------------------------------------- 1 | """The main class for the project. 2 | This module turns given text to a video with images grabbed from google search 3 | as well as audio from google tts. 4 | """ 5 | 6 | import os 7 | 8 | from moviepy.editor import ( 9 | concatenate_videoclips, 10 | ) 11 | 12 | from images_utils.image_grabber import ImageGrabber 13 | from text_utils.text_processor import TextProcessor 14 | from audio_utils.audio import WaveNetTTS 15 | 16 | from utils.common import mkdir 17 | 18 | 19 | class TextToVideo: 20 | def __init__(self, text: str, output: str): 21 | """This class processes the images and audio then generates the required vidoe 22 | 23 | Args: 24 | text (str): Text to turn into images/audio 25 | output (str): Output file name 26 | """ 27 | self.text = text 28 | self.output = output 29 | self._gid = ImageGrabber( 30 | search_options="ift:jpg", 31 | resize=True, 32 | ) 33 | self._text_processor = TextProcessor(self.text) 34 | self._wnTTS = WaveNetTTS() 35 | self._output_folder = "output" 36 | self._video_clips = [] 37 | mkdir(os.path.join(os.getcwd(), self._output_folder)) 38 | 39 | def generate_video(self) -> None: 40 | """Generates the video clips/segments to be concatenated on save""" 41 | 42 | video_segments = self._text_processor.video_segments 43 | for segment in video_segments: 44 | final_clip = segment.generate_segment(self._wnTTS, self._gid) 45 | self._video_clips.append(final_clip) 46 | 47 | def save_video(self, fps: int = 24) -> None: 48 | """Saves the processed video 49 | 50 | Args: 51 | fps (int, optional): Desired video FPS. Defaults to 24. 52 | """ 53 | 54 | if len(self._video_clips) == 0: 55 | raise VideoElementsNotProcessed 56 | 57 | final_video = concatenate_videoclips(self._video_clips, method="compose") 58 | final_video.fps = 24 59 | final_video.write_videofile(f"{self._output_folder}/{self.output}") 60 | 61 | 62 | class VideoElementsNotProcessed(Exception): 63 | pass -------------------------------------------------------------------------------- /test_script.txt: -------------------------------------------------------------------------------- 1 | [IMAGE: Shonen anime] In anime, shonen in particular, characters will often have incredible powers that will set them aside from the rest of the characters. 2 | [IMAGE: Overpowered anime characters] you will find that the entire anime's reputation is particularly based on the main character's abilities whether it is magic or other 3 | super natural forms of power. 4 | [IMAGE: strength anime characters] 5 | But some characters, however, manage to be heroes in their own way, meaning that they don't actually posses powers. 6 | they go by sheer strength and will power instead. In this video you will see 10 anime characters who are overpowered without super powers. 7 | 8 | [IMAGE: assassination classroom anime 7] [VOICE: I] Coming at number 10, nagisa Shiota from assassination classroom. Nagisa's physical abilities are mediocre, and it worthy to note that he is one of the weakest amongst the boys as his constitution and strength are more easily comparable to a female of his age. However, Nagisa has an extraordinary amount of natural talent as an assassin due to his impressive observant nature, his remarkable stealthy movements, and his incredible bloodlust, to the point that he was described by Korosensei to be a 'pure assassin'. This inclination has also been made apparent numerous times during the span of the story, such as his duels against Tataoka, his fight against Karma, and finally Korosensei. Several other characters such as Karma and Maehara have noted that he is full of surprises - one can never know when or where Nagisa will aim to strike, and his talent in the art of assassination has garnered respect from even those who are not his peers to the point that others have commented on the potential Nagisa possessed that could be fulfilled in the role of a professional hitman. In the end, though, Nagisa does not pursue this path and decides to use his abilities to help others in the same way Korosensei did: teaching to a "problematic class".[/VOICE] 9 | [IMAGE: no game no life anime 4] [VOICE: I] And at number 9, we have sora from no game no life. Despite being a shut-in, Sora has incredible superhuman charisma and eloquence. He is an excellent and natural speaker, always knowing exactly what to say to get others on his side. He has no problems dealing with people or groups, after overcoming the initial shock. While not as intelligent as Shiro in calculations, he is shown to have a brilliant super-genius IQ and an eidetic memory which enables him to quickly learn, understand and easily recall limitless amounts of information perfectly and rapidly. This is demonstrated during his card game with Stephanie. Rather than computing numbers like Shiro, Sora can quickly determine what his opponents are going to do based on observation of their behavior and motivations, a technique called "cold reading". His amazing observational and deductive skills help him come up with complex plans quickly and efficiently. This is shown during his game against Jibril and when he figured out the previous King's plan to beat the Werebeast. He was even able to learn to read Immanity-go (language) in just about one hour slower than his sister (self-proclaimed).[/VOICE] -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Text-To-Video 2 | 3 | The idea of this project is to be able to _generate_ videos just from text input that includes: 4 | 5 | * Images. 6 | * Videos. 7 | * Voice over. 8 | * Music. 9 | 10 | This is a little fun side project that I plan to work on in my free time over the next few weeks. 11 | # Example 12 | [![Video example](https://img.youtube.com/vi/xBDiMOTY1UI/0.jpg)](https://www.youtube.com/watch?v=xBDiMOTY1UI) 13 | 14 | Script template used for this example can be found [**here**](./test_script.txt) 15 | 16 | # Current status 17 | 18 | Given a script (text with a special template), the program will scrap google images for requested images, and use WaveNet TTS to voice over the script. 19 | 20 | # Script template 21 | 22 | ## Image tag 23 | ``` 24 | [IMAGE: Shonen anime] In anime, shonen in particular, characters will often have incredible powers that will set them aside from the rest of the characters. 25 | ``` 26 | 27 | Your script must always start with an `[IMAGE: keyword count]` tag, the `IMAGE` tells the program what images to search and display till the next `[IMAGE]` tag, the program will then turn the following text into voice over, and display each image for `speech duration/count` seconds. 28 | 29 | `keyword` : The keyword that the program will use to search google images 30 | 31 | `count` (Optional) : Number of images to be shown in this segment of video. Defaults to 5. 32 | 33 | ## Voice Tag 34 | ``` 35 | [IMAGE: strength anime characters] 36 | But some characters, however, [VOICE: J]manage to be heroes in their own way, meaning that they don't actually posses powers.[/VOICE] 37 | they go by sheer strength and will power instead. In this video you will see 10 anime characters who are overpowered without super powers. 38 | ``` 39 | You can wrap text in `[VOICE: voice_name]` tag, notice there's a closing tag for this one `[/VOICE]`, you can find more info about voice names [here](https://cloud.google.com/text-to-speech/docs/voices). 40 | 41 | For simplicity, this program uses `en-US-Wavenet-` voices, and you need to pass the voice name letter to the tag, this will be changed later as more language support is added. 42 | 43 | # How to run 44 | 45 | Create a virtual environment and run 46 | 47 | `pip install -r requirement.txt` 48 | 49 | Then you can create a script in `test_script.txt` and run 50 | 51 | `python main.py` 52 | 53 | # Important Note 54 | This program uses Google's `Cloud text-to-speech`, so sadly you need to enable their API set up authentication to work and try this program. Check more inforamtion on how to do this [here](https://cloud.google.com/text-to-speech/docs/libraries). 55 | 56 | 57 | 58 | # TODOs 59 | - [ ] Create a pipeline that handles the text processing for the template with many tags. 60 | - [ ] Add new video tag (from file) to the script template. 61 | - [ ] Add new music tag (from file) to the script template. 62 | - [ ] Add tags for special video effects. 63 | - [ ] Add multiple keyword for image search, comma separated. 64 | - [ ] Add ArgParser to the program instead of using `main.py`. 65 | - [ ] Variable display time for images. 66 | - [ ] Validation and testing. 67 | - [ ] Add other languages support for TTS. 68 | - [ ] Explore more TTS options, and machine learning tools that can help with this project. 69 | 70 | # License 71 | [MIT](./LICENSE) -------------------------------------------------------------------------------- /video_utils/video_segment.py: -------------------------------------------------------------------------------- 1 | import random 2 | from typing import List, Dict 3 | from moviepy.editor import ( 4 | ImageClip, 5 | AudioFileClip, 6 | VideoClip, 7 | concatenate_videoclips, 8 | concatenate_audioclips, 9 | ) 10 | from audio_utils.audio import WaveNetTTS 11 | from images_utils.image_grabber import ImageGrabber 12 | 13 | 14 | class VideoSegment: 15 | """This class represents and handles a single video segment, which 16 | includes audio, images. Input text is split into different segments 17 | every [IMAGE] tag. 18 | 19 | Attributes: 20 | text (str): Raw input text for this segment 21 | voiceover_text (List[Dict]): List of Dict of this format (voice name, 22 | text to be voiced over). 23 | image_keyword (str): Keyword for images to be scraped for this segment. 24 | segment_number (int): number of segment in the entire video. 25 | images_number (int): number of images to be displayed in this segment 26 | """ 27 | 28 | def __init__( 29 | self, 30 | text: str, 31 | voiceover_text: List[Dict], 32 | image_keyword: str, 33 | segment_number: int, 34 | images_number: int = 5, 35 | ): 36 | self.segment_number = segment_number 37 | self.text = text 38 | self.voiceover_text = voiceover_text 39 | self.image_keyword = image_keyword 40 | self.images_number = images_number 41 | 42 | def generate_segment(self, tts: WaveNetTTS, gid: ImageGrabber) -> VideoClip: 43 | """Generates a video segment by searching the images, combining them 44 | and adding TTS voice over. 45 | 46 | Args: 47 | tts (WaveNetTTS): TTS object 48 | gid (ImageGrabber): Image search/grabber object 49 | 50 | Returns: 51 | VideoClip: complete video clip combined from images/TTS. 52 | """ 53 | 54 | print(f"[INFO] Generating video segment #{self.segment_number}") 55 | image_clips = [] 56 | audio_clips = [] 57 | 58 | # Total duration of segment in seconds 59 | segment_duration = 0 60 | 61 | # Start by first generating TTS audio file 62 | for idx, voiceover in enumerate(self.voiceover_text): 63 | audio_file, duration = tts.generate_tts( 64 | voiceover["text"], 65 | f"video-segment{self.segment_number}-{idx+1}.mp3", 66 | voiceover["voice"], 67 | ) 68 | # Add audio duration to the segment duration 69 | segment_duration += duration 70 | audio_clips.append(AudioFileClip(audio_file)) 71 | 72 | # Image duration is total duration / number of images, this could be 73 | # changed to be random period of times between 0 and segment_duration 74 | image_duration = segment_duration / self.images_number 75 | images = gid.search_image(self.image_keyword) 76 | # Randomly select the images, this needs to be checked for number of 77 | # images downloaded. 78 | random_images = random.sample(images, self.images_number) 79 | 80 | # Create the image clips and produce final video 81 | for video_image in random_images: 82 | image_clips.append(ImageClip(video_image, duration=image_duration)) 83 | 84 | audio_clip = concatenate_audioclips(audio_clips) 85 | final_clip = concatenate_videoclips(image_clips, method="compose") 86 | final_clip.fps = 24 87 | final_clip = final_clip.set_audio(audio_clip) 88 | return final_clip 89 | -------------------------------------------------------------------------------- /text_utils/text_processor.py: -------------------------------------------------------------------------------- 1 | """This module processes custom text input used to generate videos. 2 | So far the only tag supported is: [IMAGE: ] 3 | """ 4 | 5 | import re 6 | from typing import List, Dict 7 | from video_utils.video_segment import VideoSegment 8 | 9 | 10 | class TextProcessor: 11 | 12 | TextTemplateRe = { 13 | "image": r"\[IMAGE: (.+?)(\d*?)]", 14 | "split_image": r"\[IMAGE: .+?\d*?]", 15 | "search_voice": r"\[VOICE: (.+?)](.+?)\[\/VOICE]", 16 | "split_voice": r"\[VOICE: .+?](.+?)\[\/VOICE]", 17 | } 18 | 19 | def __init__(self, text: str): 20 | """ 21 | Args: 22 | text (str): Text to be processed 23 | """ 24 | self.text = text 25 | self.video_segments = [] 26 | self.sentences = [] 27 | print("[INFO] Processing text...") 28 | self._process_text_for_images() 29 | print("[INFO] Processed text..") 30 | 31 | def _process_text_for_images(self) -> None: 32 | """processes and formats text 33 | Eventually, this function will set `self.sentences` to be an array of 34 | 2-tuples: Tuple[sentence, images_keyword]. Then each pair, sentence 35 | will be used for voice over and images_keyword will be used for image 36 | search over this segment. 37 | """ 38 | matches = re.finditer( 39 | TextProcessor.TextTemplateRe["image"], self.text, re.DOTALL 40 | ) 41 | 42 | groups = [] 43 | for _, match in enumerate(matches): 44 | try: 45 | images_number = int(match.group(2)) 46 | except: 47 | images_number = 5 48 | groups.append((match.group(1), images_number)) 49 | 50 | i = 0 51 | for sentence in re.split( 52 | TextProcessor.TextTemplateRe["split_image"], 53 | self.text, 54 | re.DOTALL, 55 | ): 56 | if len(sentence) > 0: 57 | self.video_segments.append( 58 | VideoSegment( 59 | sentence.strip(), 60 | self._process_voices(sentence.strip()), 61 | groups[i][0], 62 | i + 1, 63 | groups[i][1], 64 | ) 65 | ) 66 | self.sentences.append((sentence.strip(), groups[i][0])) 67 | i += 1 68 | 69 | def _process_voices(self, text) -> List[Dict]: 70 | """Extracts [VOICE] tags from video and sets every group of text to the 71 | the correct voice. 72 | 73 | Args: 74 | text (str): Text to be processed 75 | 76 | Returns: 77 | List[Dict]: List of Dict of this format {"voice": str, "text": str} 78 | Where voice is the voice name and text is the text to be voiced 79 | over. 80 | """ 81 | # TODO do this, but better. 82 | # Split all text on voice tags and sets everything to "default" 83 | voiceover_segment = [] 84 | for sentence in re.split( 85 | TextProcessor.TextTemplateRe["split_voice"], text, re.DOTALL | re.MULTILINE 86 | ): 87 | if len(sentence.strip()) > 0: 88 | voiceover_segment.append({"voice": "DEFAULT", "text": sentence.strip()}) 89 | 90 | # Iterate again but this time set the text to the correct voice in tag. 91 | for sentence in re.finditer( 92 | TextProcessor.TextTemplateRe["search_voice"], text, re.DOTALL | re.MULTILINE 93 | ): 94 | for idx, t in enumerate(voiceover_segment): 95 | if t["text"] == sentence.group(2).strip(): 96 | voiceover_segment[idx]["voice"] = sentence.group(1) 97 | 98 | return voiceover_segment 99 | -------------------------------------------------------------------------------- /audio_utils/audio.py: -------------------------------------------------------------------------------- 1 | """A wrapper for google cloud TextToSpeech service which utilizes WaveNet to generate speech. 2 | """ 3 | 4 | import os 5 | from typing import Tuple 6 | from google.cloud import texttospeech 7 | from mutagen.mp3 import MP3 8 | from utils.common import mkdir 9 | 10 | 11 | class WaveNetTTS: 12 | 13 | VOICES = { 14 | "A": ("en-US-Wavenet-A", 1), 15 | "B": ("en-US-Wavenet-B", 1), 16 | "C": ("en-US-Wavenet-C", 2), 17 | "D": ("en-US-Wavenet-D", 1), 18 | "E": ("en-US-Wavenet-E", 2), 19 | "F": ("en-US-Wavenet-F", 2), 20 | "G": ("en-US-Wavenet-G", 2), 21 | "H": ("en-US-Wavenet-H", 2), 22 | "I": ("en-US-Wavenet-I", 1), 23 | "J": ("en-US-Wavenet-J", 1), 24 | "DEFAULT": ("en-US-Wavenet-J", 1), 25 | } 26 | 27 | @classmethod 28 | def get_voices(cls, gender): 29 | """Class method to return all voices by given gender 30 | 31 | Args: 32 | gender (str): gender to filter by 33 | 34 | Returns: 35 | List[Tuple[str, int]]: List of Tuples of (voice_name, gender) 36 | """ 37 | if gender.lower() == "male": 38 | gender = 1 39 | elif gender.lower() == "female": 40 | gender = 2 41 | else: 42 | return None 43 | return [v for _, v in WaveNetTTS.VOICE_NAMES.items() if v[1] == gender] 44 | 45 | def __init__( 46 | self, 47 | audio_config: texttospeech.AudioConfig = None, 48 | ): 49 | """Initializes client to google's tts 50 | 51 | Args: 52 | audio_config (texttospeech.AudioConfig, optional): Audio configs like pitch, speed, more info on google tts 53 | documentation. Defaults to None. 54 | """ 55 | self.client = texttospeech.TextToSpeechClient() 56 | self.audio_config = audio_config 57 | if self.audio_config is None: 58 | self.audio_config = texttospeech.AudioConfig( 59 | audio_encoding=texttospeech.AudioEncoding.MP3, speaking_rate=1 60 | ) 61 | self.output = os.path.join(os.getcwd(), "tts_output") 62 | mkdir(self.output) 63 | 64 | def generate_tts( 65 | self, text: str, filename: str, voice_name: str = None 66 | ) -> Tuple[str, float]: 67 | """Synthesizes speech and generates the audio file for a given text 68 | 69 | Args: 70 | text (str): text to turn into speech 71 | filename (str): filename to save output 72 | voice (texttospeech.VoiceSelectionParams, optional): Voice selection for WaveNet. Defaults to None. 73 | Returns: 74 | Tuple[str, float]: output audio file path, audio file duration in seconds 75 | """ 76 | if voice_name is None: 77 | voice = texttospeech.VoiceSelectionParams( 78 | language_code="en-US", ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL 79 | ) 80 | else: 81 | voice_params = WaveNetTTS.VOICES[voice_name] 82 | voice = texttospeech.VoiceSelectionParams( 83 | language_code="en-US", 84 | name=voice_params[0], 85 | ssml_gender=voice_params[1], 86 | ) 87 | synthesis_input = texttospeech.SynthesisInput(text=text) 88 | response = self.client.synthesize_speech( 89 | input=synthesis_input, voice=voice, audio_config=self.audio_config 90 | ) 91 | 92 | audio_file = os.path.join(self.output, filename) 93 | with open(audio_file, "wb") as out: 94 | # Write the response to the output file. 95 | out.write(response.audio_content) 96 | print(f'[INFO] Audio content written to file "{self.output}/{filename}"') 97 | 98 | mp3 = MP3(audio_file) 99 | return audio_file, mp3.info.length 100 | -------------------------------------------------------------------------------- /images_utils/google_crawl.py: -------------------------------------------------------------------------------- 1 | 2 | # requires: selenium, chromium-driver, retry 3 | # Thanks to `Sam Watkins` on stackoverflow for this file. :) 4 | # https://stackoverflow.com/users/218294/sam-watkins 5 | # https://stackoverflow.com/questions/20716842/python-download-images-from-google-image-search/61982397#61982397?newreg=1c27b159a6a44bd08ae95ddbcf083ae1 6 | 7 | from selenium import webdriver 8 | from webdriver_manager.chrome import ChromeDriverManager 9 | from selenium.webdriver.chrome.options import Options 10 | import selenium.common.exceptions as sel_ex 11 | import sys 12 | import time 13 | import urllib.parse 14 | from retry import retry 15 | import argparse 16 | import logging 17 | 18 | logging.basicConfig(stream=sys.stderr, level=logging.INFO) 19 | logger = logging.getLogger() 20 | retry_logger = None 21 | 22 | css_thumbnail = "img.Q4LuWd" 23 | css_large = "img.n3VNCb" 24 | css_load_more = ".mye4qd" 25 | selenium_exceptions = ( 26 | sel_ex.ElementClickInterceptedException, 27 | sel_ex.ElementNotInteractableException, 28 | sel_ex.StaleElementReferenceException, 29 | ) 30 | 31 | 32 | def scroll_to_end(wd): 33 | wd.execute_script("window.scrollTo(0, document.body.scrollHeight);") 34 | 35 | 36 | @retry(exceptions=KeyError, tries=6, delay=0.1, backoff=2, logger=retry_logger) 37 | def get_thumbnails(wd, want_more_than=0): 38 | wd.execute_script("document.querySelector('{}').click();".format(css_load_more)) 39 | thumbnails = wd.find_elements_by_css_selector(css_thumbnail) 40 | n_results = len(thumbnails) 41 | if n_results <= want_more_than: 42 | raise KeyError("no new thumbnails") 43 | return thumbnails 44 | 45 | 46 | @retry(exceptions=KeyError, tries=6, delay=0.1, backoff=2, logger=retry_logger) 47 | def get_image_src(wd): 48 | actual_images = wd.find_elements_by_css_selector(css_large) 49 | sources = [] 50 | for img in actual_images: 51 | src = img.get_attribute("src") 52 | if src.startswith("http") and not src.startswith( 53 | "https://encrypted-tbn0.gstatic.com/" 54 | ): 55 | sources.append(src) 56 | if not len(sources): 57 | raise KeyError("no large image") 58 | return sources 59 | 60 | 61 | @retry( 62 | exceptions=selenium_exceptions, tries=6, delay=0.1, backoff=2, logger=retry_logger 63 | ) 64 | def retry_click(el): 65 | el.click() 66 | 67 | 68 | def get_images(wd, start=0, n=20, out=None): 69 | thumbnails = [] 70 | count = len(thumbnails) 71 | while count < n: 72 | scroll_to_end(wd) 73 | try: 74 | thumbnails = get_thumbnails(wd, want_more_than=count) 75 | except KeyError as e: 76 | logger.warning("cannot load enough thumbnails") 77 | break 78 | count = len(thumbnails) 79 | sources = [] 80 | for tn in thumbnails: 81 | try: 82 | retry_click(tn) 83 | except selenium_exceptions as e: 84 | logger.warning("main image click failed") 85 | continue 86 | sources1 = [] 87 | try: 88 | sources1 = get_image_src(wd) 89 | except KeyError as e: 90 | pass 91 | # logger.warning("main image not found") 92 | if not sources1: 93 | tn_src = tn.get_attribute("src") 94 | if not tn_src.startswith("data"): 95 | logger.warning("no src found for main image, using thumbnail") 96 | sources1 = [tn_src] 97 | else: 98 | logger.warning("no src found for main image, thumbnail is a data URL") 99 | for src in sources1: 100 | if not src in sources: 101 | sources.append(src) 102 | if out: 103 | print(src, file=out) 104 | out.flush() 105 | if len(sources) >= n: 106 | break 107 | return sources 108 | 109 | 110 | def google_image_search(wd, query, safe="off", n=20, opts="", out=None): 111 | search_url_t = "https://www.google.com/search?safe={safe}&site=&tbm=isch&source=hp&q={q}&oq={q}&gs_l=img&tbs={opts}" 112 | search_url = search_url_t.format( 113 | q=urllib.parse.quote(query), opts=urllib.parse.quote(opts), safe=safe 114 | ) 115 | wd.get(search_url) 116 | sources = get_images(wd, n=n, out=out) 117 | return sources 118 | 119 | 120 | def run_search(query, safe, n, otions, out=None): 121 | opts = Options() 122 | opts.add_argument("--headless") 123 | 124 | with webdriver.Chrome(ChromeDriverManager().install(), options=opts) as wd: 125 | sources = google_image_search( 126 | wd, query, safe=safe, n=n, opts=otions, out=None 127 | ) 128 | return sources -------------------------------------------------------------------------------- /images_utils/image_grabber.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Tuple, List 3 | from PIL import Image 4 | from google_images_download import google_images_download 5 | from .google_crawl import run_search 6 | import requests 7 | from utils.common import mkdir 8 | 9 | 10 | class ImageGrabber: 11 | """Responsible to grab and process images from the internet giving a 12 | keyword. 13 | Attributes: 14 | _search_options (str): Options to be passed to the search engine. 15 | _resize (bool): Whether to resize images after download or not. 16 | _size (Tuple[int, int]): Size to resize images to. 17 | images_count (int): number of images downloaded. 18 | _memory (Dict[str, str]): Mapping between keyword to files paths. 19 | this is checked before searching for a keyword to avoid multiple 20 | searches for the same keyword. 21 | """ 22 | 23 | def __init__( 24 | self, 25 | search_options: str = "", 26 | resize: bool = False, 27 | size: Tuple[int, int] = (1920, 1080), 28 | to_download: int = 20, 29 | ): 30 | """Initialize class variables and gid instance 31 | Args: 32 | search_options (str, optional): Search options to pass to the 33 | downloader, e.g. isz:lt,islt:svga,itp:photo,ic:color,ift:jpg. 34 | resize (bool, optional): Resizes images after downloading if True. 35 | Defaults to False. 36 | size (Tuple[int, int], optional): Resizes images to this size if 37 | resize is set to True. Defaults to (1920, 1080). 38 | to_download (int): number of images to download 39 | """ 40 | self._search_options = search_options 41 | self._resize = resize 42 | self._size = size 43 | self._downloader = google_images_download.googleimagesdownload() 44 | self.download_folder = os.path.join(os.getcwd(), "downloads") 45 | self.images_count = 0 46 | self.to_download = to_download 47 | self._memory = {} 48 | 49 | # Create downloads folder if it doesn't exist and load local images 50 | mkdir("downloads") 51 | self._load_images() 52 | 53 | def _load_images(self) -> None: 54 | """Tries to load local files first""" 55 | local_files = {} 56 | directory = os.path.join(os.getcwd(), "downloads") 57 | for root, _, files in os.walk(directory): 58 | # Skip the main folder 59 | if root == directory: 60 | continue 61 | local_files[os.path.basename(root).lower()] = [] 62 | for file in files: 63 | local_files[os.path.basename(root).lower()].append( 64 | os.path.abspath(os.path.join(root, file)) 65 | ) 66 | self._memory = local_files 67 | 68 | def _download_from_url(self, url: str, keyword: str) -> str: 69 | """Downloads a single image from a url 70 | 71 | Args: 72 | url (str): url to download 73 | keyword(str): keyword searched, to create directory. 74 | Returns: 75 | str: path to downloaded file 76 | """ 77 | self.images_count += 1 78 | 79 | # Make keyword directory in downloads if it doesn't exist 80 | mkdir(f"{self.download_folder}/{keyword}") 81 | 82 | print(f"[INFO] Downloading from URL: {url}") 83 | print( 84 | "[INFO] Downloading to: " 85 | + f"{self.download_folder}/" 86 | + f"{keyword}/image_{self.images_count}.jpg" 87 | ) 88 | 89 | # Load the image via requests 90 | res = requests.get(url) 91 | if res.status_code != 200: 92 | print(f"[INFO] Skipping downloading image, got status {res.status_code}") 93 | self.images_count -= 1 94 | return None 95 | 96 | img_data = res.content 97 | 98 | # Save image to desk 99 | with open( 100 | f"{self.download_folder}/{keyword}/image_{self.images_count}.jpg", 101 | "wb", 102 | ) as handler: 103 | handler.write(img_data) 104 | 105 | return f"{self.download_folder}/{keyword}/image_{self.images_count}.jpg" 106 | 107 | def search_image(self, keyword: str) -> List[str]: 108 | """Searches google images with the keyword given and arguments supplied to instance. 109 | Does not start a new search if keyword is already searched. 110 | 111 | Args: 112 | keyword (str): single keyword to search 113 | 114 | Returns: 115 | List[str]: List of downloaded files paths 116 | """ 117 | 118 | word = keyword.strip() 119 | 120 | # Return images paths if it already exists 121 | if word.lower() in self._memory: 122 | return self._memory[word.lower()] 123 | 124 | print(f"[INFO] Downloading images for keyword: {word}") 125 | # Scrape google images search to get urls of images 126 | urls = run_search(word, "off", self.to_download, self._search_options) 127 | 128 | # Download the images and add the path to list 129 | paths = [] 130 | for url in urls: 131 | path = self._download_from_url(url, word) 132 | if path is not None: 133 | paths.append(path) 134 | 135 | # Save keyword and paths to memory 136 | self._memory[word] = paths 137 | 138 | # Process images 139 | if self._resize and len(paths) > 0: 140 | directory = os.path.dirname(os.path.abspath(paths[0])) 141 | self._resize_images(self._size, directory) 142 | 143 | return paths 144 | 145 | def _resize_images(self, size: Tuple[int, int], directory: str) -> None: 146 | """resizes all images inside a directory to the given size 147 | 148 | Args: 149 | size (Tuple[int, int]): a 2-tuple for the desired size 150 | directory (str): path to directory, relative or absolute 151 | """ 152 | 153 | # Get only files from that directory 154 | files = [ 155 | os.path.join(directory, f) 156 | for f in os.listdir(directory) 157 | if os.path.isfile(os.path.join(directory, f)) 158 | ] 159 | 160 | for file in files: 161 | # Create a new black image with specified size as background 162 | background = Image.new("RGB", size) 163 | im = Image.open(file) 164 | 165 | # convert image mode to RGB 166 | if im.mode != "RGB": 167 | im = im.convert("RGB") 168 | 169 | # Resize image 170 | # TODO fix this 171 | wr = size[0] / im.width 172 | hr = size[1] / im.height 173 | 174 | if wr > hr: 175 | nw = (im.width) * hr 176 | im = im.resize((int(nw), size[1]), Image.ANTIALIAS) 177 | else: 178 | nh = (im.height) * wr 179 | im = im.resize((size[0], int(nh)), Image.ANTIALIAS) 180 | 181 | # Add the image to the background centered 182 | x = (size[0] - im.width) // 2 183 | y = (size[1] - im.height) // 2 184 | background.paste(im, (x, y)) 185 | 186 | # To avoid saved WEBP images, which don't work with MoviePy. 187 | if im.format == "WEBP": 188 | background.save(file + ".jpg", "JPEG") 189 | else: 190 | background.save(file, "JPEG") 191 | 192 | 193 | def main(): 194 | ig = ImageGrabber(resize=True) 195 | ig.search_image("test") 196 | 197 | 198 | if __name__ == "__main__": 199 | main() --------------------------------------------------------------------------------