├── images
    └── .gitkeep
├── README.md
└── main.py


/images/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # How to use
 2 | * [Install python](https://www.python.org/)
 3 | * Create new project and download credentials.json file from [Python Quickstart](https://developers.google.com/drive/api/v3/quickstart/python)
 4 | * put credentials.json file beside main.py
 5 | * Install the Google Client Library using
 6 | ```
 7 | pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib
 8 | ```
 9 | * Export images using [VideoSubFinder](https://sourceforge.net/projects/videosubfinder/) and put them in images folder
10 | * Run main.py and login with google account (only for first time)
11 | * Wait until it complete processing evey image.


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import httplib2
  3 | import os
  4 | import io
  5 | 
  6 | from apiclient import discovery
  7 | from oauth2client import client
  8 | from oauth2client import tools
  9 | from oauth2client.file import Storage
 10 | from apiclient.http import MediaFileUpload, MediaIoBaseDownload
 11 | from pathlib import Path
 12 | 
 13 | try:
 14 |     import argparse
 15 |     flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args()
 16 | except ImportError:
 17 |     flags = None
 18 | # Code is based on https://tanaikech.github.io/2017/05/02/ocr-using-google-drive-api/
 19 | # If modifying these scopes, delete your previously saved credentials
 20 | # at ~/.credentials/drive-python-quickstart.json
 21 | SCOPES = 'https://www.googleapis.com/auth/drive'
 22 | CLIENT_SECRET_FILE = 'credentials.json'
 23 | APPLICATION_NAME = 'Drive API Python Quickstart'
 24 | 
 25 | 
 26 | def get_credentials():
 27 |     """Gets valid user credentials from storage.
 28 | 
 29 |     If nothing has been stored, or if the stored credentials are invalid,
 30 |     the OAuth2 flow is completed to obtain the new credentials.
 31 | 
 32 |     Returns:
 33 |         Credentials, the obtained credential.
 34 |     """
 35 |     credential_path = os.path.join("./", 'token.json')
 36 |     store = Storage(credential_path)
 37 |     credentials = store.get()
 38 |     if not credentials or credentials.invalid:
 39 |         flow = client.flow_from_clientsecrets(CLIENT_SECRET_FILE, SCOPES)
 40 |         flow.user_agent = APPLICATION_NAME
 41 |         if flags:
 42 |             credentials = tools.run_flow(flow, store, flags)
 43 |         else:  # Needed only for compatibility with Python 2.6
 44 |             credentials = tools.run(flow, store)
 45 |         print('Storing credentials to ' + credential_path)
 46 |     return credentials
 47 | 
 48 | 
 49 | def main():
 50 |     credentials = get_credentials()
 51 |     http = credentials.authorize(httplib2.Http())
 52 |     service = discovery.build('drive', 'v3', http=http)
 53 | 
 54 |     # imgfile = 'image.jpeg'  # Image with texts (png, jpg, bmp, gif, pdf)
 55 |     # txtfile = 'text.txt'  # Text file outputted by OCR
 56 | 
 57 |     current_directory = Path(Path.cwd())
 58 |     images_dir = Path(f'{current_directory}/images')
 59 |     raw_texts_dir = Path(f'{current_directory}/raw_texts')
 60 |     texts_dir = Path(f'{current_directory}/texts')
 61 |     srt_file = open(Path(f'{current_directory}/subtitle_output.srt'), 'a', encoding='utf-8')
 62 |     line = 1
 63 | 
 64 |     # check directory if exists
 65 |     if not images_dir.exists():
 66 |         images_dir.mkdir()
 67 |         print('Images folder is empty.')
 68 |         exit()
 69 | 
 70 |     if not raw_texts_dir.exists():
 71 |         raw_texts_dir.mkdir()
 72 |     if not texts_dir.exists():
 73 |         texts_dir.mkdir()
 74 | 
 75 |     images = Path(f'{current_directory}/images').rglob('*.jpeg')
 76 |     for image in images:
 77 | 
 78 |         # Get data
 79 |         imgfile = str(image.absolute())
 80 |         imgname = str(image.name)
 81 |         raw_txtfile = f'{current_directory}/raw_texts/{imgname[:-5]}.txt'
 82 |         txtfile = f'{current_directory}/texts/{imgname[:-5]}.txt'
 83 | 
 84 |         mime = 'application/vnd.google-apps.document'
 85 |         res = service.files().create(
 86 |             body={
 87 |                 'name': imgname,
 88 |                 'mimeType': mime
 89 |             },
 90 |             media_body=MediaFileUpload(imgfile, mimetype=mime, resumable=True)
 91 |         ).execute()
 92 | 
 93 |         downloader = MediaIoBaseDownload(
 94 |             io.FileIO(raw_txtfile, 'wb'),
 95 |             service.files().export_media(fileId=res['id'], mimeType="text/plain")
 96 |         )
 97 |         done = False
 98 |         while done is False:
 99 |             status, done = downloader.next_chunk()
100 | 
101 |         service.files().delete(fileId=res['id']).execute()
102 | 
103 |         # Create clean text file
104 |         raw_text_file = open(raw_txtfile, 'r', encoding='utf-8')
105 |         text_content = raw_text_file.read()
106 |         raw_text_file.close()
107 |         text_content = text_content.split('\n')
108 |         text_content = ''.join(text_content[2:])
109 |         text_file = open(txtfile, 'w', encoding='utf-8')
110 |         text_file.write(text_content)
111 |         text_file.close()
112 | 
113 |         start_hour = imgname.split('_')[0][:2]
114 |         start_min = imgname.split('_')[1][:2]
115 |         start_sec = imgname.split('_')[2][:2]
116 |         start_micro = imgname.split('_')[3][:3]
117 | 
118 |         end_hour = imgname.split('__')[1].split('_')[0][:2]
119 |         end_min = imgname.split('__')[1].split('_')[1][:2]
120 |         end_sec = imgname.split('__')[1].split('_')[2][:2]
121 |         end_micro = imgname.split('__')[1].split('_')[3][:3]
122 | 
123 |         # Format start time
124 |         start_time = f'{start_hour}:{start_min}:{start_sec},{start_micro}'
125 | 
126 |         # Format end time
127 |         end_time = f'{end_hour}:{end_min}:{end_sec},{end_micro}'
128 |         # Append the line to srt file
129 |         srt_file.writelines([
130 |             f'{line}\n',
131 |             f'{start_time} --> {end_time}\n',
132 |             f'{text_content}\n\n',
133 |             ''
134 |         ])
135 | 
136 |         line += 1
137 | 
138 |         print(f"{imgname} Done.")
139 | 
140 |     srt_file.close()
141 | 
142 | 
143 | if __name__ == '__main__':
144 |     main()
145 | 


--------------------------------------------------------------------------------