├── .DS_Store ├── .gitignore ├── README.md ├── main.py └── requirements.txt /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dotvignesh/CmdF/765dbf12d7e346e070a7fa94d0fa1bb15ccb0bdf/.DS_Store -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CmdF 2 | 3 | CmdF is a terminal app that allows you to search and seek through YouTube videos using the power of whisper.cpp and fuzzy string matching. 4 | 5 | ## Demo 6 | 7 | 8 | https://github.com/dotvignesh/CmdF/assets/19832025/edee34ed-9e24-4bcf-8e4e-7630efd85667 9 | 10 | 11 | 12 | ## Installation 13 | 14 | To install and run the application, follow the instructions below: 15 | 16 | 1. Clone the repository using Git: 17 | 18 | ```bash 19 | git clone https://github.com/dotvignesh/CmdF.git 20 | ``` 21 | 22 | 2. Change into the repository directory: 23 | 24 | ```bash 25 | cd CmdF 26 | ``` 27 | 28 | 3. Create a conda environment: 29 | 30 | ```bash 31 | conda create --name cmdf 32 | ``` 33 | 34 | 4. Activate the new conda environment: 35 | 36 | ```bash 37 | conda activate cmdf 38 | ``` 39 | 40 | 5. Install the required packages: 41 | 42 | ```bash 43 | pip install -r requirements.txt 44 | ``` 45 | 46 | 6. Install whisper.cpp (if not already installed): 47 | - Go to [whisper.cpp repo](https://github.com/ggerganov/whisper.cpp) 48 | - Follow the instructions to set up and install whisper.cpp 49 | 50 | 7. Set `whisper_path` in `main.py` to your whisper.cpp installation location 51 | 52 | 8. Run the application from your terminal: 53 | 54 | ```bash 55 | python main.py 56 | ``` 57 | 58 | The application should now be running in the background. 59 | 60 | 61 | ## Usage 62 | 63 | Once the app is running, go to the video you want to search through, and hit `F9` 64 | (NOTE: the video will be downloaded and transcribed in the background - time varies depending on video length) 65 | 66 | After the video has been processed, hit `Cmd + F`, type in your query, and press `F10` 67 | Voila, the exact location in the video will be opened in a new tab!! 68 | 69 | Enjoy searching and skimming through your videos! 70 | 71 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import time 2 | from pynput import keyboard 3 | from pynput.keyboard import Key, Controller 4 | import pyperclip 5 | import youtube_dl 6 | import subprocess 7 | import os 8 | import re 9 | from thefuzz import process 10 | from fuzzywuzzy import fuzz 11 | import sys 12 | import webbrowser 13 | 14 | whisper_path = "" #TODO set this to your whisper path 15 | 16 | controller = Controller() 17 | url = None 18 | cwd = None 19 | transcript = [] 20 | 21 | ydl_opts = { 22 | 'format': 'bestaudio/best', 23 | 'outtmpl': 'temp/out.%(ext)s', 24 | 'postprocessors': [{ 25 | 'key': 'FFmpegExtractAudio', 26 | 'preferredcodec': 'mp3', 27 | 'preferredquality': '192', 28 | }], 29 | } 30 | 31 | def getVideo(): 32 | 33 | global cwd, url, transcript 34 | 35 | with controller.pressed(Key.cmd): 36 | controller.tap("l") 37 | controller.tap("a") 38 | controller.tap("c") 39 | 40 | time.sleep(0.1) 41 | 42 | url = pyperclip.paste() 43 | 44 | if not url: return 45 | print("Video:", url) 46 | 47 | cwd = os.getcwd() 48 | os.chdir(whisper_path) 49 | 50 | with youtube_dl.YoutubeDL(ydl_opts) as ydl: 51 | info_dict = ydl.extract_info(url, download=True) 52 | filename = ydl.prepare_filename(info_dict) 53 | 54 | ffmpeg_command = [ 55 | 'ffmpeg', 56 | '-i', 'temp/out.mp3', # Input file 57 | '-ar', '16000', # Set sample rate to 16kHz 58 | '-ac', '1', # Set number of audio channels to 1 (mono) 59 | '-c:a', 'pcm_s16le', # Set audio codec to PCM 16-bit little-endian 60 | 'temp/out.wav' # Output file 61 | ] 62 | 63 | try: 64 | subprocess.run(ffmpeg_command, check=True) 65 | print("Conversion completed successfully.") 66 | except subprocess.CalledProcessError as e: 67 | print("Conversion failed:", e) 68 | 69 | output = subprocess.check_output("./main -f temp/out.wav", shell=True).decode("utf-8") 70 | 71 | transcript = output.split("\n") 72 | 73 | 74 | def time_to_seconds(time_str): 75 | 76 | pattern = r'\[(\d+):(\d+):(\d+)\.\d+' 77 | 78 | match = re.match(pattern, time_str) 79 | if match: 80 | hours = int(match.group(1)) 81 | minutes = int(match.group(2)) 82 | seconds = int(match.group(3)) 83 | 84 | # Convert the time to seconds 85 | total_seconds = hours * 3600 + minutes * 60 + seconds 86 | return str(total_seconds) 87 | else: 88 | return None 89 | 90 | 91 | def getQuery(): 92 | 93 | global url, transcript 94 | 95 | if "&" in url: 96 | url = url[:url.index("&")] 97 | 98 | with controller.pressed(Key.cmd): 99 | controller.tap("a") 100 | controller.tap("c") 101 | 102 | time.sleep(0.1) 103 | query = pyperclip.paste() 104 | 105 | if not query: return 106 | print("Query:", query) 107 | 108 | matches = process.extract(query, transcript, scorer=fuzz.ratio) 109 | seconds = time_to_seconds(matches[0][0]) 110 | 111 | print("Top Matched URL: ", url + "&t=" + seconds) 112 | 113 | webbrowser.open(url + "&t=" + seconds, new=2, autoraise=True) 114 | 115 | 116 | 117 | def f9(): 118 | try: 119 | getVideo() 120 | except Exception as e: 121 | print(e) 122 | 123 | def f10(): 124 | try: 125 | getQuery() 126 | except Exception as e: 127 | print(e) 128 | 129 | 130 | def exit_handler(): 131 | try: 132 | subprocess.call(["rm", "temp/out.wav"]) 133 | subprocess.call(["rm", "temp/out.mp3"]) 134 | os.chdir(cwd) 135 | except: 136 | print("---Error---") 137 | finally: 138 | print("---Exit---") 139 | sys.exit(0) 140 | 141 | 142 | if __name__ == "__main__": 143 | try: 144 | with keyboard.GlobalHotKeys({"<109>": f10, "<101>": f9}) as h: 145 | h.join() 146 | except KeyboardInterrupt: 147 | exit_handler() -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | ane_transformers==0.1.3 2 | coremltools==7.1 3 | elevenlabs==0.2.27 4 | fuzzywuzzy==0.18.0 5 | huggingface_hub==0.21.3 6 | numpy==1.24.2 7 | pynput==1.7.6 8 | pyperclip==1.8.2 9 | thefuzz==0.22.1 10 | torch==2.2.1 11 | transformers==4.38.1 12 | youtube_dl==2021.12.17 13 | --------------------------------------------------------------------------------