├── requirements.txt ├── assets └── WLEDAudioSync30.ico ├── whl ├── aubio-0.4.5-cp39-cp39-win_amd64.whl ├── aubio-0.5.0a0-cp310-cp310-win_amd64.whl └── aubio-0.5.0a0-cp39-cp39-win_amd64.whl ├── LICENSE ├── .github └── workflows │ └── manual.yml ├── README.md └── WLEDAudioSyncRTBeat.py /requirements.txt: -------------------------------------------------------------------------------- 1 | pyaudio 2 | aubio-ledfx 3 | python-osc 4 | numpy 5 | imageio 6 | keyboard 7 | -------------------------------------------------------------------------------- /assets/WLEDAudioSync30.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zak-45/WLEDAudioSyncRTBeat/HEAD/assets/WLEDAudioSync30.ico -------------------------------------------------------------------------------- /whl/aubio-0.4.5-cp39-cp39-win_amd64.whl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zak-45/WLEDAudioSyncRTBeat/HEAD/whl/aubio-0.4.5-cp39-cp39-win_amd64.whl -------------------------------------------------------------------------------- /whl/aubio-0.5.0a0-cp310-cp310-win_amd64.whl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zak-45/WLEDAudioSyncRTBeat/HEAD/whl/aubio-0.5.0a0-cp310-cp310-win_amd64.whl -------------------------------------------------------------------------------- /whl/aubio-0.5.0a0-cp39-cp39-win_amd64.whl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zak-45/WLEDAudioSyncRTBeat/HEAD/whl/aubio-0.5.0a0-cp39-cp39-win_amd64.whl -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 zak-45 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.github/workflows/manual.yml: -------------------------------------------------------------------------------- 1 | # Cross-compile workflow that is manually triggered 2 | 3 | name: Cross Compile Manual workflow 4 | 5 | # Controls when the action will run. Workflow runs when manually triggered using the UI 6 | # or API. 7 | on: 8 | workflow_dispatch: 9 | # Inputs the workflow accepts. 10 | inputs: 11 | mytag: 12 | # Friendly description to be shown in the UI instead of 'name' 13 | description: 'Input Tag' 14 | # Default value if no value is explicitly provided 15 | default: '0.0.0.0' 16 | # Input has to be provided for the workflow to run 17 | required: true 18 | # The data type of the input 19 | type: string 20 | 21 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel 22 | 23 | jobs: 24 | build: 25 | permissions: 26 | contents: write 27 | 28 | env: 29 | DEBUG_COMPILATION: yes 30 | COMP_VERSION: ${{github.event.inputs.mytag}} 31 | 32 | strategy: 33 | matrix: 34 | os: [macos-latest, ubuntu-latest, windows-latest] 35 | 36 | runs-on: ${{matrix.os}} 37 | 38 | steps: 39 | - name: Check-out repository 40 | uses: actions/checkout@v3 41 | 42 | - name: Windows specific 43 | if: matrix.os == 'windows-latest' 44 | run: | 45 | echo "ARCHITECTURE=amd64" >> $GITHUB_ENV 46 | 47 | - name: Ubuntu specific 48 | if: matrix.os == 'ubuntu-latest' 49 | run: | 50 | ARCHITECTURE=$(uname -m) 51 | echo "ARCHITECTURE=$ARCHITECTURE" >> $GITHUB_ENV 52 | sudo apt-get update 53 | sudo apt-get install libasound-dev portaudio19-dev libportaudio2 libportaudiocpp0 54 | sudo apt-get install ffmpeg 55 | 56 | - name: MacOS specific 57 | if: matrix.os == 'macos-latest' 58 | # export CFLAGS="-Wno-incompatible-function-pointer-types" 59 | # brew install aubio 60 | run: | 61 | echo "CFLAGS=-Wno-incompatible-function-pointer-types" >> $GITHUB_ENV 62 | ARCHITECTURE=$(uname -m) 63 | echo "ARCHITECTURE=$ARCHITECTURE" >> $GITHUB_ENV 64 | brew update 65 | brew install portaudio 66 | brew reinstall libsndfile 67 | echo "PYTHONPATH=/opt/homebrew/Cellar/libvorbis/1.3.7/lib:/opt/homebrew/opt/opus/lib:/opt/homebrew/opt/libsndfile/lib:/opt/homebrew/opt/portaudio/lib:/opt/homebrew/opt/libogg/lib:/opt/homebrew/opt/libvorbis/lib:/opt/homebrew/opt/lame/lib:/opt/homebrew/opt/flac/lib:/opt/homebrew/opt/mpg123/lib:$PYTHONPATH" >> $GITHUB_ENV 68 | 69 | - name: Verify PYTHONPATH 70 | run: echo $PYTHONPATH 71 | 72 | - name: Setup Python 73 | uses: actions/setup-python@v4 74 | with: 75 | python-version: '3.13' # Version range or exact version of a Python version to use, using SemVer's version range syntax 76 | architecture: 'x64' # optional x64 or x86. Defaults to x64 if not specified 77 | cache: 'pip' 78 | cache-dependency-path: | 79 | **/requirements*.txt 80 | 81 | - name: Install Dependencies for All 82 | run: | 83 | pip install -r requirements.txt 84 | 85 | - name: Build Executable for ${{runner.os}} 86 | uses: Nuitka/Nuitka-Action@main 87 | with: 88 | nuitka-version: main 89 | script-name: WLEDAudioSyncRTBeat.py 90 | company-name: zak-45 91 | file-version: ${{github.event.inputs.mytag}} 92 | windows-icon-from-ico: assets/WLEDAudioSync30.ico 93 | macos-app-icon: assets/WLEDAudioSync30.ico 94 | mode: onefile 95 | onefile-tempdir-spec: ./WLEDAudioSyncRTBeat 96 | output-file: WLEDAudioSyncRTBeat-${{runner.os}}_${{ env.ARCHITECTURE }} 97 | 98 | - name: Upload Artifacts 99 | uses: actions/upload-artifact@v4 100 | with: 101 | name: ${{runner.os}} Build 102 | if-no-files-found: warn 103 | path: 'build/WLEDAudioSyncRTBeat-${{runner.os}}_${{ env.ARCHITECTURE }}**' 104 | 105 | - name: Create Release ${{github.event.inputs.mytag}} with Builds from Nuitka 106 | uses: ncipollo/release-action@v1 107 | with: 108 | allowUpdates: true 109 | commit: main 110 | tag: ${{github.event.inputs.mytag}} 111 | artifacts: 'build/WLEDAudioSyncRTBeat-${{runner.os}}_${{ env.ARCHITECTURE }}**' 112 | 113 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Onset Beat Detection to OSC 2 | [![Cross Compile Manual workflow](https://github.com/zak-45/WLEDAudioSyncRTBeat/actions/workflows/manual.yml/badge.svg)](https://github.com/zak-45/WLEDAudioSyncRTBeat/actions/workflows/manual.yml) 3 | 4 | This is a real-time beat and tempo detector built with Python and the [aubio](https://github.com/aubio/aubio) library. It captures audio from a selected input device, analyzes it to find the rhythm, and sends the calculated Beats Per Minute (BPM) to one or more OSC (Open Sound Control) servers. 5 | 6 | It is designed to be a stable, low-latency bridge between live audio and lighting software, VJ applications, or any other OSC-compatible system. 7 | 8 | This is a feature of [WLEDAudioSync Chataigne Module](https://github.com/zak-45/WLEDAudioSync-Chataigne-Module). 9 | 10 | You can see a demo here : [WLEDAudioSyncRTBeat demo](https://youtu.be/VXM_zEzKo6M) 11 | 12 | Chataigne view: 13 | 14 | ![image](https://github.com/zak-45/WLEDAudioSyncRTBeat/assets/121941293/89b89dbf-49bb-410e-8d7b-2c43357c5100) 15 | 16 | 17 | ## Installation 18 | 19 | ### Packaged Release (Recommended for Win / Mac / Linux) 20 | 21 | Grab the latest pre-compiled, portable release from here: https://github.com/zak-45/WLEDAudioSyncRTBeat/releases 22 | 23 | ``` 24 | No Python installation is needed. 25 | This is a portable version. 26 | Just place it in a folder and run the executable for your OS. 27 | ``` 28 | 29 | **INFO** 30 | --- 31 | Some antivirus software may flag the executable as a potential threat. This is a false positive due to the way the script is packaged into a single file. If you do not trust the executable, you can always run the script from source using the Python method below. 32 | --- 33 | 34 | ### From Source (All OS with Python) 35 | 36 | 1. Install the required Python modules: 37 | ``` 38 | pip install -r requirements.txt 39 | ``` 40 | *(Note: This will install `pyaudio`, `numpy`, `aubio`, `python-osc`, and `keyboard`.)* 41 | 42 | 2. Download the `WLEDAudioSyncRTBeat.py` file and run it: 43 | ``` 44 | python WLEDAudioSyncRTBeat.py 45 | ``` 46 | 47 | ## Usage 48 | 49 | The script can be launched with the `run` command (which is the default) or the `list` command to see audio devices. 50 | 51 | ``` 52 | usage: WLEDAudioSyncRTBeat.py [-h] [-d DEVICE] [-st SILENCE_THRESHOLD] [-c CONFIDENCE] [--double-confidence DOUBLE_CONFIDENCE] [-b BUFSIZE] [--relearn-interval RELEARN_INTERVAL] [--raw-bpm] [-s IP PORT ADDRESS [MODE]] [{run,list}] ... 53 | 54 | Realtime Audio Beat Detector with OSC output. 55 | ``` 56 | 57 | ### Commands 58 | 59 | `run` 60 | : (Default) Starts the beat detector. If no command is specified, `run` is executed automatically. 61 | 62 | `list` 63 | : Lists all available audio input devices and their corresponding index numbers, which can be used with the `-d` flag. 64 | 65 | ### Options 66 | 67 | `-h, --help` 68 | : Shows the help message and exits. 69 | 70 | `-d DEVICE, --device DEVICE` 71 | : Specifies the index of the audio input device to use. If not provided, the system's default input device is used automatically. 72 | 73 | `-st SILENCE_THRESHOLD, --silence_threshold SILENCE_THRESHOLD` 74 | : Sets the volume threshold (in negative dB) to be considered silence. Default: `-60.0`. 75 | 76 | `-c CONFIDENCE, --confidence CONFIDENCE` 77 | : The general confidence threshold (0.0 to 1.0) that `aubio` must have in a beat for it to be processed. Lower values are more sensitive but can be less stable. Default: `0.2`. 78 | 79 | `--double-confidence DOUBLE_CONFIDENCE` 80 | : The higher confidence threshold (0.0 to 1.0) required to trigger the more aggressive half-time doubling heuristic. Default: `0.5`. 81 | 82 | `-b BUFSIZE, --bufsize BUFSIZE` 83 | : The size of the audio buffer for analysis. Larger values (e.g., 2048) can improve accuracy on complex music at the cost of slightly higher latency. Powers of 2 are optimal (512, 1024, 2048). Default: `1024`. 84 | 85 | `--relearn-interval RELEARN_INTERVAL` 86 | : A powerful adaptive feature. The script will periodically re-enter the "learning phase" every X seconds to adapt to tempo changes. This is ideal for long DJ sets or songs with tempo shifts. Set to `0` to disable. Default: `0`. 87 | 88 | `--raw-bpm` 89 | : A debug mode that disables all the intelligent BPM correction heuristics. This shows you the raw, unfiltered tempo directly from the `aubio` detector. 90 | 91 | `-s IP PORT ADDRESS [MODE], --server IP PORT ADDRESS [MODE]` 92 | : The destination for the OSC messages. This argument can be used multiple times to send data to multiple servers simultaneously. 93 | * `IP`: The IP address of the OSC server. 94 | * `PORT`: The port of the OSC server. 95 | * `ADDRESS`: The OSC path (e.g., `/wled/bpm`). 96 | * `MODE` (Optional): Can be `PLAIN` (the final BPM), `HALF` (BPM / 2), or `GMA3` (a specific curve for GrandMA3 lighting software). Defaults to `PLAIN`. 97 | 98 | ### Interactive Controls 99 | 100 | `u` key 101 | : Press the `u` key at any time to manually trigger a BPM re-learning phase. This is useful if you feel the tempo is incorrect or during a song transition. 102 | 103 | ## Example 104 | 105 | ```sh 106 | # Run the detector, sending BPM to two different servers with different modes, 107 | # and have it re-evaluate the tempo every 90 seconds. 108 | python WLEDAudioSyncRTBeat.py --relearn-interval 90 -s 127.0.0.1 12000 /wled/bpm -s 192.168.1.50 8000 /gma3/speed GMA3 109 | ``` 110 | 111 | This will: 112 | 1. Start the beat detector using the default audio device. 113 | 2. Send the calculated BPM to `/wled/bpm` on the local machine at port `12000`. 114 | 3. Send a specially formatted BPM value to `/gma3/speed` on a device at `192.168.1.50:8000`. 115 | 4. Every 90 seconds, it will non-disruptively re-analyze the audio to confirm or update the tempo. 116 | 117 | ## Credits 118 | 119 | Thanks to : https://github.com/DrLuke/aubio-beat-osc for the original inspiration. -------------------------------------------------------------------------------- /WLEDAudioSyncRTBeat.py: -------------------------------------------------------------------------------- 1 | import pyaudio 2 | import numpy as np 3 | import aubio # Ledfx fork for better accuracy 4 | import time 5 | import sys 6 | import math 7 | import ipaddress 8 | import argparse 9 | import signal 10 | import os 11 | from typing import List, NamedTuple, Tuple 12 | from collections import deque 13 | 14 | from pythonosc.udp_client import SimpleUDPClient 15 | if sys.platform != 'darwin': 16 | import keyboard 17 | 18 | CHANNELS = 1 # Mono audio 19 | FORMAT = pyaudio.paFloat32 # 32-bit float format, ideal for aubio 20 | 21 | 22 | class BeatPrinter: 23 | """A simple class to manage the state of a spinning character for printing.""" 24 | 25 | def __init__(self): 26 | self.state: int = 0 27 | self.spinner_chars = "¼▚▞▚" 28 | 29 | def get_char(self) -> str: 30 | char = self.spinner_chars[self.state] 31 | self.state = (self.state + 1) % len(self.spinner_chars) 32 | return char 33 | 34 | 35 | class ServerInfo(NamedTuple): 36 | ip: str 37 | port: int 38 | address: str 39 | mode: str = None 40 | 41 | 42 | def list_devices(p: pyaudio.PyAudio): 43 | """Lists all available audio input devices.""" 44 | print("Available audio input devices:") 45 | for i in range(p.get_device_count()): 46 | info = p.get_device_info_by_index(i) 47 | if info.get('maxInputChannels') > 0: 48 | print(f" [{info['index']}] {info['name']}") 49 | print("\nUse the index with the -d flag to select a device.") 50 | 51 | 52 | class BeatDetector: 53 | def __init__(self, device_index: int = None, silence_threshold: float = -60.0, server_info: List[ServerInfo] = None, 54 | confidence_threshold: float = 0.2, doubling_confidence_threshold: float = 0.5, buf_size: int = 1024, 55 | raw_bpm_mode: bool = False, relearn_interval: int = 0): 56 | self.device_index = device_index 57 | self.silence_threshold = silence_threshold 58 | self.server_info = server_info 59 | self.buf_size = buf_size 60 | self.samplerate = 44100 # Default, will be updated 61 | self.doubling_confidence_threshold = doubling_confidence_threshold 62 | self.raw_bpm_mode = raw_bpm_mode 63 | self.relearn_interval = relearn_interval 64 | 65 | # --- State Management --- 66 | self.is_playing = False 67 | self.is_learning = False # Flag for the initial BPM learning phase 68 | self.avg_db_level = -120.0 69 | self.last_bpm = 0.0 70 | self.last_raw_bpm = 0.0 # Store the last raw BPM for consistent printing 71 | self.bpm_history = deque(maxlen=5) # History of recent BPMs to make smarter decisions 72 | self.bpm_history_raw = deque(maxlen=5) # History of recent raw BPMs to make smarter decisions 73 | self.sound_counter = 0 74 | self.silence_counter = 0 75 | self.last_update_time = 0.0 76 | self.listening_start_time = 0.0 77 | self.learning_phase_beats = [] # Store initial beats to make a better first guess 78 | self.last_relearn_time = 0.0 # Track time for periodic re-learning 79 | # --- Sanity Check State --- 80 | self.sanity_check_beats = 0 81 | self.sanity_check_start_time = 0.0 82 | 83 | self.last_callback_time = time.time() # For watchdog timer 84 | 85 | # --- Constants --- 86 | self.confidence_threshold = confidence_threshold 87 | self.bpm_smoothing_factor = 0.1 88 | self.sound_frames_needed = 3 89 | self.silence_frames_needed = 10 90 | self.update_interval = 0.5 91 | self.listening_timeout = 8.0 # Seconds to wait in "Listening" before resetting 92 | self.learning_beats_needed = 5 # Number of beats to collect before making a decision 93 | self.sanity_check_beats_needed = 10 # Number of beats to count for the sanity check 94 | self.sanity_check_window = 10.0 # Max seconds for the sanity check window 95 | self.watchdog_timeout = 2.0 # Seconds of no callbacks before forcing silence 96 | 97 | # --- Printing --- 98 | self.spinner = BeatPrinter() 99 | 100 | # --- OSC Client Setup --- 101 | self.osc_servers: List[Tuple[SimpleUDPClient, str]] = [] 102 | if self.server_info: 103 | self.osc_servers = [(SimpleUDPClient(x.ip, x.port), x.address) for x in self.server_info] 104 | 105 | # --- PyAudio and Aubio Setup --- 106 | self.p = pyaudio.PyAudio() 107 | 108 | # Query the device for its default sample rate 109 | device_info = self.p.get_device_info_by_index(self.device_index) 110 | self.samplerate = int(device_info['defaultSampleRate']) 111 | print(f"Using device sample rate: {self.samplerate} Hz") 112 | 113 | fft_size = self.buf_size * 2 114 | # Use the 'specflux' method from the ledfx fork for potentially better accuracy 115 | self.tempo = aubio.tempo("specflux", fft_size, self.buf_size, self.samplerate) 116 | self.stream = self.p.open( 117 | format=FORMAT, 118 | channels=CHANNELS, 119 | rate=self.samplerate, 120 | input=True, 121 | input_device_index=self.device_index, 122 | frames_per_buffer=self.buf_size, 123 | stream_callback=self._pyaudio_callback 124 | ) 125 | 126 | def trigger_relearn(self): 127 | """Manually triggers the BPM learning phase.""" 128 | # Only trigger if music is playing and we are not already learning 129 | if self.is_playing and not self.is_learning: 130 | self.is_learning = True 131 | self.listening_start_time = time.time() 132 | self.learning_phase_beats.clear() 133 | print(f"\n[Manual Trigger] Re-learning BPM... (current: {self.last_bpm:.1f})") 134 | 135 | def _pyaudio_callback(self, in_data, frame_count, time_info, status): 136 | try: 137 | is_watchdog_timeout = (time.time() - self.last_callback_time) > self.watchdog_timeout 138 | # Update watchdog timer 139 | self.last_callback_time = time.time() 140 | 141 | audio_samples = np.frombuffer(in_data, dtype=np.float32) 142 | db_level = aubio.db_spl(audio_samples) 143 | 144 | if db_level == -np.inf: 145 | db_level = -120.0 146 | 147 | self.avg_db_level = (0.2 * self.avg_db_level) + (0.8 * db_level) 148 | 149 | # --- CRITICAL: Sanitize the average level to recover from NaN --- 150 | # This can happen with unusual audio signals. 151 | if np.isnan(self.avg_db_level): 152 | self.avg_db_level = -120.0 153 | 154 | # --- State Machine Logic --- 155 | # Decide if we are playing or silent based on volume and watchdog 156 | if self.avg_db_level > self.silence_threshold and not is_watchdog_timeout: 157 | # We have sound 158 | self.silence_counter = 0 159 | if not self.is_playing: 160 | self.sound_counter += 1 161 | if self.sound_counter >= self.sound_frames_needed: 162 | self.is_playing = True 163 | self.is_learning = True # Start in the learning phase 164 | self.last_relearn_time = time.time() # Start the re-learning timer 165 | if self.last_bpm > 0: 166 | self.send_bpm_osc(self.last_bpm) 167 | self.last_update_time = time.time() 168 | self.last_bpm = 0.0 169 | else: 170 | # We are silent (or the watchdog timed out) 171 | self.sound_counter = 0 172 | if self.is_playing: 173 | self.silence_counter += 1 174 | if self.silence_counter >= self.silence_frames_needed: 175 | self.is_playing = False 176 | self.is_learning = False # Reset learning phase on silence 177 | self.learning_phase_beats.clear() # CRITICAL: Reset the learning beats 178 | self.bpm_history.clear() 179 | self.sanity_check_beats = 0 # Reset sanity check on silence 180 | self.sanity_check_start_time = 0.0 181 | self.bpm_history_raw.clear() 182 | self.listening_start_time = 0.0 # Reset listening timer 183 | self.send_bpm_osc(0.0) 184 | 185 | # --- Processing and Printing Logic --- 186 | if self.is_playing: 187 | # Check if it's time to trigger a periodic re-learn 188 | if self.relearn_interval > 0 and not self.is_learning and ( 189 | time.time() - self.last_relearn_time) > self.relearn_interval: 190 | self.is_learning = True 191 | self.listening_start_time = time.time() # Start the re-learning timeout clock 192 | self.learning_phase_beats.clear() 193 | print(f"Re-learning BPM... (current: {self.last_bpm:.1f}) \r") 194 | # sys.stdout.write(f"Re-learning BPM... (current: {self.last_bpm:.1f})\r") 195 | 196 | # If we don't have a stable BPM yet, we are in a "Listening" state 197 | if self.is_learning: 198 | # If this is the first time we are listening, start the timer 199 | if self.listening_start_time == 0.0: 200 | self.listening_start_time = time.time() 201 | # Check for listening timeout 202 | if (time.time() - self.listening_start_time) > self.listening_timeout: 203 | # We've been listening for too long without finding a beat, assume silence 204 | self.is_learning = False # Exit learning phase 205 | self.listening_start_time = 0.0 206 | # If we were re-learning, we keep the last BPM. If we were starting from scratch, we go silent. 207 | if self.last_bpm < 40.0: 208 | self.is_playing = False 209 | self.bpm_history.clear() 210 | self.sanity_check_beats = 0 211 | self.sanity_check_start_time = 0.0 212 | self.bpm_history_raw.clear() 213 | self.send_bpm_osc(0.0) 214 | print(f"Listening timed out... Reverting to silent. \r") 215 | else: 216 | print(f"Re-learning timed out. Reverting to last BPM: {self.last_bpm:.1f} \r") 217 | # sys.stdout.write(f"Listening timed out... Reverting to silent.\r") 218 | else: 219 | # During the initial learning phase, we print "Listening..." 220 | if self.last_bpm < 40.0: 221 | sys.stdout.write(f"Listening... | Level: {self.avg_db_level:.1f} dB \r") 222 | 223 | beat = self.tempo(audio_samples) 224 | if beat[0]: 225 | current_confidence = self.tempo.get_confidence() 226 | 227 | # Only process beats that meet the general confidence threshold 228 | if current_confidence > self.confidence_threshold: 229 | detected_bpm = self.tempo.get_bpm() 230 | 231 | # Start the sanity check timer on the first confident beat of a new window 232 | if self.sanity_check_start_time == 0.0: 233 | self.sanity_check_start_time = time.time() 234 | self.sanity_check_beats += 1 235 | 236 | self.last_raw_bpm = detected_bpm # Store the raw value at the moment of detection 237 | new_bpm = detected_bpm 238 | 239 | # If not in raw mode, apply the intelligent heuristics 240 | if not self.raw_bpm_mode and self.is_learning: 241 | # --- Learning Phase --- 242 | self.learning_phase_beats.append(detected_bpm) 243 | if len(self.learning_phase_beats) >= self.learning_beats_needed: 244 | # We have enough beats, calculate the hypothetic BPM 245 | median_bpm = np.median(self.learning_phase_beats) 246 | 247 | # Anti demi-tempo robuste 248 | if median_bpm < 110: 249 | new_bpm = median_bpm * 2 250 | else: 251 | new_bpm = median_bpm 252 | 253 | # Exit learning phase and seed the history 254 | self.is_learning = False 255 | self.bpm_history.clear() 256 | self.bpm_history_raw.clear() 257 | self.bpm_history.append(new_bpm) 258 | self.bpm_history_raw.append(detected_bpm) 259 | self.last_relearn_time = time.time() # Reset timer after learning is complete 260 | self.last_bpm = new_bpm # Immediately set the BPM to the corrected median 261 | 262 | elif not self.raw_bpm_mode and len(self.bpm_history) > 1: 263 | # --- Stable Phase: Use history for correction --- 264 | print('stable phase') 265 | recent_avg = np.mean(list(self.bpm_history)) 266 | candidates = [detected_bpm, detected_bpm * 2, detected_bpm / 2] 267 | new_bpm = min(candidates, key=lambda c: abs(c - recent_avg) * ( 268 | 1.0 if abs(c - detected_bpm) < 1 else 1.5)) 269 | 270 | recent_avg_raw = np.mean(list(self.bpm_history_raw)) 271 | 272 | # Anti demi-tempo robuste 273 | if recent_avg_raw < 110: 274 | new_bpm = recent_avg_raw * 2 275 | 276 | # --- Sanity Check Override --- 277 | # This runs after the main heuristics to catch persistent half-time errors. 278 | time_since_check_start = time.time() - self.sanity_check_start_time 279 | if self.sanity_check_beats >= self.sanity_check_beats_needed and time_since_check_start > 0: 280 | # Calculate real-world BPM based on beat frequency 281 | real_world_bpm = (self.sanity_check_beats / time_since_check_start) * 60 282 | # print(real_world_bpm) 283 | 284 | # If the real-world BPM is roughly double our locked BPM, we have a half-time error 285 | if self.last_bpm > 40 and abs((real_world_bpm / 2) - self.last_bpm) < 15: # Use a generous threshold 286 | print(f"\n[Sanity Check] Half-time error detected! Correcting {self.last_bpm:.1f} -> {self.last_bpm * 2:.1f}") 287 | # Force a correction and reset history to re-stabilize at the new tempo 288 | new_bpm = self.last_bpm * 2 289 | self.bpm_history.clear() 290 | self.bpm_history_raw.clear() 291 | 292 | # Reset the sanity check for the next window 293 | self.sanity_check_beats = 0 294 | self.sanity_check_start_time = 0.0 295 | 296 | # Also reset if the window has been open for too long (e.g., on very slow music) 297 | elif self.sanity_check_start_time > 0 and time_since_check_start > self.sanity_check_window: 298 | self.sanity_check_beats = 0 299 | self.sanity_check_start_time = 0.0 300 | 301 | if new_bpm > 0: 302 | self.bpm_history.append(new_bpm) 303 | self.bpm_history_raw.append(detected_bpm) 304 | 305 | if self.raw_bpm_mode: 306 | self.last_bpm = new_bpm 307 | else: 308 | self.last_bpm = (self.last_bpm * (1 - self.bpm_smoothing_factor)) + ( 309 | new_bpm * self.bpm_smoothing_factor) 310 | 311 | # CRITICAL: Send OSC message and update printout on every confident beat. 312 | self.send_bpm_osc(self.last_bpm) 313 | self.last_update_time = time.time() 314 | 315 | if self.last_bpm > 0: # This check is now just for printing 316 | spinner_char = self.spinner.get_char() 317 | sys.stdout.write( 318 | f"{spinner_char} BPM: {self.last_bpm:.1f} | Level: {self.avg_db_level:.1f} dB | {self.last_raw_bpm:.1f} | {self.tempo.get_bpm():.1f} | {current_confidence:.1f} \r") 319 | 320 | # Send periodic "keep-alive" updates. 321 | # CRITICAL: This now runs even during the re-learning phase to ensure a continuous BPM stream. 322 | elif self.last_bpm > 0 and (time.time() - self.last_update_time) > self.update_interval: 323 | self.send_bpm_osc(self.last_bpm) 324 | self.last_update_time = time.time() 325 | 326 | else: 327 | if self.last_bpm != 0.0: 328 | self.last_bpm = 0.0 329 | sys.stdout.write(f"Silent... \r") 330 | 331 | # No need for flush, stdout is line-buffered by default 332 | except Exception as e: 333 | # Catch any exception from aubio or other processing to prevent the callback thread from crashing 334 | print(f"\nError in audio callback: {e}", file=sys.stderr) 335 | 336 | return None, pyaudio.paContinue 337 | 338 | def send_bpm_osc(self, bpm: float): 339 | if not self.osc_servers: 340 | return 341 | 342 | bpmh = bpm / 2 343 | bpmg = math.sqrt(bpm / 240) * 100 if bpm > 0 else 0.0 344 | 345 | for server, s_info in zip(self.osc_servers, self.server_info): 346 | mode = s_info.mode or 'plain' 347 | value_to_send = {'plain': bpm, 'half': bpmh, 'gma3': bpmg}.get(mode.lower(), bpm) 348 | server[0].send_message(server[1], value_to_send) 349 | 350 | def stop(self): 351 | print("\nStopping stream and cleaning up...") 352 | self.stream.stop_stream() 353 | self.stream.close() 354 | self.p.terminate() 355 | 356 | 357 | def send_bpm_osc(bpm: float, osc_servers: List[Tuple[SimpleUDPClient, str]], server_info: List[ServerInfo]): 358 | """Calculates different BPM modes and sends them to the configured OSC servers.""" 359 | if not osc_servers: 360 | return 361 | 362 | # recalculate half BPM 363 | bpmh = bpm / 2 364 | # recalculate BPM for GrandMA3 365 | bpmg = math.sqrt(bpm / 240) * 100 if bpm > 0 else 0.0 366 | 367 | for server, s_info in zip(osc_servers, server_info): 368 | mode = s_info.mode or 'plain' 369 | value_to_send = {'plain': bpm, 'half': bpmh, 'gma3': bpmg}.get(mode.lower(), bpm) 370 | server[0].send_message(server[1], value_to_send) 371 | 372 | 373 | if __name__ == "__main__": 374 | # --- Argument Parsing --- 375 | parser = argparse.ArgumentParser(description="Realtime Audio Beat Detector with OSC output.") 376 | parser.add_argument("-d", "--device", type=int, default=None, 377 | help="Index of the audio input device to use (optional).") 378 | parser.add_argument("-st", "--silence_threshold", type=float, default=-60.0, 379 | help="The volume threshold in negative dB to consider as silence (default: -60.0).") 380 | parser.add_argument("-c", "--confidence", type=float, default=0.2, 381 | help="The confidence threshold for beat detection (0.0 to 1.0, default: 0.2).") 382 | parser.add_argument("--double-confidence", type=float, default=0.5, 383 | help="The confidence threshold required to trigger the half-time doubling heuristic (default: 0.5).") 384 | # Increased default buffer size for better accuracy on complex audio 385 | parser.add_argument("-b", "--bufsize", type=int, default=1024, 386 | help="Size of the audio buffer for analysis (powers of 2 are best, e.g., 1024, 2048). Default: 1024.") 387 | parser.add_argument("--relearn-interval", type=int, default=0, 388 | help="Periodically re-enter the learning phase every X seconds to adapt to tempo changes. 0 to disable (default: 0).") 389 | parser.add_argument("--raw-bpm", action="store_true", 390 | help="Use raw BPM value from detector, bypassing intelligent heuristics.") 391 | parser.add_argument("-s", "--server", 392 | help="OSC Server address (multiple can be provided) in format 'IP' 'PORT' 'PATH' 'MODE', " 393 | "Mode PLAIN for plain BPM-Value,Mode HALF for half of BPM-Value, " 394 | "Mode GMA3 for GrandMA3 Speed masters where 100 percent is for 240BPM. " 395 | " MODE is optional and default to PLAIN" 396 | " e.g. 127.0.0.1 8080 /test GMA3", 397 | nargs='*', 398 | action="append" 399 | ) 400 | subparsers = parser.add_subparsers(dest="command", help="Available commands") 401 | 402 | # Command to run the meter 403 | run_parser = subparsers.add_parser("run", help="Run the beat detector.") 404 | 405 | # Command to list devices 406 | list_parser = subparsers.add_parser("list", help="List available audio input devices.") 407 | 408 | args, unknown = parser.parse_known_args() 409 | 410 | if args.command == "list": 411 | list_devices(pyaudio.PyAudio()) 412 | else: 413 | # --- Device Selection --- 414 | p_temp = pyaudio.PyAudio() 415 | try: 416 | if args.device is not None: 417 | device_info = p_temp.get_device_info_by_index(args.device) 418 | print(f"Attempting to use specified device: [{device_info['index']}] {device_info['name']}") 419 | else: 420 | device_info = p_temp.get_default_input_device_info() 421 | args.device = device_info['index'] 422 | print(f"No device specified, using default input: [{device_info['index']}] {device_info['name']}") 423 | except (IOError, IndexError): 424 | print(f"Error: Device index {args.device} is invalid. Use 'list' command to see available devices.") 425 | sys.exit(1) 426 | finally: 427 | p_temp.terminate() 428 | 429 | # --- Validate Arguments --- 430 | # Check if bufsize is a power of 2, which is optimal for FFT 431 | if not (args.bufsize > 0 and (args.bufsize & (args.bufsize - 1) == 0)): 432 | print(f"Warning: Buffer size {args.bufsize} is not a power of 2. This may affect performance/accuracy.", 433 | file=sys.stderr) 434 | 435 | # --- Server Info Parsing --- 436 | server_info = [] 437 | if args.server: 438 | server_info_4: List[ServerInfo] = [ServerInfo(x[0], int(x[1]), x[2], x[3]) for x in args.server if 439 | len(x) == 4] 440 | server_info_3: List[ServerInfo] = [ServerInfo(x[0], int(x[1]), x[2]) for x in args.server if len(x) == 3] 441 | server_info = server_info_3 + server_info_4 442 | for x in args.server: 443 | if len(x) < 3: 444 | parser.error('At least 3 server arguments are required ("IP","PORT","PATH")') 445 | elif len(x) > 4: 446 | parser.error('More than 4 arguments provided for server') 447 | try: 448 | ipaddress.ip_address(x[0]) 449 | except ValueError: 450 | parser.error(f'Not a valid IP address: {x[0]}') 451 | if not x[2].startswith('/'): parser.error(f'PATH {x[2]} not valid, need to start with "/"') 452 | 453 | # --- Main Execution --- 454 | print("Starting beat detector... Press Ctrl+C to stop.") 455 | detector = BeatDetector(device_index=args.device, silence_threshold=args.silence_threshold, 456 | server_info=server_info, confidence_threshold=args.confidence, 457 | doubling_confidence_threshold=args.double_confidence, buf_size=args.bufsize, 458 | raw_bpm_mode=args.raw_bpm, relearn_interval=args.relearn_interval) 459 | 460 | # Set up the hotkey for manual re-learning 461 | if sys.platform != 'darwin': 462 | keyboard.add_hotkey('u', detector.trigger_relearn) 463 | print("Press 'u' at any time to manually trigger BPM re-learning.") 464 | 465 | # Keep the main thread alive until Ctrl+C 466 | def signal_handler(signum, frame): 467 | detector.stop() 468 | sys.exit(0) 469 | 470 | 471 | signal.signal(signal.SIGINT, signal_handler) 472 | 473 | # Use a sleep loop on Windows, as signal.pause() is not available 474 | if os.name == 'nt': 475 | while True: 476 | time.sleep(1) # Main thread just sleeps, all work is in the callback 477 | else: 478 | signal.pause() --------------------------------------------------------------------------------