├── LICENSE ├── README.md ├── docs ├── api_reference.md └── getting_started.md ├── examples ├── advanced │ └── robustness_test.py └── basic │ └── watermark_audio.py ├── perth ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-311.pyc │ ├── dummy_watermarker.cpython-311.pyc │ ├── utils.cpython-311.pyc │ ├── watermarker.cpython-310.pyc │ └── watermarker.cpython-311.pyc ├── cli │ ├── __init__.py │ └── watermark_cli.py ├── config.py ├── dummy_watermarker.py ├── perth_net │ ├── __init__.py │ ├── __pycache__ │ │ └── __init__.cpython-311.pyc │ ├── perth_net_implicit │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-311.pyc │ │ │ ├── audio_processor.cpython-311.pyc │ │ │ ├── checkpoint_manager.cpython-311.pyc │ │ │ ├── config.cpython-311.pyc │ │ │ ├── perth_watermarker.cpython-311.pyc │ │ │ └── utils.cpython-311.pyc │ │ ├── audio_processor.py │ │ ├── checkpoint_manager.py │ │ ├── config.py │ │ ├── model │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-311.pyc │ │ │ │ ├── decoder.cpython-311.pyc │ │ │ │ ├── encoder.cpython-311.pyc │ │ │ │ └── perth_net.cpython-311.pyc │ │ │ ├── decoder.py │ │ │ ├── encoder.py │ │ │ └── perth_net.py │ │ ├── perth_watermarker.py │ │ └── utils.py │ └── pretrained │ │ └── implicit │ │ ├── hparams.yaml │ │ ├── id.txt │ │ └── perth_net_250000.pth.tar ├── utils.py ├── watermarker.py └── waveform.py ├── requirements.txt ├── setup.cfg ├── setup.py └── tests ├── pytest.ini └── test_basic.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Resemble AI 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Perth 2 | 3 | **Perth** is a comprehensive Python library for audio watermarking and detection. 4 | 5 | ## Overview 6 | 7 | Perth enables you to embed imperceptible watermarks in audio files and later detect them, even after the audio has undergone various transformations or manipulations. The library implements multiple watermarking techniques including neural network-based approaches. 8 | 9 | ## Features 10 | 11 | - **Multiple Watermarking Techniques**: Including the Perth-Net Implicit neural network approach 12 | - **Robust Watermarks**: Watermarks can survive common audio transformations like compression, resampling, and more 13 | - **Command-Line Interface**: Easy to use CLI for quick watermarking tasks 14 | - **Python API**: Comprehensive API for integration into your applications 15 | - **Quality Metrics**: Tools to evaluate the perceptual quality of watermarked audio 16 | 17 | ## Installation 18 | 19 | ### From PyPI (Recommended) 20 | 21 | ```bash 22 | pip install resemble-perth 23 | ``` 24 | 25 | ### From Source 26 | 27 | ```bash 28 | git clone https://github.com/resemble-ai/Perth 29 | cd Perth 30 | pip install -e . 31 | ``` 32 | 33 | ## Quick Start 34 | 35 | ### Command Line Usage 36 | 37 | ```bash 38 | # Apply a watermark to an audio file 39 | perth input.wav -o output.wav 40 | 41 | # Extract a watermark from an audio file 42 | perth input.wav --extract 43 | ``` 44 | 45 | ### Python API Usage 46 | 47 | #### Applying a Watermark 48 | 49 | ```python 50 | import perth 51 | import librosa 52 | import soundfile as sf 53 | 54 | # Load audio file 55 | wav, sr = librosa.load("input.wav", sr=None) 56 | 57 | # Initialize watermarker 58 | watermarker = perth.PerthImplicitWatermarker() 59 | 60 | # Apply watermark 61 | watermarked_audio = watermarker.apply_watermark(wav, watermark=None, sample_rate=sr) 62 | 63 | # Save watermarked audio 64 | sf.write("output.wav", watermarked_audio, sr) 65 | ``` 66 | 67 | #### Extracting a Watermark 68 | 69 | ```python 70 | import perth 71 | import librosa 72 | 73 | # Load the watermarked audio 74 | watermarked_audio, sr = librosa.load("output.wav", sr=None) 75 | 76 | # Initialize watermarker (same as used for embedding) 77 | watermarker = perth.PerthImplicitWatermarker() 78 | 79 | # Extract watermark 80 | watermark = watermarker.get_watermark(watermarked_audio, sample_rate=sr) 81 | print(f"Extracted watermark: {watermark}") 82 | ``` 83 | 84 | ### Perth Implicit Watermarker 85 | 86 | The Perth-Net Implicit watermarker uses a neural network-based approach for embedding and extracting watermarks. It's designed to be robust against various audio manipulations while maintaining high audio quality. 87 | 88 | ```python 89 | from perth.perth_net.perth_net_implicit.perth_watermarker import PerthImplicitWatermarker 90 | 91 | watermarker = PerthImplicitWatermarker(device="cuda") # Use GPU for faster processing 92 | ``` 93 | 94 | ### Dummy Watermarker 95 | 96 | A simple placeholder watermarker for testing and demonstration purposes. 97 | 98 | ```python 99 | from perth import DummyWatermarker 100 | 101 | watermarker = DummyWatermarker() 102 | ``` 103 | 104 | ## Evaluating Watermarked Audio 105 | 106 | The library includes utilities for evaluating the quality and robustness of watermarked audio: 107 | 108 | ```python 109 | import librosa 110 | from perth.utils import calculate_audio_metrics, plot_audio_comparison 111 | 112 | # Load original and watermarked audio 113 | original, sr = librosa.load("input.wav", sr=None) 114 | watermarked, _ = librosa.load("output.wav", sr=None) 115 | 116 | # Calculate quality metrics 117 | metrics = calculate_audio_metrics(original, watermarked) 118 | print(f"SNR: {metrics['snr']:.2f} dB") 119 | print(f"PSNR: {metrics['psnr']:.2f} dB") 120 | 121 | # Visualize differences 122 | plot_audio_comparison(original, watermarked, sr, output_path="comparison.png") 123 | ``` 124 | 125 | ## Contributing 126 | 127 | Contributions are welcome! Please feel free to submit a Pull Request. 128 | 129 | ## License 130 | 131 | This project is licensed under the MIT License - see the LICENSE file for details. 132 | -------------------------------------------------------------------------------- /docs/api_reference.md: -------------------------------------------------------------------------------- 1 | # Perth API Reference 2 | 3 | This document provides detailed information about the classes and functions available in the Perth library. 4 | 5 | ## Core Classes 6 | 7 | ### WatermarkerBase 8 | 9 | `WatermarkerBase` is the abstract base class that all watermarking implementations in Perth extend. 10 | 11 | ```python 12 | from perth import WatermarkerBase 13 | ``` 14 | 15 | #### Methods 16 | 17 | - **apply_watermark**(wav, watermark=None, sample_rate=44100, **kwargs) 18 | 19 | Apply a watermark to an audio signal. 20 | 21 | - **Parameters**: 22 | - `wav` (np.ndarray): Input audio signal as numpy array 23 | - `watermark` (np.ndarray, optional): Watermark data to embed. If None, a default watermark is generated. 24 | - `sample_rate` (int): Sample rate of the audio signal in Hz 25 | - `**kwargs`: Additional algorithm-specific parameters 26 | 27 | - **Returns**: 28 | - `np.ndarray`: Watermarked audio signal 29 | 30 | - **get_watermark**(watermarked_wav, sample_rate=44100, watermark_length=None, **kwargs) 31 | 32 | Extract a watermark from a watermarked audio signal. 33 | 34 | - **Parameters**: 35 | - `watermarked_wav` (np.ndarray): Watermarked audio signal 36 | - `sample_rate` (int): Sample rate of the audio signal in Hz 37 | - `watermark_length` (int, optional): Expected length of the watermark 38 | - `**kwargs`: Additional algorithm-specific parameters 39 | 40 | - **Returns**: 41 | - `np.ndarray`: Extracted watermark data 42 | 43 | ### PerthImplicitWatermarker 44 | 45 | `PerthImplicitWatermarker` is a neural network-based watermarking implementation that uses the Perth-Net model for embedding and extracting watermarks. 46 | 47 | ```python 48 | from perth import PerthImplicitWatermarker 49 | ``` 50 | 51 | #### Constructor 52 | 53 | - **\_\_init\_\_**(run_name="implicit", models_dir=None, device="cpu", perth_net=None) 54 | 55 | - **Parameters**: 56 | - `run_name` (str): Name of the model configuration to load 57 | - `models_dir` (str, optional): Directory containing the model files 58 | - `device` (str): Device to run the model on ("cpu" or "cuda") 59 | - `perth_net` (PerthNet, optional): Pre-initialized PerthNet model instance 60 | 61 | #### Methods 62 | 63 | Inherits all methods from `WatermarkerBase` with the following implementations: 64 | 65 | - **apply_watermark**(signal, watermark, sample_rate, **_) 66 | 67 | Apply a neural network-based watermark to an audio signal. 68 | 69 | - **Parameters**: 70 | - `signal` (np.ndarray): Input audio signal 71 | - `watermark` (np.ndarray, optional): Ignored (Perth-Net generates its own watermark) 72 | - `sample_rate` (int): Sample rate of the audio signal in Hz 73 | 74 | - **Returns**: 75 | - `np.ndarray`: Watermarked audio signal 76 | 77 | - **get_watermark**(wm_signal, sample_rate, round=True, **_) 78 | 79 | Extract a watermark from a watermarked audio signal. 80 | 81 | - **Parameters**: 82 | - `wm_signal` (np.ndarray): Watermarked audio signal 83 | - `sample_rate` (int): Sample rate of the audio signal in Hz 84 | - `round` (bool): Whether to round the watermark values to binary (0 or 1) 85 | 86 | - **Returns**: 87 | - `np.ndarray`: Extracted watermark data 88 | 89 | ## Utility Functions 90 | 91 | ### Audio Processing 92 | 93 | ```python 94 | from perth.utils import load_audio, save_audio 95 | ``` 96 | 97 | - **load_audio**(audio_path, sr=None) 98 | 99 | Load an audio file using librosa. 100 | 101 | - **Parameters**: 102 | - `audio_path` (str): Path to the audio file 103 | - `sr` (int, optional): Target sample rate. If None, the native sample rate is used. 104 | 105 | - **Returns**: 106 | - `tuple`: (audio_data, sample_rate) 107 | 108 | - **save_audio**(audio_data, file_path, sample_rate) 109 | 110 | Save audio data to a file. 111 | 112 | - **Parameters**: 113 | - `audio_data` (np.ndarray): Audio data as a numpy array 114 | - `file_path` (str): Output file path 115 | - `sample_rate` (int): Sample rate for the audio file 116 | 117 | ### Analysis and Visualization 118 | 119 | ```python 120 | from perth.utils import calculate_audio_metrics, plot_audio_comparison 121 | ``` 122 | 123 | - **calculate_audio_metrics**(original, watermarked) 124 | 125 | Calculate audio quality metrics between original and watermarked audio. 126 | 127 | - **Parameters**: 128 | - `original` (np.ndarray): Original audio data 129 | - `watermarked` (np.ndarray): Watermarked audio data 130 | 131 | - **Returns**: 132 | - `dict`: Dictionary with quality metrics: 133 | - `snr`: Signal-to-Noise Ratio (dB) 134 | - `mse`: Mean Squared Error 135 | - `psnr`: Peak Signal-to-Noise Ratio (dB) 136 | 137 | - **plot_audio_comparison**(original, watermarked, sample_rate, output_path=None) 138 | 139 | Plot a comparison between original and watermarked audio. 140 | 141 | - **Parameters**: 142 | - `original` (np.ndarray): Original audio data 143 | - `watermarked` (np.ndarray): Watermarked audio data 144 | - `sample_rate` (int): Sample rate of the audio 145 | - `output_path` (str, optional): Path to save the plot. If None, plot is shown interactively. 146 | 147 | ## Command Line Interface 148 | 149 | perth provides a command-line interface through the `perth` command: 150 | 151 | ``` 152 | perth [OPTIONS] INPUT_FILE 153 | ``` 154 | 155 | ### Options 156 | 157 | - `--output`, `-o`: Path to save the output watermarked audio file 158 | - `--method`, `-m`: Watermarking method to use (choices: perth, dummy) 159 | - `--extract`, `-e`: Extract watermark from the input file instead of applying a watermark 160 | - `--device`, `-d`: Device to use for neural network processing (choices: cpu, cuda) 161 | -------------------------------------------------------------------------------- /docs/getting_started.md: -------------------------------------------------------------------------------- 1 | # Getting Started with Perth 2 | 3 | This guide will help you get started with the Perth audio watermarking library. 4 | 5 | ## Installation 6 | 7 | ### Prerequisites 8 | 9 | Before installing Perth, make sure you have the following prerequisites: 10 | 11 | - Python 3.8 or higher 12 | - pip package manager 13 | 14 | For GPU acceleration (optional): 15 | - CUDA-compatible GPU 16 | - PyTorch with CUDA support 17 | 18 | ### Install from PyPI 19 | 20 | ```bash 21 | pip install resemble-perth 22 | ``` 23 | 24 | ### Install from Source 25 | 26 | ```bash 27 | git clone https://github.com/resemble-ai/Perth 28 | cd Perth 29 | pip install -e . 30 | ``` 31 | 32 | ## Basic Usage 33 | 34 | Here's a simple example of how to use Perth to watermark an audio file: 35 | 36 | ```python 37 | import librosa 38 | import soundfile as sf 39 | from perth import PerthImplicitWatermarker 40 | 41 | # Load audio file 42 | audio, sample_rate = librosa.load('input.wav', sr=None) 43 | 44 | # Initialize watermarker 45 | watermarker = PerthImplicitWatermarker() 46 | 47 | # Apply watermark 48 | watermarked_audio = watermarker.apply_watermark(audio, sample_rate=sample_rate) 49 | 50 | # Save watermarked audio 51 | sf.write('output.wav', watermarked_audio, sample_rate) 52 | ``` 53 | 54 | To extract a watermark from an audio file: 55 | 56 | ```python 57 | import librosa 58 | from perth import PerthImplicitWatermarker 59 | 60 | # Load audio file 61 | audio, sample_rate = librosa.load('output.wav', sr=None) 62 | 63 | # Initialize watermarker 64 | watermarker = PerthImplicitWatermarker() 65 | 66 | # Extract watermark 67 | watermark = watermarker.get_watermark(audio, sample_rate=sample_rate) 68 | print(f"Extracted watermark confidence: {watermark.mean():.4f}") 69 | ``` 70 | 71 | ## Command Line Usage 72 | 73 | Perth also provides a command-line interface for easy usage: 74 | 75 | ```bash 76 | # Watermark an audio file 77 | perth input.wav -o output.wav 78 | 79 | # Extract a watermark from a file 80 | perth input.wav --extract 81 | ``` 82 | 83 | Run `perth --help` for more options and information. 84 | 85 | ## Next Steps 86 | 87 | - Check out the [examples](../examples/) directory for more complex usage examples 88 | - See the [API Reference](./api_reference.md) for detailed information on available functions and classes 89 | - Learn about [watermarking techniques](./watermarking_techniques.md) implemented in Perth -------------------------------------------------------------------------------- /examples/advanced/robustness_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Advanced example demonstrating watermark robustness testing. 4 | 5 | This script applies various audio transformations to watermarked audio 6 | and tests if the watermark can still be detected after these transformations. 7 | """ 8 | import os 9 | import argparse 10 | import numpy as np 11 | import librosa 12 | import soundfile as sf 13 | import matplotlib.pyplot as plt 14 | from scipy.signal import resample 15 | from tqdm import tqdm 16 | 17 | from perth import PerthImplicitWatermarker 18 | from perth.utils import calculate_audio_metrics, plot_audio_comparison 19 | 20 | 21 | def apply_mp3_compression(audio, sr, output_path, bitrate='128k'): 22 | """Apply MP3 compression and decompression to audio.""" 23 | import subprocess 24 | import tempfile 25 | 26 | # Save as WAV 27 | temp_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False) 28 | temp_wav.close() 29 | sf.write(temp_wav.name, audio, sr) 30 | 31 | # Compress to MP3 32 | temp_mp3 = tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) 33 | temp_mp3.close() 34 | subprocess.call(['ffmpeg', '-y', '-i', temp_wav.name, '-b:a', bitrate, temp_mp3.name], 35 | stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) 36 | 37 | # Decompress back to WAV 38 | temp_out = tempfile.NamedTemporaryFile(suffix='.wav', delete=False) 39 | temp_out.close() 40 | subprocess.call(['ffmpeg', '-y', '-i', temp_mp3.name, temp_out.name], 41 | stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) 42 | 43 | # Load the processed audio 44 | audio_processed, sr = librosa.load(temp_out.name, sr=sr) 45 | 46 | # Clean up temporary files 47 | os.unlink(temp_wav.name) 48 | os.unlink(temp_mp3.name) 49 | os.unlink(temp_out.name) 50 | 51 | return audio_processed 52 | 53 | 54 | def apply_transform(audio, sr, transform_type, **kwargs): 55 | """Apply various transformations to audio.""" 56 | if transform_type == 'mp3': 57 | bitrate = kwargs.get('bitrate', '128k') 58 | return apply_mp3_compression(audio, sr, None, bitrate) 59 | 60 | elif transform_type == 'resample': 61 | target_sr = kwargs.get('target_sr', 16000) 62 | # Resample to target SR 63 | audio_resampled = librosa.resample(audio, orig_sr=sr, target_sr=target_sr) 64 | # Resample back to original SR 65 | audio_restored = librosa.resample(audio_resampled, orig_sr=target_sr, target_sr=sr) 66 | return audio_restored 67 | 68 | elif transform_type == 'noise': 69 | noise_level = kwargs.get('noise_level', 0.005) 70 | noise = np.random.normal(0, noise_level, len(audio)) 71 | return audio + noise 72 | 73 | elif transform_type == 'clip': 74 | clip_level = kwargs.get('clip_level', 0.8) 75 | return np.clip(audio, -clip_level, clip_level) 76 | 77 | elif transform_type == 'reverse': 78 | # Cut a segment and reverse it 79 | segment_start = len(audio) // 3 80 | segment_end = segment_start + len(audio) // 3 81 | audio_mod = audio.copy() 82 | audio_mod[segment_start:segment_end] = audio_mod[segment_start:segment_end][::-1] 83 | return audio_mod 84 | 85 | else: 86 | raise ValueError(f"Unknown transform type: {transform_type}") 87 | 88 | 89 | def main(): 90 | parser = argparse.ArgumentParser(description="Test watermark robustness against various transformations") 91 | parser.add_argument("input_file", help="Path to the input audio file to be watermarked") 92 | parser.add_argument("--output_dir", "-o", default="robustness_results", 93 | help="Directory to save results") 94 | parser.add_argument("--device", "-d", default="cpu", choices=["cpu", "cuda"], 95 | help="Device to use for neural network processing") 96 | args = parser.parse_args() 97 | 98 | # Create output directory 99 | os.makedirs(args.output_dir, exist_ok=True) 100 | 101 | # Load audio 102 | print(f"Loading audio: {args.input_file}") 103 | audio, sr = librosa.load(args.input_file, sr=None) 104 | 105 | # Initialize watermarker 106 | print("Initializing watermarker...") 107 | watermarker = PerthImplicitWatermarker(device=args.device) 108 | 109 | # Apply watermark 110 | print("Applying watermark...") 111 | watermarked_audio = watermarker.apply_watermark(audio, sample_rate=sr) 112 | 113 | # Save watermarked audio 114 | watermarked_path = os.path.join(args.output_dir, "watermarked.wav") 115 | sf.write(watermarked_path, watermarked_audio, sr) 116 | print(f"Saved watermarked audio to {watermarked_path}") 117 | 118 | # Extract watermark from original watermarked audio (baseline) 119 | baseline_watermark = watermarker.get_watermark(watermarked_audio, sample_rate=sr) 120 | baseline_confidence = np.mean(baseline_watermark) 121 | print(f"Baseline watermark confidence: {baseline_confidence:.4f}") 122 | 123 | # Define transformations to test 124 | transformations = [ 125 | ('mp3', {'bitrate': '128k'}, 'MP3 Compression (128k)'), 126 | ('mp3', {'bitrate': '64k'}, 'MP3 Compression (64k)'), 127 | ('resample', {'target_sr': 16000}, 'Resample to 16kHz and back'), 128 | ('resample', {'target_sr': 8000}, 'Resample to 8kHz and back'), 129 | ('noise', {'noise_level': 0.001}, 'Low Noise Addition'), 130 | ('noise', {'noise_level': 0.01}, 'High Noise Addition'), 131 | ('clip', {'clip_level': 0.8}, 'Amplitude Clipping (0.8)'), 132 | ('reverse', {}, 'Segment Reversal'), 133 | ] 134 | 135 | # Test each transformation 136 | results = [] 137 | 138 | print("\nTesting watermark robustness against transformations:") 139 | for transform_type, params, label in tqdm(transformations): 140 | # Apply transformation 141 | transformed_audio = apply_transform(watermarked_audio, sr, transform_type, **params) 142 | 143 | # Save transformed audio 144 | transformed_path = os.path.join(args.output_dir, f"{transform_type}_transformed.wav") 145 | sf.write(transformed_path, transformed_audio, sr) 146 | 147 | # Extract watermark 148 | extracted_watermark = watermarker.get_watermark(transformed_audio, sample_rate=sr) 149 | confidence = np.mean(extracted_watermark) 150 | 151 | # Calculate audio quality metrics 152 | metrics = calculate_audio_metrics(watermarked_audio, transformed_audio) 153 | 154 | # Store results 155 | results.append({ 156 | 'transform': label, 157 | 'confidence': confidence, 158 | 'snr': metrics['snr'], 159 | 'success': confidence > 0.75 # Arbitrary threshold for demonstration 160 | }) 161 | 162 | print(f" {label}: Confidence = {confidence:.4f}, SNR = {metrics['snr']:.2f} dB, " 163 | f"{'PASS' if confidence > 0.75 else 'FAIL'}") 164 | 165 | # Plot results 166 | fig, ax = plt.subplots(figsize=(12, 6)) 167 | 168 | x = np.arange(len(results)) 169 | bar_width = 0.35 170 | 171 | # Plot confidence scores 172 | ax.bar(x, [r['confidence'] for r in results], bar_width, 173 | label='Watermark Confidence', color='skyblue') 174 | 175 | # Add threshold line 176 | ax.axhline(y=0.75, linestyle='--', color='red', alpha=0.7, 177 | label='Success Threshold (0.75)') 178 | 179 | # Add labels and title 180 | ax.set_xlabel('Transformation') 181 | ax.set_ylabel('Watermark Confidence') 182 | ax.set_title('Watermark Robustness to Various Transformations') 183 | ax.set_xticks(x) 184 | ax.set_xticklabels([r['transform'] for r in results], rotation=45, ha='right') 185 | ax.legend() 186 | 187 | plt.tight_layout() 188 | plt.savefig(os.path.join(args.output_dir, 'robustness_results.png')) 189 | print(f"Results saved to {args.output_dir}") 190 | 191 | 192 | if __name__ == "__main__": 193 | main() -------------------------------------------------------------------------------- /examples/basic/watermark_audio.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Basic example of how to watermark an audio file using Perth. 4 | """ 5 | import os 6 | import argparse 7 | import numpy as np 8 | import librosa 9 | import soundfile as sf 10 | from perth import PerthImplicitWatermarker 11 | from perth.utils import calculate_audio_metrics 12 | 13 | def main(): 14 | # Parse command line arguments 15 | parser = argparse.ArgumentParser(description="Watermark an audio file with Perth") 16 | parser.add_argument("input_file", help="Path to the input audio file") 17 | parser.add_argument("--output", "-o", default=None, 18 | help="Path to save the output watermarked audio file") 19 | parser.add_argument("--device", "-d", default="cpu", choices=["cpu", "cuda"], 20 | help="Device to use for neural network processing") 21 | args = parser.parse_args() 22 | 23 | # Derive output filename if not specified 24 | if args.output is None: 25 | base, ext = os.path.splitext(args.input_file) 26 | args.output = f"{base}_watermarked{ext}" 27 | 28 | # Create output directory if it doesn't exist 29 | os.makedirs(os.path.dirname(os.path.abspath(args.output)), exist_ok=True) 30 | 31 | # Load audio file 32 | print(f"Loading audio file: {args.input_file}") 33 | wav, sr = librosa.load(args.input_file, sr=None) 34 | 35 | # Initialize watermarker 36 | print(f"Initializing Perth watermarker (device: {args.device})...") 37 | watermarker = PerthImplicitWatermarker(device=args.device) 38 | 39 | # Apply watermark 40 | print("Applying watermark...") 41 | watermarked_audio = watermarker.apply_watermark(wav, watermark=None, sample_rate=sr) 42 | 43 | # Save watermarked audio 44 | sf.write(args.output, watermarked_audio, sr) 45 | print(f"Watermarked audio saved to: {args.output}") 46 | 47 | # Check watermark in watermarked audio 48 | print("Verifying watermark...") 49 | extracted_watermark = watermarker.get_watermark(watermarked_audio, sample_rate=sr) 50 | print(f"Watermark verification confidence: {np.mean(extracted_watermark):.4f}") 51 | 52 | # Calculate quality metrics 53 | metrics = calculate_audio_metrics(wav, watermarked_audio) 54 | print("\nAudio Quality Metrics:") 55 | print(f" Signal-to-Noise Ratio (SNR): {metrics['snr']:.2f} dB") 56 | print(f" Mean Squared Error (MSE): {metrics['mse']:.6f}") 57 | print(f" Peak Signal-to-Noise Ratio (PSNR): {metrics['psnr']:.2f} dB") 58 | 59 | 60 | if __name__ == "__main__": 61 | main() -------------------------------------------------------------------------------- /perth/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Perth: Audio Watermarking and Detection Library. 3 | 4 | This library provides tools and algorithms for embedding and detecting 5 | watermarks in audio files using various techniques. 6 | """ 7 | 8 | from .watermarker import WatermarkerBase, WatermarkingException 9 | from .dummy_watermarker import DummyWatermarker 10 | 11 | # Import specific watermarker implementations 12 | try: 13 | from .perth_net.perth_net_implicit.perth_watermarker import PerthImplicitWatermarker 14 | except ImportError: 15 | PerthImplicitWatermarker = None 16 | 17 | # Make core classes/functions available at the package level 18 | __all__ = [ 19 | 'WatermarkerBase', 20 | 'WatermarkingException', 21 | 'DummyWatermarker', 22 | ] 23 | 24 | # Add watermarker implementations if available 25 | if PerthImplicitWatermarker is not None: 26 | __all__.append('PerthImplicitWatermarker') 27 | 28 | # Version information 29 | __version__ = '1.0.0' 30 | __author__ = 'Resemble AI Team' -------------------------------------------------------------------------------- /perth/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /perth/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /perth/__pycache__/dummy_watermarker.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/__pycache__/dummy_watermarker.cpython-311.pyc -------------------------------------------------------------------------------- /perth/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /perth/__pycache__/watermarker.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/__pycache__/watermarker.cpython-310.pyc -------------------------------------------------------------------------------- /perth/__pycache__/watermarker.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/__pycache__/watermarker.cpython-311.pyc -------------------------------------------------------------------------------- /perth/cli/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Command-line interface for the Perth library. 3 | """ 4 | from .watermark_cli import main 5 | 6 | __all__ = ['main'] -------------------------------------------------------------------------------- /perth/cli/watermark_cli.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Command line interface for Perth watermarking. 4 | """ 5 | import argparse 6 | import os 7 | import sys 8 | import numpy as np 9 | import librosa 10 | import soundfile as sf 11 | from typing import Optional, List 12 | 13 | from perth.perth_net.perth_net_implicit.perth_watermarker import PerthImplicitWatermarker 14 | from perth.dummy_watermarker import DummyWatermarker 15 | from perth.config import get_config 16 | from perth.utils import load_audio, save_audio, calculate_audio_metrics, plot_audio_comparison 17 | 18 | 19 | def parse_args(args: Optional[List[str]] = None) -> argparse.Namespace: 20 | """Parse command line arguments.""" 21 | parser = argparse.ArgumentParser( 22 | description="Perth - Audio Watermarking Tool", 23 | formatter_class=argparse.ArgumentDefaultsHelpFormatter 24 | ) 25 | 26 | parser.add_argument("input_file", help="Path to the input audio file") 27 | parser.add_argument("--output", "-o", 28 | help="Path to save the output watermarked audio file. " 29 | "If not provided, appends '_watermarked' to the input filename") 30 | parser.add_argument("--method", "-m", choices=["perth", "dummy"], 31 | help="Watermarking method to use") 32 | parser.add_argument("--extract", "-e", action="store_true", 33 | help="Extract watermark from the input file instead of applying a watermark") 34 | parser.add_argument("--device", "-d", choices=["cpu", "cuda"], 35 | help="Device to use for neural network processing") 36 | parser.add_argument("--config", "-c", 37 | help="Path to a configuration file") 38 | parser.add_argument("--visualize", "-v", action="store_true", 39 | help="Generate visualization of watermark effect (only when not extracting)") 40 | 41 | return parser.parse_args(args) 42 | 43 | 44 | def main(args: Optional[List[str]] = None) -> int: 45 | """Main function for the watermarking CLI.""" 46 | parsed_args = parse_args(args) 47 | 48 | # Load configuration 49 | config = get_config(parsed_args.config) 50 | 51 | # Override config with command line arguments if provided 52 | if parsed_args.method: 53 | config.set('general', 'default_watermarker', parsed_args.method) 54 | if parsed_args.device: 55 | config.set('perth', 'device', parsed_args.device) 56 | 57 | method = config.get('general', 'default_watermarker') 58 | device = config.get('perth', 'device') 59 | 60 | try: 61 | # Load audio file 62 | print(f"Loading audio file: {parsed_args.input_file}") 63 | wav, sr = load_audio(parsed_args.input_file) 64 | 65 | # Initialize watermarker 66 | if method == "perth": 67 | print(f"Initializing Perth watermarker (device: {device})...") 68 | models_dir = config.get('perth', 'models_dir') 69 | run_name = config.get('perth', 'run_name') 70 | watermarker = PerthImplicitWatermarker( 71 | run_name=run_name, 72 | models_dir=models_dir, 73 | device=device 74 | ) 75 | else: 76 | print("Initializing dummy watermarker...") 77 | watermarker = DummyWatermarker() 78 | 79 | if parsed_args.extract: 80 | # Extract watermark 81 | print("Extracting watermark...") 82 | watermark = watermarker.get_watermark(wav, sample_rate=sr) 83 | print(f"Extracted watermark: {watermark}") 84 | print(f"Watermark confidence: {np.mean(watermark):.4f}") 85 | return 0 86 | else: 87 | # Apply watermark 88 | print("Applying watermark...") 89 | original_audio = wav.copy() # Save original for comparison 90 | watermarked_audio = watermarker.apply_watermark(wav, watermark=None, sample_rate=sr) 91 | 92 | # Save watermarked audio 93 | if parsed_args.output: 94 | output_path = parsed_args.output 95 | else: 96 | base, ext = os.path.splitext(parsed_args.input_file) 97 | output_path = f"{base}_watermarked{ext}" 98 | 99 | save_audio(watermarked_audio, output_path, sr) 100 | print(f"Watermarked audio saved to: {output_path}") 101 | 102 | # Verify watermark 103 | print("Verifying watermark...") 104 | extracted = watermarker.get_watermark(watermarked_audio, sample_rate=sr) 105 | print(f"Watermark verification confidence: {np.mean(extracted):.4f}") 106 | 107 | # Calculate and display quality metrics 108 | metrics = calculate_audio_metrics(original_audio, watermarked_audio) 109 | print("\nAudio Quality Metrics:") 110 | print(f" Signal-to-Noise Ratio (SNR): {metrics['snr']:.2f} dB") 111 | print(f" Mean Squared Error (MSE): {metrics['mse']:.8f}") 112 | print(f" Peak Signal-to-Noise Ratio (PSNR): {metrics['psnr']:.2f} dB") 113 | 114 | # Generate visualization if requested 115 | if parsed_args.visualize: 116 | viz_path = os.path.splitext(output_path)[0] + "_comparison.png" 117 | print(f"\nGenerating visualization to: {viz_path}") 118 | plot_audio_comparison(original_audio, watermarked_audio, sr, viz_path) 119 | 120 | return 0 121 | 122 | except Exception as e: 123 | print(f"Error: {e}", file=sys.stderr) 124 | return 1 125 | 126 | 127 | if __name__ == "__main__": 128 | sys.exit(main()) -------------------------------------------------------------------------------- /perth/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Configuration handling for Perth library. 3 | """ 4 | import os 5 | import yaml 6 | from typing import Dict, Any, Optional 7 | 8 | 9 | class Config: 10 | """ 11 | Configuration management for Perth. 12 | 13 | Handles loading, saving, and accessing configuration settings for the library. 14 | """ 15 | 16 | # Default configuration values 17 | _defaults = { 18 | # General settings 19 | 'general': { 20 | 'default_watermarker': 'perth', 21 | 'verbose': True, 22 | }, 23 | 24 | # Perth-Net settings 25 | 'perth': { 26 | 'device': 'cpu', 27 | 'run_name': 'implicit', 28 | 'models_dir': None, # Will be set to default location in __init__ 29 | }, 30 | 31 | # Audio processing settings 32 | 'audio': { 33 | 'default_sample_rate': 44100, 34 | 'normalize': True, 35 | }, 36 | } 37 | 38 | def __init__(self, config_path: Optional[str] = None): 39 | """ 40 | Initialize configuration with default values and optional user config. 41 | 42 | Args: 43 | config_path: Path to a YAML configuration file to load 44 | """ 45 | # Deep copy the defaults 46 | self._config = {} 47 | for section, values in self._defaults.items(): 48 | self._config[section] = values.copy() 49 | 50 | # Set default models directory 51 | self._config['perth']['models_dir'] = os.path.join( 52 | os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 53 | 'perth', 'perth_net', 'pretrained' 54 | ) 55 | 56 | # Load user config if provided 57 | if config_path and os.path.exists(config_path): 58 | self.load(config_path) 59 | 60 | def load(self, config_path: str) -> None: 61 | """ 62 | Load configuration from a YAML file. 63 | 64 | Args: 65 | config_path: Path to a YAML configuration file 66 | """ 67 | try: 68 | with open(config_path, 'r') as f: 69 | user_config = yaml.safe_load(f) 70 | 71 | # Merge with current config 72 | if user_config: 73 | for section, values in user_config.items(): 74 | if section in self._config: 75 | self._config[section].update(values) 76 | else: 77 | self._config[section] = values 78 | except Exception as e: 79 | print(f"Warning: Could not load config from {config_path}: {e}") 80 | 81 | def save(self, config_path: str) -> None: 82 | """ 83 | Save current configuration to a YAML file. 84 | 85 | Args: 86 | config_path: Path to save the configuration to 87 | """ 88 | os.makedirs(os.path.dirname(os.path.abspath(config_path)), exist_ok=True) 89 | with open(config_path, 'w') as f: 90 | yaml.dump(self._config, f, default_flow_style=False) 91 | 92 | def get(self, section: str, key: str, default: Any = None) -> Any: 93 | """ 94 | Get a configuration value. 95 | 96 | Args: 97 | section: Configuration section 98 | key: Configuration key 99 | default: Default value to return if key is not found 100 | 101 | Returns: 102 | Configuration value or default 103 | """ 104 | if section in self._config and key in self._config[section]: 105 | return self._config[section][key] 106 | return default 107 | 108 | def set(self, section: str, key: str, value: Any) -> None: 109 | """ 110 | Set a configuration value. 111 | 112 | Args: 113 | section: Configuration section 114 | key: Configuration key 115 | value: Value to set 116 | """ 117 | if section not in self._config: 118 | self._config[section] = {} 119 | self._config[section][key] = value 120 | 121 | def get_section(self, section: str) -> Dict[str, Any]: 122 | """ 123 | Get an entire configuration section. 124 | 125 | Args: 126 | section: Configuration section name 127 | 128 | Returns: 129 | Dictionary of configuration values for the section 130 | """ 131 | return self._config.get(section, {}).copy() 132 | 133 | def __str__(self) -> str: 134 | """Return a string representation of the configuration.""" 135 | return yaml.dump(self._config, default_flow_style=False) 136 | 137 | 138 | # Singleton instance for global access 139 | _config_instance = None 140 | 141 | def get_config(config_path: Optional[str] = None) -> Config: 142 | """ 143 | Get the global configuration instance. 144 | 145 | Args: 146 | config_path: Optional path to a configuration file to load 147 | 148 | Returns: 149 | Config instance 150 | """ 151 | global _config_instance 152 | if _config_instance is None: 153 | _config_instance = Config(config_path) 154 | elif config_path: 155 | _config_instance.load(config_path) 156 | return _config_instance -------------------------------------------------------------------------------- /perth/dummy_watermarker.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from typing import Optional 3 | 4 | from .watermarker import WatermarkerBase 5 | 6 | 7 | class DummyWatermarker(WatermarkerBase): 8 | """ 9 | A dummy watermarker for testing and demonstration purposes. 10 | 11 | This watermarker doesn't actually embed or extract real watermarks, 12 | but serves as a placeholder implementation for testing the framework. 13 | """ 14 | 15 | def apply_watermark(self, wav: np.ndarray, watermark: Optional[np.ndarray] = None, 16 | sample_rate: int = 44100, **kwargs) -> np.ndarray: 17 | """ 18 | Simulates applying a watermark by simply rounding the audio signal. 19 | 20 | Args: 21 | wav: Input audio signal as numpy array 22 | watermark: Ignored in this implementation 23 | sample_rate: Ignored in this implementation 24 | **kwargs: Additional ignored parameters 25 | 26 | Returns: 27 | The input audio with minimal modification (rounded to 5 decimal places) 28 | """ 29 | return wav.round(5) 30 | 31 | def get_watermark(self, watermarked_wav: np.ndarray, sample_rate: int = 44100, 32 | watermark_length: Optional[int] = None, **kwargs) -> np.ndarray: 33 | """ 34 | Simulates extracting a watermark by returning random data. 35 | 36 | Args: 37 | watermarked_wav: Watermarked audio signal as numpy array 38 | sample_rate: Ignored in this implementation 39 | watermark_length: Length of the dummy watermark to generate 40 | **kwargs: Additional ignored parameters 41 | 42 | Returns: 43 | A random binary watermark of specified length or default 32 bits 44 | """ 45 | length = watermark_length if watermark_length is not None else 32 46 | return np.random.randint(0, 2, size=length).astype(np.float32) 47 | -------------------------------------------------------------------------------- /perth/perth_net/__init__.py: -------------------------------------------------------------------------------- 1 | from pkg_resources import resource_filename 2 | PREPACKAGED_MODELS_DIR = resource_filename(__name__, "pretrained") 3 | 4 | from .perth_net_implicit.perth_watermarker import PerthImplicitWatermarker 5 | -------------------------------------------------------------------------------- /perth/perth_net/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/perth_net/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /perth/perth_net/perth_net_implicit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/perth_net/perth_net_implicit/__init__.py -------------------------------------------------------------------------------- /perth/perth_net/perth_net_implicit/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/perth_net/perth_net_implicit/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /perth/perth_net/perth_net_implicit/__pycache__/audio_processor.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/perth_net/perth_net_implicit/__pycache__/audio_processor.cpython-311.pyc -------------------------------------------------------------------------------- /perth/perth_net/perth_net_implicit/__pycache__/checkpoint_manager.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/perth_net/perth_net_implicit/__pycache__/checkpoint_manager.cpython-311.pyc -------------------------------------------------------------------------------- /perth/perth_net/perth_net_implicit/__pycache__/config.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/perth_net/perth_net_implicit/__pycache__/config.cpython-311.pyc -------------------------------------------------------------------------------- /perth/perth_net/perth_net_implicit/__pycache__/perth_watermarker.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/perth_net/perth_net_implicit/__pycache__/perth_watermarker.cpython-311.pyc -------------------------------------------------------------------------------- /perth/perth_net/perth_net_implicit/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/perth_net/perth_net_implicit/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /perth/perth_net/perth_net_implicit/audio_processor.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torchaudio.transforms import Spectrogram, InverseSpectrogram, TimeStretch 4 | 5 | import numpy as np 6 | 7 | from .config import PerthConfig 8 | from .utils import normalize, magphase_to_cx, cx_to_magphase 9 | 10 | 11 | class AudioProcessor(nn.Module): 12 | "Module wrapper for audio processing, for easy device management" 13 | 14 | def __init__(self, hp: PerthConfig): 15 | super().__init__() 16 | self.hp = hp 17 | self.window_fn = { 18 | "hamm": torch.hamming_window, 19 | "hann": torch.hann_window, 20 | "kaiser": torch.kaiser_window 21 | }[hp.window_fn] 22 | self.spectrogram = Spectrogram( 23 | n_fft=hp.n_fft, 24 | win_length=hp.window_size, 25 | power=None, 26 | hop_length=hp.hop_size, 27 | window_fn=self.window_fn, 28 | normalized=False, 29 | ) 30 | self.inv_spectrogram = InverseSpectrogram( 31 | n_fft=hp.n_fft, 32 | win_length=hp.window_size, 33 | hop_length=hp.hop_size, 34 | window_fn=self.window_fn, 35 | normalized=False, 36 | ) 37 | self.stretch = TimeStretch( 38 | n_freq=hp.n_fft // 2 + 1, 39 | hop_length=hp.hop_size, 40 | ) 41 | 42 | def signal_to_magphase(self, signal): 43 | if isinstance(signal, np.ndarray): 44 | signal = torch.from_numpy(signal.copy()) 45 | signal = signal.float() 46 | spec = self.spectrogram(signal) 47 | mag, phase = cx_to_magphase(self.hp, spec) 48 | return mag, phase 49 | 50 | def magphase_to_signal(self, mag, phase): 51 | spec = magphase_to_cx(self.hp, mag, phase) 52 | signal = self.inv_spectrogram(spec) 53 | return signal 54 | -------------------------------------------------------------------------------- /perth/perth_net/perth_net_implicit/checkpoint_manager.py: -------------------------------------------------------------------------------- 1 | import secrets 2 | from pathlib import Path 3 | 4 | import torch.nn 5 | import yaml 6 | 7 | from .config import PerthConfig 8 | 9 | 10 | class CheckpointManager: 11 | def __init__(self, models_dir, run_name, dataset_hp: PerthConfig=None): 12 | self.save_path = Path(models_dir) / run_name 13 | self.save_path.mkdir(exist_ok=True, parents=True) 14 | 15 | self.hparams_file = self.save_path.joinpath("hparams.yaml") 16 | if self.hparams_file.exists(): 17 | self.hp = self.load_hparams() 18 | if dataset_hp is not None: 19 | assert self.hp == dataset_hp 20 | else: 21 | assert dataset_hp is not None 22 | self.hp = dataset_hp 23 | self.save_hparams() 24 | 25 | self.id_file = self.save_path.joinpath("id.txt") 26 | if self.id_file.exists(): 27 | self.id = self.id_file.read_text() 28 | else: 29 | self.id = secrets.token_urlsafe(16) 30 | self.id_file.write_text(self.id) 31 | 32 | def load_latest(self, ext=".pth.tar"): 33 | sortkey = lambda x: int(x.name.replace(ext, "").split("_")[-1]) 34 | ckpts = sorted([p for p in self.save_path.iterdir() if p.name.endswith(ext)], key=sortkey) 35 | if any(ckpts): 36 | return torch.load(ckpts[-1], map_location="cpu") 37 | 38 | def load_hparams(self): 39 | with self.hparams_file.open("r") as hp_file: 40 | return PerthConfig(**yaml.load(hp_file, Loader=yaml.FullLoader)) 41 | 42 | def save_hparams(self): 43 | with self.hparams_file.open("w") as hparams_file: 44 | hparams_file.write(yaml.dump(self.hp._asdict())) 45 | 46 | def save_model(self, model, step): 47 | state = { 48 | "model": model.state_dict() if isinstance(model, torch.nn.Module) else model, 49 | "step": step, 50 | } 51 | basename = f"perth_net_{step:06d}" 52 | checkpoint_fpath = Path(self.save_path, f"{basename}.pth.tar") 53 | try: 54 | torch.save(state, checkpoint_fpath) 55 | except KeyboardInterrupt: 56 | if checkpoint_fpath.exists(): 57 | checkpoint_fpath.unlink() 58 | -------------------------------------------------------------------------------- /perth/perth_net/perth_net_implicit/config.py: -------------------------------------------------------------------------------- 1 | from typing import NamedTuple 2 | 3 | 4 | class PerthConfig(NamedTuple): 5 | use_wandb: bool 6 | batch_size: int 7 | sample_rate: int 8 | n_fft: int 9 | hop_size: int 10 | window_size: int 11 | use_lr_scheduler: bool 12 | stft_magnitude_min: float 13 | min_lr: float 14 | max_lr: float 15 | window_fn: str 16 | max_wmark_freq: float 17 | hidden_size: int 18 | # "simple" or "psychoacoustic" 19 | loss_type: str 20 | 21 | 22 | default_hp = PerthConfig( 23 | use_wandb=True, 24 | batch_size=16, 25 | sample_rate=32000, 26 | n_fft=2048, 27 | hop_size=320, 28 | window_size=2048, 29 | use_lr_scheduler=False, 30 | stft_magnitude_min=1e-9, 31 | min_lr=1e-5, 32 | max_lr=1e-4, 33 | window_fn="hann", 34 | max_wmark_freq=2000, 35 | hidden_size=256, 36 | # loss_type="simple", 37 | loss_type="psychoacoustic", 38 | ) 39 | -------------------------------------------------------------------------------- /perth/perth_net/perth_net_implicit/model/__init__.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from ..config import PerthConfig 3 | 4 | 5 | class Conv(nn.Module): 6 | 7 | def __init__(self, i, o, k, p='auto', s=1, act=True): 8 | super().__init__() 9 | assert k % 2 == 1 10 | if p == 'auto': 11 | assert s == 1 12 | p = (k - 1) // 2 13 | self.conv = nn.Conv1d(i, o, k, padding=p, stride=s) 14 | self.act = act 15 | if act: 16 | self.act = nn.LeakyReLU() 17 | 18 | def forward(self, x): 19 | x = self.conv(x) 20 | if self.act: 21 | x = self.act(x) 22 | return x 23 | 24 | 25 | def compute_subband_freq(config: PerthConfig): 26 | nfreq = config.n_fft // 2 + 1 27 | topfreq = config.sample_rate / 2 28 | subband = int(round(nfreq * config.max_wmark_freq / topfreq)) 29 | return subband 30 | 31 | 32 | def magmask(magspec, p=0.05): 33 | s = magspec.sum(dim=1) # (B, T) 34 | thresh = s.max(dim=1).values * p # (B,) 35 | return (s > thresh[:, None]).float() # (B, T) 36 | -------------------------------------------------------------------------------- /perth/perth_net/perth_net_implicit/model/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/perth_net/perth_net_implicit/model/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /perth/perth_net/perth_net_implicit/model/__pycache__/decoder.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/perth_net/perth_net_implicit/model/__pycache__/decoder.cpython-311.pyc -------------------------------------------------------------------------------- /perth/perth_net/perth_net_implicit/model/__pycache__/encoder.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/perth_net/perth_net_implicit/model/__pycache__/encoder.cpython-311.pyc -------------------------------------------------------------------------------- /perth/perth_net/perth_net_implicit/model/__pycache__/perth_net.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/perth_net/perth_net_implicit/model/__pycache__/perth_net.cpython-311.pyc -------------------------------------------------------------------------------- /perth/perth_net/perth_net_implicit/model/decoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from . import Conv 6 | from .encoder import magmask 7 | 8 | 9 | def _layers(subband, hidden): 10 | return nn.Sequential( 11 | Conv(subband, hidden, 1), 12 | *[Conv(hidden, hidden, k=7) for _ in range(5)], 13 | Conv(hidden, 2, k=1, act=False), 14 | ) 15 | 16 | 17 | def _masked_mean(x, m): 18 | return (x * m).sum(dim=2) / m.sum(dim=2) # (B, C) 19 | 20 | 21 | def _lerp(x, s): 22 | return F.interpolate(x, size=s, mode='linear', align_corners=True) 23 | 24 | 25 | def _nerp(x, s): 26 | return F.interpolate(x, size=s, mode='nearest') 27 | 28 | 29 | class Decoder(nn.Module): 30 | """ 31 | Decoder a watermark from a magnitude spectrogram. 32 | """ 33 | 34 | def __init__(self, hidden, subband): 35 | super().__init__() 36 | self.subband = subband 37 | # multi-scale decoder 38 | self.slow_layers = _layers(subband, hidden) 39 | self.normal_layers = _layers(subband, hidden) 40 | self.fast_layers = _layers(subband, hidden) 41 | 42 | def forward(self, magspec): 43 | mask = magmask(magspec.detach())[:, None] # (B, 1, T) 44 | subband = magspec[:, :self.subband] 45 | B, _, T = subband.shape 46 | 47 | # slow branch 48 | slow_subband = _lerp(subband, int(T * 1.25)) 49 | slow_out = self.slow_layers(slow_subband) # (B, 2, T_slow) 50 | slow_attn = slow_out[:, :1] # (B, 1, T_slow) 51 | slow_wmarks = slow_out[:, 1:] # (B, 1, T_slow) 52 | slow_mask = _nerp(mask, slow_wmarks.size(2)) # (B, 1, T_slow) 53 | slow_wmarks = _masked_mean(slow_wmarks, slow_mask) # (B, 1) 54 | slow_attn = _masked_mean(slow_attn, slow_mask) # (B, 1) 55 | 56 | # normal branch 57 | normal_out = self.normal_layers(subband) # (B, 2, T_normal) 58 | normal_attn = normal_out[:, :1] # (B, 1, T_normal) 59 | normal_wmarks = normal_out[:, 1:] # (B, 1, T_normal) 60 | normal_mask = _nerp(mask, normal_wmarks.size(2)) # (B, 1, T_normal) 61 | normal_wmarks = _masked_mean(normal_wmarks, normal_mask) # (B, 1) 62 | normal_attn = _masked_mean(normal_attn, normal_mask) # (B, 1) 63 | 64 | # fast branch 65 | fast_subband = _lerp(subband, int(T * 0.75)) 66 | fast_out = self.fast_layers(fast_subband) # (B, 2, T_fast) 67 | fast_attn = fast_out[:, :1] # (B, 1, T_fast) 68 | fast_wmarks = fast_out[:, 1:] # (B, 1, T_fast) 69 | fast_mask = _nerp(mask, fast_wmarks.size(2)) # (B, 1, T_fast) 70 | fast_wmarks = _masked_mean(fast_wmarks, fast_mask) # (B, 1) 71 | fast_attn = _masked_mean(fast_attn, fast_mask) # (B, 1) 72 | 73 | # combine branches with attention 74 | attn = torch.cat([slow_attn, normal_attn, fast_attn], dim=1) # (B, 3) 75 | attn = F.softmax(attn, dim=1) # (B, 3) 76 | wmarks = torch.cat([slow_wmarks, normal_wmarks, fast_wmarks], dim=1) # (B, 3) 77 | wmarks = (wmarks * attn).sum(dim=1) # (B,) 78 | 79 | # single float for each batch item indicating confidence of watermark 80 | return wmarks 81 | -------------------------------------------------------------------------------- /perth/perth_net/perth_net_implicit/model/encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from . import magmask 5 | from . import Conv 6 | 7 | 8 | class Encoder(nn.Module): 9 | """ 10 | Inserts a watermark into a magnitude spectrogram. 11 | """ 12 | 13 | def __init__(self, hidden, subband): 14 | super().__init__() 15 | self.subband = subband 16 | # residual encoder 17 | self.layers = nn.Sequential( 18 | Conv(self.subband, hidden, k=1), 19 | *[Conv(hidden, hidden, k=7) for _ in range(5)], 20 | Conv(hidden, self.subband, k=1, act=False), 21 | ) 22 | 23 | def forward(self, magspec): 24 | magspec = magspec.clone() 25 | 26 | # create mask for valid watermark locations 27 | mask = magmask(magspec)[:, None] 28 | 29 | # crop required region of spectrogram 30 | sub_mag = magspec[:, :self.subband] 31 | 32 | # encode watermark as spectrogram residual 33 | res = self.layers(sub_mag) * mask 34 | 35 | # add residual 36 | magspec[:, :self.subband] += res 37 | 38 | # return wmarked signal and mask 39 | return magspec, mask 40 | -------------------------------------------------------------------------------- /perth/perth_net/perth_net_implicit/model/perth_net.py: -------------------------------------------------------------------------------- 1 | import torch 2 | # import torchaudio 3 | import torch.nn.functional as F 4 | import numpy as np 5 | from torch import nn 6 | 7 | from ..audio_processor import AudioProcessor 8 | from ..checkpoint_manager import CheckpointManager 9 | from ..config import PerthConfig 10 | from . import compute_subband_freq 11 | from .encoder import Encoder 12 | from .decoder import Decoder 13 | # from ..utils import magphase_to_cx, cx_to_magphase 14 | 15 | 16 | def lerp(x, size=None, scale=None): 17 | return F.interpolate(x, size=size, scale_factor=scale, mode='linear', align_corners=True, recompute_scale_factor=False) 18 | 19 | 20 | def random_stretch(x): 21 | assert x.ndim >= 3 22 | r = 0.9 + 0.2 * torch.rand(1).item() 23 | return lerp(x, scale=r) 24 | 25 | 26 | def _attack(mag, phase, audio_proc): 27 | # gaussian magspec noise 28 | if torch.rand(1).item() < 1/8: 29 | peak = mag.mean() + 3 * mag.std() 30 | r = torch.randn_like(mag) * 0.01 * peak 31 | mag = mag + r 32 | 33 | # TODO: volume? 34 | 35 | # TODO: time-domain signal noise? 36 | 37 | # # stretch TODO: numerical instability! 38 | # if torch.rand(1).item() < 1/8 and phase is not None: 39 | # scale = 0.9 + 0.2 * torch.rand(1).item() 40 | # spec = magphase_to_cx(self.hp, mag, phase) 41 | # spec = audio_proc.stretch(spec, scale) 42 | # mag, phase_ = cx_to_magphase(self.hp, spec) 43 | # if torch.isnan(mag).any(): 44 | # print("WARNING: stretch failed") 45 | # mag = wmarked.clone() 46 | # else: 47 | # phase = phase_ 48 | 49 | # STFT-iSTFT cycle 50 | if torch.rand(1).item() < 1/4 and phase is not None: 51 | # # phase noise 52 | # if torch.rand(1).item() < 1/3: 53 | # phase = phase + torch.randn_like(phase) * 0.01 54 | 55 | # iSTFT 56 | signal = audio_proc.magphase_to_signal(mag, phase) 57 | 58 | # # random stretch directly on signal as well 59 | # if torch.rand(1).item() < 1/3: 60 | # signal = random_stretch(signal[None])[0] 61 | 62 | # STFT 63 | mag, phase = audio_proc.signal_to_magphase(signal) 64 | 65 | # random offset (NOTE: do this after phase-dependent attacks) 66 | if torch.rand(1).item() < 1/8: 67 | i = torch.randint(1, 13, (1,)).item() 68 | mag = torch.roll(mag, i, dims=2) 69 | 70 | # random magspec stretch (NOTE: should be near the end of attacks) 71 | if torch.rand(1).item() < 1/8: 72 | mag = random_stretch(mag) 73 | 74 | # random time masking 75 | # torchaudio.functional.mask_along_axis(mag, mask_param=, mask_value=mag.min().detach(), axis=2, p=0.05) 76 | 77 | return mag 78 | 79 | class PerthNet(nn.Module): 80 | """ 81 | PerthNet (PERceptual THreshold) watermarking model. 82 | Inserts and detects watermarks from a magnitude spectrogram. 83 | """ 84 | 85 | def __init__(self, hp: PerthConfig): 86 | super().__init__() 87 | self.hp = hp 88 | self.subband = compute_subband_freq(hp) 89 | self.encoder = Encoder(hp.hidden_size, self.subband) 90 | self.decoder = Decoder(hp.hidden_size, self.subband) 91 | self.ap = AudioProcessor(hp) 92 | 93 | @property 94 | def device(self): 95 | return next(self.parameters()).device 96 | 97 | def forward(self, magspec, attack=False, phase=None): 98 | "Run watermarker and decoder (training)" 99 | 100 | # encode watermark 101 | wmarked, mask = self.encoder(magspec) 102 | 103 | # decode from un-watermarked mag 104 | dec_input = magspec 105 | if attack: 106 | dec_input = _attack(dec_input, phase, self.ap) 107 | no_wmark_pred = self.decoder(dec_input) 108 | 109 | # decode from watermarked mag 110 | dec_input = wmarked 111 | if attack: 112 | dec_input = _attack(dec_input, phase, self.ap) 113 | wmark_pred = self.decoder(dec_input) 114 | 115 | return wmarked, no_wmark_pred, wmark_pred, mask 116 | 117 | @staticmethod 118 | def from_cm(cm): 119 | perth_net = PerthNet(cm.hp) 120 | ckpt = cm.load_latest() 121 | assert ckpt is not None, "No checkpoint found" 122 | perth_net.load_state_dict(ckpt["model"]) 123 | print(f"loaded PerthNet (Implicit) at step {ckpt['step']:,}") 124 | return perth_net 125 | 126 | @staticmethod 127 | def load(run_name, models_dir="saved_models"): 128 | cm = CheckpointManager(models_dir, run_name) 129 | return PerthNet.from_cm(cm) 130 | -------------------------------------------------------------------------------- /perth/perth_net/perth_net_implicit/perth_watermarker.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from librosa import resample 4 | 5 | from .model.perth_net import PerthNet 6 | from .. import PREPACKAGED_MODELS_DIR 7 | from perth.watermarker import WatermarkerBase 8 | 9 | 10 | def _to_tensor(x, device): 11 | if isinstance(x, np.ndarray): 12 | x = torch.from_numpy(x.copy()) 13 | return x.to(dtype=torch.float, device=device) 14 | 15 | 16 | class PerthImplicitWatermarker(WatermarkerBase): 17 | def __init__(self, run_name:str="implicit", models_dir=PREPACKAGED_MODELS_DIR, 18 | device="cpu", perth_net=None): 19 | assert (run_name is None) or (perth_net is None) 20 | if perth_net is None: 21 | self.perth_net = PerthNet.load(run_name, models_dir).to(device) 22 | else: 23 | self.perth_net = perth_net.to(device) 24 | 25 | def apply_watermark(self, signal, sample_rate, **_): 26 | change_rate = sample_rate != self.perth_net.hp.sample_rate 27 | signal = resample(signal, orig_sr=sample_rate, target_sr=self.perth_net.hp.sample_rate) if change_rate \ 28 | else signal 29 | 30 | # split signal into magnitude and phase 31 | signal = _to_tensor(signal, self.perth_net.device) 32 | magspec, phase = self.perth_net.ap.signal_to_magphase(signal) 33 | 34 | # encode the watermark 35 | magspec = magspec[None].to(self.perth_net.device) 36 | wm_magspec, _mask = self.perth_net.encoder(magspec) 37 | wm_magspec = wm_magspec[0] 38 | 39 | # assemble back into watermarked signal 40 | wm_signal = self.perth_net.ap.magphase_to_signal(wm_magspec, phase) 41 | wm_signal = wm_signal.detach().cpu().numpy() 42 | return resample(wm_signal, orig_sr=self.perth_net.hp.sample_rate, target_sr=sample_rate) if change_rate \ 43 | else wm_signal 44 | 45 | def get_watermark(self, wm_signal, sample_rate, round=True, **_): 46 | change_rate = sample_rate != self.perth_net.hp.sample_rate 47 | if change_rate: 48 | wm_signal = resample(wm_signal, orig_sr=sample_rate, target_sr=self.perth_net.hp.sample_rate, 49 | res_type="polyphase") 50 | wm_signal = _to_tensor(wm_signal, self.perth_net.device) 51 | wm_magspec, _phase = self.perth_net.ap.signal_to_magphase(wm_signal) 52 | wm_magspec = wm_magspec.to(self.perth_net.device) 53 | wmark_pred = self.perth_net.decoder(wm_magspec[None])[0] 54 | wmark_pred = wmark_pred.clip(0., 1.) 55 | wmark_pred = wmark_pred.round() if round else wmark_pred 56 | return wmark_pred.detach().cpu().numpy() 57 | -------------------------------------------------------------------------------- /perth/perth_net/perth_net_implicit/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Padding Helpers obtained from: 3 | https://github.com/rwightman/pytorch-image-models/blob 4 | /01a0e25a67305b94ea767083f4113ff002e4435c/timm/models/layers/padding.py#L12 5 | 6 | This to maintain padding="same" compatibility with Tensorflow architecture. 7 | """ 8 | 9 | import math 10 | from typing import List, Tuple 11 | import torch 12 | import torch.nn.functional as F 13 | 14 | from scipy.signal import butter 15 | from scipy.signal import filtfilt 16 | from math import pi, sin, cos, sqrt 17 | from cmath import exp 18 | import numpy as np 19 | import sys 20 | 21 | from .config import default_hp, PerthConfig 22 | 23 | 24 | def stream(message): 25 | sys.stdout.write(f"\r{message}") 26 | 27 | 28 | # Calculate asymmetric TensorFlow-like 'SAME' padding for a convolution 29 | def get_same_padding_transposed(x: int, k: int, s: int, d: int): 30 | return max((x-1) * (s-1) + (k - 1) * d, 0) 31 | 32 | def get_same_padding(x: int, k: int, s: int, d: int): 33 | return max((math.ceil(x/s) - 1) * s + (k - 1) * d + 1 - x, 0) 34 | 35 | # Dynamically pad input x with 'SAME' padding for conv with specified args 36 | def pad_same(x, k: List[int], s: List[int], d: List[int] = (1, 1), value: float = 0): 37 | ih, iw = x.size()[-2:] 38 | pad_h, pad_w = get_same_padding(ih, k[0], s[0], d[0]), get_same_padding(iw, k[1], s[1], d[1]) 39 | if pad_h > 0 or pad_w > 0: 40 | x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2], value=value) 41 | return x 42 | 43 | def pad_same_transposed(x, k: List[int], s: List[int], d: List[int] = (1, 1), value: float = 0): 44 | ih, iw = x.size()[-2:] 45 | # pad_h, pad_w = get_same_padding(ih, k[0], s[0], d[0]), get_same_padding_transposed(iw, k[1], s[1], d[1]) 46 | pad_h, pad_w = get_same_padding_transposed(ih, k[0], s[0], d[0]), get_same_padding_transposed(iw, k[1], s[1], d[1]) 47 | if pad_h > 0 or pad_w > 0: 48 | x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2], value=value) 49 | return x 50 | 51 | 52 | def normalize(hp, magspec, headroom_db=15): 53 | min_level_db = 20 * np.log10(hp.stft_magnitude_min) 54 | magspec = (magspec - min_level_db) / (-min_level_db + headroom_db) 55 | return magspec 56 | 57 | def denormalize_spectrogram(hp, magspec, headroom_db=15): 58 | min_level_db = 20 * np.log10(hp.stft_magnitude_min) 59 | return magspec * (-min_level_db + headroom_db) + min_level_db 60 | 61 | def magphase_to_cx(hp, magspec, phases): 62 | magspec = denormalize_spectrogram(hp, magspec) 63 | magspec = 10. ** ((magspec / 20).clip(max=10)) 64 | phases = torch.exp(1.j * phases) 65 | spectrum = magspec * phases 66 | return spectrum 67 | 68 | def cx_to_magphase(hp, spec): 69 | phase = torch.angle(spec) 70 | mag = spec.abs() # (nfreq, T) 71 | mag = 20 * torch.log10(mag.clip(hp.stft_magnitude_min)) 72 | mag = normalize(hp, mag) 73 | return mag, phase 74 | 75 | 76 | ## Imported from Repo 77 | 78 | def butter_lowpass(cutoff, sr=16000, order=5): 79 | nyq = 0.5 * sr 80 | normal_cutoff = cutoff / nyq 81 | b, a = butter(order, normal_cutoff, btype='low', analog=False) 82 | return b, a 83 | 84 | 85 | def butter_lowpass_filter(data, cutoff=4000, sr=16000, order=16): 86 | b, a = butter_lowpass(cutoff, sr, order=order) 87 | return filtfilt(b, a, data) 88 | 89 | 90 | def bwsk(k, n): 91 | # Returns k-th pole s_k of Butterworth transfer 92 | # function in S-domain. Note that omega_c 93 | # is not taken into account here 94 | arg = pi * (2 * k + n - 1) / (2 * n) 95 | return complex(cos(arg), sin(arg)) 96 | 97 | 98 | def bwj(k, n): 99 | # Returns (s - s_k) * H(s), where 100 | # H(s) - BW transfer function 101 | # s_k - k-th pole of H(s) 102 | res = complex(1, 0) 103 | for m in range(1, n + 1): 104 | if (m == k): 105 | continue 106 | else: 107 | res /= (bwsk(k, n) - bwsk(m, n)) 108 | return res 109 | 110 | 111 | def bwh(n=16, fc=400, fs=16e3, length=25): 112 | # Returns h(t) - BW transfer function in t-domain. 113 | # length is in ms. 114 | omegaC = 2 * pi * fc 115 | dt = 1 / fs 116 | number_of_samples = int(fs * length / 1000) 117 | result = [] 118 | for x in range(number_of_samples): 119 | res = complex(0, 0) 120 | if x >= 0: 121 | for k in range(1, n + 1): 122 | res += (exp(omegaC * x * dt / sqrt(2) * bwsk(k, n)) * bwj(k, n)) 123 | result.append((res).real) 124 | return result 125 | 126 | 127 | def snr(input_signal, output_signal): 128 | Ps = np.sum(np.abs(input_signal ** 2)) 129 | Pn = np.sum(np.abs((input_signal - output_signal) ** 2)) 130 | return 10 * np.log10((Ps / Pn)) 131 | 132 | def parse_hparam_overrides(args): 133 | hp_instance = default_hp._asdict() 134 | if args.hp is not None: 135 | overrides = args.hp 136 | overrides = overrides.split(",") 137 | for override_item in overrides: 138 | param, value = override_item.split(":") 139 | try: 140 | to_param_type = type(getattr(default_hp, param)) 141 | except: 142 | print(f"Invalid HParam Override: {param}. No matching parameter exists") 143 | exit() 144 | if to_param_type == bool: 145 | value = False if value in ("False","false") else True 146 | else: 147 | value = to_param_type(value) 148 | hp_instance[param] = value 149 | args.hp = PerthConfig(**hp_instance) 150 | return args -------------------------------------------------------------------------------- /perth/perth_net/pretrained/implicit/hparams.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 16 2 | hidden_size: 256 3 | hop_size: 320 4 | loss_type: psychoacoustic 5 | max_lr: 0.0001 6 | max_wmark_freq: 2000 7 | min_lr: 1.0e-05 8 | n_fft: 2048 9 | sample_rate: 32000 10 | stft_magnitude_min: 1.0e-09 11 | use_lr_scheduler: false 12 | use_wandb: true 13 | window_fn: hann 14 | window_size: 2048 15 | -------------------------------------------------------------------------------- /perth/perth_net/pretrained/implicit/id.txt: -------------------------------------------------------------------------------- 1 | Y-GfemTlfVYp3fNWkt2zgQ -------------------------------------------------------------------------------- /perth/perth_net/pretrained/implicit/perth_net_250000.pth.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/perth_net/pretrained/implicit/perth_net_250000.pth.tar -------------------------------------------------------------------------------- /perth/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions for audio processing and watermarking. 3 | """ 4 | import os 5 | import numpy as np 6 | import librosa 7 | import soundfile as sf 8 | import matplotlib.pyplot as plt 9 | from typing import Tuple, Optional, Dict, Any 10 | from math import sqrt 11 | from scipy.stats import mode 12 | 13 | 14 | def _signal_to_frames(data, window_length, pad=True): 15 | n_samples = data.shape[-1] 16 | frames = [] 17 | for idx in range(0, n_samples, window_length): 18 | chunk = data[idx:idx + window_length] 19 | if pad and chunk.shape[-1] < window_length: 20 | chunk = np.append(chunk, np.zeros((window_length - chunk.shape[-1]))) 21 | frames.append(chunk) 22 | return frames 23 | 24 | 25 | def _frames_to_signal(frames): 26 | return np.hstack(frames) 27 | 28 | 29 | def audio_to_raw(wav, bit_depth=16): 30 | assert wav.dtype.kind == "f", "This function takes floating point arrays" 31 | unsigned_bit_depth = bit_depth - 1 32 | range_min, range_max = -2 ** (unsigned_bit_depth), 2 ** (unsigned_bit_depth) - 1 33 | return (wav * range_max).clip(range_min, range_max).astype(np.int16) 34 | 35 | 36 | def raw_pcm16_tofloat(wav, bit_depth=16): 37 | unsigned_bit_depth = bit_depth - 1 38 | range_min, range_max = -2 ** (unsigned_bit_depth), 2 ** unsigned_bit_depth - 1 39 | a, b = -1., 1. 40 | return (a + ((wav - range_min) * (b - a)) / (range_max - range_min)).clip(-1, 1).astype(np.float32) 41 | 42 | 43 | def formatted_watermark(watermark_list, length, wrap=True): 44 | assert len(watermark_list) > 0 45 | watermark = np.array(watermark_list) 46 | # Discard extra frames that don't contain the entire watermark. 47 | # ToDo: Implement Synchronization bits support and return watermark after correlating with synch bits. 48 | if len(watermark_list) % length: 49 | watermark = watermark[:-(len(watermark_list) % length)] 50 | if wrap and len(watermark) > length: 51 | watermark = np.array(np.split(watermark, len(watermark) // length)).T 52 | watermark = flatten_watermark(watermark) 53 | return watermark[:length] 54 | 55 | 56 | def flatten_watermark(watermark_vector): 57 | return mode(watermark_vector, axis=1).mode.squeeze(-1) 58 | 59 | 60 | def modified_binets_fibonnaci(n: int, k: float = 2.5) -> int: 61 | # This is a modified fibonnaci generator that can generate exponentially spaced sequences like the standard 62 | # fibonacci series. This function returns the standard fibonnaci sequence using Golden Ratio applying Binet's 63 | # Formula when k==2 (alpha -> 1.618) 64 | # The Watermark Accuracy (BER) is slightly more robust for k == 2.5, where alpha -> 1.3 65 | 66 | if n <= 0: return 0 67 | alpha = (1 + sqrt(5)) / k 68 | beta = (1 - sqrt(5)) / k 69 | return int(((alpha ** n) - (beta ** n)) / sqrt(5)) 70 | 71 | 72 | def generate_dummy_watermark(length: int): 73 | watermark = np.random.random((length,)) 74 | return np.where(watermark > watermark.mean(), 1, 0) 75 | 76 | 77 | def watermark_str_to_numpy(watermark: str) -> np.ndarray: 78 | return np.array([int(char) for char in watermark]) 79 | 80 | 81 | def watermark_numpy_to_str(watermark: np.ndarray) -> str: 82 | return ''.join(str(char) for char in watermark) 83 | 84 | 85 | def validate_string_watermark(watermark: str) -> bool: 86 | return any([char not in ("1", "0") for char in watermark]) 87 | 88 | 89 | def load_audio(audio_path: str, sr: Optional[int] = None) -> Tuple[np.ndarray, int]: 90 | """ 91 | Load an audio file using librosa. 92 | 93 | Args: 94 | audio_path: Path to the audio file 95 | sr: Target sample rate. If None, the native sample rate is used. 96 | 97 | Returns: 98 | Tuple of (audio_data, sample_rate) 99 | """ 100 | try: 101 | audio, sample_rate = librosa.load(audio_path, sr=sr) 102 | return audio, sample_rate 103 | except Exception as e: 104 | raise IOError(f"Could not load audio file {audio_path}: {e}") 105 | 106 | 107 | def save_audio(audio_data: np.ndarray, file_path: str, sample_rate: int) -> None: 108 | """ 109 | Save audio data to a file. 110 | 111 | Args: 112 | audio_data: Audio data as a numpy array 113 | file_path: Output file path 114 | sample_rate: Sample rate for the audio file 115 | """ 116 | directory = os.path.dirname(os.path.abspath(file_path)) 117 | os.makedirs(directory, exist_ok=True) 118 | sf.write(file_path, audio_data, sample_rate) 119 | 120 | 121 | def plot_audio_comparison(original: np.ndarray, watermarked: np.ndarray, 122 | sample_rate: int, output_path: Optional[str] = None) -> None: 123 | """ 124 | Plot a comparison between original and watermarked audio. 125 | 126 | Args: 127 | original: Original audio data 128 | watermarked: Watermarked audio data 129 | sample_rate: Sample rate of the audio 130 | output_path: Path to save the plot. If None, plot is shown interactively. 131 | """ 132 | fig, axs = plt.subplots(3, 1, figsize=(10, 12)) 133 | 134 | # Plot waveforms 135 | time = np.arange(len(original)) / sample_rate 136 | axs[0].plot(time, original, alpha=0.7, label='Original') 137 | axs[0].plot(time, watermarked, alpha=0.7, label='Watermarked') 138 | axs[0].set_title('Waveform Comparison') 139 | axs[0].set_xlabel('Time (s)') 140 | axs[0].set_ylabel('Amplitude') 141 | axs[0].legend() 142 | 143 | # Plot difference 144 | diff = watermarked - original 145 | axs[1].plot(time, diff) 146 | axs[1].set_title('Difference (Watermarked - Original)') 147 | axs[1].set_xlabel('Time (s)') 148 | axs[1].set_ylabel('Difference') 149 | 150 | # Plot spectrogram of difference 151 | D = librosa.amplitude_to_db( 152 | np.abs(librosa.stft(diff)), ref=np.max 153 | ) 154 | librosa.display.specshow(D, x_axis='time', y_axis='log', sr=sample_rate, ax=axs[2]) 155 | axs[2].set_title('Spectrogram of Difference') 156 | axs[2].set_xlabel('Time (s)') 157 | axs[2].set_ylabel('Frequency (Hz)') 158 | fig.colorbar(axs[2].collections[0], ax=axs[2], format='%+2.0f dB') 159 | 160 | plt.tight_layout() 161 | if output_path: 162 | plt.savefig(output_path) 163 | plt.close() 164 | else: 165 | plt.show() 166 | 167 | 168 | def calculate_audio_metrics(original: np.ndarray, watermarked: np.ndarray) -> Dict[str, float]: 169 | """ 170 | Calculate audio quality metrics between original and watermarked audio. 171 | 172 | Args: 173 | original: Original audio data 174 | watermarked: Watermarked audio data 175 | 176 | Returns: 177 | Dictionary of quality metrics: 178 | - snr: Signal-to-Noise Ratio (dB) 179 | - mse: Mean Squared Error 180 | - psnr: Peak Signal-to-Noise Ratio (dB) 181 | """ 182 | if len(original) != len(watermarked): 183 | raise ValueError("Original and watermarked audio must have the same length") 184 | 185 | # Calculate Mean Squared Error 186 | mse = np.mean((original - watermarked) ** 2) 187 | 188 | # Calculate Signal-to-Noise Ratio 189 | signal_power = np.mean(original ** 2) 190 | noise_power = mse 191 | snr = 10 * np.log10(signal_power / noise_power) if noise_power > 0 else float('inf') 192 | 193 | # Calculate Peak Signal-to-Noise Ratio 194 | max_value = max(np.max(np.abs(original)), np.max(np.abs(watermarked))) 195 | psnr = 20 * np.log10(max_value / np.sqrt(mse)) if mse > 0 else float('inf') 196 | 197 | return { 198 | 'snr': snr, 199 | 'mse': mse, 200 | 'psnr': psnr 201 | } 202 | -------------------------------------------------------------------------------- /perth/watermarker.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from abc import ABC, abstractmethod 3 | from typing import Optional, Dict, Any 4 | 5 | 6 | class WatermarkingException(Exception): 7 | """Exception raised for errors in the watermarking process.""" 8 | pass 9 | 10 | 11 | class WatermarkerBase(ABC): 12 | """ 13 | Base class for all audio watermarking algorithms. 14 | 15 | This abstract class defines the interface that all watermarking implementations 16 | must follow, providing methods for watermark application and extraction. 17 | """ 18 | 19 | @abstractmethod 20 | def apply_watermark(self, wav: np.ndarray, watermark: Optional[np.ndarray] = None, 21 | sample_rate: int = 44100, **kwargs) -> np.ndarray: 22 | """ 23 | Apply a watermark to an audio signal. 24 | 25 | Args: 26 | wav: Input audio signal as numpy array 27 | watermark: Optional watermark data to embed. If None, a default watermark may be generated. 28 | sample_rate: Sample rate of the audio signal in Hz 29 | **kwargs: Additional algorithm-specific parameters 30 | 31 | Returns: 32 | Watermarked audio signal as numpy array 33 | 34 | Raises: 35 | WatermarkingException: If watermarking fails 36 | """ 37 | raise NotImplementedError() 38 | 39 | @abstractmethod 40 | def get_watermark(self, watermarked_wav: np.ndarray, sample_rate: int = 44100, 41 | watermark_length: Optional[int] = None, **kwargs) -> np.ndarray: 42 | """ 43 | Extract a watermark from a watermarked audio signal. 44 | 45 | Args: 46 | watermarked_wav: Watermarked audio signal as numpy array 47 | sample_rate: Sample rate of the audio signal in Hz 48 | watermark_length: Optional expected length of the watermark 49 | **kwargs: Additional algorithm-specific parameters 50 | 51 | Returns: 52 | Extracted watermark data as numpy array 53 | 54 | Raises: 55 | WatermarkingException: If watermark extraction fails 56 | """ 57 | raise NotImplementedError() 58 | 59 | def verify_compatibility(self, wav: np.ndarray, sample_rate: int) -> bool: 60 | """ 61 | Verify if the audio is compatible with this watermarking method. 62 | 63 | Args: 64 | wav: Input audio signal as numpy array 65 | sample_rate: Sample rate of the audio signal in Hz 66 | 67 | Returns: 68 | True if the audio is compatible, False otherwise 69 | """ 70 | return True 71 | 72 | 73 | -------------------------------------------------------------------------------- /perth/waveform.py: -------------------------------------------------------------------------------- 1 | # Borrowed from Resembletron 2 | 3 | import logging 4 | import tempfile 5 | import warnings 6 | from pathlib import Path 7 | import librosa 8 | import librosa.filters 9 | import numpy as np 10 | import pyrubberband as pyrb 11 | import soundfile as sf 12 | from audioread import NoBackendError 13 | from pydub import AudioSegment 14 | 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | class WatermarkingException(Exception): 20 | pass 21 | 22 | class CorruptedAudioException(Exception): 23 | pass 24 | 25 | 26 | def load_wav(fpath, target_sr, res_algo="kaiser_best"): 27 | """ 28 | :param target_sr: expected sample rate after loading and possibly resampling. If None, 29 | there will be no resampling. 30 | :param res_algo: algorithm for resampling. If None, there will also be no resampling but if 31 | the target_sr is valid, the actual sample rate of the audio on disk will be checked against 32 | and an error will be thrown if they do not match. 33 | """ 34 | bit_depth = sf.SoundFile(str(fpath)).subtype 35 | if not bit_depth.startswith("PCM"): 36 | raise WatermarkingException("Unsupported Audio type for Watermarking. " 37 | "Only 16 or 24-bit PCM/WAV/AIFF audio files can be watermarked.") 38 | try: 39 | with warnings.catch_warnings(): 40 | warnings.simplefilter("ignore") 41 | wav, actual_sr = librosa.core.load( 42 | str(fpath), sr=(target_sr if res_algo else None), res_type=res_algo 43 | ) 44 | except (EOFError, NoBackendError): 45 | raise CorruptedAudioException("Failed to load audio file") 46 | 47 | if target_sr is not None: 48 | assert actual_sr == target_sr, "Loaded audio doesn't have expected sampling rate (%s vs " \ 49 | "%s, resampling_algo=%s)" % (actual_sr, target_sr, res_algo) 50 | 51 | return wav, actual_sr 52 | 53 | def save_wav(wav, file_or_path, sample_rate: int, subtype="PCM_16"): 54 | """ 55 | :param wav: a float32 numpy array 56 | """ 57 | assert wav.dtype.kind == "f", "This function takes floating point arrays" 58 | 59 | # Float32 -> PCM_16 conversion 60 | if subtype == "PCM_16": 61 | range_min, range_max = -2 ** 15, 2 ** 15 - 1 62 | wav = (wav * range_max).clip(range_min, range_max).astype(np.int16) 63 | 64 | file_or_path = str(file_or_path) if isinstance(file_or_path, Path) else file_or_path 65 | sf.write(file_or_path, wav, sample_rate, subtype=subtype, format="wav") 66 | 67 | 68 | def pitch_shift(wav, sample_rate, semitones): 69 | return pyrb.pitch_shift(wav, sample_rate, semitones) 70 | 71 | 72 | def convert_to_mp3(wav_path, sample_rate=22050): 73 | segment = AudioSegment.from_wav(wav_path) 74 | tmpfile = tempfile.SpooledTemporaryFile(suffix=".mp3") 75 | segment = segment.set_frame_rate(sample_rate) 76 | segment.export(tmpfile, bitrate="48k", format="mp3") 77 | return tmpfile -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | bitstring 2 | matplotlib 3 | librosa>=0.7.2 4 | numpy>=1.23.4 5 | pandas>=1.1.5 6 | Pillow>=9.0.1 7 | praat-parselmouth 8 | pydub 9 | pyloudnorm>=0.1.0 10 | pyrubberband 11 | PyWavelets>=1.1.1 12 | scikit-learn>=0.22 13 | SoundFile>=0.10.3.post1 14 | sox>=1.4.0 15 | tabulate>=0.8.9 16 | tqdm>=4.61.2 17 | tensorboard>=2.10.1 18 | pyYaml>=5.4.1 19 | pydub 20 | torch==2.1.1 21 | torchaudio==2.1.1 -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import setuptools 3 | from setuptools import find_packages 4 | 5 | 6 | with open("README.md", "r", encoding="utf-8") as help_file: 7 | long_description = help_file.read() 8 | 9 | requirements = [] 10 | if os.path.exists("requirements.txt"): 11 | with open("requirements.txt", "r") as f: 12 | requirements = f.read().splitlines() 13 | 14 | # Model and pretrained data files that should be included in the package 15 | bundled_data = [ 16 | "perth_net/pretrained/*/*.*", # Perth models 17 | ] 18 | 19 | setuptools.setup( 20 | name="resemble-perth", 21 | version="1.0.1", 22 | author="Resemble AI, Aditya", 23 | author_email="team@resemble.ai, aditya@resemble.ai", 24 | description="Audio Watermarking and Detection Library", 25 | long_description=long_description, 26 | long_description_content_type="text/markdown", 27 | url="https://github.com/resemble-ai/Perth", 28 | keywords=["Audio Watermarking", "Perceptual Watermarking", "Neural Networks", "Audio Processing"], 29 | project_urls={ 30 | 'Bug Reports': 'https://github.com/resemble-ai/Perth/issues', 31 | 'Source': 'https://github.com/resemble-ai/Perth', 32 | 'Documentation': 'https://github.com/resemble-ai/Perth/blob/main/README.md', 33 | }, 34 | packages=find_packages(), 35 | package_data={"perth": bundled_data}, 36 | include_package_data=True, 37 | install_requires=requirements, 38 | python_requires=">=3.8", 39 | classifiers=[ 40 | "Programming Language :: Python :: 3", 41 | "Programming Language :: Python :: 3.8", 42 | "Programming Language :: Python :: 3.9", 43 | "Programming Language :: Python :: 3.10", 44 | "Programming Language :: Python :: 3.11", 45 | "License :: OSI Approved :: MIT License", 46 | "Operating System :: OS Independent", 47 | "Topic :: Multimedia :: Sound/Audio", 48 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 49 | ], 50 | entry_points={ 51 | 'console_scripts': [ 52 | 'perth=perth.cli.watermark_cli:main', 53 | ], 54 | }, 55 | ) 56 | -------------------------------------------------------------------------------- /tests/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | testpaths = tests 3 | python_files = test_*.py 4 | python_classes = Test* 5 | python_functions = test_* 6 | addopts = -v -------------------------------------------------------------------------------- /tests/test_basic.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | import os 4 | import tempfile 5 | 6 | from perth import DummyWatermarker 7 | from perth.utils import calculate_audio_metrics 8 | 9 | 10 | class TestDummyWatermarker(unittest.TestCase): 11 | """Test the DummyWatermarker implementation.""" 12 | 13 | def setUp(self): 14 | """Set up test fixtures.""" 15 | self.watermarker = DummyWatermarker() 16 | # Create a simple sine wave as test audio 17 | self.sample_rate = 44100 18 | t = np.linspace(0, 1, self.sample_rate) 19 | self.test_audio = np.sin(2 * np.pi * 440 * t).astype(np.float32) 20 | 21 | def test_apply_watermark(self): 22 | """Test that apply_watermark returns an array of the correct shape.""" 23 | watermarked = self.watermarker.apply_watermark(self.test_audio, sample_rate=self.sample_rate) 24 | self.assertEqual(watermarked.shape, self.test_audio.shape) 25 | 26 | def test_get_watermark(self): 27 | """Test that get_watermark returns a watermark.""" 28 | watermarked = self.watermarker.apply_watermark(self.test_audio, sample_rate=self.sample_rate) 29 | watermark = self.watermarker.get_watermark(watermarked, sample_rate=self.sample_rate) 30 | self.assertIsInstance(watermark, np.ndarray) 31 | self.assertEqual(len(watermark), 32) # Default length for dummy watermarker 32 | 33 | def test_custom_watermark_length(self): 34 | """Test that get_watermark respects custom watermark length.""" 35 | watermarked = self.watermarker.apply_watermark(self.test_audio, sample_rate=self.sample_rate) 36 | custom_length = 64 37 | watermark = self.watermarker.get_watermark( 38 | watermarked, sample_rate=self.sample_rate, watermark_length=custom_length 39 | ) 40 | self.assertEqual(len(watermark), custom_length) 41 | 42 | 43 | class TestAudioMetrics(unittest.TestCase): 44 | """Test the audio metrics calculation utilities.""" 45 | 46 | def setUp(self): 47 | """Set up test fixtures.""" 48 | # Create a simple sine wave as test audio 49 | self.sample_rate = 44100 50 | t = np.linspace(0, 1, self.sample_rate) 51 | self.original = np.sin(2 * np.pi * 440 * t).astype(np.float32) 52 | 53 | # Create a slightly modified version with some noise 54 | noise = np.random.normal(0, 0.01, len(self.original)) 55 | self.modified = self.original + noise 56 | 57 | def test_calculate_metrics(self): 58 | """Test that audio metrics calculation works correctly.""" 59 | metrics = calculate_audio_metrics(self.original, self.modified) 60 | 61 | # Check that metrics are returned and have reasonable values 62 | self.assertIn('snr', metrics) 63 | self.assertIn('mse', metrics) 64 | self.assertIn('psnr', metrics) 65 | 66 | # SNR should be positive for this test case 67 | self.assertGreater(metrics['snr'], 0) 68 | 69 | # MSE should be non-zero but small 70 | self.assertGreater(metrics['mse'], 0) 71 | self.assertLess(metrics['mse'], 0.1) 72 | 73 | # PSNR should be positive and reasonably high 74 | self.assertGreater(metrics['psnr'], 0) 75 | 76 | 77 | if __name__ == '__main__': 78 | unittest.main() --------------------------------------------------------------------------------