├── LICENSE
├── README.md
├── docs
    ├── api_reference.md
    └── getting_started.md
├── examples
    ├── advanced
    │   └── robustness_test.py
    └── basic
    │   └── watermark_audio.py
├── perth
    ├── __init__.py
    ├── __pycache__
    │   ├── __init__.cpython-310.pyc
    │   ├── __init__.cpython-311.pyc
    │   ├── dummy_watermarker.cpython-311.pyc
    │   ├── utils.cpython-311.pyc
    │   ├── watermarker.cpython-310.pyc
    │   └── watermarker.cpython-311.pyc
    ├── cli
    │   ├── __init__.py
    │   └── watermark_cli.py
    ├── config.py
    ├── dummy_watermarker.py
    ├── perth_net
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   └── __init__.cpython-311.pyc
    │   ├── perth_net_implicit
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-311.pyc
    │   │   │   ├── audio_processor.cpython-311.pyc
    │   │   │   ├── checkpoint_manager.cpython-311.pyc
    │   │   │   ├── config.cpython-311.pyc
    │   │   │   ├── perth_watermarker.cpython-311.pyc
    │   │   │   └── utils.cpython-311.pyc
    │   │   ├── audio_processor.py
    │   │   ├── checkpoint_manager.py
    │   │   ├── config.py
    │   │   ├── model
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── __init__.cpython-311.pyc
    │   │   │   │   ├── decoder.cpython-311.pyc
    │   │   │   │   ├── encoder.cpython-311.pyc
    │   │   │   │   └── perth_net.cpython-311.pyc
    │   │   │   ├── decoder.py
    │   │   │   ├── encoder.py
    │   │   │   └── perth_net.py
    │   │   ├── perth_watermarker.py
    │   │   └── utils.py
    │   └── pretrained
    │   │   └── implicit
    │   │       ├── hparams.yaml
    │   │       ├── id.txt
    │   │       └── perth_net_250000.pth.tar
    ├── utils.py
    ├── watermarker.py
    └── waveform.py
├── requirements.txt
├── setup.cfg
├── setup.py
└── tests
    ├── pytest.ini
    └── test_basic.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Resemble AI
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Perth
  2 | 
  3 | **Perth** is a comprehensive Python library for audio watermarking and detection.
  4 | 
  5 | ## Overview
  6 | 
  7 | Perth enables you to embed imperceptible watermarks in audio files and later detect them, even after the audio has undergone various transformations or manipulations. The library implements multiple watermarking techniques including neural network-based approaches.
  8 | 
  9 | ## Features
 10 | 
 11 | - **Multiple Watermarking Techniques**: Including the Perth-Net Implicit neural network approach
 12 | - **Robust Watermarks**: Watermarks can survive common audio transformations like compression, resampling, and more
 13 | - **Command-Line Interface**: Easy to use CLI for quick watermarking tasks
 14 | - **Python API**: Comprehensive API for integration into your applications
 15 | - **Quality Metrics**: Tools to evaluate the perceptual quality of watermarked audio
 16 | 
 17 | ## Installation
 18 | 
 19 | ### From PyPI (Recommended)
 20 | 
 21 | ```bash
 22 | pip install resemble-perth
 23 | ```
 24 | 
 25 | ### From Source
 26 | 
 27 | ```bash
 28 | git clone https://github.com/resemble-ai/Perth
 29 | cd Perth
 30 | pip install -e .
 31 | ```
 32 | 
 33 | ## Quick Start
 34 | 
 35 | ### Command Line Usage
 36 | 
 37 | ```bash
 38 | # Apply a watermark to an audio file
 39 | perth input.wav -o output.wav
 40 | 
 41 | # Extract a watermark from an audio file
 42 | perth input.wav --extract
 43 | ```
 44 | 
 45 | ### Python API Usage
 46 | 
 47 | #### Applying a Watermark
 48 | 
 49 | ```python
 50 | import perth
 51 | import librosa
 52 | import soundfile as sf
 53 | 
 54 | # Load audio file
 55 | wav, sr = librosa.load("input.wav", sr=None)
 56 | 
 57 | # Initialize watermarker
 58 | watermarker = perth.PerthImplicitWatermarker()
 59 | 
 60 | # Apply watermark
 61 | watermarked_audio = watermarker.apply_watermark(wav, watermark=None, sample_rate=sr)
 62 | 
 63 | # Save watermarked audio
 64 | sf.write("output.wav", watermarked_audio, sr)
 65 | ```
 66 | 
 67 | #### Extracting a Watermark
 68 | 
 69 | ```python
 70 | import perth
 71 | import librosa
 72 | 
 73 | # Load the watermarked audio
 74 | watermarked_audio, sr = librosa.load("output.wav", sr=None)
 75 | 
 76 | # Initialize watermarker (same as used for embedding)
 77 | watermarker = perth.PerthImplicitWatermarker()
 78 | 
 79 | # Extract watermark
 80 | watermark = watermarker.get_watermark(watermarked_audio, sample_rate=sr)
 81 | print(f"Extracted watermark: {watermark}")
 82 | ```
 83 | 
 84 | ### Perth Implicit Watermarker
 85 | 
 86 | The Perth-Net Implicit watermarker uses a neural network-based approach for embedding and extracting watermarks. It's designed to be robust against various audio manipulations while maintaining high audio quality.
 87 | 
 88 | ```python
 89 | from perth.perth_net.perth_net_implicit.perth_watermarker import PerthImplicitWatermarker
 90 | 
 91 | watermarker = PerthImplicitWatermarker(device="cuda")  # Use GPU for faster processing
 92 | ```
 93 | 
 94 | ### Dummy Watermarker
 95 | 
 96 | A simple placeholder watermarker for testing and demonstration purposes.
 97 | 
 98 | ```python
 99 | from perth import DummyWatermarker
100 | 
101 | watermarker = DummyWatermarker()
102 | ```
103 | 
104 | ## Evaluating Watermarked Audio
105 | 
106 | The library includes utilities for evaluating the quality and robustness of watermarked audio:
107 | 
108 | ```python
109 | import librosa
110 | from perth.utils import calculate_audio_metrics, plot_audio_comparison
111 | 
112 | # Load original and watermarked audio
113 | original, sr = librosa.load("input.wav", sr=None)
114 | watermarked, _ = librosa.load("output.wav", sr=None)
115 | 
116 | # Calculate quality metrics
117 | metrics = calculate_audio_metrics(original, watermarked)
118 | print(f"SNR: {metrics['snr']:.2f} dB")
119 | print(f"PSNR: {metrics['psnr']:.2f} dB")
120 | 
121 | # Visualize differences
122 | plot_audio_comparison(original, watermarked, sr, output_path="comparison.png")
123 | ```
124 | 
125 | ## Contributing
126 | 
127 | Contributions are welcome! Please feel free to submit a Pull Request.
128 | 
129 | ## License
130 | 
131 | This project is licensed under the MIT License - see the LICENSE file for details.
132 | 


--------------------------------------------------------------------------------
/docs/api_reference.md:
--------------------------------------------------------------------------------
  1 | # Perth API Reference
  2 | 
  3 | This document provides detailed information about the classes and functions available in the Perth library.
  4 | 
  5 | ## Core Classes
  6 | 
  7 | ### WatermarkerBase
  8 | 
  9 | `WatermarkerBase` is the abstract base class that all watermarking implementations in Perth extend.
 10 | 
 11 | ```python
 12 | from perth import WatermarkerBase
 13 | ```
 14 | 
 15 | #### Methods
 16 | 
 17 | - **apply_watermark**(wav, watermark=None, sample_rate=44100, **kwargs)
 18 |   
 19 |   Apply a watermark to an audio signal.
 20 |   
 21 |   - **Parameters**:
 22 |     - `wav` (np.ndarray): Input audio signal as numpy array
 23 |     - `watermark` (np.ndarray, optional): Watermark data to embed. If None, a default watermark is generated.
 24 |     - `sample_rate` (int): Sample rate of the audio signal in Hz
 25 |     - `**kwargs`: Additional algorithm-specific parameters
 26 |   
 27 |   - **Returns**:
 28 |     - `np.ndarray`: Watermarked audio signal
 29 | 
 30 | - **get_watermark**(watermarked_wav, sample_rate=44100, watermark_length=None, **kwargs)
 31 |   
 32 |   Extract a watermark from a watermarked audio signal.
 33 |   
 34 |   - **Parameters**:
 35 |     - `watermarked_wav` (np.ndarray): Watermarked audio signal
 36 |     - `sample_rate` (int): Sample rate of the audio signal in Hz
 37 |     - `watermark_length` (int, optional): Expected length of the watermark
 38 |     - `**kwargs`: Additional algorithm-specific parameters
 39 |   
 40 |   - **Returns**:
 41 |     - `np.ndarray`: Extracted watermark data
 42 | 
 43 | ### PerthImplicitWatermarker
 44 | 
 45 | `PerthImplicitWatermarker` is a neural network-based watermarking implementation that uses the Perth-Net model for embedding and extracting watermarks.
 46 | 
 47 | ```python
 48 | from perth import PerthImplicitWatermarker
 49 | ```
 50 | 
 51 | #### Constructor
 52 | 
 53 | - **\_\_init\_\_**(run_name="implicit", models_dir=None, device="cpu", perth_net=None)
 54 |   
 55 |   - **Parameters**:
 56 |     - `run_name` (str): Name of the model configuration to load
 57 |     - `models_dir` (str, optional): Directory containing the model files
 58 |     - `device` (str): Device to run the model on ("cpu" or "cuda")
 59 |     - `perth_net` (PerthNet, optional): Pre-initialized PerthNet model instance
 60 | 
 61 | #### Methods
 62 | 
 63 | Inherits all methods from `WatermarkerBase` with the following implementations:
 64 | 
 65 | - **apply_watermark**(signal, watermark, sample_rate, **_)
 66 |   
 67 |   Apply a neural network-based watermark to an audio signal.
 68 |   
 69 |   - **Parameters**:
 70 |     - `signal` (np.ndarray): Input audio signal
 71 |     - `watermark` (np.ndarray, optional): Ignored (Perth-Net generates its own watermark)
 72 |     - `sample_rate` (int): Sample rate of the audio signal in Hz
 73 |   
 74 |   - **Returns**:
 75 |     - `np.ndarray`: Watermarked audio signal
 76 | 
 77 | - **get_watermark**(wm_signal, sample_rate, round=True, **_)
 78 |   
 79 |   Extract a watermark from a watermarked audio signal.
 80 |   
 81 |   - **Parameters**:
 82 |     - `wm_signal` (np.ndarray): Watermarked audio signal
 83 |     - `sample_rate` (int): Sample rate of the audio signal in Hz
 84 |     - `round` (bool): Whether to round the watermark values to binary (0 or 1)
 85 |   
 86 |   - **Returns**:
 87 |     - `np.ndarray`: Extracted watermark data
 88 | 
 89 | ## Utility Functions
 90 | 
 91 | ### Audio Processing
 92 | 
 93 | ```python
 94 | from perth.utils import load_audio, save_audio
 95 | ```
 96 | 
 97 | - **load_audio**(audio_path, sr=None)
 98 |   
 99 |   Load an audio file using librosa.
100 |   
101 |   - **Parameters**:
102 |     - `audio_path` (str): Path to the audio file
103 |     - `sr` (int, optional): Target sample rate. If None, the native sample rate is used.
104 |   
105 |   - **Returns**:
106 |     - `tuple`: (audio_data, sample_rate)
107 | 
108 | - **save_audio**(audio_data, file_path, sample_rate)
109 |   
110 |   Save audio data to a file.
111 |   
112 |   - **Parameters**:
113 |     - `audio_data` (np.ndarray): Audio data as a numpy array
114 |     - `file_path` (str): Output file path
115 |     - `sample_rate` (int): Sample rate for the audio file
116 | 
117 | ### Analysis and Visualization
118 | 
119 | ```python
120 | from perth.utils import calculate_audio_metrics, plot_audio_comparison
121 | ```
122 | 
123 | - **calculate_audio_metrics**(original, watermarked)
124 |   
125 |   Calculate audio quality metrics between original and watermarked audio.
126 |   
127 |   - **Parameters**:
128 |     - `original` (np.ndarray): Original audio data
129 |     - `watermarked` (np.ndarray): Watermarked audio data
130 |   
131 |   - **Returns**:
132 |     - `dict`: Dictionary with quality metrics:
133 |       - `snr`: Signal-to-Noise Ratio (dB)
134 |       - `mse`: Mean Squared Error
135 |       - `psnr`: Peak Signal-to-Noise Ratio (dB)
136 | 
137 | - **plot_audio_comparison**(original, watermarked, sample_rate, output_path=None)
138 |   
139 |   Plot a comparison between original and watermarked audio.
140 |   
141 |   - **Parameters**:
142 |     - `original` (np.ndarray): Original audio data
143 |     - `watermarked` (np.ndarray): Watermarked audio data
144 |     - `sample_rate` (int): Sample rate of the audio
145 |     - `output_path` (str, optional): Path to save the plot. If None, plot is shown interactively.
146 | 
147 | ## Command Line Interface
148 | 
149 | perth provides a command-line interface through the `perth` command:
150 | 
151 | ```
152 | perth [OPTIONS] INPUT_FILE
153 | ```
154 | 
155 | ### Options
156 | 
157 | - `--output`, `-o`: Path to save the output watermarked audio file
158 | - `--method`, `-m`: Watermarking method to use (choices: perth, dummy)
159 | - `--extract`, `-e`: Extract watermark from the input file instead of applying a watermark
160 | - `--device`, `-d`: Device to use for neural network processing (choices: cpu, cuda)
161 | 


--------------------------------------------------------------------------------
/docs/getting_started.md:
--------------------------------------------------------------------------------
 1 | # Getting Started with Perth
 2 | 
 3 | This guide will help you get started with the Perth audio watermarking library.
 4 | 
 5 | ## Installation
 6 | 
 7 | ### Prerequisites
 8 | 
 9 | Before installing Perth, make sure you have the following prerequisites:
10 | 
11 | - Python 3.8 or higher
12 | - pip package manager
13 | 
14 | For GPU acceleration (optional):
15 | - CUDA-compatible GPU
16 | - PyTorch with CUDA support
17 | 
18 | ### Install from PyPI
19 | 
20 | ```bash
21 | pip install resemble-perth
22 | ```
23 | 
24 | ### Install from Source
25 | 
26 | ```bash
27 | git clone https://github.com/resemble-ai/Perth
28 | cd Perth
29 | pip install -e .
30 | ```
31 | 
32 | ## Basic Usage
33 | 
34 | Here's a simple example of how to use Perth to watermark an audio file:
35 | 
36 | ```python
37 | import librosa
38 | import soundfile as sf
39 | from perth import PerthImplicitWatermarker
40 | 
41 | # Load audio file
42 | audio, sample_rate = librosa.load('input.wav', sr=None)
43 | 
44 | # Initialize watermarker
45 | watermarker = PerthImplicitWatermarker()
46 | 
47 | # Apply watermark
48 | watermarked_audio = watermarker.apply_watermark(audio, sample_rate=sample_rate)
49 | 
50 | # Save watermarked audio
51 | sf.write('output.wav', watermarked_audio, sample_rate)
52 | ```
53 | 
54 | To extract a watermark from an audio file:
55 | 
56 | ```python
57 | import librosa
58 | from perth import PerthImplicitWatermarker
59 | 
60 | # Load audio file
61 | audio, sample_rate = librosa.load('output.wav', sr=None)
62 | 
63 | # Initialize watermarker
64 | watermarker = PerthImplicitWatermarker()
65 | 
66 | # Extract watermark
67 | watermark = watermarker.get_watermark(audio, sample_rate=sample_rate)
68 | print(f"Extracted watermark confidence: {watermark.mean():.4f}")
69 | ```
70 | 
71 | ## Command Line Usage
72 | 
73 | Perth also provides a command-line interface for easy usage:
74 | 
75 | ```bash
76 | # Watermark an audio file
77 | perth input.wav -o output.wav
78 | 
79 | # Extract a watermark from a file
80 | perth input.wav --extract
81 | ```
82 | 
83 | Run `perth --help` for more options and information.
84 | 
85 | ## Next Steps
86 | 
87 | - Check out the [examples](../examples/) directory for more complex usage examples
88 | - See the [API Reference](./api_reference.md) for detailed information on available functions and classes
89 | - Learn about [watermarking techniques](./watermarking_techniques.md) implemented in Perth


--------------------------------------------------------------------------------
/examples/advanced/robustness_test.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """
  3 | Advanced example demonstrating watermark robustness testing.
  4 | 
  5 | This script applies various audio transformations to watermarked audio 
  6 | and tests if the watermark can still be detected after these transformations.
  7 | """
  8 | import os
  9 | import argparse
 10 | import numpy as np
 11 | import librosa
 12 | import soundfile as sf
 13 | import matplotlib.pyplot as plt
 14 | from scipy.signal import resample
 15 | from tqdm import tqdm
 16 | 
 17 | from perth import PerthImplicitWatermarker
 18 | from perth.utils import calculate_audio_metrics, plot_audio_comparison
 19 | 
 20 | 
 21 | def apply_mp3_compression(audio, sr, output_path, bitrate='128k'):
 22 |     """Apply MP3 compression and decompression to audio."""
 23 |     import subprocess
 24 |     import tempfile
 25 |     
 26 |     # Save as WAV
 27 |     temp_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
 28 |     temp_wav.close()
 29 |     sf.write(temp_wav.name, audio, sr)
 30 |     
 31 |     # Compress to MP3
 32 |     temp_mp3 = tempfile.NamedTemporaryFile(suffix='.mp3', delete=False)
 33 |     temp_mp3.close()
 34 |     subprocess.call(['ffmpeg', '-y', '-i', temp_wav.name, '-b:a', bitrate, temp_mp3.name], 
 35 |                    stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
 36 |     
 37 |     # Decompress back to WAV
 38 |     temp_out = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
 39 |     temp_out.close()
 40 |     subprocess.call(['ffmpeg', '-y', '-i', temp_mp3.name, temp_out.name],
 41 |                    stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
 42 |     
 43 |     # Load the processed audio
 44 |     audio_processed, sr = librosa.load(temp_out.name, sr=sr)
 45 |     
 46 |     # Clean up temporary files
 47 |     os.unlink(temp_wav.name)
 48 |     os.unlink(temp_mp3.name)
 49 |     os.unlink(temp_out.name)
 50 |     
 51 |     return audio_processed
 52 | 
 53 | 
 54 | def apply_transform(audio, sr, transform_type, **kwargs):
 55 |     """Apply various transformations to audio."""
 56 |     if transform_type == 'mp3':
 57 |         bitrate = kwargs.get('bitrate', '128k')
 58 |         return apply_mp3_compression(audio, sr, None, bitrate)
 59 |     
 60 |     elif transform_type == 'resample':
 61 |         target_sr = kwargs.get('target_sr', 16000)
 62 |         # Resample to target SR
 63 |         audio_resampled = librosa.resample(audio, orig_sr=sr, target_sr=target_sr)
 64 |         # Resample back to original SR
 65 |         audio_restored = librosa.resample(audio_resampled, orig_sr=target_sr, target_sr=sr)
 66 |         return audio_restored
 67 |     
 68 |     elif transform_type == 'noise':
 69 |         noise_level = kwargs.get('noise_level', 0.005)
 70 |         noise = np.random.normal(0, noise_level, len(audio))
 71 |         return audio + noise
 72 |     
 73 |     elif transform_type == 'clip':
 74 |         clip_level = kwargs.get('clip_level', 0.8)
 75 |         return np.clip(audio, -clip_level, clip_level)
 76 |     
 77 |     elif transform_type == 'reverse':
 78 |         # Cut a segment and reverse it
 79 |         segment_start = len(audio) // 3
 80 |         segment_end = segment_start + len(audio) // 3
 81 |         audio_mod = audio.copy()
 82 |         audio_mod[segment_start:segment_end] = audio_mod[segment_start:segment_end][::-1]
 83 |         return audio_mod
 84 |     
 85 |     else:
 86 |         raise ValueError(f"Unknown transform type: {transform_type}")
 87 | 
 88 | 
 89 | def main():
 90 |     parser = argparse.ArgumentParser(description="Test watermark robustness against various transformations")
 91 |     parser.add_argument("input_file", help="Path to the input audio file to be watermarked")
 92 |     parser.add_argument("--output_dir", "-o", default="robustness_results",
 93 |                         help="Directory to save results")
 94 |     parser.add_argument("--device", "-d", default="cpu", choices=["cpu", "cuda"],
 95 |                         help="Device to use for neural network processing")
 96 |     args = parser.parse_args()
 97 | 
 98 |     # Create output directory
 99 |     os.makedirs(args.output_dir, exist_ok=True)
100 |     
101 |     # Load audio
102 |     print(f"Loading audio: {args.input_file}")
103 |     audio, sr = librosa.load(args.input_file, sr=None)
104 |     
105 |     # Initialize watermarker
106 |     print("Initializing watermarker...")
107 |     watermarker = PerthImplicitWatermarker(device=args.device)
108 |     
109 |     # Apply watermark
110 |     print("Applying watermark...")
111 |     watermarked_audio = watermarker.apply_watermark(audio, sample_rate=sr)
112 |     
113 |     # Save watermarked audio
114 |     watermarked_path = os.path.join(args.output_dir, "watermarked.wav")
115 |     sf.write(watermarked_path, watermarked_audio, sr)
116 |     print(f"Saved watermarked audio to {watermarked_path}")
117 |     
118 |     # Extract watermark from original watermarked audio (baseline)
119 |     baseline_watermark = watermarker.get_watermark(watermarked_audio, sample_rate=sr)
120 |     baseline_confidence = np.mean(baseline_watermark)
121 |     print(f"Baseline watermark confidence: {baseline_confidence:.4f}")
122 |     
123 |     # Define transformations to test
124 |     transformations = [
125 |         ('mp3', {'bitrate': '128k'}, 'MP3 Compression (128k)'),
126 |         ('mp3', {'bitrate': '64k'}, 'MP3 Compression (64k)'),
127 |         ('resample', {'target_sr': 16000}, 'Resample to 16kHz and back'),
128 |         ('resample', {'target_sr': 8000}, 'Resample to 8kHz and back'),
129 |         ('noise', {'noise_level': 0.001}, 'Low Noise Addition'),
130 |         ('noise', {'noise_level': 0.01}, 'High Noise Addition'),
131 |         ('clip', {'clip_level': 0.8}, 'Amplitude Clipping (0.8)'),
132 |         ('reverse', {}, 'Segment Reversal'),
133 |     ]
134 |     
135 |     # Test each transformation
136 |     results = []
137 |     
138 |     print("\nTesting watermark robustness against transformations:")
139 |     for transform_type, params, label in tqdm(transformations):
140 |         # Apply transformation
141 |         transformed_audio = apply_transform(watermarked_audio, sr, transform_type, **params)
142 |         
143 |         # Save transformed audio
144 |         transformed_path = os.path.join(args.output_dir, f"{transform_type}_transformed.wav")
145 |         sf.write(transformed_path, transformed_audio, sr)
146 |         
147 |         # Extract watermark
148 |         extracted_watermark = watermarker.get_watermark(transformed_audio, sample_rate=sr)
149 |         confidence = np.mean(extracted_watermark)
150 |         
151 |         # Calculate audio quality metrics
152 |         metrics = calculate_audio_metrics(watermarked_audio, transformed_audio)
153 |         
154 |         # Store results
155 |         results.append({
156 |             'transform': label,
157 |             'confidence': confidence,
158 |             'snr': metrics['snr'],
159 |             'success': confidence > 0.75  # Arbitrary threshold for demonstration
160 |         })
161 |         
162 |         print(f"  {label}: Confidence = {confidence:.4f}, SNR = {metrics['snr']:.2f} dB, "
163 |               f"{'PASS' if confidence > 0.75 else 'FAIL'}")
164 |     
165 |     # Plot results
166 |     fig, ax = plt.subplots(figsize=(12, 6))
167 |     
168 |     x = np.arange(len(results))
169 |     bar_width = 0.35
170 |     
171 |     # Plot confidence scores
172 |     ax.bar(x, [r['confidence'] for r in results], bar_width,
173 |            label='Watermark Confidence', color='skyblue')
174 |     
175 |     # Add threshold line
176 |     ax.axhline(y=0.75, linestyle='--', color='red', alpha=0.7, 
177 |                label='Success Threshold (0.75)')
178 |     
179 |     # Add labels and title
180 |     ax.set_xlabel('Transformation')
181 |     ax.set_ylabel('Watermark Confidence')
182 |     ax.set_title('Watermark Robustness to Various Transformations')
183 |     ax.set_xticks(x)
184 |     ax.set_xticklabels([r['transform'] for r in results], rotation=45, ha='right')
185 |     ax.legend()
186 |     
187 |     plt.tight_layout()
188 |     plt.savefig(os.path.join(args.output_dir, 'robustness_results.png'))
189 |     print(f"Results saved to {args.output_dir}")
190 | 
191 | 
192 | if __name__ == "__main__":
193 |     main()


--------------------------------------------------------------------------------
/examples/basic/watermark_audio.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | Basic example of how to watermark an audio file using Perth.
 4 | """
 5 | import os
 6 | import argparse
 7 | import numpy as np
 8 | import librosa
 9 | import soundfile as sf
10 | from perth import PerthImplicitWatermarker
11 | from perth.utils import calculate_audio_metrics
12 | 
13 | def main():
14 |     # Parse command line arguments
15 |     parser = argparse.ArgumentParser(description="Watermark an audio file with Perth")
16 |     parser.add_argument("input_file", help="Path to the input audio file")
17 |     parser.add_argument("--output", "-o", default=None,
18 |                         help="Path to save the output watermarked audio file")
19 |     parser.add_argument("--device", "-d", default="cpu", choices=["cpu", "cuda"],
20 |                         help="Device to use for neural network processing")
21 |     args = parser.parse_args()
22 | 
23 |     # Derive output filename if not specified
24 |     if args.output is None:
25 |         base, ext = os.path.splitext(args.input_file)
26 |         args.output = f"{base}_watermarked{ext}"
27 |     
28 |     # Create output directory if it doesn't exist
29 |     os.makedirs(os.path.dirname(os.path.abspath(args.output)), exist_ok=True)
30 | 
31 |     # Load audio file
32 |     print(f"Loading audio file: {args.input_file}")
33 |     wav, sr = librosa.load(args.input_file, sr=None)
34 |     
35 |     # Initialize watermarker
36 |     print(f"Initializing Perth watermarker (device: {args.device})...")
37 |     watermarker = PerthImplicitWatermarker(device=args.device)
38 |     
39 |     # Apply watermark
40 |     print("Applying watermark...")
41 |     watermarked_audio = watermarker.apply_watermark(wav, watermark=None, sample_rate=sr)
42 |     
43 |     # Save watermarked audio
44 |     sf.write(args.output, watermarked_audio, sr)
45 |     print(f"Watermarked audio saved to: {args.output}")
46 |     
47 |     # Check watermark in watermarked audio
48 |     print("Verifying watermark...")
49 |     extracted_watermark = watermarker.get_watermark(watermarked_audio, sample_rate=sr)
50 |     print(f"Watermark verification confidence: {np.mean(extracted_watermark):.4f}")
51 |     
52 |     # Calculate quality metrics
53 |     metrics = calculate_audio_metrics(wav, watermarked_audio)
54 |     print("\nAudio Quality Metrics:")
55 |     print(f"  Signal-to-Noise Ratio (SNR): {metrics['snr']:.2f} dB")
56 |     print(f"  Mean Squared Error (MSE): {metrics['mse']:.6f}")
57 |     print(f"  Peak Signal-to-Noise Ratio (PSNR): {metrics['psnr']:.2f} dB")
58 | 
59 | 
60 | if __name__ == "__main__":
61 |     main()


--------------------------------------------------------------------------------
/perth/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Perth: Audio Watermarking and Detection Library.
 3 | 
 4 | This library provides tools and algorithms for embedding and detecting
 5 | watermarks in audio files using various techniques.
 6 | """
 7 | 
 8 | from .watermarker import WatermarkerBase, WatermarkingException
 9 | from .dummy_watermarker import DummyWatermarker
10 | 
11 | # Import specific watermarker implementations
12 | try:
13 |     from .perth_net.perth_net_implicit.perth_watermarker import PerthImplicitWatermarker
14 | except ImportError:
15 |     PerthImplicitWatermarker = None
16 | 
17 | # Make core classes/functions available at the package level
18 | __all__ = [
19 |     'WatermarkerBase',
20 |     'WatermarkingException',
21 |     'DummyWatermarker',
22 | ]
23 | 
24 | # Add watermarker implementations if available
25 | if PerthImplicitWatermarker is not None:
26 |     __all__.append('PerthImplicitWatermarker')
27 | 
28 | # Version information
29 | __version__ = '1.0.0'
30 | __author__ = 'Resemble AI Team'


--------------------------------------------------------------------------------
/perth/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/perth/__pycache__/__init__.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/__pycache__/__init__.cpython-311.pyc


--------------------------------------------------------------------------------
/perth/__pycache__/dummy_watermarker.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/__pycache__/dummy_watermarker.cpython-311.pyc


--------------------------------------------------------------------------------
/perth/__pycache__/utils.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/__pycache__/utils.cpython-311.pyc


--------------------------------------------------------------------------------
/perth/__pycache__/watermarker.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/__pycache__/watermarker.cpython-310.pyc


--------------------------------------------------------------------------------
/perth/__pycache__/watermarker.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/__pycache__/watermarker.cpython-311.pyc


--------------------------------------------------------------------------------
/perth/cli/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Command-line interface for the Perth library.
3 | """
4 | from .watermark_cli import main
5 | 
6 | __all__ = ['main']


--------------------------------------------------------------------------------
/perth/cli/watermark_cli.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """
  3 | Command line interface for Perth watermarking.
  4 | """
  5 | import argparse
  6 | import os
  7 | import sys
  8 | import numpy as np
  9 | import librosa
 10 | import soundfile as sf
 11 | from typing import Optional, List
 12 | 
 13 | from perth.perth_net.perth_net_implicit.perth_watermarker import PerthImplicitWatermarker
 14 | from perth.dummy_watermarker import DummyWatermarker
 15 | from perth.config import get_config
 16 | from perth.utils import load_audio, save_audio, calculate_audio_metrics, plot_audio_comparison
 17 | 
 18 | 
 19 | def parse_args(args: Optional[List[str]] = None) -> argparse.Namespace:
 20 |     """Parse command line arguments."""
 21 |     parser = argparse.ArgumentParser(
 22 |         description="Perth - Audio Watermarking Tool",
 23 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter
 24 |     )
 25 |     
 26 |     parser.add_argument("input_file", help="Path to the input audio file")
 27 |     parser.add_argument("--output", "-o", 
 28 |                         help="Path to save the output watermarked audio file. "
 29 |                              "If not provided, appends '_watermarked' to the input filename")
 30 |     parser.add_argument("--method", "-m", choices=["perth", "dummy"], 
 31 |                         help="Watermarking method to use")
 32 |     parser.add_argument("--extract", "-e", action="store_true",
 33 |                         help="Extract watermark from the input file instead of applying a watermark")
 34 |     parser.add_argument("--device", "-d", choices=["cpu", "cuda"],
 35 |                         help="Device to use for neural network processing")
 36 |     parser.add_argument("--config", "-c", 
 37 |                         help="Path to a configuration file")
 38 |     parser.add_argument("--visualize", "-v", action="store_true",
 39 |                         help="Generate visualization of watermark effect (only when not extracting)")
 40 |     
 41 |     return parser.parse_args(args)
 42 | 
 43 | 
 44 | def main(args: Optional[List[str]] = None) -> int:
 45 |     """Main function for the watermarking CLI."""
 46 |     parsed_args = parse_args(args)
 47 |     
 48 |     # Load configuration
 49 |     config = get_config(parsed_args.config)
 50 |     
 51 |     # Override config with command line arguments if provided
 52 |     if parsed_args.method:
 53 |         config.set('general', 'default_watermarker', parsed_args.method)
 54 |     if parsed_args.device:
 55 |         config.set('perth', 'device', parsed_args.device)
 56 |     
 57 |     method = config.get('general', 'default_watermarker')
 58 |     device = config.get('perth', 'device')
 59 |     
 60 |     try:
 61 |         # Load audio file
 62 |         print(f"Loading audio file: {parsed_args.input_file}")
 63 |         wav, sr = load_audio(parsed_args.input_file)
 64 |         
 65 |         # Initialize watermarker
 66 |         if method == "perth":
 67 |             print(f"Initializing Perth watermarker (device: {device})...")
 68 |             models_dir = config.get('perth', 'models_dir')
 69 |             run_name = config.get('perth', 'run_name')
 70 |             watermarker = PerthImplicitWatermarker(
 71 |                 run_name=run_name,
 72 |                 models_dir=models_dir,
 73 |                 device=device
 74 |             )
 75 |         else:
 76 |             print("Initializing dummy watermarker...")
 77 |             watermarker = DummyWatermarker()
 78 |         
 79 |         if parsed_args.extract:
 80 |             # Extract watermark
 81 |             print("Extracting watermark...")
 82 |             watermark = watermarker.get_watermark(wav, sample_rate=sr)
 83 |             print(f"Extracted watermark: {watermark}")
 84 |             print(f"Watermark confidence: {np.mean(watermark):.4f}")
 85 |             return 0
 86 |         else:
 87 |             # Apply watermark
 88 |             print("Applying watermark...")
 89 |             original_audio = wav.copy()  # Save original for comparison
 90 |             watermarked_audio = watermarker.apply_watermark(wav, watermark=None, sample_rate=sr)
 91 |             
 92 |             # Save watermarked audio
 93 |             if parsed_args.output:
 94 |                 output_path = parsed_args.output
 95 |             else:
 96 |                 base, ext = os.path.splitext(parsed_args.input_file)
 97 |                 output_path = f"{base}_watermarked{ext}"
 98 |             
 99 |             save_audio(watermarked_audio, output_path, sr)
100 |             print(f"Watermarked audio saved to: {output_path}")
101 |             
102 |             # Verify watermark
103 |             print("Verifying watermark...")
104 |             extracted = watermarker.get_watermark(watermarked_audio, sample_rate=sr)
105 |             print(f"Watermark verification confidence: {np.mean(extracted):.4f}")
106 |             
107 |             # Calculate and display quality metrics
108 |             metrics = calculate_audio_metrics(original_audio, watermarked_audio)
109 |             print("\nAudio Quality Metrics:")
110 |             print(f"  Signal-to-Noise Ratio (SNR): {metrics['snr']:.2f} dB")
111 |             print(f"  Mean Squared Error (MSE): {metrics['mse']:.8f}")
112 |             print(f"  Peak Signal-to-Noise Ratio (PSNR): {metrics['psnr']:.2f} dB")
113 |             
114 |             # Generate visualization if requested
115 |             if parsed_args.visualize:
116 |                 viz_path = os.path.splitext(output_path)[0] + "_comparison.png"
117 |                 print(f"\nGenerating visualization to: {viz_path}")
118 |                 plot_audio_comparison(original_audio, watermarked_audio, sr, viz_path)
119 |             
120 |             return 0
121 |             
122 |     except Exception as e:
123 |         print(f"Error: {e}", file=sys.stderr)
124 |         return 1
125 | 
126 | 
127 | if __name__ == "__main__":
128 |     sys.exit(main())


--------------------------------------------------------------------------------
/perth/config.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Configuration handling for Perth library.
  3 | """
  4 | import os
  5 | import yaml
  6 | from typing import Dict, Any, Optional
  7 | 
  8 | 
  9 | class Config:
 10 |     """
 11 |     Configuration management for Perth.
 12 |     
 13 |     Handles loading, saving, and accessing configuration settings for the library.
 14 |     """
 15 |     
 16 |     # Default configuration values
 17 |     _defaults = {
 18 |         # General settings
 19 |         'general': {
 20 |             'default_watermarker': 'perth',
 21 |             'verbose': True,
 22 |         },
 23 |         
 24 |         # Perth-Net settings
 25 |         'perth': {
 26 |             'device': 'cpu',
 27 |             'run_name': 'implicit',
 28 |             'models_dir': None,  # Will be set to default location in __init__
 29 |         },
 30 |         
 31 |         # Audio processing settings
 32 |         'audio': {
 33 |             'default_sample_rate': 44100,
 34 |             'normalize': True,
 35 |         },
 36 |     }
 37 |     
 38 |     def __init__(self, config_path: Optional[str] = None):
 39 |         """
 40 |         Initialize configuration with default values and optional user config.
 41 |         
 42 |         Args:
 43 |             config_path: Path to a YAML configuration file to load
 44 |         """
 45 |         # Deep copy the defaults
 46 |         self._config = {}
 47 |         for section, values in self._defaults.items():
 48 |             self._config[section] = values.copy()
 49 |         
 50 |         # Set default models directory
 51 |         self._config['perth']['models_dir'] = os.path.join(
 52 |             os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
 53 |             'perth', 'perth_net', 'pretrained'
 54 |         )
 55 |         
 56 |         # Load user config if provided
 57 |         if config_path and os.path.exists(config_path):
 58 |             self.load(config_path)
 59 |     
 60 |     def load(self, config_path: str) -> None:
 61 |         """
 62 |         Load configuration from a YAML file.
 63 |         
 64 |         Args:
 65 |             config_path: Path to a YAML configuration file
 66 |         """
 67 |         try:
 68 |             with open(config_path, 'r') as f:
 69 |                 user_config = yaml.safe_load(f)
 70 |                 
 71 |             # Merge with current config
 72 |             if user_config:
 73 |                 for section, values in user_config.items():
 74 |                     if section in self._config:
 75 |                         self._config[section].update(values)
 76 |                     else:
 77 |                         self._config[section] = values
 78 |         except Exception as e:
 79 |             print(f"Warning: Could not load config from {config_path}: {e}")
 80 |     
 81 |     def save(self, config_path: str) -> None:
 82 |         """
 83 |         Save current configuration to a YAML file.
 84 |         
 85 |         Args:
 86 |             config_path: Path to save the configuration to
 87 |         """
 88 |         os.makedirs(os.path.dirname(os.path.abspath(config_path)), exist_ok=True)
 89 |         with open(config_path, 'w') as f:
 90 |             yaml.dump(self._config, f, default_flow_style=False)
 91 |     
 92 |     def get(self, section: str, key: str, default: Any = None) -> Any:
 93 |         """
 94 |         Get a configuration value.
 95 |         
 96 |         Args:
 97 |             section: Configuration section
 98 |             key: Configuration key
 99 |             default: Default value to return if key is not found
100 |             
101 |         Returns:
102 |             Configuration value or default
103 |         """
104 |         if section in self._config and key in self._config[section]:
105 |             return self._config[section][key]
106 |         return default
107 |     
108 |     def set(self, section: str, key: str, value: Any) -> None:
109 |         """
110 |         Set a configuration value.
111 |         
112 |         Args:
113 |             section: Configuration section
114 |             key: Configuration key
115 |             value: Value to set
116 |         """
117 |         if section not in self._config:
118 |             self._config[section] = {}
119 |         self._config[section][key] = value
120 |     
121 |     def get_section(self, section: str) -> Dict[str, Any]:
122 |         """
123 |         Get an entire configuration section.
124 |         
125 |         Args:
126 |             section: Configuration section name
127 |             
128 |         Returns:
129 |             Dictionary of configuration values for the section
130 |         """
131 |         return self._config.get(section, {}).copy()
132 |     
133 |     def __str__(self) -> str:
134 |         """Return a string representation of the configuration."""
135 |         return yaml.dump(self._config, default_flow_style=False)
136 | 
137 | 
138 | # Singleton instance for global access
139 | _config_instance = None
140 | 
141 | def get_config(config_path: Optional[str] = None) -> Config:
142 |     """
143 |     Get the global configuration instance.
144 |     
145 |     Args:
146 |         config_path: Optional path to a configuration file to load
147 |         
148 |     Returns:
149 |         Config instance
150 |     """
151 |     global _config_instance
152 |     if _config_instance is None:
153 |         _config_instance = Config(config_path)
154 |     elif config_path:
155 |         _config_instance.load(config_path)
156 |     return _config_instance


--------------------------------------------------------------------------------
/perth/dummy_watermarker.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from typing import Optional
 3 | 
 4 | from .watermarker import WatermarkerBase
 5 | 
 6 | 
 7 | class DummyWatermarker(WatermarkerBase):
 8 |     """
 9 |     A dummy watermarker for testing and demonstration purposes.
10 |     
11 |     This watermarker doesn't actually embed or extract real watermarks,
12 |     but serves as a placeholder implementation for testing the framework.
13 |     """
14 |     
15 |     def apply_watermark(self, wav: np.ndarray, watermark: Optional[np.ndarray] = None, 
16 |                        sample_rate: int = 44100, **kwargs) -> np.ndarray:
17 |         """
18 |         Simulates applying a watermark by simply rounding the audio signal.
19 |         
20 |         Args:
21 |             wav: Input audio signal as numpy array
22 |             watermark: Ignored in this implementation
23 |             sample_rate: Ignored in this implementation
24 |             **kwargs: Additional ignored parameters
25 |             
26 |         Returns:
27 |             The input audio with minimal modification (rounded to 5 decimal places)
28 |         """
29 |         return wav.round(5)
30 | 
31 |     def get_watermark(self, watermarked_wav: np.ndarray, sample_rate: int = 44100,
32 |                      watermark_length: Optional[int] = None, **kwargs) -> np.ndarray:
33 |         """
34 |         Simulates extracting a watermark by returning random data.
35 |         
36 |         Args:
37 |             watermarked_wav: Watermarked audio signal as numpy array
38 |             sample_rate: Ignored in this implementation
39 |             watermark_length: Length of the dummy watermark to generate
40 |             **kwargs: Additional ignored parameters
41 |             
42 |         Returns:
43 |             A random binary watermark of specified length or default 32 bits
44 |         """
45 |         length = watermark_length if watermark_length is not None else 32
46 |         return np.random.randint(0, 2, size=length).astype(np.float32)
47 | 


--------------------------------------------------------------------------------
/perth/perth_net/__init__.py:
--------------------------------------------------------------------------------
1 | from pkg_resources import resource_filename
2 | PREPACKAGED_MODELS_DIR = resource_filename(__name__, "pretrained")
3 | 
4 | from .perth_net_implicit.perth_watermarker import PerthImplicitWatermarker
5 | 


--------------------------------------------------------------------------------
/perth/perth_net/__pycache__/__init__.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/perth_net/__pycache__/__init__.cpython-311.pyc


--------------------------------------------------------------------------------
/perth/perth_net/perth_net_implicit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/perth_net/perth_net_implicit/__init__.py


--------------------------------------------------------------------------------
/perth/perth_net/perth_net_implicit/__pycache__/__init__.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/perth_net/perth_net_implicit/__pycache__/__init__.cpython-311.pyc


--------------------------------------------------------------------------------
/perth/perth_net/perth_net_implicit/__pycache__/audio_processor.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/perth_net/perth_net_implicit/__pycache__/audio_processor.cpython-311.pyc


--------------------------------------------------------------------------------
/perth/perth_net/perth_net_implicit/__pycache__/checkpoint_manager.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/perth_net/perth_net_implicit/__pycache__/checkpoint_manager.cpython-311.pyc


--------------------------------------------------------------------------------
/perth/perth_net/perth_net_implicit/__pycache__/config.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/perth_net/perth_net_implicit/__pycache__/config.cpython-311.pyc


--------------------------------------------------------------------------------
/perth/perth_net/perth_net_implicit/__pycache__/perth_watermarker.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/perth_net/perth_net_implicit/__pycache__/perth_watermarker.cpython-311.pyc


--------------------------------------------------------------------------------
/perth/perth_net/perth_net_implicit/__pycache__/utils.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/perth_net/perth_net_implicit/__pycache__/utils.cpython-311.pyc


--------------------------------------------------------------------------------
/perth/perth_net/perth_net_implicit/audio_processor.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from torchaudio.transforms import Spectrogram, InverseSpectrogram, TimeStretch
 4 | 
 5 | import numpy as np
 6 | 
 7 | from .config import PerthConfig
 8 | from .utils import normalize, magphase_to_cx, cx_to_magphase
 9 | 
10 | 
11 | class AudioProcessor(nn.Module):
12 |     "Module wrapper for audio processing, for easy device management"
13 | 
14 |     def __init__(self, hp: PerthConfig):
15 |         super().__init__()
16 |         self.hp = hp
17 |         self.window_fn = {
18 |             "hamm": torch.hamming_window,
19 |             "hann": torch.hann_window,
20 |             "kaiser": torch.kaiser_window
21 |         }[hp.window_fn]
22 |         self.spectrogram = Spectrogram(
23 |             n_fft=hp.n_fft,
24 |             win_length=hp.window_size,
25 |             power=None,
26 |             hop_length=hp.hop_size,
27 |             window_fn=self.window_fn,
28 |             normalized=False,
29 |         )
30 |         self.inv_spectrogram = InverseSpectrogram(
31 |             n_fft=hp.n_fft,
32 |             win_length=hp.window_size,
33 |             hop_length=hp.hop_size,
34 |             window_fn=self.window_fn,
35 |             normalized=False,
36 |         )
37 |         self.stretch = TimeStretch(
38 |             n_freq=hp.n_fft // 2 + 1,
39 |             hop_length=hp.hop_size,
40 |         )
41 | 
42 |     def signal_to_magphase(self, signal):
43 |         if isinstance(signal, np.ndarray):
44 |             signal = torch.from_numpy(signal.copy())
45 |         signal = signal.float()
46 |         spec = self.spectrogram(signal)
47 |         mag, phase = cx_to_magphase(self.hp, spec)
48 |         return mag, phase
49 | 
50 |     def magphase_to_signal(self, mag, phase):
51 |         spec = magphase_to_cx(self.hp, mag, phase)
52 |         signal = self.inv_spectrogram(spec)
53 |         return signal
54 | 


--------------------------------------------------------------------------------
/perth/perth_net/perth_net_implicit/checkpoint_manager.py:
--------------------------------------------------------------------------------
 1 | import secrets
 2 | from pathlib import Path
 3 | 
 4 | import torch.nn
 5 | import yaml
 6 | 
 7 | from .config import PerthConfig
 8 | 
 9 | 
10 | class CheckpointManager:
11 |     def __init__(self, models_dir, run_name, dataset_hp: PerthConfig=None):
12 |         self.save_path = Path(models_dir) / run_name
13 |         self.save_path.mkdir(exist_ok=True, parents=True)
14 | 
15 |         self.hparams_file = self.save_path.joinpath("hparams.yaml")
16 |         if self.hparams_file.exists():
17 |             self.hp = self.load_hparams()
18 |             if dataset_hp is not None:
19 |                 assert self.hp == dataset_hp
20 |         else:
21 |             assert dataset_hp is not None
22 |             self.hp = dataset_hp
23 |             self.save_hparams()
24 | 
25 |         self.id_file = self.save_path.joinpath("id.txt")
26 |         if self.id_file.exists():
27 |             self.id = self.id_file.read_text()
28 |         else:
29 |             self.id = secrets.token_urlsafe(16)
30 |             self.id_file.write_text(self.id)
31 | 
32 |     def load_latest(self, ext=".pth.tar"):
33 |         sortkey = lambda x: int(x.name.replace(ext, "").split("_")[-1])
34 |         ckpts = sorted([p for p in self.save_path.iterdir() if p.name.endswith(ext)], key=sortkey)
35 |         if any(ckpts):
36 |             return torch.load(ckpts[-1], map_location="cpu")
37 | 
38 |     def load_hparams(self):
39 |         with self.hparams_file.open("r") as hp_file:
40 |             return PerthConfig(**yaml.load(hp_file, Loader=yaml.FullLoader))
41 | 
42 |     def save_hparams(self):
43 |         with self.hparams_file.open("w") as hparams_file:
44 |             hparams_file.write(yaml.dump(self.hp._asdict()))
45 | 
46 |     def save_model(self, model, step):
47 |         state = {
48 |             "model": model.state_dict() if isinstance(model, torch.nn.Module) else model,
49 |             "step": step,
50 |         }
51 |         basename = f"perth_net_{step:06d}"
52 |         checkpoint_fpath = Path(self.save_path, f"{basename}.pth.tar")
53 |         try:
54 |             torch.save(state, checkpoint_fpath)
55 |         except KeyboardInterrupt:
56 |             if checkpoint_fpath.exists():
57 |                 checkpoint_fpath.unlink()
58 | 


--------------------------------------------------------------------------------
/perth/perth_net/perth_net_implicit/config.py:
--------------------------------------------------------------------------------
 1 | from typing import NamedTuple
 2 | 
 3 | 
 4 | class PerthConfig(NamedTuple):
 5 |     use_wandb: bool
 6 |     batch_size: int
 7 |     sample_rate: int
 8 |     n_fft: int
 9 |     hop_size: int
10 |     window_size: int
11 |     use_lr_scheduler: bool
12 |     stft_magnitude_min: float
13 |     min_lr: float
14 |     max_lr: float
15 |     window_fn: str
16 |     max_wmark_freq: float
17 |     hidden_size: int
18 |     # "simple" or "psychoacoustic"
19 |     loss_type: str
20 | 
21 | 
22 | default_hp = PerthConfig(
23 |     use_wandb=True,
24 |     batch_size=16,
25 |     sample_rate=32000,
26 |     n_fft=2048,
27 |     hop_size=320,
28 |     window_size=2048,
29 |     use_lr_scheduler=False,
30 |     stft_magnitude_min=1e-9,
31 |     min_lr=1e-5,
32 |     max_lr=1e-4,
33 |     window_fn="hann",
34 |     max_wmark_freq=2000,
35 |     hidden_size=256,
36 |     # loss_type="simple",
37 |     loss_type="psychoacoustic",
38 | )
39 | 


--------------------------------------------------------------------------------
/perth/perth_net/perth_net_implicit/model/__init__.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | from ..config import PerthConfig
 3 | 
 4 | 
 5 | class Conv(nn.Module):
 6 | 
 7 |     def __init__(self, i, o, k, p='auto', s=1, act=True):
 8 |         super().__init__()
 9 |         assert k % 2 == 1
10 |         if p == 'auto':
11 |             assert s == 1
12 |             p = (k - 1) // 2
13 |         self.conv = nn.Conv1d(i, o, k, padding=p, stride=s)
14 |         self.act = act
15 |         if act:
16 |             self.act = nn.LeakyReLU()
17 | 
18 |     def forward(self, x):
19 |         x = self.conv(x)
20 |         if self.act:
21 |             x = self.act(x)
22 |         return x
23 | 
24 | 
25 | def compute_subband_freq(config: PerthConfig):
26 |     nfreq = config.n_fft // 2 + 1
27 |     topfreq = config.sample_rate / 2
28 |     subband = int(round(nfreq * config.max_wmark_freq / topfreq))
29 |     return subband
30 | 
31 | 
32 | def magmask(magspec, p=0.05):
33 |     s = magspec.sum(dim=1) # (B, T)
34 |     thresh = s.max(dim=1).values * p # (B,)
35 |     return (s > thresh[:, None]).float() # (B, T)
36 | 


--------------------------------------------------------------------------------
/perth/perth_net/perth_net_implicit/model/__pycache__/__init__.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/perth_net/perth_net_implicit/model/__pycache__/__init__.cpython-311.pyc


--------------------------------------------------------------------------------
/perth/perth_net/perth_net_implicit/model/__pycache__/decoder.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/perth_net/perth_net_implicit/model/__pycache__/decoder.cpython-311.pyc


--------------------------------------------------------------------------------
/perth/perth_net/perth_net_implicit/model/__pycache__/encoder.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/perth_net/perth_net_implicit/model/__pycache__/encoder.cpython-311.pyc


--------------------------------------------------------------------------------
/perth/perth_net/perth_net_implicit/model/__pycache__/perth_net.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/perth_net/perth_net_implicit/model/__pycache__/perth_net.cpython-311.pyc


--------------------------------------------------------------------------------
/perth/perth_net/perth_net_implicit/model/decoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from . import Conv
 6 | from .encoder import magmask
 7 | 
 8 | 
 9 | def _layers(subband, hidden):
10 |     return nn.Sequential(
11 |         Conv(subband, hidden, 1),
12 |         *[Conv(hidden, hidden, k=7) for _ in range(5)],
13 |         Conv(hidden, 2, k=1, act=False),
14 |     )
15 | 
16 | 
17 | def _masked_mean(x, m):
18 |     return (x * m).sum(dim=2) / m.sum(dim=2) # (B, C)
19 | 
20 | 
21 | def _lerp(x, s):
22 |     return F.interpolate(x, size=s, mode='linear', align_corners=True)
23 | 
24 | 
25 | def _nerp(x, s):
26 |     return F.interpolate(x, size=s, mode='nearest')
27 | 
28 | 
29 | class Decoder(nn.Module):
30 |     """
31 |     Decoder a watermark from a magnitude spectrogram.
32 |     """
33 | 
34 |     def __init__(self, hidden, subband):
35 |         super().__init__()
36 |         self.subband = subband
37 |         # multi-scale decoder
38 |         self.slow_layers = _layers(subband, hidden)
39 |         self.normal_layers = _layers(subband, hidden)
40 |         self.fast_layers = _layers(subband, hidden)
41 | 
42 |     def forward(self, magspec):
43 |         mask = magmask(magspec.detach())[:, None] # (B, 1, T)
44 |         subband = magspec[:, :self.subband]
45 |         B, _, T = subband.shape
46 | 
47 |         # slow branch
48 |         slow_subband = _lerp(subband, int(T * 1.25))
49 |         slow_out = self.slow_layers(slow_subband)           # (B, 2, T_slow)
50 |         slow_attn = slow_out[:, :1]                         # (B, 1, T_slow)
51 |         slow_wmarks = slow_out[:, 1:]                       # (B, 1, T_slow)
52 |         slow_mask = _nerp(mask, slow_wmarks.size(2))        # (B, 1, T_slow)
53 |         slow_wmarks = _masked_mean(slow_wmarks, slow_mask)  # (B, 1)
54 |         slow_attn = _masked_mean(slow_attn, slow_mask)      # (B, 1)
55 | 
56 |         # normal branch
57 |         normal_out = self.normal_layers(subband)                  # (B, 2, T_normal)
58 |         normal_attn = normal_out[:, :1]                           # (B, 1, T_normal)
59 |         normal_wmarks = normal_out[:, 1:]                         # (B, 1, T_normal)
60 |         normal_mask = _nerp(mask, normal_wmarks.size(2))          # (B, 1, T_normal)
61 |         normal_wmarks = _masked_mean(normal_wmarks, normal_mask)  # (B, 1)
62 |         normal_attn = _masked_mean(normal_attn, normal_mask)      # (B, 1)
63 | 
64 |         # fast branch
65 |         fast_subband = _lerp(subband, int(T * 0.75))
66 |         fast_out = self.fast_layers(fast_subband)           # (B, 2, T_fast)
67 |         fast_attn = fast_out[:, :1]                         # (B, 1, T_fast)
68 |         fast_wmarks = fast_out[:, 1:]                       # (B, 1, T_fast)
69 |         fast_mask = _nerp(mask, fast_wmarks.size(2))        # (B, 1, T_fast)
70 |         fast_wmarks = _masked_mean(fast_wmarks, fast_mask)  # (B, 1)
71 |         fast_attn = _masked_mean(fast_attn, fast_mask)      # (B, 1)
72 | 
73 |         # combine branches with attention
74 |         attn = torch.cat([slow_attn, normal_attn, fast_attn], dim=1) # (B, 3)
75 |         attn = F.softmax(attn, dim=1) # (B, 3)
76 |         wmarks = torch.cat([slow_wmarks, normal_wmarks, fast_wmarks], dim=1) # (B, 3)
77 |         wmarks = (wmarks * attn).sum(dim=1) # (B,)
78 | 
79 |         # single float for each batch item indicating confidence of watermark
80 |         return wmarks
81 | 


--------------------------------------------------------------------------------
/perth/perth_net/perth_net_implicit/model/encoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from . import magmask
 5 | from . import Conv
 6 | 
 7 | 
 8 | class Encoder(nn.Module):
 9 |     """
10 |     Inserts a watermark into a magnitude spectrogram.
11 |     """
12 | 
13 |     def __init__(self, hidden, subband):
14 |         super().__init__()
15 |         self.subband = subband
16 |         # residual encoder
17 |         self.layers = nn.Sequential(
18 |             Conv(self.subband, hidden, k=1),
19 |             *[Conv(hidden, hidden, k=7) for _ in range(5)],
20 |             Conv(hidden, self.subband, k=1, act=False),
21 |         )
22 | 
23 |     def forward(self, magspec):
24 |         magspec = magspec.clone()
25 | 
26 |         # create mask for valid watermark locations
27 |         mask = magmask(magspec)[:, None]
28 | 
29 |         # crop required region of spectrogram
30 |         sub_mag = magspec[:, :self.subband]
31 | 
32 |         # encode watermark as spectrogram residual
33 |         res = self.layers(sub_mag) * mask
34 | 
35 |         # add residual
36 |         magspec[:, :self.subband] += res
37 | 
38 |         # return wmarked signal and mask
39 |         return magspec, mask
40 | 


--------------------------------------------------------------------------------
/perth/perth_net/perth_net_implicit/model/perth_net.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | # import torchaudio
  3 | import torch.nn.functional as F
  4 | import numpy as np
  5 | from torch import nn
  6 | 
  7 | from ..audio_processor import AudioProcessor
  8 | from ..checkpoint_manager import CheckpointManager
  9 | from ..config import PerthConfig
 10 | from . import compute_subband_freq
 11 | from .encoder import Encoder
 12 | from .decoder import Decoder
 13 | # from ..utils import magphase_to_cx, cx_to_magphase
 14 | 
 15 | 
 16 | def lerp(x, size=None, scale=None):
 17 |     return F.interpolate(x, size=size, scale_factor=scale, mode='linear', align_corners=True, recompute_scale_factor=False)
 18 | 
 19 | 
 20 | def random_stretch(x):
 21 |     assert x.ndim >= 3
 22 |     r = 0.9 + 0.2 * torch.rand(1).item()
 23 |     return lerp(x, scale=r)
 24 | 
 25 | 
 26 | def _attack(mag, phase, audio_proc):
 27 |     # gaussian magspec noise
 28 |     if torch.rand(1).item() < 1/8:
 29 |         peak = mag.mean() + 3 * mag.std()
 30 |         r = torch.randn_like(mag) * 0.01 * peak
 31 |         mag = mag + r
 32 | 
 33 |     # TODO: volume?
 34 | 
 35 |     # TODO: time-domain signal noise?
 36 | 
 37 |     # # stretch TODO: numerical instability!
 38 |     # if torch.rand(1).item() < 1/8 and phase is not None:
 39 |     #     scale = 0.9 + 0.2 * torch.rand(1).item()
 40 |     #     spec = magphase_to_cx(self.hp, mag, phase)
 41 |     #     spec = audio_proc.stretch(spec, scale)
 42 |     #     mag, phase_ = cx_to_magphase(self.hp, spec)
 43 |     #     if torch.isnan(mag).any():
 44 |     #         print("WARNING: stretch failed")
 45 |     #         mag = wmarked.clone()
 46 |     #     else:
 47 |     #         phase = phase_
 48 | 
 49 |     # STFT-iSTFT cycle
 50 |     if torch.rand(1).item() < 1/4 and phase is not None:
 51 |         # # phase noise
 52 |         # if torch.rand(1).item() < 1/3:
 53 |         #     phase = phase + torch.randn_like(phase) * 0.01
 54 | 
 55 |         # iSTFT
 56 |         signal = audio_proc.magphase_to_signal(mag, phase)
 57 | 
 58 |         # # random stretch directly on signal as well
 59 |         # if torch.rand(1).item() < 1/3:
 60 |         #     signal = random_stretch(signal[None])[0]
 61 | 
 62 |         # STFT
 63 |         mag, phase = audio_proc.signal_to_magphase(signal)
 64 | 
 65 |     # random offset (NOTE: do this after phase-dependent attacks)
 66 |     if torch.rand(1).item() < 1/8:
 67 |         i = torch.randint(1, 13, (1,)).item()
 68 |         mag = torch.roll(mag, i, dims=2)
 69 | 
 70 |     # random magspec stretch (NOTE: should be near the end of attacks)
 71 |     if torch.rand(1).item() < 1/8:
 72 |         mag = random_stretch(mag)
 73 | 
 74 |     # random time masking
 75 |     # torchaudio.functional.mask_along_axis(mag, mask_param=, mask_value=mag.min().detach(), axis=2, p=0.05)
 76 | 
 77 |     return mag
 78 | 
 79 | class PerthNet(nn.Module):
 80 |     """
 81 |     PerthNet (PERceptual THreshold) watermarking model.
 82 |     Inserts and detects watermarks from a magnitude spectrogram.
 83 |     """
 84 | 
 85 |     def __init__(self, hp: PerthConfig):
 86 |         super().__init__()
 87 |         self.hp = hp
 88 |         self.subband = compute_subband_freq(hp)
 89 |         self.encoder = Encoder(hp.hidden_size, self.subband)
 90 |         self.decoder = Decoder(hp.hidden_size, self.subband)
 91 |         self.ap = AudioProcessor(hp)
 92 | 
 93 |     @property
 94 |     def device(self):
 95 |         return next(self.parameters()).device
 96 | 
 97 |     def forward(self, magspec, attack=False, phase=None):
 98 |         "Run watermarker and decoder (training)"
 99 | 
100 |         # encode watermark
101 |         wmarked, mask = self.encoder(magspec)
102 | 
103 |         # decode from un-watermarked mag
104 |         dec_input = magspec
105 |         if attack:
106 |             dec_input = _attack(dec_input, phase, self.ap)
107 |         no_wmark_pred = self.decoder(dec_input)
108 | 
109 |         # decode from watermarked mag
110 |         dec_input = wmarked
111 |         if attack:
112 |             dec_input = _attack(dec_input, phase, self.ap)
113 |         wmark_pred = self.decoder(dec_input)
114 | 
115 |         return wmarked, no_wmark_pred, wmark_pred, mask
116 | 
117 |     @staticmethod
118 |     def from_cm(cm):
119 |         perth_net = PerthNet(cm.hp)
120 |         ckpt = cm.load_latest()
121 |         assert ckpt is not None, "No checkpoint found"
122 |         perth_net.load_state_dict(ckpt["model"])
123 |         print(f"loaded PerthNet (Implicit) at step {ckpt['step']:,}")
124 |         return perth_net
125 | 
126 |     @staticmethod
127 |     def load(run_name, models_dir="saved_models"):
128 |         cm = CheckpointManager(models_dir, run_name)
129 |         return PerthNet.from_cm(cm)
130 | 


--------------------------------------------------------------------------------
/perth/perth_net/perth_net_implicit/perth_watermarker.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | from librosa import resample
 4 | 
 5 | from .model.perth_net import PerthNet
 6 | from .. import PREPACKAGED_MODELS_DIR
 7 | from perth.watermarker import WatermarkerBase
 8 | 
 9 | 
10 | def _to_tensor(x, device):
11 |     if isinstance(x, np.ndarray):
12 |         x = torch.from_numpy(x.copy())
13 |     return x.to(dtype=torch.float, device=device)
14 | 
15 | 
16 | class PerthImplicitWatermarker(WatermarkerBase):
17 |     def __init__(self, run_name:str="implicit", models_dir=PREPACKAGED_MODELS_DIR,
18 |                  device="cpu", perth_net=None):
19 |         assert (run_name is None) or (perth_net is None)
20 |         if perth_net is None:
21 |             self.perth_net = PerthNet.load(run_name, models_dir).to(device)
22 |         else:
23 |             self.perth_net = perth_net.to(device)
24 | 
25 |     def apply_watermark(self, signal,  sample_rate, **_):
26 |         change_rate = sample_rate != self.perth_net.hp.sample_rate
27 |         signal = resample(signal, orig_sr=sample_rate, target_sr=self.perth_net.hp.sample_rate) if change_rate \
28 |             else signal
29 | 
30 |         # split signal into magnitude and phase
31 |         signal = _to_tensor(signal, self.perth_net.device)
32 |         magspec, phase = self.perth_net.ap.signal_to_magphase(signal)
33 | 
34 |         # encode the watermark
35 |         magspec = magspec[None].to(self.perth_net.device)
36 |         wm_magspec, _mask = self.perth_net.encoder(magspec)
37 |         wm_magspec = wm_magspec[0]
38 | 
39 |         # assemble back into watermarked signal
40 |         wm_signal = self.perth_net.ap.magphase_to_signal(wm_magspec, phase)
41 |         wm_signal = wm_signal.detach().cpu().numpy()
42 |         return resample(wm_signal, orig_sr=self.perth_net.hp.sample_rate, target_sr=sample_rate) if change_rate \
43 |             else wm_signal
44 | 
45 |     def get_watermark(self, wm_signal, sample_rate, round=True, **_):
46 |         change_rate = sample_rate != self.perth_net.hp.sample_rate
47 |         if change_rate:
48 |             wm_signal = resample(wm_signal, orig_sr=sample_rate, target_sr=self.perth_net.hp.sample_rate,
49 |                                  res_type="polyphase")
50 |         wm_signal = _to_tensor(wm_signal, self.perth_net.device)
51 |         wm_magspec, _phase = self.perth_net.ap.signal_to_magphase(wm_signal)
52 |         wm_magspec = wm_magspec.to(self.perth_net.device)
53 |         wmark_pred = self.perth_net.decoder(wm_magspec[None])[0]
54 |         wmark_pred = wmark_pred.clip(0., 1.)
55 |         wmark_pred = wmark_pred.round() if round else wmark_pred
56 |         return wmark_pred.detach().cpu().numpy()
57 | 


--------------------------------------------------------------------------------
/perth/perth_net/perth_net_implicit/utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Padding Helpers obtained from:
  3 | https://github.com/rwightman/pytorch-image-models/blob
  4 | /01a0e25a67305b94ea767083f4113ff002e4435c/timm/models/layers/padding.py#L12
  5 | 
  6 | This to maintain padding="same" compatibility with Tensorflow architecture.
  7 | """
  8 | 
  9 | import math
 10 | from typing import List, Tuple
 11 | import torch
 12 | import torch.nn.functional as F
 13 | 
 14 | from scipy.signal import butter
 15 | from scipy.signal import filtfilt
 16 | from math import pi, sin, cos, sqrt
 17 | from cmath import exp
 18 | import numpy as np
 19 | import sys
 20 | 
 21 | from .config import default_hp, PerthConfig
 22 | 
 23 | 
 24 | def stream(message):
 25 |     sys.stdout.write(f"\r{message}")
 26 | 
 27 | 
 28 | # Calculate asymmetric TensorFlow-like 'SAME' padding for a convolution
 29 | def get_same_padding_transposed(x: int, k: int, s: int, d: int):
 30 |     return max((x-1) * (s-1) + (k - 1) * d, 0)
 31 | 
 32 | def get_same_padding(x: int, k: int, s: int, d: int):
 33 |     return max((math.ceil(x/s) - 1) * s + (k - 1) * d + 1 - x, 0)
 34 | 
 35 | # Dynamically pad input x with 'SAME' padding for conv with specified args
 36 | def pad_same(x, k: List[int], s: List[int], d: List[int] = (1, 1), value: float = 0):
 37 |     ih, iw = x.size()[-2:]
 38 |     pad_h, pad_w = get_same_padding(ih, k[0], s[0], d[0]), get_same_padding(iw, k[1], s[1], d[1])
 39 |     if pad_h > 0 or pad_w > 0:
 40 |         x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2], value=value)
 41 |     return x
 42 | 
 43 | def pad_same_transposed(x, k: List[int], s: List[int], d: List[int] = (1, 1), value: float = 0):
 44 |     ih, iw = x.size()[-2:]
 45 |     # pad_h, pad_w = get_same_padding(ih, k[0], s[0], d[0]), get_same_padding_transposed(iw, k[1], s[1], d[1])
 46 |     pad_h, pad_w = get_same_padding_transposed(ih, k[0], s[0], d[0]), get_same_padding_transposed(iw, k[1], s[1], d[1])
 47 |     if pad_h > 0 or pad_w > 0:
 48 |         x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2], value=value)
 49 |     return x
 50 | 
 51 | 
 52 | def normalize(hp, magspec, headroom_db=15):
 53 |     min_level_db = 20 * np.log10(hp.stft_magnitude_min)
 54 |     magspec = (magspec - min_level_db) / (-min_level_db + headroom_db)
 55 |     return magspec
 56 | 
 57 | def denormalize_spectrogram(hp, magspec, headroom_db=15):
 58 |     min_level_db = 20 * np.log10(hp.stft_magnitude_min)
 59 |     return magspec * (-min_level_db + headroom_db) + min_level_db
 60 | 
 61 | def magphase_to_cx(hp, magspec, phases):
 62 |     magspec = denormalize_spectrogram(hp, magspec)
 63 |     magspec = 10. ** ((magspec / 20).clip(max=10))
 64 |     phases = torch.exp(1.j * phases)
 65 |     spectrum = magspec * phases
 66 |     return spectrum
 67 | 
 68 | def cx_to_magphase(hp, spec):
 69 |     phase = torch.angle(spec)
 70 |     mag = spec.abs() # (nfreq, T)
 71 |     mag = 20 * torch.log10(mag.clip(hp.stft_magnitude_min))
 72 |     mag = normalize(hp, mag)
 73 |     return mag, phase
 74 | 
 75 | 
 76 | ## Imported from Repo
 77 | 
 78 | def butter_lowpass(cutoff, sr=16000, order=5):
 79 |     nyq = 0.5 * sr
 80 |     normal_cutoff = cutoff / nyq
 81 |     b, a = butter(order, normal_cutoff, btype='low', analog=False)
 82 |     return b, a
 83 | 
 84 | 
 85 | def butter_lowpass_filter(data, cutoff=4000, sr=16000, order=16):
 86 |     b, a = butter_lowpass(cutoff, sr, order=order)
 87 |     return filtfilt(b, a, data)
 88 | 
 89 | 
 90 | def bwsk(k, n):
 91 |     # Returns k-th pole s_k of Butterworth transfer
 92 |     # function in S-domain. Note that omega_c
 93 |     # is not taken into account here
 94 |     arg = pi * (2 * k + n - 1) / (2 * n)
 95 |     return complex(cos(arg), sin(arg))
 96 | 
 97 | 
 98 | def bwj(k, n):
 99 |     # Returns (s - s_k) * H(s), where
100 |     # H(s) - BW transfer function
101 |     # s_k  - k-th pole of H(s)
102 |     res = complex(1, 0)
103 |     for m in range(1, n + 1):
104 |         if (m == k):
105 |             continue
106 |         else:
107 |             res /= (bwsk(k, n) - bwsk(m, n))
108 |     return res
109 | 
110 | 
111 | def bwh(n=16, fc=400, fs=16e3, length=25):
112 |     # Returns h(t) - BW transfer function in t-domain.
113 |     # length is in ms.
114 |     omegaC = 2 * pi * fc
115 |     dt = 1 / fs
116 |     number_of_samples = int(fs * length / 1000)
117 |     result = []
118 |     for x in range(number_of_samples):
119 |         res = complex(0, 0)
120 |         if x >= 0:
121 |             for k in range(1, n + 1):
122 |                 res += (exp(omegaC * x * dt / sqrt(2) * bwsk(k, n)) * bwj(k, n))
123 |         result.append((res).real)
124 |     return result
125 | 
126 | 
127 | def snr(input_signal, output_signal):
128 |     Ps = np.sum(np.abs(input_signal ** 2))
129 |     Pn = np.sum(np.abs((input_signal - output_signal) ** 2))
130 |     return 10 * np.log10((Ps / Pn))
131 | 
132 | def parse_hparam_overrides(args):
133 |     hp_instance = default_hp._asdict()
134 |     if args.hp is not None:
135 |         overrides = args.hp
136 |         overrides = overrides.split(",")
137 |         for override_item in overrides:
138 |             param, value = override_item.split(":")
139 |             try:
140 |                 to_param_type = type(getattr(default_hp, param))
141 |             except:
142 |                 print(f"Invalid HParam Override: {param}. No matching parameter exists")
143 |                 exit()
144 |             if to_param_type == bool:
145 |                 value = False if value in ("False","false") else True
146 |             else:
147 |                 value = to_param_type(value)
148 |             hp_instance[param] = value
149 |     args.hp = PerthConfig(**hp_instance)
150 |     return args


--------------------------------------------------------------------------------
/perth/perth_net/pretrained/implicit/hparams.yaml:
--------------------------------------------------------------------------------
 1 | batch_size: 16
 2 | hidden_size: 256
 3 | hop_size: 320
 4 | loss_type: psychoacoustic
 5 | max_lr: 0.0001
 6 | max_wmark_freq: 2000
 7 | min_lr: 1.0e-05
 8 | n_fft: 2048
 9 | sample_rate: 32000
10 | stft_magnitude_min: 1.0e-09
11 | use_lr_scheduler: false
12 | use_wandb: true
13 | window_fn: hann
14 | window_size: 2048
15 | 


--------------------------------------------------------------------------------
/perth/perth_net/pretrained/implicit/id.txt:
--------------------------------------------------------------------------------
1 | Y-GfemTlfVYp3fNWkt2zgQ


--------------------------------------------------------------------------------
/perth/perth_net/pretrained/implicit/perth_net_250000.pth.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resemble-ai/Perth/d6cc38cc9ab9da71c6b64d8c741bc13049eb881e/perth/perth_net/pretrained/implicit/perth_net_250000.pth.tar


--------------------------------------------------------------------------------
/perth/utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Utility functions for audio processing and watermarking.
  3 | """
  4 | import os
  5 | import numpy as np
  6 | import librosa
  7 | import soundfile as sf
  8 | import matplotlib.pyplot as plt
  9 | from typing import Tuple, Optional, Dict, Any
 10 | from math import sqrt
 11 | from scipy.stats import mode
 12 | 
 13 | 
 14 | def _signal_to_frames(data, window_length, pad=True):
 15 |     n_samples = data.shape[-1]
 16 |     frames = []
 17 |     for idx in range(0, n_samples, window_length):
 18 |         chunk = data[idx:idx + window_length]
 19 |         if pad and chunk.shape[-1] < window_length:
 20 |             chunk = np.append(chunk, np.zeros((window_length - chunk.shape[-1])))
 21 |         frames.append(chunk)
 22 |     return frames
 23 | 
 24 | 
 25 | def _frames_to_signal(frames):
 26 |     return np.hstack(frames)
 27 | 
 28 | 
 29 | def audio_to_raw(wav, bit_depth=16):
 30 |     assert wav.dtype.kind == "f", "This function takes floating point arrays"
 31 |     unsigned_bit_depth = bit_depth - 1
 32 |     range_min, range_max = -2 ** (unsigned_bit_depth), 2 ** (unsigned_bit_depth) - 1
 33 |     return (wav * range_max).clip(range_min, range_max).astype(np.int16)
 34 | 
 35 | 
 36 | def raw_pcm16_tofloat(wav, bit_depth=16):
 37 |     unsigned_bit_depth = bit_depth - 1
 38 |     range_min, range_max = -2 ** (unsigned_bit_depth), 2 ** unsigned_bit_depth - 1
 39 |     a, b = -1., 1.
 40 |     return (a + ((wav - range_min) * (b - a)) / (range_max - range_min)).clip(-1, 1).astype(np.float32)
 41 | 
 42 | 
 43 | def formatted_watermark(watermark_list, length, wrap=True):
 44 |     assert len(watermark_list) > 0
 45 |     watermark = np.array(watermark_list)
 46 |      # Discard extra frames that don't contain the entire watermark.
 47 |     # ToDo: Implement Synchronization bits support and return watermark after correlating with synch bits.
 48 |     if len(watermark_list) % length:
 49 |         watermark = watermark[:-(len(watermark_list) % length)]
 50 |     if wrap and len(watermark) > length:
 51 |         watermark = np.array(np.split(watermark, len(watermark) // length)).T
 52 |         watermark = flatten_watermark(watermark)
 53 |     return watermark[:length]
 54 | 
 55 | 
 56 | def flatten_watermark(watermark_vector):
 57 |     return mode(watermark_vector, axis=1).mode.squeeze(-1)
 58 | 
 59 | 
 60 | def modified_binets_fibonnaci(n: int, k: float = 2.5) -> int:
 61 |     # This is a modified fibonnaci generator that can generate exponentially spaced sequences like the standard
 62 |     # fibonacci series. This function returns the standard fibonnaci sequence using Golden Ratio applying Binet's
 63 |     # Formula when k==2 (alpha -> 1.618)
 64 |     # The Watermark Accuracy (BER) is slightly more robust for k == 2.5, where alpha -> 1.3
 65 | 
 66 |     if n <= 0: return 0
 67 |     alpha = (1 + sqrt(5)) / k
 68 |     beta = (1 - sqrt(5)) / k
 69 |     return int(((alpha ** n) - (beta ** n)) / sqrt(5))
 70 | 
 71 | 
 72 | def generate_dummy_watermark(length: int):
 73 |     watermark = np.random.random((length,))
 74 |     return np.where(watermark > watermark.mean(), 1, 0)
 75 | 
 76 | 
 77 | def watermark_str_to_numpy(watermark: str) -> np.ndarray:
 78 |     return np.array([int(char) for char in watermark])
 79 | 
 80 | 
 81 | def watermark_numpy_to_str(watermark: np.ndarray) -> str:
 82 |     return ''.join(str(char) for char in watermark)
 83 | 
 84 | 
 85 | def validate_string_watermark(watermark: str) -> bool:
 86 |     return any([char not in ("1", "0") for char in watermark])
 87 | 
 88 | 
 89 | def load_audio(audio_path: str, sr: Optional[int] = None) -> Tuple[np.ndarray, int]:
 90 |     """
 91 |     Load an audio file using librosa.
 92 |     
 93 |     Args:
 94 |         audio_path: Path to the audio file
 95 |         sr: Target sample rate. If None, the native sample rate is used.
 96 |         
 97 |     Returns:
 98 |         Tuple of (audio_data, sample_rate)
 99 |     """
100 |     try:
101 |         audio, sample_rate = librosa.load(audio_path, sr=sr)
102 |         return audio, sample_rate
103 |     except Exception as e:
104 |         raise IOError(f"Could not load audio file {audio_path}: {e}")
105 | 
106 | 
107 | def save_audio(audio_data: np.ndarray, file_path: str, sample_rate: int) -> None:
108 |     """
109 |     Save audio data to a file.
110 |     
111 |     Args:
112 |         audio_data: Audio data as a numpy array
113 |         file_path: Output file path
114 |         sample_rate: Sample rate for the audio file
115 |     """
116 |     directory = os.path.dirname(os.path.abspath(file_path))
117 |     os.makedirs(directory, exist_ok=True)
118 |     sf.write(file_path, audio_data, sample_rate)
119 | 
120 | 
121 | def plot_audio_comparison(original: np.ndarray, watermarked: np.ndarray, 
122 |                          sample_rate: int, output_path: Optional[str] = None) -> None:
123 |     """
124 |     Plot a comparison between original and watermarked audio.
125 |     
126 |     Args:
127 |         original: Original audio data
128 |         watermarked: Watermarked audio data
129 |         sample_rate: Sample rate of the audio
130 |         output_path: Path to save the plot. If None, plot is shown interactively.
131 |     """
132 |     fig, axs = plt.subplots(3, 1, figsize=(10, 12))
133 |     
134 |     # Plot waveforms
135 |     time = np.arange(len(original)) / sample_rate
136 |     axs[0].plot(time, original, alpha=0.7, label='Original')
137 |     axs[0].plot(time, watermarked, alpha=0.7, label='Watermarked')
138 |     axs[0].set_title('Waveform Comparison')
139 |     axs[0].set_xlabel('Time (s)')
140 |     axs[0].set_ylabel('Amplitude')
141 |     axs[0].legend()
142 |     
143 |     # Plot difference
144 |     diff = watermarked - original
145 |     axs[1].plot(time, diff)
146 |     axs[1].set_title('Difference (Watermarked - Original)')
147 |     axs[1].set_xlabel('Time (s)')
148 |     axs[1].set_ylabel('Difference')
149 |     
150 |     # Plot spectrogram of difference
151 |     D = librosa.amplitude_to_db(
152 |         np.abs(librosa.stft(diff)), ref=np.max
153 |     )
154 |     librosa.display.specshow(D, x_axis='time', y_axis='log', sr=sample_rate, ax=axs[2])
155 |     axs[2].set_title('Spectrogram of Difference')
156 |     axs[2].set_xlabel('Time (s)')
157 |     axs[2].set_ylabel('Frequency (Hz)')
158 |     fig.colorbar(axs[2].collections[0], ax=axs[2], format='%+2.0f dB')
159 |     
160 |     plt.tight_layout()
161 |     if output_path:
162 |         plt.savefig(output_path)
163 |         plt.close()
164 |     else:
165 |         plt.show()
166 | 
167 | 
168 | def calculate_audio_metrics(original: np.ndarray, watermarked: np.ndarray) -> Dict[str, float]:
169 |     """
170 |     Calculate audio quality metrics between original and watermarked audio.
171 |     
172 |     Args:
173 |         original: Original audio data
174 |         watermarked: Watermarked audio data
175 |         
176 |     Returns:
177 |         Dictionary of quality metrics:
178 |         - snr: Signal-to-Noise Ratio (dB)
179 |         - mse: Mean Squared Error
180 |         - psnr: Peak Signal-to-Noise Ratio (dB)
181 |     """
182 |     if len(original) != len(watermarked):
183 |         raise ValueError("Original and watermarked audio must have the same length")
184 |     
185 |     # Calculate Mean Squared Error
186 |     mse = np.mean((original - watermarked) ** 2)
187 |     
188 |     # Calculate Signal-to-Noise Ratio
189 |     signal_power = np.mean(original ** 2)
190 |     noise_power = mse
191 |     snr = 10 * np.log10(signal_power / noise_power) if noise_power > 0 else float('inf')
192 |     
193 |     # Calculate Peak Signal-to-Noise Ratio
194 |     max_value = max(np.max(np.abs(original)), np.max(np.abs(watermarked)))
195 |     psnr = 20 * np.log10(max_value / np.sqrt(mse)) if mse > 0 else float('inf')
196 |     
197 |     return {
198 |         'snr': snr,
199 |         'mse': mse,
200 |         'psnr': psnr
201 |     }
202 | 


--------------------------------------------------------------------------------
/perth/watermarker.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from abc import ABC, abstractmethod
 3 | from typing import Optional, Dict, Any
 4 | 
 5 | 
 6 | class WatermarkingException(Exception):
 7 |     """Exception raised for errors in the watermarking process."""
 8 |     pass
 9 | 
10 | 
11 | class WatermarkerBase(ABC):
12 |     """
13 |     Base class for all audio watermarking algorithms.
14 |     
15 |     This abstract class defines the interface that all watermarking implementations
16 |     must follow, providing methods for watermark application and extraction.
17 |     """
18 |     
19 |     @abstractmethod
20 |     def apply_watermark(self, wav: np.ndarray, watermark: Optional[np.ndarray] = None, 
21 |                         sample_rate: int = 44100, **kwargs) -> np.ndarray:
22 |         """
23 |         Apply a watermark to an audio signal.
24 |         
25 |         Args:
26 |             wav: Input audio signal as numpy array
27 |             watermark: Optional watermark data to embed. If None, a default watermark may be generated.
28 |             sample_rate: Sample rate of the audio signal in Hz
29 |             **kwargs: Additional algorithm-specific parameters
30 |             
31 |         Returns:
32 |             Watermarked audio signal as numpy array
33 |             
34 |         Raises:
35 |             WatermarkingException: If watermarking fails
36 |         """
37 |         raise NotImplementedError()
38 | 
39 |     @abstractmethod
40 |     def get_watermark(self, watermarked_wav: np.ndarray, sample_rate: int = 44100,
41 |                       watermark_length: Optional[int] = None, **kwargs) -> np.ndarray:
42 |         """
43 |         Extract a watermark from a watermarked audio signal.
44 |         
45 |         Args:
46 |             watermarked_wav: Watermarked audio signal as numpy array
47 |             sample_rate: Sample rate of the audio signal in Hz
48 |             watermark_length: Optional expected length of the watermark
49 |             **kwargs: Additional algorithm-specific parameters
50 |             
51 |         Returns:
52 |             Extracted watermark data as numpy array
53 |             
54 |         Raises:
55 |             WatermarkingException: If watermark extraction fails
56 |         """
57 |         raise NotImplementedError()
58 |         
59 |     def verify_compatibility(self, wav: np.ndarray, sample_rate: int) -> bool:
60 |         """
61 |         Verify if the audio is compatible with this watermarking method.
62 |         
63 |         Args:
64 |             wav: Input audio signal as numpy array
65 |             sample_rate: Sample rate of the audio signal in Hz
66 |             
67 |         Returns:
68 |             True if the audio is compatible, False otherwise
69 |         """
70 |         return True
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/perth/waveform.py:
--------------------------------------------------------------------------------
 1 | # Borrowed from Resembletron
 2 | 
 3 | import logging
 4 | import tempfile
 5 | import warnings
 6 | from pathlib import Path
 7 | import librosa
 8 | import librosa.filters
 9 | import numpy as np
10 | import pyrubberband as pyrb
11 | import soundfile as sf
12 | from audioread import NoBackendError
13 | from pydub import AudioSegment
14 | 
15 | 
16 | logger = logging.getLogger(__name__)
17 | 
18 | 
19 | class WatermarkingException(Exception):
20 |     pass
21 | 
22 | class CorruptedAudioException(Exception):
23 |     pass
24 | 
25 | 
26 | def load_wav(fpath, target_sr, res_algo="kaiser_best"):
27 |     """
28 |     :param target_sr: expected sample rate after loading and possibly resampling. If None,
29 |     there will be no resampling.
30 |     :param res_algo: algorithm for resampling. If None, there will also be no resampling but if
31 |     the target_sr is valid, the actual sample rate of the audio on disk will be checked against
32 |     and an error will be thrown if they do not match.
33 |     """
34 |     bit_depth = sf.SoundFile(str(fpath)).subtype
35 |     if not bit_depth.startswith("PCM"):
36 |         raise WatermarkingException("Unsupported Audio type for Watermarking. "
37 |                                     "Only 16 or 24-bit PCM/WAV/AIFF audio files can be watermarked.")
38 |     try:
39 |         with warnings.catch_warnings():
40 |             warnings.simplefilter("ignore")
41 |             wav, actual_sr = librosa.core.load(
42 |                 str(fpath), sr=(target_sr if res_algo else None), res_type=res_algo
43 |             )
44 |     except (EOFError, NoBackendError):
45 |         raise CorruptedAudioException("Failed to load audio file")
46 | 
47 |     if target_sr is not None:
48 |         assert actual_sr == target_sr, "Loaded audio doesn't have expected sampling rate (%s vs " \
49 |                                        "%s, resampling_algo=%s)" % (actual_sr, target_sr, res_algo)
50 | 
51 |     return wav, actual_sr
52 | 
53 | def save_wav(wav, file_or_path, sample_rate: int, subtype="PCM_16"):
54 |     """
55 |     :param wav: a float32 numpy array
56 |     """
57 |     assert wav.dtype.kind == "f", "This function takes floating point arrays"
58 | 
59 |     # Float32 -> PCM_16 conversion
60 |     if subtype == "PCM_16":
61 |         range_min, range_max = -2 ** 15, 2 ** 15 - 1
62 |         wav = (wav * range_max).clip(range_min, range_max).astype(np.int16)
63 | 
64 |     file_or_path = str(file_or_path) if isinstance(file_or_path, Path) else file_or_path
65 |     sf.write(file_or_path, wav, sample_rate, subtype=subtype, format="wav")
66 | 
67 | 
68 | def pitch_shift(wav, sample_rate, semitones):
69 |     return pyrb.pitch_shift(wav, sample_rate, semitones)
70 | 
71 | 
72 | def convert_to_mp3(wav_path, sample_rate=22050):
73 |     segment = AudioSegment.from_wav(wav_path)
74 |     tmpfile = tempfile.SpooledTemporaryFile(suffix=".mp3")
75 |     segment = segment.set_frame_rate(sample_rate)
76 |     segment.export(tmpfile, bitrate="48k", format="mp3")
77 |     return tmpfile


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | bitstring
 2 | matplotlib
 3 | librosa>=0.7.2
 4 | numpy>=1.23.4
 5 | pandas>=1.1.5
 6 | Pillow>=9.0.1
 7 | praat-parselmouth
 8 | pydub
 9 | pyloudnorm>=0.1.0
10 | pyrubberband
11 | PyWavelets>=1.1.1
12 | scikit-learn>=0.22
13 | SoundFile>=0.10.3.post1
14 | sox>=1.4.0
15 | tabulate>=0.8.9
16 | tqdm>=4.61.2
17 | tensorboard>=2.10.1
18 | pyYaml>=5.4.1
19 | pydub
20 | torch==2.1.1
21 | torchaudio==2.1.1


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import setuptools
 3 | from setuptools import find_packages
 4 | 
 5 | 
 6 | with open("README.md", "r", encoding="utf-8") as help_file:
 7 |     long_description = help_file.read()
 8 | 
 9 | requirements = []
10 | if os.path.exists("requirements.txt"):
11 |     with open("requirements.txt", "r") as f:
12 |         requirements = f.read().splitlines()
13 | 
14 | # Model and pretrained data files that should be included in the package
15 | bundled_data = [
16 |     "perth_net/pretrained/*/*.*",  # Perth models
17 | ]
18 | 
19 | setuptools.setup(
20 |     name="resemble-perth",
21 |     version="1.0.1",
22 |     author="Resemble AI, Aditya",
23 |     author_email="team@resemble.ai, aditya@resemble.ai",
24 |     description="Audio Watermarking and Detection Library",
25 |     long_description=long_description,
26 |     long_description_content_type="text/markdown",
27 |     url="https://github.com/resemble-ai/Perth",
28 |     keywords=["Audio Watermarking", "Perceptual Watermarking", "Neural Networks", "Audio Processing"],
29 |     project_urls={
30 |         'Bug Reports': 'https://github.com/resemble-ai/Perth/issues',
31 |         'Source': 'https://github.com/resemble-ai/Perth',
32 |         'Documentation': 'https://github.com/resemble-ai/Perth/blob/main/README.md',
33 |     },
34 |     packages=find_packages(),
35 |     package_data={"perth": bundled_data},
36 |     include_package_data=True,
37 |     install_requires=requirements,
38 |     python_requires=">=3.8",
39 |     classifiers=[
40 |         "Programming Language :: Python :: 3",
41 |         "Programming Language :: Python :: 3.8",
42 |         "Programming Language :: Python :: 3.9",
43 |         "Programming Language :: Python :: 3.10",
44 |         "Programming Language :: Python :: 3.11",
45 |         "License :: OSI Approved :: MIT License",
46 |         "Operating System :: OS Independent",
47 |         "Topic :: Multimedia :: Sound/Audio",
48 |         "Topic :: Scientific/Engineering :: Artificial Intelligence",
49 |     ],
50 |     entry_points={
51 |         'console_scripts': [
52 |             'perth=perth.cli.watermark_cli:main',
53 |         ],
54 |     },
55 | )
56 | 


--------------------------------------------------------------------------------
/tests/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | testpaths = tests
3 | python_files = test_*.py
4 | python_classes = Test*
5 | python_functions = test_*
6 | addopts = -v


--------------------------------------------------------------------------------
/tests/test_basic.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | import os
 4 | import tempfile
 5 | 
 6 | from perth import DummyWatermarker
 7 | from perth.utils import calculate_audio_metrics
 8 | 
 9 | 
10 | class TestDummyWatermarker(unittest.TestCase):
11 |     """Test the DummyWatermarker implementation."""
12 |     
13 |     def setUp(self):
14 |         """Set up test fixtures."""
15 |         self.watermarker = DummyWatermarker()
16 |         # Create a simple sine wave as test audio
17 |         self.sample_rate = 44100
18 |         t = np.linspace(0, 1, self.sample_rate)
19 |         self.test_audio = np.sin(2 * np.pi * 440 * t).astype(np.float32)
20 |     
21 |     def test_apply_watermark(self):
22 |         """Test that apply_watermark returns an array of the correct shape."""
23 |         watermarked = self.watermarker.apply_watermark(self.test_audio, sample_rate=self.sample_rate)
24 |         self.assertEqual(watermarked.shape, self.test_audio.shape)
25 |     
26 |     def test_get_watermark(self):
27 |         """Test that get_watermark returns a watermark."""
28 |         watermarked = self.watermarker.apply_watermark(self.test_audio, sample_rate=self.sample_rate)
29 |         watermark = self.watermarker.get_watermark(watermarked, sample_rate=self.sample_rate)
30 |         self.assertIsInstance(watermark, np.ndarray)
31 |         self.assertEqual(len(watermark), 32)  # Default length for dummy watermarker
32 |     
33 |     def test_custom_watermark_length(self):
34 |         """Test that get_watermark respects custom watermark length."""
35 |         watermarked = self.watermarker.apply_watermark(self.test_audio, sample_rate=self.sample_rate)
36 |         custom_length = 64
37 |         watermark = self.watermarker.get_watermark(
38 |             watermarked, sample_rate=self.sample_rate, watermark_length=custom_length
39 |         )
40 |         self.assertEqual(len(watermark), custom_length)
41 | 
42 | 
43 | class TestAudioMetrics(unittest.TestCase):
44 |     """Test the audio metrics calculation utilities."""
45 |     
46 |     def setUp(self):
47 |         """Set up test fixtures."""
48 |         # Create a simple sine wave as test audio
49 |         self.sample_rate = 44100
50 |         t = np.linspace(0, 1, self.sample_rate)
51 |         self.original = np.sin(2 * np.pi * 440 * t).astype(np.float32)
52 |         
53 |         # Create a slightly modified version with some noise
54 |         noise = np.random.normal(0, 0.01, len(self.original))
55 |         self.modified = self.original + noise
56 |     
57 |     def test_calculate_metrics(self):
58 |         """Test that audio metrics calculation works correctly."""
59 |         metrics = calculate_audio_metrics(self.original, self.modified)
60 |         
61 |         # Check that metrics are returned and have reasonable values
62 |         self.assertIn('snr', metrics)
63 |         self.assertIn('mse', metrics)
64 |         self.assertIn('psnr', metrics)
65 |         
66 |         # SNR should be positive for this test case
67 |         self.assertGreater(metrics['snr'], 0)
68 |         
69 |         # MSE should be non-zero but small
70 |         self.assertGreater(metrics['mse'], 0)
71 |         self.assertLess(metrics['mse'], 0.1)
72 |         
73 |         # PSNR should be positive and reasonably high
74 |         self.assertGreater(metrics['psnr'], 0)
75 | 
76 | 
77 | if __name__ == '__main__':
78 |     unittest.main()


--------------------------------------------------------------------------------