├── conf
    ├── salad_bowl.yml
    ├── generated
    │   ├── cat
    │   │   ├── coarse.yml
    │   │   ├── interface.yml
    │   │   └── c2f.yml
    │   ├── cat10
    │   │   ├── coarse.yml
    │   │   ├── interface.yml
    │   │   └── c2f.yml
    │   ├── saxophone
    │   │   ├── coarse.yml
    │   │   ├── interface.yml
    │   │   └── c2f.yml
    │   ├── ivo
    │   │   ├── coarse.yml
    │   │   ├── interface.yml
    │   │   └── c2f.yml
    │   ├── march-31
    │   │   ├── coarse.yml
    │   │   ├── interface.yml
    │   │   └── c2f.yml
    │   ├── lazaro-ros
    │   │   ├── coarse.yml
    │   │   ├── interface.yml
    │   │   └── c2f.yml
    │   ├── le-poisson-steve
    │   │   ├── coarse.yml
    │   │   ├── interface.yml
    │   │   └── c2f.yml
    │   ├── sax-new
    │   │   ├── coarse.yml
    │   │   ├── interface.yml
    │   │   └── c2f.yml
    │   └── lazaro-ros-sep
    │   │   ├── coarse.yml
    │   │   ├── interface.yml
    │   │   └── c2f.yml
    ├── c2f.yml
    ├── interface.yml
    ├── lora
    │   ├── lora.yml
    │   └── lora-s2s.yml
    └── vampnet.yml
├── DEFAULT_MODEL
├── unloop
    ├── .gitignore
    ├── requirements.txt
    ├── max
    │   ├── choose_from_list.js
    │   ├── randint.maxpat
    │   ├── randrange.maxpat
    │   ├── paths.js
    │   ├── unloop.maxpat
    │   ├── two-gate.maxpat
    │   ├── pan~.maxpat
    │   ├── dry-wet.maxpat
    │   ├── panner-cleat.maxpat
    │   └── click.maxpat
    ├── _.md
    └── client.py
├── DEFAULT_HF_MODEL_REPO
├── TODOS
├── scratch
    ├── convert_to_wav.sh
    ├── separate_folder.sh
    └── rms_mask.txt
├── assets
    └── example.wav
├── vampnet
    ├── modules
    │   ├── __init__.py
    │   ├── activations.py
    │   └── layers.py
    ├── scheduler.py
    ├── util.py
    ├── __init__.py
    ├── mask.py
    ├── beats.py
    ├── control.py
    └── newmask.py
├── requirements.txt
├── update-repos.sh
├── scripts
    ├── utils
    │   ├── stage.py
    │   ├── remove_quiet_files.py
    │   ├── split_long_audio_file.py
    │   ├── README.md
    │   ├── huggingface
    │   │   └── push_to_repos.sh
    │   ├── plots.py
    │   ├── split.py
    │   ├── xeno-canto-dl.py
    │   ├── visualize_embeddings.py
    │   └── gtzan_embeddings.py
    └── exp
    │   ├── export.py
    │   ├── fine_tune.py
    │   ├── eval.py
    │   └── experiment.py
├── LICENSE
├── hello.py
├── setup.py
├── .gitattributes
├── token_telephone
    ├── ttutil.py
    └── vamp_helper.py
├── .gitignore
└── README.md


/conf/salad_bowl.yml:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/DEFAULT_MODEL:
--------------------------------------------------------------------------------
1 | default
2 | 


--------------------------------------------------------------------------------
/unloop/.gitignore:
--------------------------------------------------------------------------------
1 | .gradio


--------------------------------------------------------------------------------
/DEFAULT_HF_MODEL_REPO:
--------------------------------------------------------------------------------
1 | hugggof/vampnet


--------------------------------------------------------------------------------
/TODOS:
--------------------------------------------------------------------------------
1 | [ ] add sketch2sound finetuning


--------------------------------------------------------------------------------
/scratch/convert_to_wav.sh:
--------------------------------------------------------------------------------
1 | for f in *.mp3; do ffmpeg -i "$f" "${f%.mp3}.wav"; done


--------------------------------------------------------------------------------
/scratch/separate_folder.sh:
--------------------------------------------------------------------------------
1 | for f in *.mp3; do demucs "$f" --two-stems=vocals; done
2 | 


--------------------------------------------------------------------------------
/assets/example.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hugofloresgarcia/vampnet/HEAD/assets/example.wav


--------------------------------------------------------------------------------
/unloop/requirements.txt:
--------------------------------------------------------------------------------
1 | python-osc
2 | descript-audiotools
3 | tqdm
4 | argbind
5 | gradio-client


--------------------------------------------------------------------------------
/vampnet/modules/__init__.py:
--------------------------------------------------------------------------------
1 | import audiotools
2 | 
3 | audiotools.ml.BaseModel.INTERN += ["vampnet.modules.**"]
4 | audiotools.ml.BaseModel.EXTERN += ["einops", "flash_attn.flash_attention", "loralib"]
5 | 
6 | from .transformer import VampNet


--------------------------------------------------------------------------------
/unloop/max/choose_from_list.js:
--------------------------------------------------------------------------------
1 | subdivs = [0.125, 0.25, 0.5, 1, 2, 4];
2 | subdivs = subdivs.map(function(x) { return x; });
3 | 
4 | function bang() {
5 |     var i = Math.floor(Math.random() * subdivs.length);
6 |     outlet(0, subdivs[i]);
7 | }


--------------------------------------------------------------------------------
/conf/generated/cat/coarse.yml:
--------------------------------------------------------------------------------
1 | $include:
2 | - conf/lora/lora.yml
3 | fine_tune: true
4 | fine_tune_checkpoint: ./models/vampnet/coarse.pth
5 | save_path: ./runs/cat/coarse
6 | train/AudioLoader.sources: &id001
7 | - scratch/cat-audio
8 | val/AudioLoader.sources: *id001
9 | 


--------------------------------------------------------------------------------
/conf/generated/cat10/coarse.yml:
--------------------------------------------------------------------------------
1 | $include:
2 | - conf/lora/lora.yml
3 | fine_tune: true
4 | fine_tune_checkpoint: ./models/vampnet/coarse.pth
5 | save_path: ./runs/cat10/coarse
6 | train/AudioLoader.sources: &id001
7 | - scratch/cat-audio-10s
8 | val/AudioLoader.sources: *id001
9 | 


--------------------------------------------------------------------------------
/conf/generated/saxophone/coarse.yml:
--------------------------------------------------------------------------------
1 | $include:
2 | - conf/lora/lora.yml
3 | fine_tune: true
4 | fine_tune_checkpoint: ./models/vampnet/coarse.pth
5 | save_path: ./runs/saxophone/coarse
6 | train/AudioLoader.sources: &id001
7 | - scratch/sounds
8 | val/AudioLoader.sources: *id001
9 | 


--------------------------------------------------------------------------------
/conf/generated/ivo/coarse.yml:
--------------------------------------------------------------------------------
1 | $include:
2 | - conf/lora/lora.yml
3 | fine_tune: true
4 | fine_tune_checkpoint: ./models/vampnet/coarse.pth
5 | save_path: ./runs/ivo/coarse
6 | train/AudioLoader.sources: &id001
7 | - ./scratch/miguel/ivo/separated
8 | val/AudioLoader.sources: *id001
9 | 


--------------------------------------------------------------------------------
/conf/generated/march-31/coarse.yml:
--------------------------------------------------------------------------------
1 | $include:
2 | - conf/lora/lora.yml
3 | fine_tune: true
4 | fine_tune_checkpoint: ./models/vampnet/coarse.pth
5 | save_path: ./runs/march-31/coarse
6 | train/AudioLoader.sources: &id001
7 | - sound-journal-march-31
8 | val/AudioLoader.sources: *id001
9 | 


--------------------------------------------------------------------------------
/conf/generated/lazaro-ros/coarse.yml:
--------------------------------------------------------------------------------
1 | $include:
2 | - conf/lora/lora.yml
3 | fine_tune: true
4 | fine_tune_checkpoint: ./models/vampnet/coarse.pth
5 | save_path: ./runs/lazaro-ros/coarse
6 | train/AudioLoader.sources: &id001
7 | - ./scratch/miguel/lazaro-ros
8 | val/AudioLoader.sources: *id001
9 | 


--------------------------------------------------------------------------------
/conf/generated/le-poisson-steve/coarse.yml:
--------------------------------------------------------------------------------
1 | $include:
2 | - conf/lora/lora.yml
3 | fine_tune: true
4 | fine_tune_checkpoint: ./models/vampnet/coarse.pth
5 | save_path: ./runs/le-poisson-steve/coarse
6 | train/AudioLoader.sources: &id001
7 | - scratch/steve
8 | val/AudioLoader.sources: *id001
9 | 


--------------------------------------------------------------------------------
/conf/generated/sax-new/coarse.yml:
--------------------------------------------------------------------------------
1 | $include:
2 | - conf/lora/lora.yml
3 | fine_tune: true
4 | fine_tune_checkpoint: ./models/vampnet/coarse.pth
5 | save_path: ./runs/sax-new/coarse
6 | train/AudioLoader.sources: &id001
7 | - ./scratch/miguel/saxophone-new/
8 | val/AudioLoader.sources: *id001
9 | 


--------------------------------------------------------------------------------
/conf/c2f.yml:
--------------------------------------------------------------------------------
 1 | $include:
 2 |   - conf/vampnet.yml
 3 | 
 4 | VampNet.n_codebooks: 14
 5 | VampNet.n_conditioning_codebooks: 4
 6 | 
 7 | VampNet.embedding_dim: 1280
 8 | VampNet.n_layers: 16
 9 | VampNet.n_heads: 20
10 | 
11 | AudioDataset.duration: 3.0
12 | 
13 | 
14 | AudioDataset.loudness_cutoff: -40.0
15 | 


--------------------------------------------------------------------------------
/conf/generated/lazaro-ros-sep/coarse.yml:
--------------------------------------------------------------------------------
1 | $include:
2 | - conf/lora/lora.yml
3 | fine_tune: true
4 | fine_tune_checkpoint: ./models/vampnet/coarse.pth
5 | save_path: ./runs/lazaro-ros-sep/coarse
6 | train/AudioLoader.sources: &id001
7 | - ./scratch/miguel/lazaro-ros/separated
8 | val/AudioLoader.sources: *id001
9 | 


--------------------------------------------------------------------------------
/conf/generated/cat/interface.yml:
--------------------------------------------------------------------------------
1 | AudioLoader.sources:
2 | - - scratch/cat-audio
3 | Interface.coarse2fine_ckpt: ./runs/cat/c2f/latest/vampnet/weights.pth
4 | Interface.coarse_ckpt: ./runs/cat/coarse/latest/vampnet/weights.pth
5 | Interface.codec_ckpt: ./models/vampnet/codec.pth
6 | Interface.wavebeat_ckpt: ./models/wavebeat.pth
7 | 


--------------------------------------------------------------------------------
/conf/generated/cat10/interface.yml:
--------------------------------------------------------------------------------
1 | AudioLoader.sources:
2 | - - scratch/cat-audio-10s
3 | Interface.coarse2fine_ckpt: ./runs/cat10/c2f/latest/vampnet/weights.pth
4 | Interface.coarse_ckpt: ./runs/cat10/coarse/latest/vampnet/weights.pth
5 | Interface.codec_ckpt: ./models/vampnet/codec.pth
6 | Interface.wavebeat_ckpt: ./models/wavebeat.pth
7 | 


--------------------------------------------------------------------------------
/conf/generated/ivo/interface.yml:
--------------------------------------------------------------------------------
1 | AudioLoader.sources:
2 | - - ./scratch/miguel/ivo/separated
3 | Interface.coarse2fine_ckpt: ./runs/ivo/c2f/latest/vampnet/weights.pth
4 | Interface.coarse_ckpt: ./runs/ivo/coarse/latest/vampnet/weights.pth
5 | Interface.codec_ckpt: ./models/vampnet/codec.pth
6 | Interface.wavebeat_ckpt: ./models/wavebeat.pth
7 | 


--------------------------------------------------------------------------------
/conf/generated/saxophone/interface.yml:
--------------------------------------------------------------------------------
1 | AudioLoader.sources:
2 | - - scratch/sounds
3 | Interface.coarse2fine_ckpt: ./runs/saxophone/c2f/latest/vampnet/weights.pth
4 | Interface.coarse_ckpt: ./runs/saxophone/coarse/latest/vampnet/weights.pth
5 | Interface.codec_ckpt: ./models/vampnet/codec.pth
6 | Interface.wavebeat_ckpt: ./models/wavebeat.pth
7 | 


--------------------------------------------------------------------------------
/conf/generated/march-31/interface.yml:
--------------------------------------------------------------------------------
1 | AudioLoader.sources:
2 | - - sound-journal-march-31
3 | Interface.coarse2fine_ckpt: ./runs/march-31/c2f/latest/vampnet/weights.pth
4 | Interface.coarse_ckpt: ./runs/march-31/coarse/latest/vampnet/weights.pth
5 | Interface.codec_ckpt: ./models/vampnet/codec.pth
6 | Interface.wavebeat_ckpt: ./models/wavebeat.pth
7 | 


--------------------------------------------------------------------------------
/conf/generated/sax-new/interface.yml:
--------------------------------------------------------------------------------
1 | AudioLoader.sources:
2 | - - ./scratch/miguel/saxophone-new/
3 | Interface.coarse2fine_ckpt: ./runs/sax-new/c2f/latest/vampnet/weights.pth
4 | Interface.coarse_ckpt: ./runs/sax-new/coarse/latest/vampnet/weights.pth
5 | Interface.codec_ckpt: ./models/vampnet/codec.pth
6 | Interface.wavebeat_ckpt: ./models/wavebeat.pth
7 | 


--------------------------------------------------------------------------------
/conf/generated/lazaro-ros/interface.yml:
--------------------------------------------------------------------------------
1 | AudioLoader.sources:
2 | - - ./scratch/miguel/lazaro-ros
3 | Interface.coarse2fine_ckpt: ./runs/lazaro-ros/c2f/latest/vampnet/weights.pth
4 | Interface.coarse_ckpt: ./runs/lazaro-ros/coarse/latest/vampnet/weights.pth
5 | Interface.codec_ckpt: ./models/vampnet/codec.pth
6 | Interface.wavebeat_ckpt: ./models/wavebeat.pth
7 | 


--------------------------------------------------------------------------------
/conf/generated/le-poisson-steve/interface.yml:
--------------------------------------------------------------------------------
1 | AudioLoader.sources:
2 | - - scratch/steve
3 | Interface.coarse2fine_ckpt: ./runs/le-poisson-steve/c2f/latest/vampnet/weights.pth
4 | Interface.coarse_ckpt: ./runs/le-poisson-steve/coarse/latest/vampnet/weights.pth
5 | Interface.codec_ckpt: ./models/vampnet/codec.pth
6 | Interface.wavebeat_ckpt: ./models/wavebeat.pth
7 | 


--------------------------------------------------------------------------------
/conf/generated/lazaro-ros-sep/interface.yml:
--------------------------------------------------------------------------------
1 | AudioLoader.sources:
2 | - - ./scratch/miguel/lazaro-ros/separated
3 | Interface.coarse2fine_ckpt: ./runs/lazaro-ros-sep/c2f/latest/vampnet/weights.pth
4 | Interface.coarse_ckpt: ./runs/lazaro-ros-sep/coarse/latest/vampnet/weights.pth
5 | Interface.codec_ckpt: ./models/vampnet/codec.pth
6 | Interface.wavebeat_ckpt: ./models/wavebeat.pth
7 | 


--------------------------------------------------------------------------------
/conf/interface.yml:
--------------------------------------------------------------------------------
 1 | Interface.coarse_ckpt: ./models/vampnet/coarse.pth
 2 | Interface.coarse2fine_ckpt: ./models/vampnet/c2f.pth
 3 | Interface.codec_ckpt: ./models/vampnet/codec.pth
 4 | Interface.coarse_chunk_size_s: 10
 5 | Interface.coarse2fine_chunk_size_s: 3
 6 | Interface.wavebeat_ckpt: ./models/wavebeat.pth
 7 | 
 8 | # AudioLoader.sources:
 9 | #   - /media/CHONK/null
10 | 
11 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | torch
 2 | argbind>=0.3.2
 3 | numpy==1.23
 4 | loralib
 5 | wavebeat @ git+https://github.com/hugofloresgarcia/wavebeat
 6 | lac @ git+https://github.com/hugofloresgarcia/lac.git
 7 | descript-audiotools @ git+https://github.com/hugofloresgarcia/audiotools.git
 8 | -e git+https://github.com/audacitorch/pyharp.git@develop#egg=pyharp
 9 | torch_pitch_shift
10 | gradio
11 | pydantic==2.10.6


--------------------------------------------------------------------------------
/update-repos.sh:
--------------------------------------------------------------------------------
 1 | # 
 2 | repos=( "vampnet-music" "vampnet-percussion" "vampnet-n64" "vampnet-birds" "vampnet-choir" "vampnet-machines" "nesquik" "vampnet-opera")
 3 | for repo in "${repos[@]}"
 4 | do
 5 |     echo "Updating $repo"
 6 |     git remote add --fetch $repo https://huggingface.co/spaces/hugggof/$repo
 7 |     git push --force $repo main
 8 | done
 9 | 
10 | # https://huggingface.co/spaces/hugggof/vampnet-music
11 | # git push --space-percussion main 


--------------------------------------------------------------------------------
/conf/generated/cat/c2f.yml:
--------------------------------------------------------------------------------
 1 | $include:
 2 | - conf/lora/lora.yml
 3 | AudioDataset.duration: 3.0
 4 | AudioDataset.loudness_cutoff: -40.0
 5 | VampNet.embedding_dim: 1280
 6 | VampNet.n_codebooks: 14
 7 | VampNet.n_conditioning_codebooks: 4
 8 | VampNet.n_heads: 20
 9 | VampNet.n_layers: 16
10 | fine_tune: true
11 | fine_tune_checkpoint: ./models/vampnet/c2f.pth
12 | save_path: ./runs/cat/c2f
13 | train/AudioLoader.sources: &id001
14 | - scratch/cat-audio
15 | val/AudioLoader.sources: *id001
16 | 


--------------------------------------------------------------------------------
/conf/generated/cat10/c2f.yml:
--------------------------------------------------------------------------------
 1 | $include:
 2 | - conf/lora/lora.yml
 3 | AudioDataset.duration: 3.0
 4 | AudioDataset.loudness_cutoff: -40.0
 5 | VampNet.embedding_dim: 1280
 6 | VampNet.n_codebooks: 14
 7 | VampNet.n_conditioning_codebooks: 4
 8 | VampNet.n_heads: 20
 9 | VampNet.n_layers: 16
10 | fine_tune: true
11 | fine_tune_checkpoint: ./models/vampnet/c2f.pth
12 | save_path: ./runs/cat10/c2f
13 | train/AudioLoader.sources: &id001
14 | - scratch/cat-audio-10s
15 | val/AudioLoader.sources: *id001
16 | 


--------------------------------------------------------------------------------
/conf/generated/ivo/c2f.yml:
--------------------------------------------------------------------------------
 1 | $include:
 2 | - conf/lora/lora.yml
 3 | AudioDataset.duration: 3.0
 4 | AudioDataset.loudness_cutoff: -40.0
 5 | VampNet.embedding_dim: 1280
 6 | VampNet.n_codebooks: 14
 7 | VampNet.n_conditioning_codebooks: 4
 8 | VampNet.n_heads: 20
 9 | VampNet.n_layers: 16
10 | fine_tune: true
11 | fine_tune_checkpoint: ./models/vampnet/c2f.pth
12 | save_path: ./runs/ivo/c2f
13 | train/AudioLoader.sources: &id001
14 | - ./scratch/miguel/ivo/separated
15 | val/AudioLoader.sources: *id001
16 | 


--------------------------------------------------------------------------------
/conf/generated/saxophone/c2f.yml:
--------------------------------------------------------------------------------
 1 | $include:
 2 | - conf/lora/lora.yml
 3 | AudioDataset.duration: 3.0
 4 | AudioDataset.loudness_cutoff: -40.0
 5 | VampNet.embedding_dim: 1280
 6 | VampNet.n_codebooks: 14
 7 | VampNet.n_conditioning_codebooks: 4
 8 | VampNet.n_heads: 20
 9 | VampNet.n_layers: 16
10 | fine_tune: true
11 | fine_tune_checkpoint: ./models/vampnet/c2f.pth
12 | save_path: ./runs/saxophone/c2f
13 | train/AudioLoader.sources: &id001
14 | - scratch/sounds
15 | val/AudioLoader.sources: *id001
16 | 


--------------------------------------------------------------------------------
/conf/generated/march-31/c2f.yml:
--------------------------------------------------------------------------------
 1 | $include:
 2 | - conf/lora/lora.yml
 3 | AudioDataset.duration: 3.0
 4 | AudioDataset.loudness_cutoff: -40.0
 5 | VampNet.embedding_dim: 1280
 6 | VampNet.n_codebooks: 14
 7 | VampNet.n_conditioning_codebooks: 4
 8 | VampNet.n_heads: 20
 9 | VampNet.n_layers: 16
10 | fine_tune: true
11 | fine_tune_checkpoint: ./models/vampnet/c2f.pth
12 | save_path: ./runs/march-31/c2f
13 | train/AudioLoader.sources: &id001
14 | - sound-journal-march-31
15 | val/AudioLoader.sources: *id001
16 | 


--------------------------------------------------------------------------------
/conf/generated/le-poisson-steve/c2f.yml:
--------------------------------------------------------------------------------
 1 | $include:
 2 | - conf/lora/lora.yml
 3 | AudioDataset.duration: 3.0
 4 | AudioDataset.loudness_cutoff: -40.0
 5 | VampNet.embedding_dim: 1280
 6 | VampNet.n_codebooks: 14
 7 | VampNet.n_conditioning_codebooks: 4
 8 | VampNet.n_heads: 20
 9 | VampNet.n_layers: 16
10 | fine_tune: true
11 | fine_tune_checkpoint: ./models/vampnet/c2f.pth
12 | save_path: ./runs/le-poisson-steve/c2f
13 | train/AudioLoader.sources: &id001
14 | - scratch/steve
15 | val/AudioLoader.sources: *id001
16 | 


--------------------------------------------------------------------------------
/conf/generated/sax-new/c2f.yml:
--------------------------------------------------------------------------------
 1 | $include:
 2 | - conf/lora/lora.yml
 3 | AudioDataset.duration: 3.0
 4 | AudioDataset.loudness_cutoff: -40.0
 5 | VampNet.embedding_dim: 1280
 6 | VampNet.n_codebooks: 14
 7 | VampNet.n_conditioning_codebooks: 4
 8 | VampNet.n_heads: 20
 9 | VampNet.n_layers: 16
10 | fine_tune: true
11 | fine_tune_checkpoint: ./models/vampnet/c2f.pth
12 | save_path: ./runs/sax-new/c2f
13 | train/AudioLoader.sources: &id001
14 | - ./scratch/miguel/saxophone-new/
15 | val/AudioLoader.sources: *id001
16 | 


--------------------------------------------------------------------------------
/conf/generated/lazaro-ros/c2f.yml:
--------------------------------------------------------------------------------
 1 | $include:
 2 | - conf/lora/lora.yml
 3 | AudioDataset.duration: 3.0
 4 | AudioDataset.loudness_cutoff: -40.0
 5 | VampNet.embedding_dim: 1280
 6 | VampNet.n_codebooks: 14
 7 | VampNet.n_conditioning_codebooks: 4
 8 | VampNet.n_heads: 20
 9 | VampNet.n_layers: 16
10 | fine_tune: true
11 | fine_tune_checkpoint: ./models/vampnet/c2f.pth
12 | save_path: ./runs/lazaro-ros/c2f
13 | train/AudioLoader.sources: &id001
14 | - ./scratch/miguel/lazaro-ros
15 | val/AudioLoader.sources: *id001
16 | 


--------------------------------------------------------------------------------
/conf/generated/lazaro-ros-sep/c2f.yml:
--------------------------------------------------------------------------------
 1 | $include:
 2 | - conf/lora/lora.yml
 3 | AudioDataset.duration: 3.0
 4 | AudioDataset.loudness_cutoff: -40.0
 5 | VampNet.embedding_dim: 1280
 6 | VampNet.n_codebooks: 14
 7 | VampNet.n_conditioning_codebooks: 4
 8 | VampNet.n_heads: 20
 9 | VampNet.n_layers: 16
10 | fine_tune: true
11 | fine_tune_checkpoint: ./models/vampnet/c2f.pth
12 | save_path: ./runs/lazaro-ros-sep/c2f
13 | train/AudioLoader.sources: &id001
14 | - ./scratch/miguel/lazaro-ros/separated
15 | val/AudioLoader.sources: *id001
16 | 


--------------------------------------------------------------------------------
/conf/lora/lora.yml:
--------------------------------------------------------------------------------
 1 | $include:
 2 |   - conf/vampnet.yml
 3 | 
 4 | fine_tune: True
 5 | 
 6 | train/AudioDataset.n_examples: 100000000
 7 | val/AudioDataset.n_examples: 500
 8 | 
 9 | 
10 | NoamScheduler.warmup: 500
11 | 
12 | batch_size: 7
13 | num_workers: 7
14 | save_iters: [2000, 4000, 10000, 20000, 40000, 100000]
15 | sample_freq: 2000
16 | val_freq: 1000
17 | 
18 | AdamW.lr: 0.0001
19 | 
20 | # let's us organize sound classes into folders and choose from those sound classes uniformly
21 | AudioDataset.without_replacement: False
22 | num_iters: 500000
23 | 


--------------------------------------------------------------------------------
/conf/lora/lora-s2s.yml:
--------------------------------------------------------------------------------
 1 | $include:
 2 |   - conf/vampnet.yml
 3 | 
 4 | fine_tune: True
 5 | 
 6 | train/AudioDataset.n_examples: 100000000
 7 | val/AudioDataset.n_examples: 500
 8 | 
 9 | 
10 | NoamScheduler.warmup: 500
11 | 
12 | batch_size: 7
13 | num_workers: 7
14 | save_iters: [2000, 4000, 10000,20000, 40000, 100000]
15 | sample_freq: 2000
16 | val_freq: 1000
17 | 
18 | AdamW.lr: 0.0001
19 | 
20 | # let's us organize sound classes into folders and choose from those sound classes uniformly
21 | AudioDataset.without_replacement: False
22 | num_iters: 500000
23 | 
24 | 
25 | # control signals to use as conditioning. 
26 | Sketch2SoundController.ctrl_keys: ['rmsq16',]
27 | 
28 | 


--------------------------------------------------------------------------------
/scripts/utils/stage.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | from pathlib import Path
 4 | 
 5 | import argbind
 6 | import rich
 7 | from audiotools.ml import Experiment
 8 | 
 9 | 
10 | @argbind.bind(without_prefix=True)
11 | def run(
12 |     run_dir: str = os.getenv("PATH_TO_RUNS", "runs"),
13 |     name: str = None,
14 |     recent: bool = False,
15 | ):
16 |     if recent:
17 |         paths = sorted(Path(run_dir).iterdir(), key=os.path.getmtime)
18 |         paths = [p.name for p in paths if p.is_dir()]
19 |         if paths:
20 |             name = paths[-1]
21 | 
22 |     with Experiment(run_dir, name) as exp:
23 |         exp.snapshot()
24 |         rich.print(f"Created a snapshot of {exp.parent_directory} at {exp.exp_dir}")
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     args = argbind.parse_args()
29 |     with argbind.scope(args):
30 |         run()
31 | 


--------------------------------------------------------------------------------
/scripts/utils/remove_quiet_files.py:
--------------------------------------------------------------------------------
 1 | # removes files with loudness below 24db
 2 | 
 3 | from pathlib import Path 
 4 | import shutil
 5 | import audiotools as at
 6 | import argbind
 7 | 
 8 | @argbind.bind(without_prefix=True)
 9 | def remove_quiet_files(
10 |     src_dir: Path = None,
11 |     dest_dir: Path = None,
12 |     min_loudness: float = -30,
13 | ):
14 |     # copy src to dest
15 |     dest_dir.mkdir(parents=True, exist_ok=True)
16 |     shutil.copytree(src_dir, dest_dir, dirs_exist_ok=True)
17 |     
18 |     audio_files = at.util.find_audio(dest_dir)
19 |     for audio_file in audio_files:
20 |         sig = at.AudioSignal(audio_file)
21 |         if sig.loudness() < min_loudness:
22 |             audio_file.unlink()
23 |             print(f"removed {audio_file}")
24 | 
25 | if __name__ == "__main__":
26 |     args = argbind.parse_args()
27 | 
28 |     with argbind.scope(args):
29 |         remove_quiet_files()


--------------------------------------------------------------------------------
/scripts/utils/split_long_audio_file.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import argbind
 3 | 
 4 | import audiotools as at
 5 | import tqdm
 6 | 
 7 | 
 8 | @argbind.bind(without_prefix=True)
 9 | def split_long_audio_file(
10 |     file: str = None, 
11 |     max_chunk_size_s: int = 60*10
12 | ):
13 |     file = Path(file)
14 |     output_dir = file.parent / file.stem
15 |     output_dir.mkdir()
16 |     
17 |     sig = at.AudioSignal(file)
18 | 
19 |     # split into chunks
20 |     for i, sig in tqdm.tqdm(enumerate(sig.windows(
21 |         window_duration=max_chunk_size_s, hop_duration=max_chunk_size_s/2, 
22 |         preprocess=True))
23 |     ):
24 |         sig.write(output_dir / f"{i}.wav")
25 | 
26 |     print(f"wrote {len(list(output_dir.glob('*.wav')))} files to {output_dir}")
27 |     
28 |     return output_dir
29 | 
30 | if __name__ == "__main__":
31 |     args = argbind.parse_args()
32 | 
33 |     with argbind.scope(args):
34 |         split_long_audio_file()


--------------------------------------------------------------------------------
/scripts/utils/README.md:
--------------------------------------------------------------------------------
 1 | # Scripts
 2 | 
 3 | ## process_zip.py
 4 | 
 5 | Some requirements that may not be installed in the docker image:
 6 | * argbind
 7 | * wav2wav (pip install git+https://github.com/descriptinc/lyrebird-wav2wav.git or `pip install git+https://github.com/descriptinc/lyrebird-wav2wav.git@<branchname>`)
 8 | 
 9 | ### zip folder structure
10 | 
11 | The zip folder should have the following internal structure:
12 | 
13 | ```
14 | base_folder/
15 |     test_case_1/
16 |         before.wav
17 |     test_case_2/
18 |         before.wav
19 |     ...
20 |     test_case_n/
21 |         before.wav
22 | ```
23 | 
24 | Note: There can be issues with the output zip if the input zip folder structure is too deep or too shallow. IF you want/need to use a zip file with a different folder structure, adjust this:
25 | https://github.com/descriptinc/lyrebird-wav2wav/blob/136c923ce19df03876a515ca0ed83854710cfa30/scripts/utils/process_zip.py#L28
26 | 
27 | ### Execution
28 | `python process_zip.py <path/to/zip> -tag <string>`
29 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Hugo Flores García and Prem Seetharaman
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/conf/vampnet.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | codec_ckpt: ./models/vampnet/codec.pth
 3 | save_path: ckpt
 4 | 
 5 | num_iters: 1000000000
 6 | save_iters: [10000, 50000, 100000, 300000, 500000]
 7 | val_idx: [0,1,2,3,4,5,6,7,8,9]
 8 | sample_freq: 10000
 9 | val_freq: 1000
10 | 
11 | batch_size: 8
12 | num_workers: 10
13 | 
14 | # Optimization
15 | amp: false
16 | 
17 | CrossEntropyLoss.label_smoothing: 0.1
18 | 
19 | AdamW.lr: 0.001
20 | 
21 | NoamScheduler.factor: 2.0
22 | NoamScheduler.warmup: 10000
23 | 
24 | VampNet.vocab_size: 1024
25 | VampNet.n_codebooks: 4
26 | VampNet.n_conditioning_codebooks: 0
27 | VampNet.r_cond_dim: 0
28 | VampNet.noise_mode: mask
29 | VampNet.embedding_dim: 1280
30 | VampNet.n_layers: 20
31 | VampNet.n_heads: 20
32 | VampNet.flash_attn: false
33 | VampNet.dropout: 0.1
34 | 
35 | AudioLoader.relative_path: ""
36 | AudioDataset.loudness_cutoff: -30.0
37 | AudioDataset.without_replacement: true
38 | AudioLoader.shuffle: true
39 | 
40 | AudioDataset.duration: 10.0
41 | 
42 | train/AudioDataset.n_examples: 10000000
43 | train/AudioLoader.sources:
44 |   - /media/CHONK/hugo/spotdl/audio-train
45 | 
46 | val/AudioDataset.n_examples: 2000
47 | val/AudioLoader.sources:
48 |   - /media/CHONK/hugo/spotdl/audio-val
49 | 
50 | 


--------------------------------------------------------------------------------
/scripts/utils/huggingface/push_to_repos.sh:
--------------------------------------------------------------------------------
 1 | # the (remote repo, model_name) are:
 2 | #  vampnet-music (default)
 3 | #  vampnet-percussion (percussion)
 4 | #  vampnet-choir ()'choir')
 5 | #  etc for..
 6 | # 'machines'
 7 | # 'n64'
 8 | #  'opera'
 9 | #   'percussion'
10 | 
11 | # iterate through remote, model_name pairs:
12 | # and edit the DEFAULT_MODEL file in the repo
13 | # add commit and push to the right remote
14 | # each remote starts with https://huggingface.co/hugggof/{repo_name}
15 | 
16 | for repo in vampnet-music vampnet-percussion vampnet-choir vampnet-machines vampnet-n64 vampnet-opera vampnet-percussion
17 | do
18 |     echo "repo: $repo"
19 |     # get the model name from the repo
20 |     model_name=$(echo $repo | cut -d'-' -f2)
21 |     # if the model_name is music , set it to default
22 |     if [ $model_name == "music" ]; then
23 |         model_name="default"
24 |     fi
25 |     echo "model_name: $model_name"
26 |     # remove the DEFAULT_MODEL file
27 |     rm DEFAULT_MODEL
28 |     # create a new DEFAULT_MODEL file with the model name
29 |     echo $model_name > DEFAULT_MODEL
30 | 
31 |     # commit and push to the right remote
32 |     git add DEFAULT_MODEL
33 |     git commit -m "update DEFAULT_MODEL to $model_name"
34 |     git remote remove $repo
35 |     git remote add $repo https://huggingface.co/spaces/hugggof/$repo
36 |     git push $repo main
37 | done


--------------------------------------------------------------------------------
/hello.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import vampnet
 3 | import audiotools as at
 4 | 
 5 | # load the default vampnet model
 6 | interface = vampnet.interface.Interface.default()
 7 | 
 8 | # list available finetuned models
 9 | finetuned_model_choices = interface.available_models()
10 | print(f"available finetuned models: {finetuned_model_choices}")
11 | 
12 | # pick a random finetuned model
13 | model_choice = random.choice(finetuned_model_choices)
14 | print(f"choosing model: {model_choice}")
15 | 
16 | # or pick a specific finetuned model
17 | print(f"actually, forcing model: default")
18 | model_choice = "default"
19 | 
20 | # load a finetuned model
21 | interface.load_finetuned(model_choice)
22 | 
23 | # load an example audio file
24 | signal = at.AudioSignal("assets/example.wav")
25 | 
26 | # get the tokens for the audio
27 | codes = interface.encode(signal)
28 | 
29 | # build a mask for the audio
30 | mask = interface.build_mask(
31 |     codes, signal,
32 |     periodic_prompt=13, 
33 |     upper_codebook_mask=3,
34 | )
35 | 
36 | # generate the output tokens
37 | output_tokens = interface.vamp(
38 |     codes, mask, return_mask=False,
39 |     temperature=1.0, 
40 |     typical_filtering=False, 
41 |     debug=True
42 | )
43 | 
44 | # convert them to a signal
45 | output_signal = interface.decode(output_tokens)
46 | 
47 | # save the output signal
48 | output_signal.write("scratch/output.wav")


--------------------------------------------------------------------------------
/vampnet/scheduler.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | from typing import List
 3 | 
 4 | import torch
 5 | 
 6 | class NoamScheduler:
 7 |     """OG scheduler from transformer paper: https://arxiv.org/pdf/1706.03762.pdf
 8 |     Implementation from Annotated Transformer: https://nlp.seas.harvard.edu/2018/04/03/attention.html
 9 |     """
10 | 
11 |     def __init__(
12 |         self,
13 |         optimizer: torch.optim.Optimizer,
14 |         d_model: int = 512,
15 |         factor: float = 1.0,
16 |         warmup: int = 4000,
17 |     ):
18 |         # Store hparams
19 |         self.warmup = warmup
20 |         self.factor = factor
21 |         self.d_model = d_model
22 | 
23 |         # Initialize variables `lr` and `steps`
24 |         self.lr = None
25 |         self.steps = 0
26 | 
27 |         # Store the optimizer
28 |         self.optimizer = optimizer
29 | 
30 |     def state_dict(self):
31 |         return {
32 |             key: value for key, value in self.__dict__.items() if key != "optimizer"
33 |         }
34 | 
35 |     def load_state_dict(self, state_dict):
36 |         self.__dict__.update(state_dict)
37 | 
38 |     def step(self):
39 |         self.steps += 1
40 |         self.lr = self.factor * (
41 |             self.d_model ** (-0.5)
42 |             * min(self.steps ** (-0.5), self.steps * self.warmup ** (-1.5))
43 |         )
44 | 
45 |         for p in self.optimizer.param_groups:
46 |             p["lr"] = self.lr
47 | 
48 | 


--------------------------------------------------------------------------------
/vampnet/util.py:
--------------------------------------------------------------------------------
 1 | import tqdm
 2 | 
 3 | import torch
 4 | from einops import rearrange
 5 | 
 6 | def scalar_to_batch_tensor(x, batch_size):
 7 |     return torch.tensor(x).repeat(batch_size)
 8 | 
 9 | 
10 | def parallelize(
11 |         fn, 
12 |         *iterables,
13 |         parallel: str = "thread_map",
14 |         **kwargs
15 |     ):
16 |     if parallel == "thread_map":
17 |         from tqdm.contrib.concurrent import thread_map
18 |         return thread_map(
19 |             fn, 
20 |             *iterables, 
21 |             **kwargs
22 |         )
23 |     elif parallel == "process_map":
24 |         from tqdm.contrib.concurrent import process_map
25 |         return process_map(
26 |             fn, 
27 |             *iterables, 
28 |             **kwargs
29 |         )
30 |     elif parallel == "single":
31 |         return [fn(x) for x in tqdm.tqdm(*iterables)]
32 |     else:
33 |         raise ValueError(f"parallel must be one of 'thread_map', 'process_map', 'single', but got {parallel}")
34 |     
35 | def codebook_flatten(tokens: torch.Tensor):
36 |     """ 
37 |     flatten a sequence of tokens from (batch, codebook, time) to (batch, codebook * time)
38 |     """
39 |     return rearrange(tokens, "b c t -> b (t c)")
40 | 
41 | def codebook_unflatten(flat_tokens: torch.Tensor, n_c: int = None):
42 |     """
43 |     unflatten a sequence of tokens from (batch, codebook * time) to (batch, codebook, time)
44 |     """
45 |     tokens = rearrange(flat_tokens, "b (t c) -> b c t", c=n_c)
46 |     return tokens
47 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages
 2 | from setuptools import setup
 3 | 
 4 | with open("README.md") as f:
 5 |     long_description = f.read()
 6 | 
 7 | setup(
 8 |     name="vampnet",
 9 |     version="0.0.1",
10 |     classifiers=[
11 |         "Intended Audience :: Developers",
12 |         "Natural Language :: English",
13 |         "Programming Language :: Python :: 3.7",
14 |         "Topic :: Artistic Software",
15 |         "Topic :: Multimedia",
16 |         "Topic :: Multimedia :: Sound/Audio",
17 |         "Topic :: Multimedia :: Sound/Audio :: Editors",
18 |         "Topic :: Software Development :: Libraries",
19 |     ],
20 |     description="Generative Music Modeling.",
21 |     long_description=long_description,
22 |     long_description_content_type="text/markdown",
23 |     author="Hugo Flores García, Prem Seetharaman",
24 |     author_email="hugggofloresgarcia@gmail.com",
25 |     url="https://github.com/hugofloresgarcia/vampnet",
26 |     license="MIT",
27 |     packages=find_packages(),
28 |     install_requires=[
29 |         "torch==2.4.1",
30 |         "argbind>=0.3.2",
31 |         "numpy==1.23",
32 |         "wavebeat @ git+https://github.com/hugofloresgarcia/wavebeat",
33 |         "lac @ git+https://github.com/hugofloresgarcia/lac.git",
34 |         "descript-audiotools @ git+https://github.com/hugofloresgarcia/audiotools.git",
35 |         "gradio", 
36 |         "loralib",
37 |         "torch_pitch_shift",
38 |         "plotly",
39 |         "pydantic==2.10.6",
40 |         "spaces",
41 |     ],
42 | )
43 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
 1 | *.7z filter=lfs diff=lfs merge=lfs -text
 2 | *.arrow filter=lfs diff=lfs merge=lfs -text
 3 | *.bin filter=lfs diff=lfs merge=lfs -text
 4 | *.bz2 filter=lfs diff=lfs merge=lfs -text
 5 | *.ckpt filter=lfs diff=lfs merge=lfs -text
 6 | *.ftz filter=lfs diff=lfs merge=lfs -text
 7 | *.gz filter=lfs diff=lfs merge=lfs -text
 8 | *.h5 filter=lfs diff=lfs merge=lfs -text
 9 | *.joblib filter=lfs diff=lfs merge=lfs -text
10 | *.lfs.* filter=lfs diff=lfs merge=lfs -text
11 | *.mlmodel filter=lfs diff=lfs merge=lfs -text
12 | *.model filter=lfs diff=lfs merge=lfs -text
13 | *.msgpack filter=lfs diff=lfs merge=lfs -text
14 | *.npy filter=lfs diff=lfs merge=lfs -text
15 | *.npz filter=lfs diff=lfs merge=lfs -text
16 | *.onnx filter=lfs diff=lfs merge=lfs -text
17 | *.ot filter=lfs diff=lfs merge=lfs -text
18 | *.parquet filter=lfs diff=lfs merge=lfs -text
19 | *.pb filter=lfs diff=lfs merge=lfs -text
20 | *.pickle filter=lfs diff=lfs merge=lfs -text
21 | *.pkl filter=lfs diff=lfs merge=lfs -text
22 | *.pt filter=lfs diff=lfs merge=lfs -text
23 | *.rar filter=lfs diff=lfs merge=lfs -text
24 | *.safetensors filter=lfs diff=lfs merge=lfs -text
25 | saved_model/**/* filter=lfs diff=lfs merge=lfs -text
26 | *.tar.* filter=lfs diff=lfs merge=lfs -text
27 | *.tar filter=lfs diff=lfs merge=lfs -text
28 | *.tflite filter=lfs diff=lfs merge=lfs -text
29 | *.tgz filter=lfs diff=lfs merge=lfs -text
30 | *.wasm filter=lfs diff=lfs merge=lfs -text
31 | *.xz filter=lfs diff=lfs merge=lfs -text
32 | *.zip filter=lfs diff=lfs merge=lfs -text
33 | *.zst filter=lfs diff=lfs merge=lfs -text
34 | *tfevents* filter=lfs diff=lfs merge=lfs -text
35 | 


--------------------------------------------------------------------------------
/token_telephone/ttutil.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from pathlib import Path
 3 | ROOT = Path(__file__).parent
 4 | 
 5 | import numpy as np
 6 | from queue import Queue
 7 | 
 8 | # make a log file!!
 9 | logfile= ROOT / "log.txt"
10 | if logfile.exists():
11 |     logfile.unlink()
12 | logging.basicConfig(filename=logfile, level=logging.INFO, datefmt="%Y-%m-%d %H:%M:%S", format="%(asctime)s | %(levelname)s | %(message)s")
13 | 
14 | 
15 | def hsv_to_rgb(h, s, v):
16 |     # from https://en.wikipedia.org/wiki/HSL_and_HSV#From_HSV
17 |     c = v * s
18 |     h_ = h / 60
19 |     x = c * (1 - abs(h_ % 2 - 1))
20 |     m = v - c
21 | 
22 |     if h_ < 1:
23 |         r, g, b = c, x, 0
24 |     elif h_ < 2:
25 |         r, g, b = x, c, 0
26 |     elif h_ < 3:
27 |         r, g, b = 0, c, x
28 |     elif h_ < 4:
29 |         r, g, b = 0, x, c
30 |     elif h_ < 5:
31 |         r, g, b = x, 0, c
32 |     else:
33 |         r, g, b = c, 0, x
34 | 
35 |     return r + m, g + m, b + m
36 | 
37 | 
38 | def dbg(*args):
39 |     print(" ".join(map(str, args)))
40 | 
41 | 
42 | # we'll want to log on a separate thread 
43 | # so that we can log without blocking the main thread
44 | 
45 | # make a queue for logging
46 | log_queue = Queue()
47 | 
48 | # log to a file instead of the console
49 | def log(msg):
50 |     # log_queue.put(msg)
51 |     logging.info(msg)
52 |     pass
53 | 
54 | def set_debug(debug):
55 |     if debug:
56 |         # print log to console
57 |         logging.getLogger().addHandler(logging.StreamHandler())
58 | 
59 | 
60 | def pow2db(x):
61 |     return 10 * np.log10(x + 1e-6)
62 | 
63 | 
64 | def db2pow(x):
65 |     return 10 ** (x / 10)
66 | 


--------------------------------------------------------------------------------
/vampnet/modules/activations.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import numpy as np
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | from einops import rearrange
 7 | 
 8 | 
 9 | class NewGELU(nn.Module):
10 |     """
11 |     Implementation of the GELU activation function currently in Google BERT repo
12 |     (identical to OpenAI GPT). Also see the Gaussian Error Linear Units
13 |     paper: https://arxiv.org/abs/1606.08415
14 |     """
15 | 
16 |     def forward(self, x):
17 |         return (
18 |             0.5
19 |             * x
20 |             * (
21 |                 1.0
22 |                 + torch.tanh(
23 |                     math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))
24 |                 )
25 |             )
26 |         )
27 | 
28 | class GatedGELU(nn.Module):
29 |     def __init__(self):
30 |         super().__init__()
31 |         self.gelu = NewGELU()
32 | 
33 |     def forward(self, x, dim: int = -1):
34 |         p1, p2 = x.chunk(2, dim=dim)
35 |         return p1 * self.gelu(p2)
36 | 
37 | class Snake1d(nn.Module):
38 |     def __init__(self, channels):
39 |         super().__init__()
40 |         self.alpha = nn.Parameter(torch.ones(channels))
41 | 
42 |     def forward(self, x):
43 |         return x + (self.alpha + 1e-9).reciprocal() * torch.sin(self.alpha * x).pow(2)
44 | 
45 | def get_activation(name: str = "relu"):
46 |     if name == "relu":
47 |         return nn.ReLU
48 |     elif name == "gelu":
49 |         return NewGELU
50 |     elif name == "geglu":
51 |         return GatedGELU
52 |     elif name == "snake":
53 |         return Snake1d
54 |     else:
55 |         raise ValueError(f"Unrecognized activation {name}")


--------------------------------------------------------------------------------
/scripts/utils/plots.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import seaborn as sns
 3 | from pandas.api.types import CategoricalDtype
 4 | 
 5 | def plot_metrics(metrics, condition_to_latex, title, color_palette):
 6 |     # Add a new column to your dataframe with the latex representation
 7 |     metrics['condition_latex'] = metrics['condition'].map(condition_to_latex)
 8 | 
 9 |     # Order condition_latex as per the condition_to_latex dictionary
10 |     cat_type = CategoricalDtype(categories=condition_to_latex.values(), ordered=True)
11 |     metrics['condition_latex'] = metrics['condition_latex'].astype(cat_type)
12 | 
13 |     # Compute mean and std for each condition for each metric
14 |     grouped = metrics.groupby('condition_latex')[['mel', 'frechet']].agg(['mean', 'std'])
15 | 
16 |     fig, axs = plt.subplots(2, 1, figsize=(7, 5.25))
17 | 
18 |     # Set the main title for the figure
19 |     fig.suptitle(title, fontsize=16)
20 | 
21 |     # Get color for each bar in the plot
22 |     bar_colors = [color_palette[condition] for condition in grouped.index]
23 | 
24 |     # Plot mel
25 |     sns.boxplot(x='condition_latex', y='mel', data=metrics, ax=axs[0], palette=color_palette, showfliers=False)
26 |     axs[0].set_ylabel('Mel Spectrogram Loss \u2190')
27 |     axs[0].set_xlabel('') # Remove x-axis label
28 |     axs[0].set_xticklabels(grouped.index, rotation=0, ha='center')
29 | 
30 |     # Plot frechet
31 |     axs[1].bar(grouped.index, grouped['frechet']['mean'], yerr=grouped['frechet']['std'], color=bar_colors)
32 |     axs[1].set_ylabel('FAD \u2190')
33 |     axs[1].set_xlabel('') # Remove x-axis label
34 |     axs[1].set_xticklabels(grouped.index, rotation=0, ha='center')
35 | 
36 |     # Adjust the space between plots
37 |     plt.subplots_adjust(hspace=0.1)
38 | 
39 |     # Remove any unnecessary space around the plot
40 |     plt.tight_layout(rect=[0, 0, 1, 0.96])
41 | 
42 |     # Reduce the space between suptitle and the plot
43 |     plt.subplots_adjust(top=0.92)


--------------------------------------------------------------------------------
/scripts/utils/split.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import random
 3 | import shutil
 4 | import os
 5 | import json 
 6 | 
 7 | import argbind
 8 | from tqdm import tqdm
 9 | from tqdm.contrib.concurrent import thread_map
10 | 
11 | from audiotools.core import util
12 | 
13 | 
14 | @argbind.bind(without_prefix=True)
15 | def train_test_split(
16 |     audio_folder: str = ".", 
17 |     test_size: float = 0.2,
18 |     seed: int = 42,
19 | ):
20 |     print(f"finding audio")
21 | 
22 |     audio_folder = Path(audio_folder)
23 |     audio_files = util.find_audio(audio_folder)
24 |     print(f"found {len(audio_files)} audio files")
25 |     
26 |     # split according to test_size
27 |     n_test = int(len(audio_files) * test_size)
28 |     n_train = len(audio_files) - n_test
29 | 
30 |     # shuffle
31 |     random.seed(seed)
32 |     random.shuffle(audio_files)
33 | 
34 |     train_files = audio_files[:n_train]
35 |     test_files = audio_files[n_train:]
36 | 
37 | 
38 |     print(f"Train files: {len(train_files)}")
39 |     print(f"Test files: {len(test_files)}")
40 |     continue_ = input("Continue [yn]? ") or "n"
41 | 
42 |     if continue_ != "y":
43 |         return
44 |     
45 |     for split, files in (
46 |         ("train", train_files), ("test", test_files)
47 |     ):
48 |         for file in tqdm(files):
49 |             out_file = audio_folder.parent / f"{audio_folder.name}-{split}" / Path(file).name
50 |             out_file.parent.mkdir(exist_ok=True, parents=True)
51 |             try:
52 |                 os.symlink(file, out_file)
53 |             except FileExistsError:
54 |                 print(f"File {out_file} already exists, skipping")
55 | 
56 |         # save split as json
57 |         with open(Path(audio_folder) / f"{split}.json", "w") as f:
58 |             json.dump([str(f) for f in files], f)
59 |     
60 | 
61 |     
62 | if __name__ == "__main__":
63 |     args  = argbind.parse_args()
64 | 
65 |     with argbind.scope(args):
66 |         train_test_split()


--------------------------------------------------------------------------------
/unloop/_.md:
--------------------------------------------------------------------------------
 1 | ## client side setup
 2 | clone
 3 | ```
 4 | https://github.com/hugofloresgarcia/unsound-objects.git
 5 | git checkout unloop
 6 | ```
 7 | 
 8 | install
 9 | ```
10 | conda create -n unsound python=3.10
11 | conda activate unsound
12 | pip install -r requirements.txt
13 | ```
14 | 
15 | ## server side setup
16 | ssh into malleus
17 | ```
18 | ssh bryan@malleus.cs.northwestern.edu -L 7860:localhost:7860
19 | ```
20 | 
21 | then leave the malleus window open and start up a new local window
22 | 
23 | (kindly ask hugo to launch the gradio on port 7860)
24 | 
25 | you can verify that the gradio is running by opening `http://localhost:7860` on your browser
26 | 
27 | ## launch the gradio server (vampnet)
28 | you have to run the gradio server running vampnet model.
29 | (on the remote machine)
30 | ```bash
31 | conda create -n vampnet python=3.10
32 | git clone https://github.com/huggingface.co/spaces/hugggof/vampnet-music.git
33 | pip install -e .
34 | CUDA_VISIBLE_DEVICES=0 python app.py
35 | ```
36 | 
37 | ### launch the gradio server (s2s)
38 | you have to run the gradio server running audit model.
39 | 
40 | (on the remote machine)
41 | ```bash
42 | conda create -n audit python=3.10
43 | cd audit
44 | pip install -r requirements.txt
45 | CUDA_VISIBLE_DEVICES=0 python scripts/text2sfx/demo.py ckpts/adobe-soda/checkpoints/seethara/text2sfx/25-02-18-256ch-8s/ --model latest_ema.pth
46 | ```
47 | 
48 | or for audit-old
49 | ```
50 | CUDA_VISIBLE_DEVICES=0 python scripts/cdit/demos/voice2sfx.py ckpts/rms-centroid-ppg/latest.pth
51 | ```
52 | 
53 | ## launch the client (laptop)
54 | then launch the client from your local terminal
55 | ```
56 | python client.py --vampnet_url <VAMPNET_URL> --s2s_url http://localhost:7860
57 | ```
58 | 
59 | ## max setup
60 | Then...make sure you have installed (in Max)
61 | ```
62 | flucoma
63 | ```
64 | 
65 | MAKE SURE YOU ARE RUNNING MAX 8.  It is not compatible with Max 9.
66 | 
67 | Now open up the right max patch `./max/sound-objects.maxpat`. 
68 | 
69 | ### text prompts
70 | NOTE: text prompts are from the list here
71 | https://universalcategorysystem.com/
72 | https://www.dropbox.com/scl/fo/lw1i20cgsm4edsvj3awn1/AP_ZhzG3LlpfFLbX309FbOU?dl=0&e=1&preview=UCS+v8.2.1+Full+List.xlsx&rlkey=wa2onzo0difpew1nze6odztlp
73 | *** HUGO make an empty 'audio' directory in the repo! ***
74 | 


--------------------------------------------------------------------------------
/scripts/exp/export.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import shutil
 4 | import argparse
 5 | from vampnet import DEFAULT_HF_MODEL_REPO
 6 | from huggingface_hub import create_repo, repo_exists, HfApi
 7 | 
 8 | 
 9 | 
10 | parser = argparse.ArgumentParser(description="Export the fine-tuned model to the repo")
11 | parser.add_argument(
12 |     "--name", type=str, default="lazaro-ros-sep",
13 |     help="name of the fine-tuned model to export"
14 | )
15 | parser.add_argument(
16 |     "--model", type=str, default="latest",
17 |     help="model version to export. check runs/<name> for available versions"
18 | )
19 | parser.add_argument(
20 |     "--repo", type=str, default=DEFAULT_HF_MODEL_REPO,
21 |     help="name of the repo to export to"
22 | )
23 | 
24 | args = parser.parse_args()
25 | name = args.name
26 | version = args.model
27 | 
28 | ##
29 | print(f"~~~~~~~~~~~ vampnet export! ~~~~~~~~~~~~")
30 | print(f"exporting {name} version {version} to {args.repo}\n")
31 | 
32 | run_dir = Path(f"runs/{name}")
33 | repo_dir = Path("models/vampnet")
34 | 
35 | # create our repo
36 | new_repo = False
37 | if not repo_exists(args.repo):
38 |     print(f"repo {args.repo} does not exist, creating it")
39 |     print(f"creating a repo at {args.repo}")
40 |     create_repo(args.repo)
41 |     new_repo = True
42 | 
43 | paths = []
44 | for part in ("coarse", "c2f"):
45 |     outdir = repo_dir / "loras" / name 
46 |     outdir.mkdir(parents=True, exist_ok=True)
47 |     outpath = outdir / f"{part}.pth"
48 |     path = run_dir / part / version / "vampnet" / "weights.pth"
49 |     # path.rename(outpath)
50 |     shutil.copy(path, outpath)
51 |     paths.append(outpath)
52 |     print(f"copied {path} to {outpath}")
53 | 
54 | print(f"uploading files to {args.repo}")
55 | # upload files to the repo
56 | 
57 | # if it's a new repo, let's add the default models too
58 | if new_repo:
59 |     paths.extend([repo_dir / "c2f.pth", repo_dir / "coarse.pth", repo_dir / "codec.pth", repo_dir / "wavebeat.pth"])
60 | 
61 | api = HfApi()
62 | 
63 | for path in paths:
64 |     path_in_repo = str(path.relative_to(repo_dir))
65 |     print(f"uploading {path} to {args.repo}/{path_in_repo}")
66 |     api.upload_file(
67 |         path_or_fileobj=path,
68 |         path_in_repo=path_in_repo,
69 |         repo_id=args.repo,
70 |         token=True,
71 |         commit_message=f"uploading {path_in_repo}",
72 |     )
73 | 
74 | 
75 | print("done!!! >::0")


--------------------------------------------------------------------------------
/vampnet/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from . import modules
 3 | from pathlib import Path
 4 | from . import scheduler
 5 | from .interface import Interface
 6 | from .modules.transformer import VampNet
 7 | 
 8 | 
 9 | __version__ = "0.0.1"
10 | 
11 | ROOT = Path(__file__).parent.parent
12 | MODELS_DIR = ROOT / "models" / "vampnet"
13 | 
14 | from huggingface_hub import hf_hub_download, HfFileSystem
15 | DEFAULT_HF_MODEL_REPO_DIR = ROOT / "DEFAULT_HF_MODEL_REPO"
16 | DEFAULT_HF_MODEL_REPO = DEFAULT_HF_MODEL_REPO_DIR.read_text().strip()
17 | # DEFAULT_HF_MODEL_REPO = "hugggof/vampnet"
18 | FS = HfFileSystem()
19 | 
20 | def download_codec():
21 |     # from dac.model.dac import DAC
22 |     from lac.model.lac import LAC as DAC
23 |     repo_id = DEFAULT_HF_MODEL_REPO
24 |     filename = "codec.pth"
25 |     codec_path = hf_hub_download(
26 |         repo_id=repo_id,
27 |         filename=filename,
28 |         subfolder=None, 
29 |         local_dir=MODELS_DIR
30 |     )
31 |     return codec_path
32 |     
33 | 
34 | def download_default():
35 |     filenames = ["coarse.pth", "c2f.pth", "wavebeat.pth"]
36 |     repo_id = DEFAULT_HF_MODEL_REPO
37 |     paths = []
38 |     for filename in filenames:
39 |         path = f"{MODELS_DIR}/{filename}"
40 |         if not Path(path).exists():
41 |             print(f"{path} does not exist, downloading")
42 |             FS.download(f"{repo_id}/{filename}", path)
43 |         paths.append(path)
44 |     
45 |     # load the models
46 |     return paths[0], paths[1]
47 | 
48 | 
49 | def download_finetuned(name, repo_id=DEFAULT_HF_MODEL_REPO):
50 |     filenames = ["coarse.pth", "c2f.pth"]
51 |     paths = []
52 |     for filename in filenames:
53 |         path = f"{MODELS_DIR}/loras/{name}/{filename}"
54 |         if not Path(path).exists():
55 |             print(f"{path} does not exist, downloading")
56 |             FS.download(f"{repo_id}/loras/{name}/{filename}", path)
57 |         paths.append(path)
58 |     
59 |     # load the models
60 |     return paths[0], paths[1]
61 |     
62 | def list_finetuned(repo_id=DEFAULT_HF_MODEL_REPO):
63 |     diritems = FS.listdir(f"{repo_id}/loras")
64 |     # iterate through all the names
65 |     valid_diritems = []
66 |     for item in diritems:
67 |         model_file_items = FS.listdir(item["name"])
68 |         item_names = [item["name"].split("/")[-1] for item in model_file_items]
69 |         # check that theres a "c2f.pth" and "coarse.pth" in the items
70 |         c2f_exists = "c2f.pth" in item_names
71 |         coarse_exists = "coarse.pth" in item_names
72 |         if c2f_exists and coarse_exists:
73 |             valid_diritems.append(item)
74 | 
75 |     # get the names of the valid items
76 |     names = [item["name"].split("/")[-1] for item in valid_diritems]
77 |     return names
78 | 
79 | 
80 | 


--------------------------------------------------------------------------------
/scratch/rms_mask.txt:
--------------------------------------------------------------------------------
 1 | 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1
 2 | 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1
 3 | 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1
 4 | 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 5 | 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 6 | 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 7 | 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 8 | 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 9 | 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
10 | 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
11 | 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
12 | 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
13 | 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
14 | 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
15 | 


--------------------------------------------------------------------------------
/unloop/max/randint.maxpat:
--------------------------------------------------------------------------------
  1 | {
  2 | 	"patcher" : 	{
  3 | 		"fileversion" : 1,
  4 | 		"appversion" : 		{
  5 | 			"major" : 8,
  6 | 			"minor" : 6,
  7 | 			"revision" : 5,
  8 | 			"architecture" : "x64",
  9 | 			"modernui" : 1
 10 | 		}
 11 | ,
 12 | 		"classnamespace" : "box",
 13 | 		"rect" : [ 59.0, 106.0, 640.0, 480.0 ],
 14 | 		"bglocked" : 0,
 15 | 		"openinpresentation" : 0,
 16 | 		"default_fontsize" : 12.0,
 17 | 		"default_fontface" : 0,
 18 | 		"default_fontname" : "Arial",
 19 | 		"gridonopen" : 1,
 20 | 		"gridsize" : [ 15.0, 15.0 ],
 21 | 		"gridsnaponopen" : 1,
 22 | 		"objectsnaponopen" : 1,
 23 | 		"statusbarvisible" : 2,
 24 | 		"toolbarvisible" : 1,
 25 | 		"lefttoolbarpinned" : 0,
 26 | 		"toptoolbarpinned" : 0,
 27 | 		"righttoolbarpinned" : 0,
 28 | 		"bottomtoolbarpinned" : 0,
 29 | 		"toolbars_unpinned_last_save" : 0,
 30 | 		"tallnewobj" : 0,
 31 | 		"boxanimatetime" : 200,
 32 | 		"enablehscroll" : 1,
 33 | 		"enablevscroll" : 1,
 34 | 		"devicewidth" : 0.0,
 35 | 		"description" : "",
 36 | 		"digest" : "",
 37 | 		"tags" : "",
 38 | 		"style" : "",
 39 | 		"subpatcher_template" : "",
 40 | 		"assistshowspatchername" : 0,
 41 | 		"boxes" : [ 			{
 42 | 				"box" : 				{
 43 | 					"comment" : "",
 44 | 					"id" : "obj-4",
 45 | 					"index" : 1,
 46 | 					"maxclass" : "outlet",
 47 | 					"numinlets" : 1,
 48 | 					"numoutlets" : 0,
 49 | 					"patching_rect" : [ 56.0, 199.0, 30.0, 30.0 ]
 50 | 				}
 51 | 
 52 | 			}
 53 | , 			{
 54 | 				"box" : 				{
 55 | 					"id" : "obj-3",
 56 | 					"maxclass" : "newobj",
 57 | 					"numinlets" : 2,
 58 | 					"numoutlets" : 1,
 59 | 					"outlettype" : [ "" ],
 60 | 					"patching_rect" : [ 56.0, 127.0, 63.0, 22.0 ],
 61 | 					"text" : "random 1."
 62 | 				}
 63 | 
 64 | 			}
 65 | , 			{
 66 | 				"box" : 				{
 67 | 					"comment" : "bang for a random number",
 68 | 					"id" : "obj-2",
 69 | 					"index" : 1,
 70 | 					"maxclass" : "inlet",
 71 | 					"numinlets" : 0,
 72 | 					"numoutlets" : 1,
 73 | 					"outlettype" : [ "bang" ],
 74 | 					"patching_rect" : [ 56.0, 77.0, 30.0, 30.0 ]
 75 | 				}
 76 | 
 77 | 			}
 78 | , 			{
 79 | 				"box" : 				{
 80 | 					"id" : "obj-1",
 81 | 					"maxclass" : "newobj",
 82 | 					"numinlets" : 6,
 83 | 					"numoutlets" : 1,
 84 | 					"outlettype" : [ "" ],
 85 | 					"patching_rect" : [ 56.0, 159.0, 130.0, 22.0 ],
 86 | 					"text" : "scale 0. 1. #1 #2 #3"
 87 | 				}
 88 | 
 89 | 			}
 90 |  ],
 91 | 		"lines" : [ 			{
 92 | 				"patchline" : 				{
 93 | 					"destination" : [ "obj-4", 0 ],
 94 | 					"source" : [ "obj-1", 0 ]
 95 | 				}
 96 | 
 97 | 			}
 98 | , 			{
 99 | 				"patchline" : 				{
100 | 					"destination" : [ "obj-3", 0 ],
101 | 					"source" : [ "obj-2", 0 ]
102 | 				}
103 | 
104 | 			}
105 | , 			{
106 | 				"patchline" : 				{
107 | 					"destination" : [ "obj-1", 0 ],
108 | 					"source" : [ "obj-3", 0 ]
109 | 				}
110 | 
111 | 			}
112 |  ]
113 | 	}
114 | 
115 | }
116 | 


--------------------------------------------------------------------------------
/unloop/max/randrange.maxpat:
--------------------------------------------------------------------------------
  1 | {
  2 | 	"patcher" : 	{
  3 | 		"fileversion" : 1,
  4 | 		"appversion" : 		{
  5 | 			"major" : 8,
  6 | 			"minor" : 6,
  7 | 			"revision" : 5,
  8 | 			"architecture" : "x64",
  9 | 			"modernui" : 1
 10 | 		}
 11 | ,
 12 | 		"classnamespace" : "box",
 13 | 		"rect" : [ 59.0, 106.0, 640.0, 480.0 ],
 14 | 		"bglocked" : 0,
 15 | 		"openinpresentation" : 0,
 16 | 		"default_fontsize" : 12.0,
 17 | 		"default_fontface" : 0,
 18 | 		"default_fontname" : "Arial",
 19 | 		"gridonopen" : 1,
 20 | 		"gridsize" : [ 15.0, 15.0 ],
 21 | 		"gridsnaponopen" : 1,
 22 | 		"objectsnaponopen" : 1,
 23 | 		"statusbarvisible" : 2,
 24 | 		"toolbarvisible" : 1,
 25 | 		"lefttoolbarpinned" : 0,
 26 | 		"toptoolbarpinned" : 0,
 27 | 		"righttoolbarpinned" : 0,
 28 | 		"bottomtoolbarpinned" : 0,
 29 | 		"toolbars_unpinned_last_save" : 0,
 30 | 		"tallnewobj" : 0,
 31 | 		"boxanimatetime" : 200,
 32 | 		"enablehscroll" : 1,
 33 | 		"enablevscroll" : 1,
 34 | 		"devicewidth" : 0.0,
 35 | 		"description" : "",
 36 | 		"digest" : "",
 37 | 		"tags" : "",
 38 | 		"style" : "",
 39 | 		"subpatcher_template" : "",
 40 | 		"assistshowspatchername" : 0,
 41 | 		"boxes" : [ 			{
 42 | 				"box" : 				{
 43 | 					"comment" : "",
 44 | 					"id" : "obj-4",
 45 | 					"index" : 0,
 46 | 					"maxclass" : "outlet",
 47 | 					"numinlets" : 1,
 48 | 					"numoutlets" : 0,
 49 | 					"patching_rect" : [ 77.0, 218.0, 30.0, 30.0 ]
 50 | 				}
 51 | 
 52 | 			}
 53 | , 			{
 54 | 				"box" : 				{
 55 | 					"id" : "obj-3",
 56 | 					"maxclass" : "newobj",
 57 | 					"numinlets" : 6,
 58 | 					"numoutlets" : 1,
 59 | 					"outlettype" : [ "" ],
 60 | 					"patching_rect" : [ 77.0, 177.0, 97.0, 22.0 ],
 61 | 					"text" : "scale 0. 1. #1 #2"
 62 | 				}
 63 | 
 64 | 			}
 65 | , 			{
 66 | 				"box" : 				{
 67 | 					"id" : "obj-2",
 68 | 					"maxclass" : "newobj",
 69 | 					"numinlets" : 2,
 70 | 					"numoutlets" : 1,
 71 | 					"outlettype" : [ "" ],
 72 | 					"patching_rect" : [ 77.0, 137.0, 63.0, 22.0 ],
 73 | 					"text" : "random 1."
 74 | 				}
 75 | 
 76 | 			}
 77 | , 			{
 78 | 				"box" : 				{
 79 | 					"comment" : "",
 80 | 					"id" : "obj-1",
 81 | 					"index" : 0,
 82 | 					"maxclass" : "inlet",
 83 | 					"numinlets" : 0,
 84 | 					"numoutlets" : 1,
 85 | 					"outlettype" : [ "" ],
 86 | 					"patching_rect" : [ 77.0, 88.0, 30.0, 30.0 ]
 87 | 				}
 88 | 
 89 | 			}
 90 |  ],
 91 | 		"lines" : [ 			{
 92 | 				"patchline" : 				{
 93 | 					"destination" : [ "obj-2", 0 ],
 94 | 					"source" : [ "obj-1", 0 ]
 95 | 				}
 96 | 
 97 | 			}
 98 | , 			{
 99 | 				"patchline" : 				{
100 | 					"destination" : [ "obj-3", 0 ],
101 | 					"source" : [ "obj-2", 0 ]
102 | 				}
103 | 
104 | 			}
105 | , 			{
106 | 				"patchline" : 				{
107 | 					"destination" : [ "obj-4", 0 ],
108 | 					"source" : [ "obj-3", 0 ]
109 | 				}
110 | 
111 | 			}
112 |  ],
113 | 		"dependency_cache" : [  ],
114 | 		"autosave" : 0
115 | 	}
116 | 
117 | }
118 | 


--------------------------------------------------------------------------------
/scripts/exp/fine_tune.py:
--------------------------------------------------------------------------------
 1 | import argbind
 2 | from pathlib import Path
 3 | import yaml
 4 | from typing import List
 5 | 
 6 | 
 7 | 
 8 | 
 9 | """example output: (yaml)
10 | 
11 | """
12 | 
13 | @argbind.bind(without_prefix=True, positional=True)
14 | def fine_tune(audio_files_or_folders: List[str], name: str):
15 | 
16 |     conf_dir = Path("conf")
17 |     assert conf_dir.exists(), "conf directory not found. are you in the vampnet directory?"
18 | 
19 |     conf_dir = conf_dir / "generated"
20 |     conf_dir.mkdir(exist_ok=True)
21 | 
22 |     finetune_dir = conf_dir / name
23 |     finetune_dir.mkdir(exist_ok=True)
24 | 
25 |     finetune_c2f_conf = {
26 |         "$include": ["conf/lora/lora.yml"],
27 |         "fine_tune": True,
28 |         "train/AudioLoader.sources": audio_files_or_folders,
29 |         "val/AudioLoader.sources": audio_files_or_folders,
30 |         "VampNet.n_codebooks": 14,
31 |         "VampNet.n_conditioning_codebooks": 4,
32 |         "VampNet.embedding_dim": 1280,
33 |         "VampNet.n_layers": 16,
34 |         "VampNet.n_heads": 20,
35 |         "AudioDataset.duration": 3.0,
36 |         "AudioDataset.loudness_cutoff": -40.0,
37 |         "save_path": f"./runs/{name}/c2f",
38 |         "fine_tune_checkpoint": "./models/vampnet/c2f.pth"
39 |     }
40 | 
41 |     finetune_coarse_conf = {
42 |         "$include": ["conf/lora/lora.yml"],
43 |         "fine_tune": True,
44 |         "train/AudioLoader.sources": audio_files_or_folders,
45 |         "val/AudioLoader.sources": audio_files_or_folders,
46 |         "save_path": f"./runs/{name}/coarse",
47 |         "fine_tune_checkpoint": "./models/vampnet/coarse.pth"
48 |     }
49 | 
50 |     interface_conf = {
51 |         "Interface.coarse_ckpt": f"./runs/{name}/coarse/latest/vampnet/weights.pth",
52 | 
53 |         "Interface.coarse2fine_ckpt": f"./runs/{name}/c2f/latest/vampnet/weights.pth",
54 |         "Interface.wavebeat_ckpt": "./models/wavebeat.pth",
55 | 
56 |         "Interface.codec_ckpt": "./models/vampnet/codec.pth",
57 |         "AudioLoader.sources": [audio_files_or_folders],
58 |     }
59 | 
60 |     # save the confs
61 |     with open(finetune_dir / "c2f.yml", "w") as f:
62 |         yaml.dump(finetune_c2f_conf, f)
63 | 
64 |     with open(finetune_dir / "coarse.yml", "w") as f:
65 |         yaml.dump(finetune_coarse_conf, f)
66 |     
67 |     with open(finetune_dir / "interface.yml", "w") as f: 
68 |         yaml.dump(interface_conf, f)
69 | 
70 | 
71 |     # print(f"generated confs in {finetune_dir}. 
72 |     # run training jobs with `python scripts/exp/train.py --args.load {finetune_dir}/<c2f/coarse>.yml` ")
73 | 
74 |     print(f"generated confs in {finetune_dir}.")
75 |     print()
76 |     print(f"you'll need to run two training jobs, though they can run in parallel on separate GPUs.")
77 |     print(f"run the coarse job with \n\tpython scripts/exp/train.py --args.load {finetune_dir}/coarse.yml\n")
78 |     print(f"run the c2f job with \n\tpython scripts/exp/train.py --args.load {finetune_dir}/c2f.yml\n")
79 | if __name__ == "__main__":
80 |     args = argbind.parse_args()
81 | 
82 |     with argbind.scope(args):
83 |         fine_tune()
84 | 
85 | 
86 | 
87 |     


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/env.sh
108 | venv/
109 | env.bak/
110 | venv.bak/
111 | 
112 | # Spyder project settings
113 | .spyderproject
114 | .spyproject
115 | 
116 | # Rope project settings
117 | .ropeproject
118 | 
119 | # mkdocs documentation
120 | /site
121 | 
122 | # mypy
123 | .mypy_cache/
124 | .dmypy.json
125 | dmypy.json
126 | 
127 | # Pyre type checker
128 | .pyre/
129 | 
130 | # Files created by experiments
131 | output/
132 | snapshot/
133 | *.m4a
134 | notebooks/scratch.ipynb
135 | notebooks/inspect.ipynb
136 | notebooks/effects.ipynb
137 | notebooks/*.ipynb
138 | notebooks/*.gif
139 | notebooks/*.wav
140 | notebooks/*.mp4
141 | *runs/
142 | boards/
143 | samples/
144 | *.ipynb
145 | 
146 | results.json
147 | metrics.csv
148 | mprofile_*
149 | mem.png
150 | 
151 | results/
152 | mprofile*
153 | *.png
154 | # do not ignore the test wav file
155 | !tests/audio/short_test_audio.wav
156 | !tests/audio/output.wav
157 | */.DS_Store
158 | .DS_Store
159 | env.sh
160 | _codebraid/
161 | **/*.html
162 | **/*.exec.md
163 | flagged/
164 | log.txt
165 | ckpt/
166 | .syncthing*
167 | tests/assets/
168 | archived/
169 | 
170 | # scratch/
171 | scratch/miguel
172 | scratch/saxophone
173 | scratch/*.wav
174 | 
175 | runs-archive
176 | lyrebird-audiotools
177 | lyrebird-audio-codec
178 | samples-*/**
179 | 
180 | gradio-outputs/
181 | samples*/
182 | models-all/
183 | models.zip
184 | .git-old
185 | 
186 | 
187 | 
188 | gtzan.zip
189 | .gtzan_emb_cache
190 | 
191 | 
192 | data/
193 | data
194 | pyharp
195 | 
196 | models/vampnet/*
197 | models/*
198 | 
199 | lib/
200 | 
201 | _outputs/
202 | debug.txt
203 | 
204 | scratch/*
205 | 
206 | .gradio


--------------------------------------------------------------------------------
/unloop/max/paths.js:
--------------------------------------------------------------------------------
  1 | var pathModes = ["off", "wander", "circle", "bounce"];
  2 | 
  3 | // Define state object
  4 | var state = {
  5 |     coords: [],
  6 |     coordidx: 0,
  7 |     mode: "off"
  8 | };
  9 | 
 10 | // init with a random 
 11 | setPath("random");
 12 | 
 13 | // the space ranges from -1 to 1 in x and y, z must always be 0
 14 | function bang() {
 15 |     // TODO: emit next xyz coordinate in path
 16 |     if (state.coords.length > 0) {
 17 |         outlet(0, state.coords[state.coordidx]);
 18 |         state.coordidx = (state.coordidx + 1) % state.coords.length;
 19 |     }
 20 |     else {
 21 |         post("no path to follow\n");
 22 |     }
 23 | }
 24 | 
 25 | function setPath(mode) {
 26 |     if (pathModes.indexOf(mode) >= 0) {
 27 |         state.mode = mode;
 28 |     }
 29 |     state.mode = mode;
 30 | 
 31 |     // generate points for the pathe
 32 |     if (state.mode == "circle") {
 33 |         // circle around in a random direction
 34 |         state.coords = [];
 35 |         var numPoints = Math.round(Math.random() * 100);
 36 |         var angle = Math.random() * 2 * Math.PI;
 37 | 
 38 |         var direction = Math.random() < 0.5 ? 1 : -1;
 39 |         for (var i = 0; i < numPoints; i++) {
 40 |             state.coords.push([Math.cos(angle), Math.sin(angle), 0]);
 41 |             angle += 2 * Math.PI / numPoints * direction;
 42 |         }
 43 |     }
 44 |     else if (state.mode == "wander") {
 45 |             // wander around in brownian motion
 46 |             state.coords = [];
 47 |             var numPoints = Math.round(Math.random() * 100);
 48 |             // var x = 0;
 49 |             // var y = 0;
 50 |             // pick a random starting point within -1 and 1
 51 |             var x = Math.random() * 2 - 1;
 52 |             var y = Math.random() * 2 - 1;
 53 |             for (var i = 0; i < numPoints; i++) {
 54 |                 x += Math.random() * 0.2 - 0.1; // TODO: this 0.1 controls wander amt
 55 |                 y += Math.random() * 0.2 - 0.1;
 56 | 
 57 |                 // clamp to -1 to 1
 58 |                 x = Math.min(1, Math.max(-1, x));
 59 |                 y = Math.min(1, Math.max(-1, y));
 60 |                 state.coords.push([x, y, 0]);
 61 |             }
 62 |         }
 63 |     else if (state.mode == "bounce") {
 64 |         // bounce around two points
 65 |         state.coords = [];
 66 |         var numPoints = 2;
 67 |         var x = 0;
 68 |         var y = 0;
 69 |         
 70 |         // pick two random quadrants to place the point in
 71 |         quads = {
 72 |             1: [1, 1],
 73 |             2: [-1, 1],
 74 |             3: [-1, -1],
 75 |             4: [1, -1]
 76 |         }
 77 |         var quadindices = [1, 2, 3, 4];
 78 |         // scramble quadindices 
 79 |         quadindices.sort(function(a, b) { return Math.random() - 0.5; });
 80 |         var quadidx1 = quadindices.pop();
 81 |         var quadidx2 = quadindices.pop();
 82 |         var quad1 = quads[quadidx1];
 83 |         var quad2 = quads[quadidx2];
 84 |         // post("quad1: " + quad1 + " quad2: " + quad2);
 85 | 
 86 |         // pick point 1, a random point in the range (0, 1), then scale by the quad
 87 |         var x1 = Math.random() * quad1[0];
 88 |         var y1 = Math.random() * quad1[1];
 89 | 
 90 |         // pick point 2, a random point in the range (0, 1), then scale by the quad
 91 |         var x2 = Math.random() * quad2[0];
 92 |         var y2 = Math.random() * quad2[1];
 93 | 
 94 |         // generate the path
 95 |         state.coords.push([x1, y1, 0]);
 96 |         state.coords.push([x2, y2, 0]);
 97 |     }
 98 |     else if (state.mode == "random") {
 99 |         state.coords = [];
100 |         var numPoints = Math.round(Math.random() * 100) + 4;
101 |         for (var i = 0; i < numPoints; i++) {
102 |             state.coords.push([Math.random() * 2 - 1, Math.random() * 2 - 1, 0]);
103 |         }
104 |         // post("random now has " + state.coords.length + " points\n");
105 |     }
106 |     else {
107 |         post("unknown path mode");
108 |     }
109 | }
110 | 


--------------------------------------------------------------------------------
/unloop/max/unloop.maxpat:
--------------------------------------------------------------------------------
  1 | {
  2 | 	"patcher" : 	{
  3 | 		"fileversion" : 1,
  4 | 		"appversion" : 		{
  5 | 			"major" : 9,
  6 | 			"minor" : 0,
  7 | 			"revision" : 5,
  8 | 			"architecture" : "x64",
  9 | 			"modernui" : 1
 10 | 		}
 11 | ,
 12 | 		"classnamespace" : "box",
 13 | 		"rect" : [ 84.0, 131.0, 1000.0, 780.0 ],
 14 | 		"gridsize" : [ 15.0, 15.0 ],
 15 | 		"boxes" : [ 			{
 16 | 				"box" : 				{
 17 | 					"bgmode" : 0,
 18 | 					"border" : 0,
 19 | 					"clickthrough" : 0,
 20 | 					"enablehscroll" : 0,
 21 | 					"enablevscroll" : 0,
 22 | 					"id" : "obj-2",
 23 | 					"lockeddragscroll" : 0,
 24 | 					"lockedsize" : 0,
 25 | 					"maxclass" : "bpatcher",
 26 | 					"name" : "unloop-bpatcher.maxpat",
 27 | 					"numinlets" : 2,
 28 | 					"numoutlets" : 2,
 29 | 					"offset" : [ 0.0, 0.0 ],
 30 | 					"outlettype" : [ "signal", "" ],
 31 | 					"patching_rect" : [ 100.0, 68.0, 307.0, 469.0 ],
 32 | 					"viewvisibility" : 1
 33 | 				}
 34 | 
 35 | 			}
 36 | , 			{
 37 | 				"box" : 				{
 38 | 					"id" : "obj-1",
 39 | 					"maxclass" : "newobj",
 40 | 					"numinlets" : 2,
 41 | 					"numoutlets" : 0,
 42 | 					"patching_rect" : [ 143.0, 616.0, 55.0, 22.0 ],
 43 | 					"text" : "dac~ 1 2"
 44 | 				}
 45 | 
 46 | 			}
 47 |  ],
 48 | 		"lines" : [ 			{
 49 | 				"patchline" : 				{
 50 | 					"destination" : [ "obj-1", 1 ],
 51 | 					"order" : 0,
 52 | 					"source" : [ "obj-2", 0 ]
 53 | 				}
 54 | 
 55 | 			}
 56 | , 			{
 57 | 				"patchline" : 				{
 58 | 					"destination" : [ "obj-1", 0 ],
 59 | 					"order" : 1,
 60 | 					"source" : [ "obj-2", 0 ]
 61 | 				}
 62 | 
 63 | 			}
 64 |  ],
 65 | 		"originid" : "pat-142",
 66 | 		"parameters" : 		{
 67 | 			"obj-2::obj-1124" : [ "morph", "dry/wet", 0 ],
 68 | 			"obj-2::obj-1125" : [ "level[8]", "level", 0 ],
 69 | 			"obj-2::obj-1128" : [ "gain[4]", "gain", 0 ],
 70 | 			"obj-2::obj-1140" : [ "overdub", "overdub", 0 ],
 71 | 			"obj-2::obj-117" : [ "live.drop", "live.drop", 0 ],
 72 | 			"obj-2::obj-1230" : [ "speed[2]", "speed+", 0 ],
 73 | 			"obj-2::obj-171" : [ "toggle[2]", "toggle[30]", 0 ],
 74 | 			"obj-2::obj-295" : [ "button[1]", "button[1]", 0 ],
 75 | 			"obj-2::obj-316" : [ "toggle[3]", "toggle[3]", 0 ],
 76 | 			"obj-2::obj-424::obj-12" : [ "number[8]", "number[2]", 0 ],
 77 | 			"obj-2::obj-424::obj-13" : [ "number[9]", "number[3]", 0 ],
 78 | 			"obj-2::obj-424::obj-15" : [ "number[2]", "number[2]", 0 ],
 79 | 			"obj-2::obj-424::obj-19" : [ "number[3]", "number[3]", 0 ],
 80 | 			"obj-2::obj-424::obj-20" : [ "number", "number", 0 ],
 81 | 			"obj-2::obj-424::obj-23" : [ "number[4]", "number[3]", 0 ],
 82 | 			"obj-2::obj-424::obj-26" : [ "number[5]", "number[3]", 0 ],
 83 | 			"obj-2::obj-424::obj-28" : [ "number[6]", "number[2]", 0 ],
 84 | 			"obj-2::obj-424::obj-30" : [ "number[7]", "number[2]", 0 ],
 85 | 			"obj-2::obj-424::obj-347" : [ "periodic", "periodic", 0 ],
 86 | 			"obj-2::obj-424::obj-349" : [ "drop", "drop", 0 ],
 87 | 			"obj-2::obj-424::obj-8" : [ "toggle", "toggle", 0 ],
 88 | 			"obj-2::obj-54" : [ "lpf", "lpf", 0 ],
 89 | 			"obj-2::obj-55" : [ "tapelength", "length", 0 ],
 90 | 			"obj-2::obj-76" : [ "hpf", "hpf", 0 ],
 91 | 			"obj-2::obj-91::obj-156" : [ "live.gain~[26]", "live.gain~", 0 ],
 92 | 			"obj-2::obj-91::obj-162" : [ "live.gain~[25]", "live.gain~", 0 ],
 93 | 			"parameterbanks" : 			{
 94 | 				"0" : 				{
 95 | 					"index" : 0,
 96 | 					"name" : "",
 97 | 					"parameters" : [ "-", "-", "-", "-", "-", "-", "-", "-" ]
 98 | 				}
 99 | 
100 | 			}
101 | ,
102 | 			"inherited_shortname" : 1
103 | 		}
104 | ,
105 | 		"dependency_cache" : [ 			{
106 | 				"name" : "dry-wet.maxpat",
107 | 				"bootpath" : "~/projects/research/unloop-2025/vampnet/unloop/max",
108 | 				"patcherrelativepath" : ".",
109 | 				"type" : "JSON",
110 | 				"implicit" : 1
111 | 			}
112 | , 			{
113 | 				"name" : "unloop-bpatcher.maxpat",
114 | 				"bootpath" : "~/projects/research/unloop-2025/vampnet/unloop/max",
115 | 				"patcherrelativepath" : ".",
116 | 				"type" : "JSON",
117 | 				"implicit" : 1
118 | 			}
119 | , 			{
120 | 				"name" : "vampnet-ui.maxpat",
121 | 				"bootpath" : "~/projects/research/unloop-2025/vampnet/unloop/max",
122 | 				"patcherrelativepath" : ".",
123 | 				"type" : "JSON",
124 | 				"implicit" : 1
125 | 			}
126 |  ],
127 | 		"autosave" : 0
128 | 	}
129 | 
130 | }
131 | 


--------------------------------------------------------------------------------
/unloop/max/two-gate.maxpat:
--------------------------------------------------------------------------------
  1 | {
  2 | 	"patcher" : 	{
  3 | 		"fileversion" : 1,
  4 | 		"appversion" : 		{
  5 | 			"major" : 8,
  6 | 			"minor" : 6,
  7 | 			"revision" : 5,
  8 | 			"architecture" : "x64",
  9 | 			"modernui" : 1
 10 | 		}
 11 | ,
 12 | 		"classnamespace" : "box",
 13 | 		"rect" : [ 59.0, 106.0, 640.0, 480.0 ],
 14 | 		"bglocked" : 0,
 15 | 		"openinpresentation" : 0,
 16 | 		"default_fontsize" : 12.0,
 17 | 		"default_fontface" : 0,
 18 | 		"default_fontname" : "Arial",
 19 | 		"gridonopen" : 1,
 20 | 		"gridsize" : [ 15.0, 15.0 ],
 21 | 		"gridsnaponopen" : 1,
 22 | 		"objectsnaponopen" : 1,
 23 | 		"statusbarvisible" : 2,
 24 | 		"toolbarvisible" : 1,
 25 | 		"lefttoolbarpinned" : 0,
 26 | 		"toptoolbarpinned" : 0,
 27 | 		"righttoolbarpinned" : 0,
 28 | 		"bottomtoolbarpinned" : 0,
 29 | 		"toolbars_unpinned_last_save" : 0,
 30 | 		"tallnewobj" : 0,
 31 | 		"boxanimatetime" : 200,
 32 | 		"enablehscroll" : 1,
 33 | 		"enablevscroll" : 1,
 34 | 		"devicewidth" : 0.0,
 35 | 		"description" : "",
 36 | 		"digest" : "",
 37 | 		"tags" : "",
 38 | 		"style" : "",
 39 | 		"subpatcher_template" : "",
 40 | 		"assistshowspatchername" : 0,
 41 | 		"boxes" : [ 			{
 42 | 				"box" : 				{
 43 | 					"comment" : "",
 44 | 					"id" : "obj-4",
 45 | 					"index" : 2,
 46 | 					"maxclass" : "outlet",
 47 | 					"numinlets" : 1,
 48 | 					"numoutlets" : 0,
 49 | 					"patching_rect" : [ 70.491801261901855, 137.704914093017578, 30.0, 30.0 ]
 50 | 				}
 51 | 
 52 | 			}
 53 | , 			{
 54 | 				"box" : 				{
 55 | 					"comment" : "",
 56 | 					"id" : "obj-3",
 57 | 					"index" : 1,
 58 | 					"maxclass" : "outlet",
 59 | 					"numinlets" : 1,
 60 | 					"numoutlets" : 0,
 61 | 					"patching_rect" : [ 26.229507446289062, 137.704914093017578, 30.0, 30.0 ]
 62 | 				}
 63 | 
 64 | 			}
 65 | , 			{
 66 | 				"box" : 				{
 67 | 					"comment" : "",
 68 | 					"id" : "obj-1",
 69 | 					"index" : 2,
 70 | 					"maxclass" : "inlet",
 71 | 					"numinlets" : 0,
 72 | 					"numoutlets" : 1,
 73 | 					"outlettype" : [ "" ],
 74 | 					"patching_rect" : [ 70.491801261901855, 28.688523769378662, 30.0, 30.0 ]
 75 | 				}
 76 | 
 77 | 			}
 78 | , 			{
 79 | 				"box" : 				{
 80 | 					"comment" : "",
 81 | 					"id" : "obj-518",
 82 | 					"index" : 1,
 83 | 					"maxclass" : "inlet",
 84 | 					"numinlets" : 0,
 85 | 					"numoutlets" : 1,
 86 | 					"outlettype" : [ "" ],
 87 | 					"patching_rect" : [ 26.229507446289062, 28.688523769378662, 30.0, 30.0 ]
 88 | 				}
 89 | 
 90 | 			}
 91 | , 			{
 92 | 				"box" : 				{
 93 | 					"id" : "obj-517",
 94 | 					"maxclass" : "newobj",
 95 | 					"numinlets" : 2,
 96 | 					"numoutlets" : 1,
 97 | 					"outlettype" : [ "int" ],
 98 | 					"patching_rect" : [ 69.672129154205322, 74.590161800384521, 33.0, 22.0 ],
 99 | 					"text" : "== 0"
100 | 				}
101 | 
102 | 			}
103 | , 			{
104 | 				"box" : 				{
105 | 					"id" : "obj-516",
106 | 					"maxclass" : "newobj",
107 | 					"numinlets" : 2,
108 | 					"numoutlets" : 1,
109 | 					"outlettype" : [ "" ],
110 | 					"patching_rect" : [ 69.672129154205322, 106.557374000549316, 32.0, 22.0 ],
111 | 					"text" : "gate"
112 | 				}
113 | 
114 | 			}
115 | , 			{
116 | 				"box" : 				{
117 | 					"id" : "obj-515",
118 | 					"maxclass" : "newobj",
119 | 					"numinlets" : 2,
120 | 					"numoutlets" : 1,
121 | 					"outlettype" : [ "" ],
122 | 					"patching_rect" : [ 26.229507446289062, 106.557374000549316, 32.0, 22.0 ],
123 | 					"text" : "gate"
124 | 				}
125 | 
126 | 			}
127 |  ],
128 | 		"lines" : [ 			{
129 | 				"patchline" : 				{
130 | 					"destination" : [ "obj-515", 1 ],
131 | 					"order" : 1,
132 | 					"source" : [ "obj-1", 0 ]
133 | 				}
134 | 
135 | 			}
136 | , 			{
137 | 				"patchline" : 				{
138 | 					"destination" : [ "obj-516", 1 ],
139 | 					"order" : 0,
140 | 					"source" : [ "obj-1", 0 ]
141 | 				}
142 | 
143 | 			}
144 | , 			{
145 | 				"patchline" : 				{
146 | 					"destination" : [ "obj-3", 0 ],
147 | 					"source" : [ "obj-515", 0 ]
148 | 				}
149 | 
150 | 			}
151 | , 			{
152 | 				"patchline" : 				{
153 | 					"destination" : [ "obj-4", 0 ],
154 | 					"source" : [ "obj-516", 0 ]
155 | 				}
156 | 
157 | 			}
158 | , 			{
159 | 				"patchline" : 				{
160 | 					"destination" : [ "obj-516", 0 ],
161 | 					"source" : [ "obj-517", 0 ]
162 | 				}
163 | 
164 | 			}
165 | , 			{
166 | 				"patchline" : 				{
167 | 					"destination" : [ "obj-515", 0 ],
168 | 					"order" : 1,
169 | 					"source" : [ "obj-518", 0 ]
170 | 				}
171 | 
172 | 			}
173 | , 			{
174 | 				"patchline" : 				{
175 | 					"destination" : [ "obj-517", 0 ],
176 | 					"order" : 0,
177 | 					"source" : [ "obj-518", 0 ]
178 | 				}
179 | 
180 | 			}
181 |  ]
182 | 	}
183 | 
184 | }
185 | 


--------------------------------------------------------------------------------
/scripts/exp/eval.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | import os
  3 | from functools import partial
  4 | 
  5 | from frechet_audio_distance import FrechetAudioDistance
  6 | import pandas
  7 | import argbind
  8 | import torch
  9 | from tqdm import tqdm
 10 | 
 11 | import audiotools
 12 | from audiotools import AudioSignal
 13 | 
 14 | @argbind.bind(without_prefix=True)
 15 | def eval(
 16 |     exp_dir: str = None,
 17 |     baseline_key: str = "baseline", 
 18 |     audio_ext: str = ".wav",
 19 | ):
 20 |     assert exp_dir is not None
 21 |     exp_dir = Path(exp_dir)
 22 |     assert exp_dir.exists(), f"exp_dir {exp_dir} does not exist"
 23 | 
 24 |     # set up our metrics
 25 |     # sisdr_loss = audiotools.metrics.distance.SISDRLoss()
 26 |     # stft_loss = audiotools.metrics.spectral.MultiScaleSTFTLoss()
 27 |     mel_loss = audiotools.metrics.spectral.MelSpectrogramLoss()
 28 |     frechet = FrechetAudioDistance(
 29 |         use_pca=False, 
 30 |         use_activation=False,
 31 |         verbose=True, 
 32 |         audio_load_worker=4,
 33 |     )
 34 |     frechet.model.to("cuda" if torch.cuda.is_available() else "cpu")
 35 | 
 36 |     # figure out what conditions we have
 37 |     conditions = [d.name for d in exp_dir.iterdir() if d.is_dir()]
 38 | 
 39 |     assert baseline_key in conditions, f"baseline_key {baseline_key} not found in {exp_dir}"
 40 |     conditions.remove(baseline_key)
 41 | 
 42 |     print(f"Found {len(conditions)} conditions in {exp_dir}")
 43 |     print(f"conditions: {conditions}")
 44 | 
 45 |     baseline_dir = exp_dir / baseline_key 
 46 |     baseline_files = sorted(list(baseline_dir.glob(f"*{audio_ext}")), key=lambda x: int(x.stem))
 47 | 
 48 |     metrics = []
 49 |     for condition in tqdm(conditions):
 50 |         cond_dir = exp_dir / condition
 51 |         cond_files = sorted(list(cond_dir.glob(f"*{audio_ext}")), key=lambda x: int(x.stem))
 52 | 
 53 |         print(f"computing fad for {baseline_dir} and {cond_dir}")
 54 |         frechet_score = frechet.score(baseline_dir, cond_dir)
 55 | 
 56 |         # make sure we have the same number of files
 57 |         num_files = min(len(baseline_files), len(cond_files))
 58 |         baseline_files = baseline_files[:num_files]
 59 |         cond_files = cond_files[:num_files]
 60 |         assert len(list(baseline_files)) == len(list(cond_files)), f"number of files in {baseline_dir} and {cond_dir} do not match. {len(list(baseline_files))} vs {len(list(cond_files))}"
 61 | 
 62 |         def process(baseline_file, cond_file):
 63 |             # make sure the files match (same name)
 64 |             assert baseline_file.stem == cond_file.stem, f"baseline file {baseline_file} and cond file {cond_file} do not match"
 65 | 
 66 |             # load the files
 67 |             baseline_sig = AudioSignal(str(baseline_file))
 68 |             cond_sig = AudioSignal(str(cond_file))
 69 | 
 70 |             cond_sig.resample(baseline_sig.sample_rate)
 71 |             cond_sig.truncate_samples(baseline_sig.length)
 72 | 
 73 |             # if our condition is inpainting, we need to trim the conditioning off
 74 |             if "inpaint" in condition:
 75 |                 ctx_amt = float(condition.split("_")[-1])
 76 |                 ctx_samples = int(ctx_amt * baseline_sig.sample_rate)
 77 |                 print(f"found inpainting condition. trimming off {ctx_samples} samples from {cond_file} and {baseline_file}")
 78 |                 cond_sig.trim(ctx_samples, ctx_samples)
 79 |                 baseline_sig.trim(ctx_samples, ctx_samples)
 80 | 
 81 |             return {
 82 |                 # "sisdr": -sisdr_loss(baseline_sig, cond_sig).item(),
 83 |                 # "stft": stft_loss(baseline_sig, cond_sig).item(),
 84 |                 "mel": mel_loss(baseline_sig, cond_sig).item(),
 85 |                 "frechet": frechet_score,
 86 |                 # "visqol": vsq,
 87 |                 "condition": condition,
 88 |                 "file": baseline_file.stem,
 89 |             }
 90 | 
 91 |         print(f"processing {len(baseline_files)} files in {baseline_dir} and {cond_dir}")
 92 |         metrics.extend(tqdm(map(process, baseline_files, cond_files), total=len(baseline_files)))
 93 | 
 94 |     metric_keys = [k for k in metrics[0].keys() if k not in ("condition", "file")]
 95 | 
 96 | 
 97 |     for mk in metric_keys:
 98 |         stat = pandas.DataFrame(metrics)
 99 |         stat = stat.groupby(['condition'])[mk].agg(['mean', 'count', 'std'])
100 |         stat.to_csv(exp_dir / f"stats-{mk}.csv")
101 | 
102 |     df = pandas.DataFrame(metrics)
103 |     df.to_csv(exp_dir / "metrics-all.csv", index=False)
104 | 
105 | 
106 | if __name__ == "__main__":
107 |     args = argbind.parse_args()
108 | 
109 |     with argbind.scope(args):
110 |         eval()


--------------------------------------------------------------------------------
/vampnet/modules/layers.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | from typing import Optional
  3 | from typing import Tuple
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | from einops import rearrange
  9 | from torch.nn.utils import weight_norm
 10 | 
 11 | # Scripting this brings model speed up 1.4x
 12 | @torch.jit.script
 13 | def snake(x, alpha):
 14 |     shape = x.shape
 15 |     x = x.reshape(shape[0], shape[1], -1)
 16 |     x = x + (alpha + 1e-9).reciprocal() * torch.sin(alpha * x).pow(2)
 17 |     x = x.reshape(shape)
 18 |     return x
 19 | 
 20 | 
 21 | class Snake1d(nn.Module):
 22 |     def __init__(self, channels):
 23 |         super().__init__()
 24 |         self.alpha = nn.Parameter(torch.ones(1, channels, 1))
 25 | 
 26 |     def forward(self, x):
 27 |         return snake(x, self.alpha)
 28 | 
 29 | 
 30 | def num_params(model):
 31 |     return sum(p.numel() for p in model.parameters() if p.requires_grad)
 32 | 
 33 | 
 34 | def recurse_children(module, fn):
 35 |     for child in module.children():
 36 |         if isinstance(child, nn.ModuleList):
 37 |             for c in child:
 38 |                 yield recurse_children(c, fn)
 39 |         if isinstance(child, nn.ModuleDict):
 40 |             for c in child.values():
 41 |                 yield recurse_children(c, fn)
 42 | 
 43 |         yield recurse_children(child, fn)
 44 |         yield fn(child)
 45 | 
 46 | 
 47 | def WNConv1d(*args, **kwargs):
 48 |     return weight_norm(nn.Conv1d(*args, **kwargs))
 49 | 
 50 | 
 51 | def WNConvTranspose1d(*args, **kwargs):
 52 |     return weight_norm(nn.ConvTranspose1d(*args, **kwargs))
 53 | 
 54 | 
 55 | class SequentialWithFiLM(nn.Module):
 56 |     """
 57 |     handy wrapper for nn.Sequential that allows FiLM layers to be
 58 |     inserted in between other layers.
 59 |     """
 60 | 
 61 |     def __init__(self, *layers):
 62 |         super().__init__()
 63 |         self.layers = nn.ModuleList(layers)
 64 | 
 65 |     @staticmethod
 66 |     def has_film(module):
 67 |         mod_has_film = any(
 68 |             [res for res in recurse_children(module, lambda c: isinstance(c, FiLM))]
 69 |         )
 70 |         return mod_has_film
 71 | 
 72 |     def forward(self, x, cond):
 73 |         for layer in self.layers:
 74 |             if self.has_film(layer):
 75 |                 x = layer(x, cond)
 76 |             else:
 77 |                 x = layer(x)
 78 |         return x
 79 | 
 80 | 
 81 | class FiLM(nn.Module):
 82 |     def __init__(self, input_dim: int, output_dim: int):
 83 |         super().__init__()
 84 | 
 85 |         self.input_dim = input_dim
 86 |         self.output_dim = output_dim
 87 | 
 88 |         if input_dim > 0:
 89 |             self.beta = nn.Linear(input_dim, output_dim)
 90 |             self.gamma = nn.Linear(input_dim, output_dim)
 91 | 
 92 |     def forward(self, x, r):
 93 |         if self.input_dim == 0:
 94 |             return x
 95 |         else:
 96 |             beta, gamma = self.beta(r), self.gamma(r)
 97 |             beta, gamma = (
 98 |                 beta.view(x.size(0), self.output_dim, 1),
 99 |                 gamma.view(x.size(0), self.output_dim, 1),
100 |             )
101 |             x = x * (gamma + 1) + beta
102 |         return x
103 | 
104 | 
105 | class CodebookEmbedding(nn.Module):
106 |     def __init__(
107 |         self,
108 |         vocab_size: int,
109 |         latent_dim: int,
110 |         n_codebooks: int,
111 |         emb_dim: int,
112 |         special_tokens: Optional[Tuple[str]] = None,
113 |     ):
114 |         super().__init__()
115 |         self.n_codebooks = n_codebooks
116 |         self.emb_dim = emb_dim
117 |         self.latent_dim = latent_dim
118 |         self.vocab_size = vocab_size
119 | 
120 |         if special_tokens is not None:
121 |             for tkn in special_tokens:
122 |                 self.special = nn.ParameterDict(
123 |                     {
124 |                         tkn: nn.Parameter(torch.randn(n_codebooks, self.latent_dim))
125 |                         for tkn in special_tokens
126 |                     }
127 |                 )
128 |                 self.special_idxs = {
129 |                     tkn: i + vocab_size for i, tkn in enumerate(special_tokens)
130 |                 }
131 | 
132 |         self.out_proj = nn.Conv1d(n_codebooks * self.latent_dim, self.emb_dim, 1)
133 | 
134 |     def from_codes(self, codes: torch.Tensor, codec):
135 |         """ 
136 |         get a sequence of continuous embeddings from a sequence of discrete codes. 
137 |         unlike it's counterpart in the original VQ-VAE, this function adds for any special tokens
138 |         necessary for the language model, like <MASK>. 
139 |         """
140 |         n_codebooks = codes.shape[1]
141 |         latent = []
142 |         for i in range(n_codebooks):
143 |             c = codes[:, i, :]
144 | 
145 |             lookup_table = codec.quantizer.quantizers[i].codebook.weight
146 |             if hasattr(self, "special"):
147 |                 special_lookup = torch.cat(
148 |                     [self.special[tkn][i : i + 1] for tkn in self.special], dim=0
149 |                 )
150 |                 lookup_table = torch.cat([lookup_table, special_lookup], dim=0)
151 | 
152 |             l = F.embedding(c, lookup_table).transpose(1, 2)
153 |             latent.append(l)
154 | 
155 |         latent = torch.cat(latent, dim=1)
156 |         return latent
157 | 
158 |     def forward(self, latents: torch.Tensor):
159 |         """
160 |         project a sequence of latents to a sequence of embeddings
161 |         """
162 |         x = self.out_proj(latents)
163 |         return x
164 | 
165 | 


--------------------------------------------------------------------------------
/token_telephone/vamp_helper.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | import time
  3 | import os
  4 | from contextlib import contextmanager
  5 | import random
  6 | 
  7 | import numpy as np
  8 | import audiotools as at
  9 | from audiotools import AudioSignal
 10 | import argbind
 11 | import shutil
 12 | import torch
 13 | import yaml
 14 | 
 15 | 
 16 | from vampnet.interface import Interface, signal_concat
 17 | from vampnet import mask as pmask
 18 | 
 19 | from ttutil import log
 20 | 
 21 | # TODO: incorporate discord bot (if mem allows)
 22 | # in a separate thread, send audio samples for listening
 23 | # and send back the results
 24 | # as well as the params for sampling
 25 | # also a command that lets you clear the current signal 
 26 | # if you want to start over
 27 | 
 28 | 
 29 | device = "cuda" if torch.cuda.is_available() else "cpu"
 30 | 
 31 | VAMPNET_DIR = Path(".").resolve()
 32 | 
 33 | @contextmanager
 34 | def chdir(path):
 35 |     old_dir = os.getcwd()
 36 |     os.chdir(path)
 37 |     try:
 38 |         yield
 39 |     finally:
 40 |         os.chdir(old_dir)
 41 | 
 42 | def load_interface(model_choice="default") -> Interface:
 43 |     with chdir(VAMPNET_DIR):
 44 | 
 45 | 
 46 |         # populate the model choices with any interface.yml files in the generated confs
 47 |         MODEL_CHOICES = {
 48 |             "default": {
 49 |                 "Interface.coarse_ckpt": "models/vampnet/coarse.pth", 
 50 |                 "Interface.coarse2fine_ckpt": "models/vampnet/c2f.pth",
 51 |                 "Interface.codec_ckpt": "models/vampnet/codec.pth",
 52 |             }
 53 |         }
 54 |         generated_confs = Path("conf/generated")
 55 |         for conf_file in generated_confs.glob("*/interface.yml"):
 56 |             with open(conf_file) as f:
 57 |                 _conf = yaml.safe_load(f)
 58 | 
 59 |                 # check if the coarse, c2f, and codec ckpts exist
 60 |                 # otherwise, dont' add this model choice
 61 |                 if not (
 62 |                     Path(_conf["Interface.coarse_ckpt"]).exists() and 
 63 |                     Path(_conf["Interface.coarse2fine_ckpt"]).exists() and 
 64 |                     Path(_conf["Interface.codec_ckpt"]).exists()
 65 |                 ):
 66 |                     continue
 67 | 
 68 |                 MODEL_CHOICES[conf_file.parent.name] = _conf
 69 | 
 70 |     interface = Interface(
 71 |         device=device, 
 72 |         coarse_ckpt=MODEL_CHOICES[model_choice]["Interface.coarse_ckpt"],
 73 |         coarse2fine_ckpt=MODEL_CHOICES[model_choice]["Interface.coarse2fine_ckpt"],
 74 |         codec_ckpt=MODEL_CHOICES[model_choice]["Interface.codec_ckpt"],
 75 |     )
 76 | 
 77 |     interface.model_choices = MODEL_CHOICES
 78 |     interface.to("cuda" if torch.cuda.is_available() else "cpu")
 79 |     return interface
 80 | 
 81 | def load_model(interface: Interface, model_choice: str):
 82 |     interface.reload(
 83 |         interface.model_choices[model_choice]["Interface.coarse_ckpt"],
 84 |         interface.model_choices[model_choice]["Interface.coarse2fine_ckpt"],
 85 |     )
 86 | 
 87 | def ez_variation(
 88 |         interface,
 89 |         sig: AudioSignal,
 90 |         seed: int = None, 
 91 |         model_choice: str = None,  
 92 |     ):
 93 |     t0 = time.time()
 94 |     
 95 |     if seed is None:
 96 |         seed = int(torch.randint(0, 2**32, (1,)).item())
 97 |     at.util.seed(seed)
 98 | 
 99 |     # reload the model if necessary
100 |     if model_choice is not None:
101 |         load_model(interface, model_choice)
102 | 
103 |     # SAMPLING MASK PARAMS, hard code for now, we'll prob want a more preset-ey thing for the actual thin
104 |     # we probably honestly just want to oscillate between the same 4 presets
105 |     # in a predictable order such that they have a predictable outcome
106 |     periodic_p = random.choice([3])
107 |     n_mask_codebooks = 3
108 |     sampletemp = random.choice([1.0,])
109 |     dropout = random.choice([0.0, 0.0])
110 | 
111 |     top_p = None # NOTE: top p may be the culprit behind the collapse into single pitches. 
112 | 
113 |     # parameters for the build_mask function
114 |     build_mask_kwargs = dict(
115 |         rand_mask_intensity=1.0,
116 |         prefix_s=0.0,
117 |         suffix_s=0.0,
118 |         periodic_prompt=int(periodic_p),
119 |         periodic_prompt2=int(periodic_p),
120 |         periodic_prompt_width=1,
121 |         _dropout=dropout,
122 |         upper_codebook_mask=int(n_mask_codebooks), 
123 |         upper_codebook_mask_2=int(n_mask_codebooks),
124 |     )
125 | 
126 |     # parameters for the vamp function
127 |     vamp_kwargs = dict(
128 |         temperature=sampletemp,
129 |         typical_filtering=True, 
130 |         typical_mass=0.15, 
131 |         typical_min_tokens=64, 
132 |         top_p=top_p,
133 |         seed=seed,
134 |         sample_cutoff=1.0,
135 |     )
136 | 
137 |     # save the mask as a txt file
138 |     interface.set_chunk_size(10.0)
139 |     sig, mask, codes = interface.vamp(
140 |         sig, 
141 |         batch_size=1,
142 |         feedback_steps=1,
143 |         time_stretch_factor=1,
144 |         build_mask_kwargs=build_mask_kwargs,
145 |         vamp_kwargs=vamp_kwargs,
146 |         return_mask=True,
147 |     )
148 | 
149 |     log(f"vamp took {time.time() - t0} seconds")
150 |     return sig
151 | 
152 | 
153 | 
154 | def main():
155 |     import tqdm
156 | 
157 |     interface = load_interface()
158 |     sig = AudioSignal.excerpt("assets/example.wav", duration=7.0)
159 |     sig = interface.preprocess(sig)
160 |     sig.write('ttout/in.wav')
161 |     insig = sig.clone()
162 | 
163 |     fdbk_every = 4
164 |     fdbk = 0.5
165 | 
166 |     for i in tqdm.tqdm(range(1000)): 
167 |         sig = ez_variation(interface, sig, model_choice="orchestral")
168 |         sig.write(f'ttout/out{i}.wav')
169 |     
170 | 
171 | if __name__ == "__main__":
172 |     main()


--------------------------------------------------------------------------------
/scripts/utils/xeno-canto-dl.py:
--------------------------------------------------------------------------------
  1 | from xenopy import Query
  2 | 
  3 | 
  4 | SPECIES = [
  5 |     "American Robin",
  6 |     "Northern Cardinal",
  7 |     "Mourning Dove",
  8 |     "American Crow",
  9 |     "Baltimore Oriole",
 10 |     "Blue Jay",
 11 |     "Eastern Bluebird",
 12 |     "House Finch",
 13 |     "American Goldfinch",
 14 |     "House Sparrow",
 15 |     "Song Sparrow",
 16 |     "Tufted Titmouse",
 17 |     "White-breasted Nuthatch",
 18 |     "European Starling",
 19 |     "American Redstart",
 20 |     "Red-winged Blackbird",
 21 |     "Brown-headed Cowbird",
 22 |     "Common Grackle",
 23 |     "Boat-tailed Grackle",
 24 |     "Common Yellowthroat",
 25 |     "Northern Mockingbird",
 26 |     "Carolina Wren",
 27 |     "Eastern Meadowlark",
 28 |     "Chipping Sparrow",
 29 |     "Tree Swallow",
 30 |     "Barn Swallow",
 31 |     "Cliff Swallow",
 32 |     "Pine Siskin",
 33 |     "Indigo Bunting",
 34 |     "Eastern Towhee",
 35 |     "Carolina Chickadee",
 36 |     "Great Crested Flycatcher",
 37 |     "Eastern Wood-Pewee",
 38 |     "Ovenbird",
 39 |     "Northern Flicker",
 40 |     "Red-eyed Vireo",
 41 |     "American Woodcock",
 42 |     "Eastern Phoebe",
 43 |     "Downy Woodpecker",
 44 |     "Scarlet Tanager",
 45 |     "Yellow Warbler",
 46 |     "White-eyed Vireo",
 47 |     "Common Loon",
 48 |     "White-throated Sparrow",
 49 |     "Yellow-throated Vireo",
 50 |     "Great Blue Heron",
 51 |     "Belted Kingfisher",
 52 |     "Pied-billed Grebe",
 53 |     "Wild Turkey",
 54 |     "Wood Thrush",
 55 |     "Rose-breasted Grosbeak",
 56 |     "Field Sparrow",
 57 |     "Hooded Warbler",
 58 |     "Northern Parula",
 59 |     "Chestnut-sided Warbler",
 60 |     "Blue-winged Warbler",
 61 |     "Red-bellied Woodpecker",
 62 |     "Yellow-billed Cuckoo",
 63 |     "Gray Catbird",
 64 |     "Northern Saw-whet Owl",
 65 |     "Osprey",
 66 |     "Common Nighthawk",
 67 |     "Broad-winged Hawk",
 68 |     "Black-throated Green Warbler",
 69 |     "Great Horned Owl",
 70 |     "Common Raven",
 71 |     "Barred Owl",
 72 |     "Canada Warbler",
 73 |     "Magnolia Warbler",
 74 |     "Black-and-white Warbler",
 75 |     "Eastern Kingbird",
 76 |     "Swainson's Thrush",
 77 |     "Worm-eating Warbler",
 78 |     "Prairie Warbler",
 79 |     "Baltimore Oriole",
 80 |     "Black-throated Blue Warbler",
 81 |     "Louisiana Waterthrush",
 82 |     "Blackburnian Warbler",
 83 |     "Black-capped Chickadee",
 84 |     "Cerulean Warbler",
 85 |     "Red-shouldered Hawk",
 86 |     "Cooper's Hawk",
 87 |     "Yellow-throated Warbler",
 88 |     "Blue-headed Vireo",
 89 |     "Blackpoll Warbler",
 90 |     "Ruffed Grouse",
 91 |     "Kentucky Warbler",
 92 |     "Hermit Thrush",
 93 |     "Cedar Waxwing",
 94 |     "Eastern Screech-Owl",
 95 |     "Northern Goshawk",
 96 |     "Green Heron",
 97 |     "Red-tailed Hawk",
 98 |     "Black Vulture",
 99 |     "Hairy Woodpecker",
100 |     "Golden-crowned Kinglet",
101 |     "Ruby-crowned Kinglet",
102 |     "Bicknell's Thrush",
103 |     "Blue-gray Gnatcatcher",
104 |     "Veery",
105 |     "Pileated Woodpecker",
106 |     "Purple Finch",
107 |     "White-crowned Sparrow",
108 |     "Snow Bunting",
109 |     "Pine Grosbeak",
110 |     "American Tree Sparrow",
111 |     "Dark-eyed Junco",
112 |     "Snowy Owl",
113 |     "White-winged Crossbill",
114 |     "Red Crossbill",
115 |     "Common Redpoll",
116 |     "Northern Shrike",
117 |     "Northern Harrier",
118 |     "Rough-legged Hawk",
119 |     "Long-eared Owl",
120 |     "Evening Grosbeak",
121 |     "Northern Pintail",
122 |     "American Black Duck",
123 |     "Mallard",
124 |     "Canvasback",
125 |     "Redhead",
126 |     "Ring-necked Duck",
127 |     "Greater Scaup",
128 |     "Lesser Scaup",
129 |     "Bufflehead",
130 |     "Common Goldeneye",
131 |     "Hooded Merganser",
132 |     "Common Merganser",
133 |     "Red-breasted Merganser",
134 |     "Ruddy Duck",
135 |     "Wood Duck",
136 |     "Gadwall",
137 |     "American Wigeon",
138 |     "Northern Shoveler",
139 |     "Green-winged Teal",
140 |     "Blue-winged Teal",
141 |     "Cinnamon Teal",
142 |     "Ringed Teal",
143 |     "Cape Teal",
144 |     "Northern Fulmar",
145 |     "Yellow-billed Loon",
146 |     "Red-throated Loon",
147 |     "Arctic Loon",
148 |     "Pacific Loon",
149 |     "Horned Grebe",
150 |     "Red-necked Grebe",
151 |     "Eared Grebe",
152 |     "Western Grebe",
153 |     "Clark's Grebe",
154 |     "Double-crested Cormorant",
155 |     "Pelagic Cormorant",
156 |     "Great Cormorant",
157 |     "American White Pelican",
158 |     "Brown Pelican",
159 |     "Brandt's Cormorant",
160 |     "Least Bittern",
161 |     "Great Egret",
162 |     "Snowy Egret",
163 |     "Little Blue Heron",
164 |     "Tricolored Heron",
165 |     "Reddish Egret",
166 |     "Black-crowned Night-Heron",
167 |     "Yellow-crowned Night-Heron",
168 |     "White Ibis",
169 |     "Glossy Ibis",
170 |     "Roseate Spoonbill",
171 |     "Wood Stork",
172 |     "Black-bellied Whistling-Duck",
173 |     "Fulvous Whistling-Duck",
174 |     "Greater White-fronted Goose",
175 |     "Snow Goose",
176 |     "Ross's Goose",
177 |     "Canada Goose",
178 |     "Brant",
179 |     "Mute Swan",
180 |     "Tundra Swan",
181 |     "Whooper Swan",
182 |     "Sandhill Crane",
183 |     "Black-necked Stilt",
184 |     "American Avocet",
185 |     "Northern Jacana",
186 |     "Greater Yellowlegs",
187 |     "Lesser Yellowlegs",
188 |     "Willet",
189 |     "Spotted Sandpiper",
190 |     "Upland Sandpiper",
191 |     "Whimbrel",
192 |     "Long-billed Curlew",
193 |     "Marbled Godwit",
194 |     "Ruddy Turnstone",
195 |     "Red Knot",
196 |     "Sanderling",
197 |     "Semipalmated Sandpiper",
198 |     "Western Sandpiper",
199 |     "Least Sandpiper",
200 |     "White-rumped Sandpiper",
201 |     "Baird's Sandpiper",
202 |     "Pectoral Sandpiper",
203 |     "Dunlin",
204 |     "Buff-breasted Sandpiper",
205 |     "Short-billed Dowitcher",
206 |     "Long-billed Dowitcher",
207 |     "Common Snipe",
208 |     "American Woodcock",
209 |     "Wilson's Phalarope",
210 |     "Red-necked Phalarope",
211 |     "Red Phalarope"
212 | ]
213 | 
214 | from pathlib import Path
215 | 
216 | def remove_spaces(s):
217 |     return s.replace(" ", "")
218 | 
219 | for species in SPECIES: 
220 |     if Path("/media/CHONK/hugo/xeno-canto-full/" + remove_spaces(species)).exists():
221 |         continue
222 |     try:
223 |         q = Query(
224 |             name=species, q="A", length="10-30", 
225 |             )
226 | 
227 |         # retrieve metadata
228 |         metafiles = q.retrieve_meta(verbose=True)
229 |         # retrieve recordings
230 |         q.retrieve_recordings(multiprocess=True, nproc=10, attempts=10, outdir="/media/CHONK/hugo/xeno-canto-full/")
231 | 
232 |     except:
233 |         print("Failed to download " + species)
234 |         continue


--------------------------------------------------------------------------------
/unloop/max/pan~.maxpat:
--------------------------------------------------------------------------------
  1 | {
  2 | 	"patcher" : 	{
  3 | 		"fileversion" : 1,
  4 | 		"appversion" : 		{
  5 | 			"major" : 8,
  6 | 			"minor" : 6,
  7 | 			"revision" : 5,
  8 | 			"architecture" : "x64",
  9 | 			"modernui" : 1
 10 | 		}
 11 | ,
 12 | 		"classnamespace" : "box",
 13 | 		"rect" : [ 664.0, 441.0, 640.0, 480.0 ],
 14 | 		"bglocked" : 0,
 15 | 		"openinpresentation" : 0,
 16 | 		"default_fontsize" : 12.0,
 17 | 		"default_fontface" : 0,
 18 | 		"default_fontname" : "Arial",
 19 | 		"gridonopen" : 1,
 20 | 		"gridsize" : [ 15.0, 15.0 ],
 21 | 		"gridsnaponopen" : 1,
 22 | 		"objectsnaponopen" : 1,
 23 | 		"statusbarvisible" : 2,
 24 | 		"toolbarvisible" : 1,
 25 | 		"lefttoolbarpinned" : 0,
 26 | 		"toptoolbarpinned" : 0,
 27 | 		"righttoolbarpinned" : 0,
 28 | 		"bottomtoolbarpinned" : 0,
 29 | 		"toolbars_unpinned_last_save" : 0,
 30 | 		"tallnewobj" : 0,
 31 | 		"boxanimatetime" : 200,
 32 | 		"enablehscroll" : 1,
 33 | 		"enablevscroll" : 1,
 34 | 		"devicewidth" : 0.0,
 35 | 		"description" : "",
 36 | 		"digest" : "",
 37 | 		"tags" : "",
 38 | 		"style" : "",
 39 | 		"subpatcher_template" : "",
 40 | 		"assistshowspatchername" : 0,
 41 | 		"boxes" : [ 			{
 42 | 				"box" : 				{
 43 | 					"id" : "obj-2",
 44 | 					"maxclass" : "newobj",
 45 | 					"numinlets" : 2,
 46 | 					"numoutlets" : 1,
 47 | 					"outlettype" : [ "signal" ],
 48 | 					"patching_rect" : [ 241.0, 273.0, 29.5, 22.0 ],
 49 | 					"text" : "*~"
 50 | 				}
 51 | 
 52 | 			}
 53 | , 			{
 54 | 				"box" : 				{
 55 | 					"id" : "obj-1",
 56 | 					"maxclass" : "newobj",
 57 | 					"numinlets" : 2,
 58 | 					"numoutlets" : 1,
 59 | 					"outlettype" : [ "signal" ],
 60 | 					"patching_rect" : [ 160.0, 273.0, 29.5, 22.0 ],
 61 | 					"text" : "*~"
 62 | 				}
 63 | 
 64 | 			}
 65 | , 			{
 66 | 				"box" : 				{
 67 | 					"comment" : "right signal",
 68 | 					"id" : "obj-28",
 69 | 					"index" : 2,
 70 | 					"maxclass" : "outlet",
 71 | 					"numinlets" : 1,
 72 | 					"numoutlets" : 0,
 73 | 					"patching_rect" : [ 241.0, 316.0, 30.0, 30.0 ]
 74 | 				}
 75 | 
 76 | 			}
 77 | , 			{
 78 | 				"box" : 				{
 79 | 					"comment" : "left signal",
 80 | 					"id" : "obj-27",
 81 | 					"index" : 1,
 82 | 					"maxclass" : "outlet",
 83 | 					"numinlets" : 1,
 84 | 					"numoutlets" : 0,
 85 | 					"patching_rect" : [ 160.0, 316.0, 30.0, 30.0 ]
 86 | 				}
 87 | 
 88 | 			}
 89 | , 			{
 90 | 				"box" : 				{
 91 | 					"id" : "obj-26",
 92 | 					"maxclass" : "newobj",
 93 | 					"numinlets" : 2,
 94 | 					"numoutlets" : 1,
 95 | 					"outlettype" : [ "signal" ],
 96 | 					"patching_rect" : [ 160.0, 230.0, 43.0, 22.0 ],
 97 | 					"text" : "cycle~"
 98 | 				}
 99 | 
100 | 			}
101 | , 			{
102 | 				"box" : 				{
103 | 					"id" : "obj-25",
104 | 					"maxclass" : "newobj",
105 | 					"numinlets" : 2,
106 | 					"numoutlets" : 1,
107 | 					"outlettype" : [ "signal" ],
108 | 					"patching_rect" : [ 241.0, 230.0, 43.0, 22.0 ],
109 | 					"text" : "cycle~"
110 | 				}
111 | 
112 | 			}
113 | , 			{
114 | 				"box" : 				{
115 | 					"id" : "obj-24",
116 | 					"maxclass" : "newobj",
117 | 					"numinlets" : 2,
118 | 					"numoutlets" : 1,
119 | 					"outlettype" : [ "signal" ],
120 | 					"patching_rect" : [ 265.0, 182.0, 49.0, 22.0 ],
121 | 					"text" : "+~ 0.75"
122 | 				}
123 | 
124 | 			}
125 | , 			{
126 | 				"box" : 				{
127 | 					"id" : "obj-23",
128 | 					"maxclass" : "newobj",
129 | 					"numinlets" : 2,
130 | 					"numoutlets" : 1,
131 | 					"outlettype" : [ "signal" ],
132 | 					"patching_rect" : [ 184.0, 151.0, 37.0, 22.0 ],
133 | 					"text" : "*~ #1"
134 | 				}
135 | 
136 | 			}
137 | , 			{
138 | 				"box" : 				{
139 | 					"id" : "obj-22",
140 | 					"maxclass" : "newobj",
141 | 					"numinlets" : 1,
142 | 					"numoutlets" : 1,
143 | 					"outlettype" : [ "signal" ],
144 | 					"patching_rect" : [ 184.0, 111.0, 58.0, 22.0 ],
145 | 					"text" : "sig~ 0.25"
146 | 				}
147 | 
148 | 			}
149 | , 			{
150 | 				"box" : 				{
151 | 					"id" : "obj-11",
152 | 					"maxclass" : "comment",
153 | 					"numinlets" : 1,
154 | 					"numoutlets" : 0,
155 | 					"patching_rect" : [ 183.0, 8.0, 60.0, 20.0 ],
156 | 					"text" : "pan"
157 | 				}
158 | 
159 | 			}
160 | , 			{
161 | 				"box" : 				{
162 | 					"id" : "obj-10",
163 | 					"maxclass" : "comment",
164 | 					"numinlets" : 1,
165 | 					"numoutlets" : 0,
166 | 					"patching_rect" : [ 124.0, 8.0, 60.0, 20.0 ],
167 | 					"text" : "audio"
168 | 				}
169 | 
170 | 			}
171 | , 			{
172 | 				"box" : 				{
173 | 					"comment" : "pan value (0...1)",
174 | 					"id" : "obj-8",
175 | 					"index" : 2,
176 | 					"maxclass" : "inlet",
177 | 					"numinlets" : 0,
178 | 					"numoutlets" : 1,
179 | 					"outlettype" : [ "float" ],
180 | 					"patching_rect" : [ 198.0, 33.0, 30.0, 30.0 ]
181 | 				}
182 | 
183 | 			}
184 | , 			{
185 | 				"box" : 				{
186 | 					"comment" : "mono audio in",
187 | 					"id" : "obj-7",
188 | 					"index" : 1,
189 | 					"maxclass" : "inlet",
190 | 					"numinlets" : 0,
191 | 					"numoutlets" : 1,
192 | 					"outlettype" : [ "signal" ],
193 | 					"patching_rect" : [ 134.0, 33.0, 30.0, 30.0 ]
194 | 				}
195 | 
196 | 			}
197 |  ],
198 | 		"lines" : [ 			{
199 | 				"patchline" : 				{
200 | 					"destination" : [ "obj-27", 0 ],
201 | 					"source" : [ "obj-1", 0 ]
202 | 				}
203 | 
204 | 			}
205 | , 			{
206 | 				"patchline" : 				{
207 | 					"destination" : [ "obj-28", 0 ],
208 | 					"source" : [ "obj-2", 0 ]
209 | 				}
210 | 
211 | 			}
212 | , 			{
213 | 				"patchline" : 				{
214 | 					"destination" : [ "obj-23", 0 ],
215 | 					"source" : [ "obj-22", 0 ]
216 | 				}
217 | 
218 | 			}
219 | , 			{
220 | 				"patchline" : 				{
221 | 					"destination" : [ "obj-24", 0 ],
222 | 					"order" : 0,
223 | 					"source" : [ "obj-23", 0 ]
224 | 				}
225 | 
226 | 			}
227 | , 			{
228 | 				"patchline" : 				{
229 | 					"destination" : [ "obj-26", 1 ],
230 | 					"order" : 1,
231 | 					"source" : [ "obj-23", 0 ]
232 | 				}
233 | 
234 | 			}
235 | , 			{
236 | 				"patchline" : 				{
237 | 					"destination" : [ "obj-25", 1 ],
238 | 					"source" : [ "obj-24", 0 ]
239 | 				}
240 | 
241 | 			}
242 | , 			{
243 | 				"patchline" : 				{
244 | 					"destination" : [ "obj-2", 0 ],
245 | 					"source" : [ "obj-25", 0 ]
246 | 				}
247 | 
248 | 			}
249 | , 			{
250 | 				"patchline" : 				{
251 | 					"destination" : [ "obj-1", 0 ],
252 | 					"source" : [ "obj-26", 0 ]
253 | 				}
254 | 
255 | 			}
256 | , 			{
257 | 				"patchline" : 				{
258 | 					"destination" : [ "obj-1", 1 ],
259 | 					"order" : 1,
260 | 					"source" : [ "obj-7", 0 ]
261 | 				}
262 | 
263 | 			}
264 | , 			{
265 | 				"patchline" : 				{
266 | 					"destination" : [ "obj-2", 1 ],
267 | 					"order" : 0,
268 | 					"source" : [ "obj-7", 0 ]
269 | 				}
270 | 
271 | 			}
272 | , 			{
273 | 				"patchline" : 				{
274 | 					"destination" : [ "obj-23", 1 ],
275 | 					"midpoints" : [ 207.5, 96.0, 252.0, 96.0, 252.0, 147.0, 211.5, 147.0 ],
276 | 					"source" : [ "obj-8", 0 ]
277 | 				}
278 | 
279 | 			}
280 |  ]
281 | 	}
282 | 
283 | }
284 | 


--------------------------------------------------------------------------------
/unloop/max/dry-wet.maxpat:
--------------------------------------------------------------------------------
  1 | {
  2 | 	"patcher" : 	{
  3 | 		"fileversion" : 1,
  4 | 		"appversion" : 		{
  5 | 			"major" : 8,
  6 | 			"minor" : 6,
  7 | 			"revision" : 5,
  8 | 			"architecture" : "x64",
  9 | 			"modernui" : 1
 10 | 		}
 11 | ,
 12 | 		"classnamespace" : "box",
 13 | 		"rect" : [ 84.0, 131.0, 640.0, 480.0 ],
 14 | 		"bglocked" : 0,
 15 | 		"openinpresentation" : 0,
 16 | 		"default_fontsize" : 12.0,
 17 | 		"default_fontface" : 0,
 18 | 		"default_fontname" : "Arial",
 19 | 		"gridonopen" : 1,
 20 | 		"gridsize" : [ 15.0, 15.0 ],
 21 | 		"gridsnaponopen" : 1,
 22 | 		"objectsnaponopen" : 1,
 23 | 		"statusbarvisible" : 2,
 24 | 		"toolbarvisible" : 1,
 25 | 		"lefttoolbarpinned" : 0,
 26 | 		"toptoolbarpinned" : 0,
 27 | 		"righttoolbarpinned" : 0,
 28 | 		"bottomtoolbarpinned" : 0,
 29 | 		"toolbars_unpinned_last_save" : 0,
 30 | 		"tallnewobj" : 0,
 31 | 		"boxanimatetime" : 200,
 32 | 		"enablehscroll" : 1,
 33 | 		"enablevscroll" : 1,
 34 | 		"devicewidth" : 0.0,
 35 | 		"description" : "",
 36 | 		"digest" : "",
 37 | 		"tags" : "",
 38 | 		"style" : "",
 39 | 		"subpatcher_template" : "",
 40 | 		"assistshowspatchername" : 0,
 41 | 		"boxes" : [ 			{
 42 | 				"box" : 				{
 43 | 					"id" : "obj-154",
 44 | 					"maxclass" : "newobj",
 45 | 					"numinlets" : 1,
 46 | 					"numoutlets" : 1,
 47 | 					"outlettype" : [ "" ],
 48 | 					"patching_rect" : [ 163.354036390781403, 177.018632590770721, 39.0, 22.0 ],
 49 | 					"presentation" : 1,
 50 | 					"presentation_rect" : [ 1469.090271848838711, 2039.0, 39.0, 22.0 ],
 51 | 					"text" : "atodb"
 52 | 				}
 53 | 
 54 | 			}
 55 | , 			{
 56 | 				"box" : 				{
 57 | 					"id" : "obj-156",
 58 | 					"lastchannelcount" : 0,
 59 | 					"maxclass" : "live.gain~",
 60 | 					"numinlets" : 2,
 61 | 					"numoutlets" : 5,
 62 | 					"outlettype" : [ "signal", "signal", "", "float", "list" ],
 63 | 					"parameter_enable" : 1,
 64 | 					"patching_rect" : [ 133.0, 223.0, 48.0, 136.0 ],
 65 | 					"presentation" : 1,
 66 | 					"presentation_rect" : [ 1469.090271848838711, 2101.0, 48.0, 136.0 ],
 67 | 					"saved_attribute_attributes" : 					{
 68 | 						"valueof" : 						{
 69 | 							"parameter_longname" : "live.gain~[26]",
 70 | 							"parameter_mmax" : 6.0,
 71 | 							"parameter_mmin" : -70.0,
 72 | 							"parameter_modmode" : 0,
 73 | 							"parameter_shortname" : "live.gain~",
 74 | 							"parameter_type" : 0,
 75 | 							"parameter_unitstyle" : 4
 76 | 						}
 77 | 
 78 | 					}
 79 | ,
 80 | 					"varname" : "live.gain~[1]"
 81 | 				}
 82 | 
 83 | 			}
 84 | , 			{
 85 | 				"box" : 				{
 86 | 					"id" : "obj-157",
 87 | 					"maxclass" : "newobj",
 88 | 					"numinlets" : 2,
 89 | 					"numoutlets" : 1,
 90 | 					"outlettype" : [ "float" ],
 91 | 					"patching_rect" : [ 163.354036390781403, 140.37267005443573, 29.5, 22.0 ],
 92 | 					"presentation" : 1,
 93 | 					"presentation_rect" : [ 1469.090271848838711, 2009.0, 29.5, 22.0 ],
 94 | 					"text" : "!- 1."
 95 | 				}
 96 | 
 97 | 			}
 98 | , 			{
 99 | 				"box" : 				{
100 | 					"id" : "obj-158",
101 | 					"maxclass" : "newobj",
102 | 					"numinlets" : 6,
103 | 					"numoutlets" : 1,
104 | 					"outlettype" : [ "" ],
105 | 					"patching_rect" : [ 61.0, 100.0, 90.0, 22.0 ],
106 | 					"presentation" : 1,
107 | 					"presentation_rect" : [ 1397.090271848838711, 1978.0, 94.0, 22.0 ],
108 | 					"text" : "scale 0. 1. 1. 0."
109 | 				}
110 | 
111 | 			}
112 | , 			{
113 | 				"box" : 				{
114 | 					"id" : "obj-160",
115 | 					"maxclass" : "newobj",
116 | 					"numinlets" : 1,
117 | 					"numoutlets" : 1,
118 | 					"outlettype" : [ "" ],
119 | 					"patching_rect" : [ 50.0, 168.944098472595215, 39.0, 22.0 ],
120 | 					"presentation" : 1,
121 | 					"presentation_rect" : [ 1386.090271848838711, 2039.0, 39.0, 22.0 ],
122 | 					"text" : "atodb"
123 | 				}
124 | 
125 | 			}
126 | , 			{
127 | 				"box" : 				{
128 | 					"id" : "obj-162",
129 | 					"lastchannelcount" : 0,
130 | 					"maxclass" : "live.gain~",
131 | 					"numinlets" : 2,
132 | 					"numoutlets" : 5,
133 | 					"outlettype" : [ "signal", "signal", "", "float", "list" ],
134 | 					"parameter_enable" : 1,
135 | 					"patching_rect" : [ 50.0, 223.0, 48.0, 136.0 ],
136 | 					"presentation" : 1,
137 | 					"presentation_rect" : [ 1386.090271848838711, 2101.0, 48.0, 136.0 ],
138 | 					"saved_attribute_attributes" : 					{
139 | 						"valueof" : 						{
140 | 							"parameter_longname" : "live.gain~[25]",
141 | 							"parameter_mmax" : 6.0,
142 | 							"parameter_mmin" : -70.0,
143 | 							"parameter_modmode" : 0,
144 | 							"parameter_shortname" : "live.gain~",
145 | 							"parameter_type" : 0,
146 | 							"parameter_unitstyle" : 4
147 | 						}
148 | 
149 | 					}
150 | ,
151 | 					"varname" : "live.gain~"
152 | 				}
153 | 
154 | 			}
155 | , 			{
156 | 				"box" : 				{
157 | 					"comment" : "dry",
158 | 					"id" : "obj-216",
159 | 					"index" : 1,
160 | 					"maxclass" : "inlet",
161 | 					"numinlets" : 0,
162 | 					"numoutlets" : 1,
163 | 					"outlettype" : [ "signal" ],
164 | 					"patching_rect" : [ 25.000060151161279, 40.0, 30.0, 30.0 ]
165 | 				}
166 | 
167 | 			}
168 | , 			{
169 | 				"box" : 				{
170 | 					"comment" : "mix",
171 | 					"id" : "obj-223",
172 | 					"index" : 3,
173 | 					"maxclass" : "inlet",
174 | 					"numinlets" : 0,
175 | 					"numoutlets" : 1,
176 | 					"outlettype" : [ "" ],
177 | 					"patching_rect" : [ 313.0, 40.0, 30.0, 30.0 ]
178 | 				}
179 | 
180 | 			}
181 | , 			{
182 | 				"box" : 				{
183 | 					"comment" : "wet",
184 | 					"id" : "obj-227",
185 | 					"index" : 2,
186 | 					"maxclass" : "inlet",
187 | 					"numinlets" : 0,
188 | 					"numoutlets" : 1,
189 | 					"outlettype" : [ "signal" ],
190 | 					"patching_rect" : [ 133.0, 44.0, 30.0, 30.0 ]
191 | 				}
192 | 
193 | 			}
194 | , 			{
195 | 				"box" : 				{
196 | 					"comment" : "",
197 | 					"id" : "obj-230",
198 | 					"index" : 1,
199 | 					"maxclass" : "outlet",
200 | 					"numinlets" : 1,
201 | 					"numoutlets" : 0,
202 | 					"patching_rect" : [ 50.0, 419.0, 30.0, 30.0 ]
203 | 				}
204 | 
205 | 			}
206 |  ],
207 | 		"lines" : [ 			{
208 | 				"patchline" : 				{
209 | 					"destination" : [ "obj-156", 0 ],
210 | 					"source" : [ "obj-154", 0 ]
211 | 				}
212 | 
213 | 			}
214 | , 			{
215 | 				"patchline" : 				{
216 | 					"destination" : [ "obj-230", 0 ],
217 | 					"source" : [ "obj-156", 0 ]
218 | 				}
219 | 
220 | 			}
221 | , 			{
222 | 				"patchline" : 				{
223 | 					"destination" : [ "obj-154", 0 ],
224 | 					"source" : [ "obj-157", 0 ]
225 | 				}
226 | 
227 | 			}
228 | , 			{
229 | 				"patchline" : 				{
230 | 					"destination" : [ "obj-157", 0 ],
231 | 					"order" : 0,
232 | 					"source" : [ "obj-158", 0 ]
233 | 				}
234 | 
235 | 			}
236 | , 			{
237 | 				"patchline" : 				{
238 | 					"destination" : [ "obj-160", 0 ],
239 | 					"order" : 1,
240 | 					"source" : [ "obj-158", 0 ]
241 | 				}
242 | 
243 | 			}
244 | , 			{
245 | 				"patchline" : 				{
246 | 					"destination" : [ "obj-162", 0 ],
247 | 					"source" : [ "obj-160", 0 ]
248 | 				}
249 | 
250 | 			}
251 | , 			{
252 | 				"patchline" : 				{
253 | 					"destination" : [ "obj-230", 0 ],
254 | 					"source" : [ "obj-162", 0 ]
255 | 				}
256 | 
257 | 			}
258 | , 			{
259 | 				"patchline" : 				{
260 | 					"destination" : [ "obj-162", 0 ],
261 | 					"source" : [ "obj-216", 0 ]
262 | 				}
263 | 
264 | 			}
265 | , 			{
266 | 				"patchline" : 				{
267 | 					"destination" : [ "obj-158", 0 ],
268 | 					"source" : [ "obj-223", 0 ]
269 | 				}
270 | 
271 | 			}
272 | , 			{
273 | 				"patchline" : 				{
274 | 					"destination" : [ "obj-156", 0 ],
275 | 					"source" : [ "obj-227", 0 ]
276 | 				}
277 | 
278 | 			}
279 |  ]
280 | 	}
281 | 
282 | }
283 | 


--------------------------------------------------------------------------------
/vampnet/mask.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional
  2 | 
  3 | import torch
  4 | from audiotools import AudioSignal
  5 | 
  6 | from .util import scalar_to_batch_tensor
  7 | 
  8 | def _gamma(r):
  9 |     return (r * torch.pi / 2).cos().clamp(1e-10, 1.0)
 10 | 
 11 | def _invgamma(y):
 12 |     if not torch.is_tensor(y):
 13 |         y = torch.tensor(y)[None]
 14 |     return 2 * y.acos() / torch.pi
 15 | 
 16 | def full_mask(x: torch.Tensor):
 17 |     assert x.ndim == 3, "x must be (batch, n_codebooks, seq)"
 18 |     return torch.ones_like(x).long()
 19 | 
 20 | def empty_mask(x: torch.Tensor):
 21 |     assert x.ndim == 3, "x must be (batch, n_codebooks, seq)"
 22 |     return torch.zeros_like(x).long()
 23 | 
 24 | def apply_mask(
 25 |         x: torch.Tensor, 
 26 |         mask: torch.Tensor, 
 27 |         mask_token: int
 28 |     ):
 29 |     assert mask.ndim == 3, "mask must be (batch, n_codebooks, seq), but got {mask.ndim}"
 30 |     assert mask.shape == x.shape, f"mask must be same shape as x, but got {mask.shape} and {x.shape}" 
 31 |     assert mask.dtype == torch.long, "mask must be long dtype, but got {mask.dtype}"
 32 |     assert ~torch.any(mask > 1), "mask must be binary"
 33 |     assert ~torch.any(mask < 0), "mask must be binary"
 34 | 
 35 |     fill_x = torch.full_like(x, mask_token)
 36 |     x = x * (1 - mask) + fill_x * mask
 37 | 
 38 |     return x, mask
 39 | 
 40 | def random(
 41 |     x: torch.Tensor,
 42 |     r: torch.Tensor
 43 | ):
 44 |     assert x.ndim == 3, "x must be (batch, n_codebooks, seq)"
 45 |     if not isinstance(r, torch.Tensor):
 46 |         r = scalar_to_batch_tensor(r, x.shape[0]).to(x.device)
 47 | 
 48 |     r = _gamma(r)[:, None, None]
 49 |     probs = torch.ones_like(x) * r
 50 | 
 51 |     mask = torch.bernoulli(probs)
 52 |     mask = mask.round().long()
 53 | 
 54 |     return mask
 55 | 
 56 | def linear_random(
 57 |     x: torch.Tensor,
 58 |     r: torch.Tensor,
 59 | ):
 60 |     assert x.ndim == 3, "x must be (batch, n_codebooks, seq)"
 61 |     if not isinstance(r, torch.Tensor):
 62 |         r = scalar_to_batch_tensor(r, x.shape[0]).to(x.device).float()
 63 |         r = r[:, None, None]
 64 | 
 65 |     probs = torch.ones_like(x).to(x.device).float()
 66 |     # expand to batch and codebook dims
 67 |     probs = probs.expand(x.shape[0], x.shape[1], -1)
 68 |     probs = probs * r
 69 | 
 70 |     mask = torch.bernoulli(probs)
 71 |     mask = mask.round().long()
 72 | 
 73 |     return mask
 74 | 
 75 | def inpaint(x: torch.Tensor, 
 76 |     n_prefix,
 77 |     n_suffix,
 78 | ):
 79 |     assert n_prefix is not None
 80 |     assert n_suffix is not None
 81 |     
 82 |     mask = full_mask(x)
 83 | 
 84 |     # if we have a prefix or suffix, set their mask prob to 0
 85 |     if n_prefix > 0:
 86 |         if not isinstance(n_prefix, torch.Tensor):
 87 |             n_prefix = scalar_to_batch_tensor(n_prefix, x.shape[0]).to(x.device) 
 88 |         for i, n in enumerate(n_prefix):
 89 |             if n > 0:
 90 |                 mask[i, :, :n] = 0.0
 91 |     if n_suffix > 0:
 92 |         if not isinstance(n_suffix, torch.Tensor):
 93 |             n_suffix = scalar_to_batch_tensor(n_suffix, x.shape[0]).to(x.device)
 94 |         for i, n in enumerate(n_suffix):
 95 |             if n > 0:
 96 |                 mask[i, :, -n:] = 0.0
 97 | 
 98 |     
 99 |     return mask
100 | 
101 | def periodic_mask(x: torch.Tensor, 
102 |                 period: int,width: int = 1, 
103 |                 random_roll=False,
104 |     ):
105 |     mask = full_mask(x)
106 |     if period == 0:
107 |         return mask
108 | 
109 |     if not isinstance(period, torch.Tensor):
110 |         period = scalar_to_batch_tensor(period, x.shape[0])
111 |     for i, factor in enumerate(period):
112 |         if factor == 0:
113 |             continue
114 |         for j in range(mask.shape[-1]):
115 |             if j % factor == 0:
116 |                 # figure out how wide the mask should be
117 |                 j_start = max(0, j - width // 2  )
118 |                 j_end = min(mask.shape[-1] - 1, j + width // 2 ) + 1 
119 |                 # flip a coin for each position in the mask
120 |                 j_mask = torch.bernoulli(torch.ones(j_end - j_start))
121 |                 assert torch.all(j_mask == 1)
122 |                 j_fill = torch.ones_like(j_mask) * (1 - j_mask)
123 |                 assert torch.all(j_fill == 0)
124 |                 # fill
125 |                 mask[i, :, j_start:j_end] = j_fill
126 |     if random_roll:
127 |         # add a random offset to the mask
128 |         offset = torch.randint(0, period[0], (1,))
129 |         mask = torch.roll(mask, offset.item(), dims=-1)
130 | 
131 |     return mask
132 | 
133 | def codebook_unmask(
134 |     mask: torch.Tensor, 
135 |     n_conditioning_codebooks: int
136 | ):
137 |     if n_conditioning_codebooks == None:
138 |         return mask
139 |     # if we have any conditioning codebooks, set their mask  to 0
140 |     mask = mask.clone()
141 |     mask[:, :n_conditioning_codebooks, :] = 0
142 |     return mask
143 | 
144 | def codebook_mask(mask: torch.Tensor, val1: int, val2: int = None):
145 |     mask = mask.clone()
146 |     mask[:, val1:, :] = 1
147 |     # val2 = val2 or val1
148 |     # vs = torch.linspace(val1, val2, mask.shape[1])
149 |     # for t, v in enumerate(vs):
150 |     #     v = int(v)
151 |     #     mask[:, v:, t] = 1 
152 | 
153 |     return mask
154 | 
155 | def mask_and(
156 |     mask1: torch.Tensor, 
157 |     mask2: torch.Tensor
158 | ):
159 |     assert mask1.shape == mask2.shape, "masks must be same shape"
160 |     return torch.min(mask1, mask2)
161 | 
162 | def dropout(
163 |     mask: torch.Tensor,
164 |     p: float,
165 | ):
166 |     # instead of the above, mask along the last dimensions
167 |     tsteps = mask.shape[-1]
168 |     tsteps_to_drop = int(tsteps * p)
169 |     tsteps_to_keep = tsteps - tsteps_to_drop
170 |     idxs_to_drop = torch.randint(0, tsteps, (tsteps_to_drop,))
171 |     mask = mask.clone()
172 |     mask[:, :, idxs_to_drop] = 1
173 |     return mask.long()
174 | 
175 | 
176 | 
177 | 
178 | def mask_or(
179 |     mask1: torch.Tensor, 
180 |     mask2: torch.Tensor
181 | ):
182 |     assert mask1.shape == mask2.shape, f"masks must be same shape, but got {mask1.shape} and {mask2.shape}"
183 |     assert mask1.max() <= 1, "mask1 must be binary"
184 |     assert mask2.max() <= 1, "mask2 must be binary"
185 |     assert mask1.min() >= 0, "mask1 must be binary"
186 |     assert mask2.min() >= 0, "mask2 must be binary"
187 |     return (mask1 + mask2).clamp(0, 1)
188 | 
189 | def time_stretch_mask(
190 |     x: torch.Tensor, 
191 |     stretch_factor: int,
192 | ):
193 |     assert stretch_factor >= 1, "stretch factor must be >= 1"
194 |     c_seq_len = x.shape[-1]
195 |     x = x.repeat_interleave(stretch_factor, dim=-1)
196 | 
197 |     # trim cz to the original length
198 |     x = x[:, :, :c_seq_len]
199 | 
200 |     mask = periodic_mask(x, stretch_factor, width=1)
201 |     return mask
202 | 
203 | def onset_mask(
204 |     sig: AudioSignal, 
205 |     z: torch.Tensor,
206 |     interface,
207 |     width: int = 1, 
208 | ):
209 |     import librosa
210 | 
211 |     onset_frame_idxs = librosa.onset.onset_detect(
212 |         y=sig.samples[0][0].detach().cpu().numpy(), sr=sig.sample_rate, 
213 |         hop_length=interface.codec.hop_length,
214 |         backtrack=True,
215 |     )
216 |     if len(onset_frame_idxs) == 0:
217 |         print("no onsets detected")
218 |     print("onset_frame_idxs", onset_frame_idxs)
219 |     print("mask shape", z.shape)
220 | 
221 |     mask = torch.ones_like(z)
222 |     for idx in onset_frame_idxs:
223 |         mask[:, :, idx-width:idx+width] = 0
224 | 
225 |     return mask
226 | 
227 | 
228 | 
229 | if __name__ == "__main__":
230 |     sig = AudioSignal("assets/example.wav")
231 | 


--------------------------------------------------------------------------------
/unloop/max/panner-cleat.maxpat:
--------------------------------------------------------------------------------
  1 | {
  2 | 	"patcher" : 	{
  3 | 		"fileversion" : 1,
  4 | 		"appversion" : 		{
  5 | 			"major" : 8,
  6 | 			"minor" : 6,
  7 | 			"revision" : 5,
  8 | 			"architecture" : "x64",
  9 | 			"modernui" : 1
 10 | 		}
 11 | ,
 12 | 		"classnamespace" : "box",
 13 | 		"rect" : [ 59.0, 106.0, 640.0, 480.0 ],
 14 | 		"bglocked" : 0,
 15 | 		"openinpresentation" : 0,
 16 | 		"default_fontsize" : 12.0,
 17 | 		"default_fontface" : 0,
 18 | 		"default_fontname" : "Arial",
 19 | 		"gridonopen" : 1,
 20 | 		"gridsize" : [ 15.0, 15.0 ],
 21 | 		"gridsnaponopen" : 1,
 22 | 		"objectsnaponopen" : 1,
 23 | 		"statusbarvisible" : 2,
 24 | 		"toolbarvisible" : 1,
 25 | 		"lefttoolbarpinned" : 0,
 26 | 		"toptoolbarpinned" : 0,
 27 | 		"righttoolbarpinned" : 0,
 28 | 		"bottomtoolbarpinned" : 0,
 29 | 		"toolbars_unpinned_last_save" : 0,
 30 | 		"tallnewobj" : 0,
 31 | 		"boxanimatetime" : 200,
 32 | 		"enablehscroll" : 1,
 33 | 		"enablevscroll" : 1,
 34 | 		"devicewidth" : 0.0,
 35 | 		"description" : "",
 36 | 		"digest" : "",
 37 | 		"tags" : "",
 38 | 		"style" : "",
 39 | 		"subpatcher_template" : "",
 40 | 		"assistshowspatchername" : 0,
 41 | 		"boxes" : [ 			{
 42 | 				"box" : 				{
 43 | 					"id" : "obj-3",
 44 | 					"maxclass" : "newobj",
 45 | 					"numinlets" : 4,
 46 | 					"numoutlets" : 0,
 47 | 					"patching_rect" : [ 364.666666666666629, 337.068983197212219, 75.0, 22.0 ],
 48 | 					"text" : "dac~ 1 2 3 4"
 49 | 				}
 50 | 
 51 | 			}
 52 | , 			{
 53 | 				"box" : 				{
 54 | 					"id" : "obj-2",
 55 | 					"maxclass" : "newobj",
 56 | 					"numinlets" : 3,
 57 | 					"numoutlets" : 4,
 58 | 					"outlettype" : [ "signal", "signal", "signal", "signal" ],
 59 | 					"patching_rect" : [ 369.0, 305.0, 62.0, 22.0 ],
 60 | 					"text" : "quadpan~"
 61 | 				}
 62 | 
 63 | 			}
 64 | , 			{
 65 | 				"box" : 				{
 66 | 					"id" : "obj-211",
 67 | 					"maxclass" : "newobj",
 68 | 					"numinlets" : 2,
 69 | 					"numoutlets" : 0,
 70 | 					"patching_rect" : [ 112.0, 336.89655339717865, 55.0, 22.0 ],
 71 | 					"text" : "dac~ 1 2"
 72 | 				}
 73 | 
 74 | 			}
 75 | , 			{
 76 | 				"box" : 				{
 77 | 					"id" : "obj-200",
 78 | 					"maxclass" : "newobj",
 79 | 					"numinlets" : 6,
 80 | 					"numoutlets" : 1,
 81 | 					"outlettype" : [ "" ],
 82 | 					"patching_rect" : [ 229.5, 225.0, 114.0, 22.0 ],
 83 | 					"text" : "scale 0. 1000. -1. 1."
 84 | 				}
 85 | 
 86 | 			}
 87 | , 			{
 88 | 				"box" : 				{
 89 | 					"id" : "obj-198",
 90 | 					"maxclass" : "newobj",
 91 | 					"numinlets" : 4,
 92 | 					"numoutlets" : 2,
 93 | 					"outlettype" : [ "signal", "signal" ],
 94 | 					"patching_rect" : [ 112.0, 305.0, 50.5, 22.0 ],
 95 | 					"text" : "pan2"
 96 | 				}
 97 | 
 98 | 			}
 99 | , 			{
100 | 				"box" : 				{
101 | 					"id" : "obj-194",
102 | 					"maxclass" : "newobj",
103 | 					"numinlets" : 1,
104 | 					"numoutlets" : 2,
105 | 					"outlettype" : [ "float", "float" ],
106 | 					"patching_rect" : [ 229.5, 174.0, 74.0, 22.0 ],
107 | 					"text" : "unpack 0. 0."
108 | 				}
109 | 
110 | 			}
111 | , 			{
112 | 				"box" : 				{
113 | 					"color" : [ 0.2, 0.0, 0.8, 1.0 ],
114 | 					"id" : "obj-193",
115 | 					"maxclass" : "newobj",
116 | 					"numinlets" : 0,
117 | 					"numoutlets" : 1,
118 | 					"outlettype" : [ "" ],
119 | 					"patching_rect" : [ 112.0, 186.0, 92.0, 22.0 ],
120 | 					"text" : "r panner-choice"
121 | 				}
122 | 
123 | 			}
124 | , 			{
125 | 				"box" : 				{
126 | 					"id" : "obj-185",
127 | 					"maxclass" : "newobj",
128 | 					"numinlets" : 2,
129 | 					"numoutlets" : 3,
130 | 					"outlettype" : [ "signal", "signal", "signal" ],
131 | 					"patching_rect" : [ 112.0, 217.0, 49.0, 22.0 ],
132 | 					"text" : "gate~ 3"
133 | 				}
134 | 
135 | 			}
136 | , 			{
137 | 				"box" : 				{
138 | 					"id" : "obj-184",
139 | 					"maxclass" : "newobj",
140 | 					"numinlets" : 3,
141 | 					"numoutlets" : 0,
142 | 					"patching_rect" : [ 221.5, 305.0, 59.0, 22.0 ],
143 | 					"text" : "16panner"
144 | 				}
145 | 
146 | 			}
147 | , 			{
148 | 				"box" : 				{
149 | 					"id" : "obj-43",
150 | 					"maxclass" : "newobj",
151 | 					"numinlets" : 2,
152 | 					"numoutlets" : 2,
153 | 					"outlettype" : [ "", "" ],
154 | 					"patching_rect" : [ 229.5, 128.0, 51.0, 22.0 ],
155 | 					"text" : "route xy"
156 | 				}
157 | 
158 | 			}
159 | , 			{
160 | 				"box" : 				{
161 | 					"comment" : "",
162 | 					"id" : "obj-213",
163 | 					"index" : 1,
164 | 					"maxclass" : "inlet",
165 | 					"numinlets" : 0,
166 | 					"numoutlets" : 1,
167 | 					"outlettype" : [ "signal" ],
168 | 					"patching_rect" : [ 142.0, 76.0, 30.0, 30.0 ]
169 | 				}
170 | 
171 | 			}
172 | , 			{
173 | 				"box" : 				{
174 | 					"comment" : "",
175 | 					"id" : "obj-214",
176 | 					"index" : 2,
177 | 					"maxclass" : "inlet",
178 | 					"numinlets" : 0,
179 | 					"numoutlets" : 1,
180 | 					"outlettype" : [ "" ],
181 | 					"patching_rect" : [ 229.5, 76.0, 30.0, 30.0 ]
182 | 				}
183 | 
184 | 			}
185 |  ],
186 | 		"lines" : [ 			{
187 | 				"patchline" : 				{
188 | 					"destination" : [ "obj-184", 0 ],
189 | 					"source" : [ "obj-185", 1 ]
190 | 				}
191 | 
192 | 			}
193 | , 			{
194 | 				"patchline" : 				{
195 | 					"destination" : [ "obj-198", 0 ],
196 | 					"source" : [ "obj-185", 0 ]
197 | 				}
198 | 
199 | 			}
200 | , 			{
201 | 				"patchline" : 				{
202 | 					"destination" : [ "obj-2", 0 ],
203 | 					"source" : [ "obj-185", 2 ]
204 | 				}
205 | 
206 | 			}
207 | , 			{
208 | 				"patchline" : 				{
209 | 					"destination" : [ "obj-185", 0 ],
210 | 					"source" : [ "obj-193", 0 ]
211 | 				}
212 | 
213 | 			}
214 | , 			{
215 | 				"patchline" : 				{
216 | 					"destination" : [ "obj-184", 2 ],
217 | 					"order" : 1,
218 | 					"source" : [ "obj-194", 1 ]
219 | 				}
220 | 
221 | 			}
222 | , 			{
223 | 				"patchline" : 				{
224 | 					"destination" : [ "obj-184", 1 ],
225 | 					"order" : 1,
226 | 					"source" : [ "obj-194", 0 ]
227 | 				}
228 | 
229 | 			}
230 | , 			{
231 | 				"patchline" : 				{
232 | 					"destination" : [ "obj-2", 2 ],
233 | 					"order" : 0,
234 | 					"source" : [ "obj-194", 1 ]
235 | 				}
236 | 
237 | 			}
238 | , 			{
239 | 				"patchline" : 				{
240 | 					"destination" : [ "obj-2", 1 ],
241 | 					"order" : 0,
242 | 					"source" : [ "obj-194", 0 ]
243 | 				}
244 | 
245 | 			}
246 | , 			{
247 | 				"patchline" : 				{
248 | 					"destination" : [ "obj-200", 0 ],
249 | 					"order" : 2,
250 | 					"source" : [ "obj-194", 0 ]
251 | 				}
252 | 
253 | 			}
254 | , 			{
255 | 				"patchline" : 				{
256 | 					"destination" : [ "obj-211", 1 ],
257 | 					"source" : [ "obj-198", 1 ]
258 | 				}
259 | 
260 | 			}
261 | , 			{
262 | 				"patchline" : 				{
263 | 					"destination" : [ "obj-211", 0 ],
264 | 					"source" : [ "obj-198", 0 ]
265 | 				}
266 | 
267 | 			}
268 | , 			{
269 | 				"patchline" : 				{
270 | 					"destination" : [ "obj-3", 3 ],
271 | 					"source" : [ "obj-2", 3 ]
272 | 				}
273 | 
274 | 			}
275 | , 			{
276 | 				"patchline" : 				{
277 | 					"destination" : [ "obj-3", 2 ],
278 | 					"source" : [ "obj-2", 2 ]
279 | 				}
280 | 
281 | 			}
282 | , 			{
283 | 				"patchline" : 				{
284 | 					"destination" : [ "obj-3", 1 ],
285 | 					"source" : [ "obj-2", 1 ]
286 | 				}
287 | 
288 | 			}
289 | , 			{
290 | 				"patchline" : 				{
291 | 					"destination" : [ "obj-3", 0 ],
292 | 					"source" : [ "obj-2", 0 ]
293 | 				}
294 | 
295 | 			}
296 | , 			{
297 | 				"patchline" : 				{
298 | 					"destination" : [ "obj-198", 1 ],
299 | 					"source" : [ "obj-200", 0 ]
300 | 				}
301 | 
302 | 			}
303 | , 			{
304 | 				"patchline" : 				{
305 | 					"destination" : [ "obj-185", 1 ],
306 | 					"source" : [ "obj-213", 0 ]
307 | 				}
308 | 
309 | 			}
310 | , 			{
311 | 				"patchline" : 				{
312 | 					"destination" : [ "obj-43", 0 ],
313 | 					"source" : [ "obj-214", 0 ]
314 | 				}
315 | 
316 | 			}
317 | , 			{
318 | 				"patchline" : 				{
319 | 					"destination" : [ "obj-194", 0 ],
320 | 					"source" : [ "obj-43", 0 ]
321 | 				}
322 | 
323 | 			}
324 |  ]
325 | 	}
326 | 
327 | }
328 | 


--------------------------------------------------------------------------------
/scripts/exp/experiment.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | import random
  3 | from typing import List
  4 | import tempfile
  5 | import subprocess
  6 | 
  7 | import argbind
  8 | from tqdm import tqdm
  9 | import torch
 10 | 
 11 | from vampnet.interface import Interface
 12 | from vampnet import mask as pmask
 13 | import audiotools as at
 14 | 
 15 | Interface: Interface = argbind.bind(Interface)
 16 | 
 17 | 
 18 | 
 19 | def calculate_bitrate(
 20 |         interface, num_codebooks, 
 21 |         downsample_factor
 22 |     ):
 23 |     bit_width = 10
 24 |     sr = interface.codec.sample_rate
 25 |     hop = interface.codec.hop_size
 26 |     rate = (sr / hop) * ((bit_width * num_codebooks) / downsample_factor)
 27 |     return rate
 28 | 
 29 | def baseline(sig, interface):
 30 |     return interface.preprocess(sig)
 31 | 
 32 | def reconstructed(sig, interface):
 33 |     return interface.decode(
 34 |         interface.encode(sig)
 35 |     )
 36 | 
 37 | def coarse2fine(sig, interface):
 38 |     z = interface.encode(sig)
 39 |     z = z[:, :interface.c2f.n_conditioning_codebooks, :]
 40 | 
 41 |     z = interface.coarse_to_fine(z)
 42 |     return interface.decode(z)
 43 | 
 44 | class CoarseCond:
 45 | 
 46 |     def __init__(self, num_conditioning_codebooks, downsample_factor):
 47 |         self.num_conditioning_codebooks = num_conditioning_codebooks
 48 |         self.downsample_factor = downsample_factor
 49 | 
 50 |     def __call__(self, sig, interface):
 51 |         z = interface.encode(sig)
 52 |         mask = pmask.full_mask(z)
 53 |         mask = pmask.codebook_unmask(mask, self.num_conditioning_codebooks)
 54 |         mask = pmask.periodic_mask(mask, self.downsample_factor)
 55 | 
 56 |         zv = interface.coarse_vamp(z, mask)
 57 |         zv = interface.coarse_to_fine(zv)
 58 |         return interface.decode(zv)
 59 | 
 60 | def opus(sig, interface, bitrate=128):
 61 |     sig = interface.preprocess(sig)
 62 |     
 63 |     with tempfile.NamedTemporaryFile(suffix=".wav") as f:
 64 |         sig.write(f.name)
 65 | 
 66 |         opus_name = Path(f.name).with_suffix(".opus")
 67 |         # convert to opus
 68 |         cmd = [
 69 |             "ffmpeg", "-y", "-i", f.name, 
 70 |             "-c:a", "libopus", 
 71 |             "-b:a", f"{bitrate}", 
 72 |            opus_name
 73 |         ]
 74 |         subprocess.run(cmd, check=True)
 75 | 
 76 |         # convert back to wav
 77 |         output_name = Path(f"{f.name}-opus").with_suffix(".wav")
 78 |         cmd = [
 79 |             "ffmpeg", "-y", "-i", opus_name, 
 80 |             output_name
 81 |         ]
 82 | 
 83 |         subprocess.run(cmd, check=True)
 84 | 
 85 |         sig = at.AudioSignal(
 86 |             output_name, 
 87 |             sample_rate=sig.sample_rate
 88 |         )
 89 |     return sig
 90 | 
 91 | def mask_ratio_1_step(ratio=1.0):
 92 |     def wrapper(sig, interface):
 93 |         z = interface.encode(sig)
 94 |         mask = pmask.linear_random(z, ratio)
 95 |         zv = interface.coarse_vamp(
 96 |             z, 
 97 |             mask,
 98 |             sampling_steps=1, 
 99 |         )
100 | 
101 |         return interface.decode(zv)
102 |     return wrapper
103 | 
104 | def num_sampling_steps(num_steps=1):
105 |     def wrapper(sig, interface: Interface):
106 |         z = interface.encode(sig)
107 |         mask = pmask.periodic_mask(z, 16)
108 |         zv = interface.coarse_vamp(
109 |             z, 
110 |             mask,
111 |             sampling_steps=num_steps, 
112 |         )
113 | 
114 |         zv = interface.coarse_to_fine(zv)
115 |         return interface.decode(zv)
116 |     return wrapper
117 | 
118 | def beat_mask(ctx_time):
119 |     def wrapper(sig, interface):
120 |         beat_mask = interface.make_beat_mask(
121 |             sig,
122 |             before_beat_s=ctx_time/2,
123 |             after_beat_s=ctx_time/2,
124 |             invert=True
125 |         )
126 | 
127 |         z = interface.encode(sig)
128 | 
129 |         zv = interface.coarse_vamp(
130 |             z, beat_mask
131 |         )
132 | 
133 |         zv = interface.coarse_to_fine(zv)
134 |         return interface.decode(zv)
135 |     return wrapper
136 | 
137 | def inpaint(ctx_time):
138 |     def wrapper(sig, interface: Interface):
139 |         z = interface.encode(sig)
140 |         mask = pmask.inpaint(z, interface.s2t(ctx_time), interface.s2t(ctx_time))
141 | 
142 |         zv = interface.coarse_vamp(z, mask)
143 |         zv = interface.coarse_to_fine(zv)
144 |         
145 |         return interface.decode(zv)
146 |     return wrapper
147 | 
148 | def token_noise(noise_amt):
149 |     def wrapper(sig, interface: Interface):
150 |         z = interface.encode(sig)
151 |         mask = pmask.random(z, noise_amt)
152 |         z = torch.where(
153 |             mask, 
154 |             torch.randint_like(z, 0, interface.coarse.vocab_size), 
155 |             z
156 |         )
157 |         return interface.decode(z)
158 |     return wrapper
159 | 
160 | EXP_REGISTRY = {}
161 | 
162 | EXP_REGISTRY["gen-compression"] = {
163 |     "baseline": baseline,
164 |     "reconstructed": reconstructed,
165 |     "coarse2fine": coarse2fine,
166 |     **{
167 |         f"{n}_codebooks_downsampled_{x}x": CoarseCond(num_conditioning_codebooks=n, downsample_factor=x)
168 |             for (n, x) in (
169 |                 (1, 1), # 1 codebook, no downsampling
170 |                 (4, 4), # 4 codebooks, downsampled 4x
171 |                 (4, 16), # 4 codebooks, downsampled 16x
172 |                 (4, 32), # 4 codebooks, downsampled 16x
173 |             )
174 |     }, 
175 |     **{
176 |         f"token_noise_{x}": mask_ratio_1_step(ratio=x)
177 |             for x in [0.25, 0.5, 0.75]
178 |     },
179 | 
180 | }
181 | 
182 | 
183 | EXP_REGISTRY["sampling-steps"] = {
184 |     # "codec": reconstructed,
185 |     **{f"steps_{n}": num_sampling_steps(n)  for n in [1, 4, 12, 36, 64, 72]},
186 | }
187 | 
188 | 
189 | EXP_REGISTRY["musical-sampling"] = {
190 |     **{f"beat_mask_{t}": beat_mask(t) for t in [0.075]}, 
191 |     **{f"inpaint_{t}": inpaint(t) for t in [0.5, 1.0,]}, # multiply these by 2 (they go left and right)
192 | }
193 | 
194 | @argbind.bind(without_prefix=True)
195 | def main(
196 |         sources=[
197 |             "/media/CHONK/hugo/spotdl/val",
198 |         ], 
199 |         output_dir: str = "./samples",
200 |         max_excerpts: int = 2000,
201 |         exp_type: str = "gen-compression", 
202 |         seed: int = 0,
203 |         ext: str = [".mp3"],
204 |     ):
205 |     at.util.seed(seed)
206 |     interface = Interface()
207 | 
208 |     output_dir = Path(output_dir) 
209 |     output_dir.mkdir(exist_ok=True, parents=True)
210 | 
211 |     from audiotools.data.datasets import AudioLoader, AudioDataset
212 | 
213 |     loader = AudioLoader(sources=sources, shuffle_state=seed, ext=ext)
214 |     dataset = AudioDataset(loader, 
215 |         sample_rate=interface.codec.sample_rate, 
216 |         duration=interface.coarse.chunk_size_s, 
217 |         n_examples=max_excerpts, 
218 |         without_replacement=True,
219 |     )
220 | 
221 |     if exp_type in EXP_REGISTRY:
222 |         SAMPLE_CONDS = EXP_REGISTRY[exp_type]
223 |     else:
224 |         raise ValueError(f"Unknown exp_type {exp_type}")
225 | 
226 | 
227 |     indices = list(range(max_excerpts))
228 |     random.shuffle(indices)
229 |     for i in tqdm(indices):
230 |         # if all our files are already there, skip
231 |         done = []
232 |         for name in SAMPLE_CONDS:
233 |             o_dir = Path(output_dir) / name
234 |             done.append((o_dir / f"{i}.wav").exists())
235 |         if all(done):
236 |             continue
237 | 
238 |         sig = dataset[i]["signal"]
239 |         results = {
240 |             name: cond(sig, interface).cpu()
241 |             for name, cond in SAMPLE_CONDS.items()
242 |         }
243 | 
244 |         for name, sig in results.items():
245 |             o_dir = Path(output_dir) / name
246 |             o_dir.mkdir(exist_ok=True, parents=True)
247 | 
248 |             sig.write(o_dir / f"{i}.wav")
249 | 
250 | if __name__ == "__main__":
251 |     args = argbind.parse_args()
252 | 
253 |     with argbind.scope(args):
254 |         main()
255 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: salad bowl (vampnet)
  3 | emoji: 🥗
  4 | colorFrom: yellow
  5 | colorTo: green
  6 | sdk: gradio
  7 | sdk_version: 5.23.2
  8 | python_version: 3.11
  9 | app_file: app.py
 10 | pinned: false
 11 | license: cc-by-nc-4.0
 12 | ---
 13 | 
 14 | # VampNet
 15 | 
 16 | # Table of contents
 17 | 
 18 | - [setting up](#setting-up)
 19 | - [programmatic usage](#programmatic-usage)
 20 | - [launching the web app](#launching-the-web-app)
 21 | - [training / fine-tuning](#training--fine-tuning)
 22 |   - [training a model](#training-a-model)
 23 |   - [debugging training](#debugging-training)
 24 |   - [fine-tuning](#fine-tuning)
 25 | - [exporting your model](#exporting-your-model)
 26 | - [unloop](#unloop)
 27 | - [token telephone](#token-telephone)
 28 | - [a note on argbind](#a-note-on-argbind)
 29 | - [take a look at the pretrained models](#take-a-look-at-the-pretrained-models)
 30 | - [licensing for pretrained models](#licensing-for-pretrained-models)
 31 | 
 32 | ## setting up
 33 | 
 34 | python 3.9-3.11 works well. (for example, using conda)
 35 | ```bash
 36 | conda create -n vampnet python=3.9
 37 | conda activate vampnet
 38 | ```
 39 | 
 40 | install VampNet
 41 | 
 42 | ```bash
 43 | git clone https://github.com/hugofloresgarcia/vampnet.git
 44 | pip install -e ./vampnet
 45 | ```
 46 | 
 47 | ## programmatic usage
 48 | 
 49 | quick start!
 50 | ```python
 51 | import random
 52 | import vampnet
 53 | import audiotools as at
 54 | 
 55 | # load the default vampnet model
 56 | interface = vampnet.interface.Interface.default()
 57 | 
 58 | # list available finetuned models
 59 | finetuned_model_choices = interface.available_models()
 60 | print(f"available finetuned models: {finetuned_model_choices}")
 61 | 
 62 | # pick a random finetuned model
 63 | model_choice = random.choice(finetuned_model_choices)
 64 | print(f"choosing model: {model_choice}")
 65 | 
 66 | # load a finetuned model
 67 | interface.load_finetuned(model_choice)
 68 | 
 69 | # load an example audio file
 70 | signal = at.AudioSignal("assets/example.wav")
 71 | 
 72 | # get the tokens for the audio
 73 | codes = interface.encode(signal)
 74 | 
 75 | # build a mask for the audio
 76 | mask = interface.build_mask(
 77 |     codes, signal,
 78 |     periodic_prompt=7, 
 79 |     upper_codebook_mask=3,
 80 | )
 81 | 
 82 | # generate the output tokens
 83 | output_tokens = interface.vamp(
 84 |     codes, mask, return_mask=False,
 85 |     temperature=1.0, 
 86 |     typical_filtering=True, 
 87 | )
 88 | 
 89 | # convert them to a signal
 90 | output_signal = interface.decode(output_tokens)
 91 | 
 92 | # save the output signal
 93 | output_signal.write("scratch/output.wav")
 94 | ```
 95 | 
 96 | 
 97 | # Launching the Web app
 98 | You can launch a gradio UI to play with vampnet. 
 99 | 
100 | ```bash
101 | python app.py 
102 | ```
103 | 
104 | # Training / Fine-tuning 
105 | 
106 | ## Training a model
107 | 
108 | To train a model, run the following script: 
109 | 
110 | ```bash
111 | python scripts/exp/train.py --args.load conf/vampnet.yml --save_path /path/to/checkpoints
112 | ```
113 | 
114 | for multi-gpu training, use torchrun:
115 | 
116 | ```bash
117 | torchrun --nproc_per_node gpu scripts/exp/train.py --args.load conf/vampnet.yml --save_path path/to/ckpt
118 | ```
119 | 
120 | You can edit `conf/vampnet.yml` to change the dataset paths or any training hyperparameters. 
121 | 
122 | For coarse2fine models, you can use `conf/c2f.yml` as a starting configuration. 
123 | 
124 | See `python scripts/exp/train.py -h` for a list of options.
125 | 
126 | ## Debugging training
127 | 
128 | To debug training, it's easier to debug with 1 gpu and 0 workers
129 | 
130 | ```bash
131 | CUDA_VISIBLE_DEVICES=0 python -m pdb scripts/exp/train.py --args.load conf/vampnet.yml --save_path /path/to/checkpoints --num_workers 0
132 | ```
133 | 
134 | # Fine-tuning
135 | 
136 | To fine-tune a model, use the script in `scripts/exp/fine_tune.py` 
137 | 
138 | for an audio folder
139 | ```bash
140 | python scripts/exp/fine_tune.py /path/to/audio/folder <fine_tune_name>
141 | ```
142 | 
143 | for multiple files
144 | ```bash
145 | python scripts/exp/fine_tune.py "/path/to/audio1.mp3 /path/to/audio2/ /path/to/audio3.wav" <fine_tune_name>
146 | ```
147 | 
148 | This creates configuration files for a fine tuning train job. The save_paths will be set to `runs/<fine_tune_name>/coarse` and `runs/<fine_tune_name>/c2f`. 
149 | 
150 | launch the coarse job: 
151 | ```bash
152 | python scripts/exp/train.py --args.load conf/generated/<fine_tune_name>/coarse.yml 
153 | ```
154 | 
155 | this will save the coarse model to `runs/<fine_tune_name>/coarse/ckpt/best/`.
156 | 
157 | launch the c2f job: 
158 | ```bash
159 | python  scripts/exp/train.py --args.load conf/generated/<fine_tune_name>/c2f.yml 
160 | ```
161 | 
162 | # Resuming a Training/Finetuning Job from checkpoint. 
163 | 
164 | To resume from checkpoint, use the `--resume` flag and the `--save_path` to point to the checkpoint you want to resume from.
165 | ```bash
166 | python scripts/exp/train.py --args.load conf/generated/steve/coarse.yml --save_path runs/steve/coarse --resume
167 | ```
168 | 
169 | # Exporting your model
170 | 
171 | Once your model has been fine-tuned, you can export it to a HuggingFace model. 
172 | 
173 | In order to use your model in `app.py`, you will need to export it to HuggingFace.
174 | 
175 | **NOTE**: In order to export, you will need a [huggingface account](https://huggingface.co/).
176 | 
177 | Now, log in to huggingface using the command line:
178 | ```bash
179 | huggingface-cli login
180 | ```
181 | 
182 | replace the contents of the file named `./DEFAULT_HF_MODEL_REPO` with your `<HUGGINGFACE_USERNAME>/vampnet`. A model repo will be automatically created for you with `export.py`. The default is `hugggof/vampnet`. 
183 | 
184 | for example, if my username is `hugggof`, I would run the following command:`
185 | ```bash
186 | echo 'hugggof/vampnet' > ./DEFAULT_HF_MODEL_REPO
187 | ```
188 | 
189 | Now, run the following command to export your model (replace `<your_finetuned_model_name>` with the name of your model):
190 | 
191 | ```bash
192 | python scripts/exp/export.py --name <your_finetuned_model_name> --model latest
193 | ```
194 | 
195 | Once that's done, your model should appear on the list of available models in the gradio interface.
196 | Simply run `python app.py` and select your model from the dropdown list.
197 | 
198 | 
199 | # Unloop
200 | 
201 | Make sure you have Max installed on your laptop!
202 | 
203 | **NOTE**: To run unloop (with a GPU-powered server), you will need to install the vampnet repo in both your local machine and your GPU server.
204 | 
205 | ## start a vampnet gradio server
206 | 
207 | First, **on your GPU server**, run the gradio server:
208 | ```bash
209 | python app.py --args.load conf/interface.yml --Interface.device cuda
210 | ```
211 | This will run a vampnet gradio API on your GPU server. Copy the address. It will be something like `https://127.0.0.1:7860/`. 
212 | 
213 | **IMPORTANT** Make sure that this gradio port (by default `7860`) is forwarded to your local machine, where you have Max installed. 
214 | 
215 | ## start the unloop gradio client
216 | Now, **on your local machine**, run the unloop gradio client.
217 | ```
218 | cd unloop
219 | pip install -r requirements.txt
220 | python client.py --vampnet_url https://127.0.0.1:7860/ # replace with your gradio server address
221 | ```
222 | This will start a gradio client that connects to the gradio server running on your GPU server.
223 | 
224 | ## start the unloop Max patch
225 | Now, open the unloop Max patch. It's located at `unloop/max/unloop.maxpat`.
226 | 
227 | In the tape controls, check the heartbeat (`<3`) to make sure the connection to the local gradio client is working. 
228 | 
229 | have fun!
230 | 
231 | # Token Telephone
232 | 
233 | Instructions forthcoming, but the sauce is in `token_telephone/tt.py`
234 | 
235 | ## A note on argbind
236 | This repository relies on [argbind](https://github.com/pseeth/argbind) to manage CLIs and config files. 
237 | Config files are stored in the `conf/` folder. 
238 | 
239 | ### Take a look at the pretrained models
240 | All the pretrained models (trained by hugo) are stored here: https://huggingface.co/hugggof/vampnet 
241 | 
242 | ### Licensing for Pretrained Models: 
243 | The weights for the models are licensed [`CC BY-NC-SA 4.0`](https://creativecommons.org/licenses/by-nc-sa/4.0/deed.ml). Likewise, any VampNet models fine-tuned on the pretrained models are also licensed [`CC BY-NC-SA 4.0`](https://creativecommons.org/licenses/by-nc-sa/4.0/deed.ml).
244 | 
245 | Download the pretrained models from [this link](https://zenodo.org/record/8136629). Then, extract the models to the `models/` folder. 
246 | 
247 | 
248 | 
249 | 
250 | 


--------------------------------------------------------------------------------
/vampnet/beats.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import logging
  3 | import warnings
  4 | from dataclasses import dataclass
  5 | from pathlib import Path
  6 | from typing import Any
  7 | from typing import List
  8 | from typing import Tuple
  9 | from typing import Union
 10 | 
 11 | import librosa
 12 | import torch
 13 | import numpy as np
 14 | from audiotools import AudioSignal
 15 | 
 16 | 
 17 | logging.basicConfig(level=logging.INFO)
 18 | 
 19 | ###################
 20 | # beat sync utils #
 21 | ###################
 22 | 
 23 | AGGREGATOR_REGISTRY = {
 24 |     "mean": np.mean,
 25 |     "median": np.median,
 26 |     "max": np.max,
 27 |     "min": np.min,
 28 | }
 29 | 
 30 | 
 31 | def list_aggregators() -> list:
 32 |     return list(AGGREGATOR_REGISTRY.keys())
 33 | 
 34 | 
 35 | @dataclass
 36 | class TimeSegment:
 37 |     start: float
 38 |     end: float
 39 | 
 40 |     @property
 41 |     def duration(self):
 42 |         return self.end - self.start
 43 | 
 44 |     def __str__(self) -> str:
 45 |         return f"{self.start} - {self.end}"
 46 | 
 47 |     def find_overlapping_segment(
 48 |         self, segments: List["TimeSegment"]
 49 |     ) -> Union["TimeSegment", None]:
 50 |         """Find the first segment that overlaps with this segment, or None if no segment overlaps"""
 51 |         for s in segments:
 52 |             if s.start <= self.start and s.end >= self.end:
 53 |                 return s
 54 |         return None
 55 | 
 56 | 
 57 | def mkdir(path: Union[Path, str]) -> Path:
 58 |     p = Path(path)
 59 |     p.mkdir(parents=True, exist_ok=True)
 60 |     return p
 61 | 
 62 | 
 63 | 
 64 | ###################
 65 | #    beat data    #
 66 | ###################
 67 | @dataclass
 68 | class BeatSegment(TimeSegment):
 69 |     downbeat: bool = False  # if there's a downbeat on the start_time
 70 | 
 71 | 
 72 | class Beats:
 73 |     def __init__(self, beat_times, downbeat_times):
 74 |         if isinstance(beat_times, np.ndarray):
 75 |             beat_times = beat_times.tolist()
 76 |         if isinstance(downbeat_times, np.ndarray):
 77 |             downbeat_times = downbeat_times.tolist()
 78 |         self._beat_times = beat_times
 79 |         self._downbeat_times = downbeat_times
 80 |         self._use_downbeats = False
 81 | 
 82 |     def use_downbeats(self, use_downbeats: bool = True):
 83 |         """use downbeats instead of beats when calling beat_times"""
 84 |         self._use_downbeats = use_downbeats
 85 | 
 86 |     def beat_segments(self, signal: AudioSignal) -> List[BeatSegment]:
 87 |         """
 88 |         segments a song into time segments corresponding to beats.
 89 |         the first segment starts at 0 and ends at the first beat time.
 90 |         the last segment starts at the last beat time and ends at the end of the song.
 91 |         """
 92 |         beat_times = self._beat_times.copy()
 93 |         downbeat_times = self._downbeat_times
 94 |         beat_times.insert(0, 0)
 95 |         beat_times.append(signal.signal_duration)
 96 | 
 97 |         downbeat_ids = np.intersect1d(beat_times, downbeat_times, return_indices=True)[
 98 |             1
 99 |         ]
100 |         is_downbeat = [
101 |             True if i in downbeat_ids else False for i in range(len(beat_times))
102 |         ]
103 |         segments = [
104 |             BeatSegment(start_time, end_time, downbeat)
105 |             for start_time, end_time, downbeat in zip(
106 |                 beat_times[:-1], beat_times[1:], is_downbeat
107 |             )
108 |         ]
109 |         return segments
110 | 
111 |     def get_beats(self) -> np.ndarray:
112 |         """returns an array of beat times, in seconds
113 |         if downbeats is True, returns an array of downbeat times, in seconds
114 |         """
115 |         return np.array(
116 |             self._downbeat_times if self._use_downbeats else self._beat_times
117 |         )
118 | 
119 |     @property
120 |     def beat_times(self) -> np.ndarray:
121 |         """return beat times"""
122 |         return np.array(self._beat_times)
123 | 
124 |     @property
125 |     def downbeat_times(self) -> np.ndarray:
126 |         """return downbeat times"""
127 |         return np.array(self._downbeat_times)
128 | 
129 |     def beat_times_to_feature_frames(
130 |         self, signal: AudioSignal, features: np.ndarray
131 |     ) -> np.ndarray:
132 |         """convert beat times to frames, given an array of time-varying features"""
133 |         beat_times = self.get_beats()
134 |         beat_frames = (
135 |             beat_times * signal.sample_rate / signal.signal_length * features.shape[-1]
136 |         ).astype(np.int64)
137 |         return beat_frames
138 | 
139 |     def sync_features(
140 |         self, feature_frames: np.ndarray, features: np.ndarray, aggregate="median"
141 |     ) -> np.ndarray:
142 |         """sync features to beats"""
143 |         if aggregate not in AGGREGATOR_REGISTRY:
144 |             raise ValueError(f"unknown aggregation method {aggregate}")
145 | 
146 |         return librosa.util.sync(
147 |             features, feature_frames, aggregate=AGGREGATOR_REGISTRY[aggregate]
148 |         )
149 | 
150 |     def to_json(self) -> dict:
151 |         """return beats and downbeats as json"""
152 |         return {
153 |             "beats": self._beat_times,
154 |             "downbeats": self._downbeat_times,
155 |             "use_downbeats": self._use_downbeats,
156 |         }
157 | 
158 |     @classmethod
159 |     def from_dict(cls, data: dict):
160 |         """load beats and downbeats from json"""
161 |         inst = cls(data["beats"], data["downbeats"])
162 |         inst.use_downbeats(data["use_downbeats"])
163 |         return inst
164 | 
165 |     def save(self, output_dir: Path):
166 |         """save beats and downbeats to json"""
167 |         mkdir(output_dir)
168 |         with open(output_dir / "beats.json", "w") as f:
169 |             json.dump(self.to_json(), f)
170 | 
171 |     @classmethod
172 |     def load(cls, input_dir: Path):
173 |         """load beats and downbeats from json"""
174 |         beats_file = Path(input_dir) / "beats.json"
175 |         with open(beats_file, "r") as f:
176 |             data = json.load(f)
177 |         return cls.from_dict(data)
178 | 
179 | 
180 | ###################
181 | #  beat tracking  #
182 | ###################
183 | 
184 | 
185 | class BeatTracker:
186 |     def extract_beats(self, signal: AudioSignal) -> Tuple[np.ndarray, np.ndarray]:
187 |         """extract beats from an audio signal"""
188 |         raise NotImplementedError
189 | 
190 |     def __call__(self, signal: AudioSignal) -> Beats:
191 |         """extract beats from an audio signal
192 |         NOTE: if the first beat (and/or downbeat) is detected within the first 100ms of the audio,
193 |         it is discarded. This is to avoid empty bins with no beat synced features in the first beat.
194 |         Args:
195 |             signal (AudioSignal): signal to beat track
196 |         Returns:
197 |             Tuple[np.ndarray, np.ndarray]: beats and downbeats
198 |         """
199 |         beats, downbeats = self.extract_beats(signal)
200 |         return Beats(beats, downbeats)
201 | 
202 | 
203 | class WaveBeat(BeatTracker):
204 |     def __init__(self, ckpt_path: str = "checkpoints/wavebeat", device: str = "cpu"):
205 |         from wavebeat.dstcn import dsTCNModel
206 | 
207 |         model = dsTCNModel.load_from_checkpoint(ckpt_path, map_location=torch.device(device))
208 |         model.eval()
209 | 
210 |         self.device = device
211 |         self.model = model
212 | 
213 |     def extract_beats(self, signal: AudioSignal) -> Tuple[np.ndarray, np.ndarray]:
214 |         """returns beat and downbeat times, in  seconds"""
215 |         # extract beats
216 |         self.model.to('cuda' if torch.cuda.is_available() else 'cpu')
217 |         beats, downbeats = self.model.predict_beats_from_array(
218 |             audio=signal.audio_data.squeeze(0),
219 |             sr=signal.sample_rate,
220 |             use_gpu=torch.cuda.is_available(),
221 |         )
222 | 
223 |         return beats, downbeats
224 | 
225 | 
226 | class MadmomBeats(BeatTracker):
227 |     def __init__(self):
228 |         raise NotImplementedError
229 | 
230 |     def extract_beats(self, signal: AudioSignal) -> Tuple[np.ndarray, np.ndarray]:
231 |         """returns beat and downbeat times, in  seconds"""
232 |         pass
233 | 
234 | 
235 | BEAT_TRACKER_REGISTRY = {
236 |     "wavebeat": WaveBeat,
237 |     "madmom": MadmomBeats,
238 | }
239 | 
240 | 
241 | def list_beat_trackers() -> list:
242 |     return list(BEAT_TRACKER_REGISTRY.keys())
243 | 
244 | 
245 | def load_beat_tracker(beat_tracker: str, **kwargs) -> BeatTracker:
246 |     if beat_tracker not in BEAT_TRACKER_REGISTRY:
247 |         raise ValueError(
248 |             f"Unknown beat tracker {beat_tracker}. Available: {list_beat_trackers()}"
249 |         )
250 | 
251 |     return BEAT_TRACKER_REGISTRY[beat_tracker](**kwargs)


--------------------------------------------------------------------------------
/unloop/client.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | from pathlib import Path
  3 | import shutil
  4 | import json
  5 | 
  6 | import argbind
  7 | import audiotools as at
  8 | from gradio_client import Client, handle_file
  9 | from pythonosc.osc_server import ThreadingOSCUDPServer
 10 | from pythonosc.udp_client import SimpleUDPClient
 11 | from pythonosc.dispatcher import Dispatcher
 12 | import torch
 13 | 
 14 | class Timer:
 15 |     
 16 |     def __init__(self):
 17 |         self.times = {}
 18 |     
 19 |     def tick(self, name: str):
 20 |         self.times[name] = time.time()
 21 |     
 22 |     def tock(self, name: str):
 23 |         toc = time.time() - self.times[name]
 24 |         print(f"{name} took {toc} seconds")
 25 |         return toc
 26 |     
 27 |     def __str__(self):
 28 |         return str(self.times)
 29 | 
 30 | timer = Timer()
 31 | 
 32 | DOWNLOADS_DIR = ".gradio"
 33 | 
 34 | def clear_file(file):
 35 |     file = Path(file)
 36 |     if file.exists():
 37 |         file.unlink()
 38 | 
 39 | 
 40 | class OSCManager:
 41 | 
 42 |     def __init__(
 43 |         self, 
 44 |         ip: str, 
 45 |         s_port: str, 
 46 |         r_port: str,
 47 |         process_fn: callable, 
 48 |         # param_change_callback: callable = None
 49 |     ):
 50 |         self.ip = ip
 51 |         self.s_port = s_port
 52 |         self.r_port = r_port
 53 | 
 54 |         # register the process_fn
 55 |         self.process_fn = process_fn
 56 | 
 57 |         print(f"will send to {ip}:{s_port}")
 58 |         self.client = SimpleUDPClient(ip, s_port)
 59 | 
 60 | 
 61 |     def start_server(self,):
 62 |         dispatcher = Dispatcher()
 63 |         dispatcher.map("/process", self.process_fn)
 64 | 
 65 |         def send_heartbeat(_, *args):
 66 |             # print("Received heartbeat")
 67 |             self.client.send_message("/heartbeat", "pong")
 68 | 
 69 |         dispatcher.map("/heartbeat", lambda a, *r: send_heartbeat(a, *r))
 70 | 
 71 |         dispatcher.map("/cleanup", lambda a, *r: clear_file(r[0]))
 72 | 
 73 |         dispatcher.set_default_handler(lambda a, *r: print(a, r))
 74 | 
 75 |         server = ThreadingOSCUDPServer((self.ip, self.r_port), dispatcher)
 76 |         print(f"Serving on {server.server_address}")
 77 |         server.serve_forever()
 78 | 
 79 |     def error(self, msg: str):
 80 |         self.client.send_message("/error", msg)
 81 | 
 82 |     def log(self, msg: str):
 83 |         self.client.send_message("/log", msg)
 84 |         
 85 | 
 86 | class GradioOSCClient:
 87 | 
 88 |     def __init__(self, 
 89 |         ip: str,
 90 |         s_port: int, r_port: int,
 91 |         vampnet_url: str = None, # url for vampnet
 92 |     ):
 93 |         self.osc_manager = OSCManager(
 94 |             ip=ip, s_port=s_port, r_port=r_port, 
 95 |             process_fn=self.process, 
 96 |         )
 97 | 
 98 |         self.clients = {}
 99 |         if vampnet_url is not None:
100 |             self.clients["vampnet"] = Client(src=vampnet_url, download_files=DOWNLOADS_DIR)
101 |         
102 |         assert len(self.clients) > 0, "At least one client must be specified!"
103 | 
104 |         self.batch_size = 2# TODO: automatically get batch size from client. 
105 | 
106 |         self.osc_manager.log("hello from gradio client!")
107 | 
108 |         self.inf_idx = 0
109 | 
110 | 
111 |     def param_changed(self, param_name, new_value):
112 |         print(f"Parameter {param_name} changed to {new_value}")
113 | 
114 |     def vampnet_process(self, address: str, *args):
115 |         client = self.clients["vampnet"]
116 | 
117 |         # query id --- audiofile ---- model_choice --- periodic --- drop --- seed 
118 |         query_id = args[0]
119 |         client_type = args[1]
120 |         audio_path = Path(args[2])
121 |         model_choice = args[3]
122 |         periodic_p = args[4]
123 |         dropout = args[5]
124 |         seed = args[6]
125 |         looplength_ms = args[7]
126 |         typical_filter = args[8]
127 |         typical_mass = args[9]
128 |         typical_min_tokens = args[10]
129 |         upper_codebook_mask = args[11]
130 |         onset_mask_width = args[12]
131 |         sampling_steps = args[13]
132 |         temperature = args[14]
133 |         top_p = args[15]
134 |         beat_mask_ms = args[16]
135 |         num_feedback_steps  = args[17]
136 |         
137 |         if not audio_path.exists():
138 |             print(f"File {audio_path} does not exist")
139 |             self.osc_manager.error(f"File {audio_path} does not exist")
140 |             return
141 | 
142 |         sig = at.AudioSignal(audio_path)
143 |         sig.to_mono()
144 |         sig.sample_rate = 48000 # HOT PATCH (FIXME IN MAX: sample rate is being forced to 48k)
145 | 
146 |         # grab the looplength only
147 |         # TODO: although I added this, 
148 |         # the max patch is still configured to crop anything past the looplength off
149 |         # so we'll have to change that in order to make an effect. 
150 |         end_sample = int((looplength_ms * sig.sample_rate) / 1000)
151 | 
152 |         # grab  the remainder of the waveform
153 |         num_cut_samples = sig.samples.shape[-1] - end_sample
154 |         cut_wav = sig.samples[..., -num_cut_samples:]
155 | 
156 |         sig.samples = sig.samples[..., :end_sample]
157 |         # write the file back
158 |         sig.write(audio_path)
159 |         
160 |         timer.tick("predict")
161 |         print(f"Processing {address} with args {args}")
162 |         # breakpoint()
163 |         job = client.submit(
164 |             input_audio=handle_file(audio_path),
165 |             sampletemp=temperature,
166 |             top_p=top_p,
167 |             periodic_p=periodic_p,
168 |             dropout=dropout,
169 |             stretch_factor=1,
170 |             onset_mask_width=onset_mask_width,
171 |             typical_filtering=bool(typical_filter),
172 |             typical_mass=typical_mass,
173 |             typical_min_tokens=typical_min_tokens,
174 |             seed=seed,
175 |             model_choice=model_choice,
176 |             n_mask_codebooks=upper_codebook_mask,
177 |             pitch_shift_amt=0,
178 |             sample_cutoff=1.0,
179 |             sampling_steps=sampling_steps,
180 |             beat_mask_ms=int(beat_mask_ms),
181 |             num_feedback_steps=num_feedback_steps,
182 |             api_name="/vamp_1"
183 |         )
184 | 
185 |         while not job.done():
186 |             time.sleep(0.1)
187 |             self.osc_manager.client.send_message("/progress", [query_id, str(job.status().code)])
188 | 
189 |         result = job.result()
190 |         # audio_file = result
191 |         # audio_files = [audio_file] * self.batch_size
192 |         audio_files = list(result[:self.batch_size])
193 |         # if each file is missing a .wav at the end, add it 
194 |         first_audio = audio_files[0]
195 |         if not first_audio.endswith(".wav"):
196 |             for audio_file in set(audio_files):
197 |                 if not audio_file.endswith(".wav"):
198 |                     shutil.move(audio_file, f"{audio_file}.wav")
199 |                     audio_file = f"{audio_file}.wav"
200 |             audio_files = [f"{audio}.wav" for audio in audio_files if not audio.endswith(".wav")]
201 |         
202 |         for audio_file in audio_files:
203 |             # load the file, add the cut samples back
204 |             sig = at.AudioSignal(audio_file)
205 |             sig.resample(48000)
206 |             sig.samples = torch.cat([sig.samples, cut_wav], dim=-1)
207 |             sig.write(audio_file)
208 |         seed = result[-1]
209 | 
210 |         timer.tock("predict")
211 | 
212 |         # send a message that the process is done
213 |         self.osc_manager.log(f"query {query_id} has been processed")
214 |         self.osc_manager.client.send_message("/process-result", [query_id] + audio_files)
215 | 
216 |    
217 |     def process(self, address: str, *args):
218 |         query_id = args[0]
219 |         client_type = args[1]
220 |         audio_path = Path(args[2])
221 | 
222 |         if client_type == "vampnet":
223 |             self.vampnet_process(address, *args)
224 |             return
225 |         elif client_type == "sketch2sound":
226 |             self.process_s2s(address, *args)
227 |             return
228 |         else:
229 |             raise ValueError(f"Unknown client type {client_type}")
230 |      
231 | def gradio_main(
232 |     vampnet_url: str = None
233 | ):
234 |     system = GradioOSCClient(
235 |         vampnet_url=vampnet_url,
236 |         ip="127.0.0.1", s_port=8003, r_port=8001,
237 |     )
238 | 
239 |     system.osc_manager.start_server()
240 | 
241 | 
242 | if __name__ == "__main__":
243 |     try:
244 |         gradio_main = argbind.bind(gradio_main, without_prefix=True)
245 | 
246 |         args = argbind.parse_args()
247 |         with argbind.scope(args):
248 |             gradio_main()
249 | 
250 |     except Exception as e:
251 |         import shutil
252 |         shutil.rmtree(DOWNLOADS_DIR, ignore_errors=True)
253 |         raise e


--------------------------------------------------------------------------------
/vampnet/control.py:
--------------------------------------------------------------------------------
  1 | from dataclasses import dataclass
  2 | from functools import partial
  3 | from typing import Optional
  4 | 
  5 | from torch import nn
  6 | 
  7 | import vampnet.dsp.signal as sn
  8 | from vampnet.dsp.signal import Signal
  9 | from vampnet.mask import random_along_time
 10 | from torch import Tensor
 11 | import torch
 12 | 
 13 | 
 14 | class MedianFilterAugment(nn.Module):
 15 | 
 16 |     def __init__(self, 
 17 |         kernel_size: int, 
 18 |         train_min: int = 1, 
 19 |         train_max: int = 20,
 20 |     ):
 21 |         super().__init__()
 22 |         self.kernel_size = kernel_size
 23 |         self.train_min = train_min
 24 |         self.train_max = train_max
 25 | 
 26 |     def forward(self, x: Tensor) -> Tensor:
 27 |         if self.training:
 28 |             sizes = torch.randint(
 29 |                 self.train_min, 
 30 |                 self.train_max, 
 31 |                 size=(x.shape[0],)
 32 |             )
 33 |         else:
 34 |             sizes = self.kernel_size
 35 |         # print(f"median filter sizes: {sizes}")
 36 |         return sn.median_filter_1d(x, sizes)
 37 | 
 38 | class RMS(nn.Module):
 39 | 
 40 |     def __init__(self, 
 41 |         hop_length, 
 42 |         window_length=2048, 
 43 |         n_quantize=None, 
 44 |         sample_rate=44100, 
 45 |         median_filter_size: Optional[int] = None,
 46 |         train_median_filter_min=1, 
 47 |         train_median_filter_max=15,
 48 |     ):
 49 |         super().__init__()
 50 | 
 51 |         self.hop_length = hop_length
 52 |         self.window_length = window_length
 53 |         self.n_quantize = n_quantize
 54 |         self.sample_rate = sample_rate
 55 | 
 56 |         self.mf = MedianFilterAugment(
 57 |             kernel_size=median_filter_size, 
 58 |             train_min=train_median_filter_min, 
 59 |             train_max=train_median_filter_max
 60 |         ) if median_filter_size is not None else None
 61 |     
 62 |     @property
 63 |     def dim(self):
 64 |         return 1
 65 | 
 66 |     def extract(self, sig: Signal) -> Tensor:
 67 |         rmsd = sn.rms(sig, 
 68 |             window_length=self.window_length, 
 69 |             hop_length=self.hop_length, 
 70 |         )[:, :, :-1] # TODO: cutting the last frame to match DAC tokens but why :'(
 71 |         nb, _, _ = rmsd.shape
 72 | 
 73 |         if self.n_quantize is not None:
 74 |             # standardize to 0-1
 75 |             rmsd = (rmsd - rmsd.min()) / (rmsd.max() - rmsd.min())
 76 | 
 77 |             # quantize to 128 steps
 78 |             rmsd = torch.round(rmsd * self.n_quantize)
 79 |             rmsd =  rmsd / self.n_quantize
 80 | 
 81 |         if self.mf is not None:
 82 |             rmsd = self.mf(rmsd)
 83 |         
 84 |         return rmsd
 85 | 
 86 | 
 87 | 
 88 | class HarmonicChroma(nn.Module):
 89 | 
 90 |     def __init__(self, 
 91 |         hop_length: int, window_length: int = 4096,
 92 |         n_chroma: int = 48, sample_rate: int = 44100,
 93 |         top_n: int = 0
 94 |     ):
 95 |         super().__init__()
 96 |         from torchaudio.prototype.transforms import ChromaScale
 97 |         self.hop_length = hop_length
 98 |         self.window_length = window_length
 99 |         self.n_chroma = n_chroma
100 |         self.sample_rate = sample_rate
101 |         self.top_n = top_n
102 | 
103 |         # HUGO: this representation, as is,
104 |         # encodes timbre information in the chroma
105 |         # which is not what we want!!!
106 |         # would a median filter help perhaps? 
107 |         self.chroma = ChromaScale(
108 |             sample_rate=self.sample_rate,
109 |             n_freqs=self.window_length // 2 + 1,
110 |             n_chroma=self.n_chroma,
111 |             octwidth=5.0,
112 |         )
113 | 
114 |     @property
115 |     def dim(self):
116 |         return self.n_chroma
117 | 
118 |     def extract(self, sig: Signal) -> Tensor:
119 |         from vampnet.dsp.hpss import hpss
120 |         self.chroma.to(sig.wav.device)
121 | 
122 |         # spectrogram 
123 |         spec = sn.stft(sig, 
124 |             window_length=self.window_length, 
125 |             hop_length=self.hop_length
126 |         )
127 |         # magnitude
128 |         spec = torch.abs(spec)
129 | 
130 |         # hpss
131 |         spec = hpss(spec, kernel_size=51, hard=True)[0]
132 | 
133 |         # chroma
134 |         chroma = self.chroma(spec)
135 | 
136 |         # get the rms of this spec
137 |         rms_d = sn.rms_from_spec(
138 |             spec, window_length=self.window_length
139 |         )
140 | 
141 |         # convert the rms to db
142 |         rms_d = 10 * torch.log10(rms_d + 1e-7)
143 | 
144 |         # make a mask based on the rms < -40
145 |         mask = torch.where(rms_d < -40, torch.zeros_like(rms_d), torch.ones_like(rms_d))
146 | 
147 |         # remove anything below 80 (where the fuck did I get this number from?)
148 |         chroma = torch.where(chroma < 100, torch.zeros_like(chroma), chroma)
149 | 
150 |         # Get top 2 values and indices along the -2 dimension
151 |         if self.top_n:
152 |             _, topk_indices = torch.topk(chroma, self.top_n, dim=-2)
153 | 
154 |             # Create a mask for the top 2 values
155 |             topk_mask = torch.zeros_like(chroma).scatter_(-2, topk_indices, 1.0)
156 | 
157 |             # Retain only the top 2 values
158 |             chroma = chroma * topk_mask
159 | 
160 |         # apply the mask
161 |         chroma = chroma * mask.unsqueeze(-2)
162 | 
163 |         # Apply softmax along dim=-2
164 |         if self.top_n > 0:
165 |             chroma = torch.nn.functional.softmax(chroma, dim=-2)
166 | 
167 |             # mask out any timesteps whose chroma have all equal values (all 0s before softmax)
168 |             # TODO: i did this with chatgpt, there's gott a be a better way
169 |             chroma_mean = chroma.mean(dim=-2, keepdim=True)
170 |             chroma_diff = torch.abs(chroma - chroma_mean)
171 |             equal_mask = torch.all(chroma_diff < 1e-6, dim=-2, keepdim=True)
172 |             
173 |             # Set chroma values to zero for timesteps with all equal values
174 |             chroma = torch.where(equal_mask, torch.zeros_like(chroma), chroma)
175 | 
176 | 
177 |         return chroma[:, 0, :, :-1] # mono only :(  FIX ME!
178 | 
179 | 
180 | # TODO: try harmonic mel? 
181 | 
182 | CONTROLLERS = {
183 |     "rms": RMS, 
184 |     "rmsq128": partial(RMS, n_quantize=128),
185 |     "rmsq16": partial(RMS, n_quantize=16),
186 |     "rms-median": partial(RMS, median_filter_size=5),
187 |     "rmsq16-median": partial(RMS, n_quantize=16, median_filter_size=3),
188 |     "hchroma": HarmonicChroma,
189 |     "hchroma-12c-top2": partial(HarmonicChroma, n_chroma=12,  top_n=2), # TODO: refactor me. If this works, this should just be named hchroma. 
190 |     "hchroma-36c-top3": partial(HarmonicChroma, n_chroma=36,  top_n=3) # TODO: refactor me. If this works, this should just be named hchroma.
191 | }
192 |  
193 | class Sketch2SoundController(nn.Module):
194 | 
195 |     def __init__(
196 |         self,
197 |         ctrl_keys: list[str], 
198 |         hop_length: str, 
199 |         sample_rate: int,
200 |     ):
201 |         super().__init__()
202 | 
203 |         assert all([k in CONTROLLERS for k in ctrl_keys]), f"got an unsupported control key in {ctrl_keys}!\n  supported: {CONTROLLERS.keys()}"
204 | 
205 |         self.hop_length = hop_length
206 |         self.ctrl_keys = ctrl_keys
207 |         self.sample_rate = sample_rate
208 | 
209 |         self.controllers = {
210 |             k: CONTROLLERS[k](hop_length=hop_length, sample_rate=sample_rate)
211 |             for k in self.ctrl_keys
212 |         }
213 | 
214 |     @property
215 |     def ctrl_dims(self, ) -> dict[str, int]:
216 |         return {
217 |             k: controller.dim for k, controller in self.controllers.items()
218 |         }
219 | 
220 |     def extract(self, sig: Signal) -> dict[str, Tensor]:
221 |         ctrls = {
222 |             k: controller.extract(sig) for k, controller in self.controllers.items()
223 |         }
224 |         return ctrls
225 | 
226 |     def random_mask(self, ctrls: dict[str, Tensor], r: float):
227 |         masks = {}
228 |         for k, ctrl in ctrls.items():
229 |             masks[k] = 1-random_along_time(ctrl, r)
230 |         return masks
231 | 
232 |     def empty_mask(self, ctrls: dict[str, Tensor]):
233 |         first_key = next(iter(ctrls))
234 |         mask = torch.zeros_like(ctrls[first_key])
235 |         return {k: mask for k in ctrls}
236 |         
237 | 
238 | def test_controller():
239 |     controller = Sketch2SoundController(
240 |         ctrl_keys=["rms-median", "rms", "rmsq128"], 
241 |         hop_length=512, 
242 |         sample_rate=44100
243 |     )
244 |     controller.train()
245 |     # sig = sn.read_from_file("assets/example.wav")
246 |     # sig = sn.read_from_file("/Users/hugo/Downloads/DCS_SE_FullChoir_ScaleUpDown06_A2_DYN.wav")
247 |     # sig = sn.excerpt('/Users/hugo/Downloads/(guitarra - hugo mix) bubararu - tambor negro.wav', offset=0, duration=10)
248 |     sig = sn.read_from_file("assets/voice-prompt.wav")
249 |     ctrls = controller.extract(sig)
250 |     print(f"given sig of shape {sig.wav.shape}, extracted controls: {ctrls}")
251 | 
252 |     # print the whole thing
253 |     # torch.set_printoptions(profile="full")
254 |     # print(ctrls["hchroma"][0][0][:, 200:210])
255 | 
256 |     # imshow the chroma
257 |     import matplotlib.pyplot as plt
258 | 
259 |     # Define relative heights for the subplots
260 |     fig, (ax1, ax2, ax3, ax4) = plt.subplots(
261 |         4, 1, 
262 |         sharex=True, 
263 |     )
264 | 
265 |     # Display the spectrogram on the top
266 |     ax1.imshow(sn.stft(sig, hop_length=512, window_length=2048).abs()[0][0].cpu().log().numpy(), aspect='auto', origin='lower')
267 |     # display rms on the bottom
268 |     ax2.plot(ctrls["rms-median"][0][0])
269 |     ax3.plot(ctrls["rms"][0][0])
270 |     ax4.plot(ctrls["rmsq128"][0][0])
271 | 
272 |     plt.tight_layout()  # Ensure proper spacing
273 |     plt.savefig("img.png")
274 | 
275 | 
276 | if __name__ == "__main__":
277 |     test_controller()


--------------------------------------------------------------------------------
/scripts/utils/visualize_embeddings.py:
--------------------------------------------------------------------------------
  1 | """
  2 | TODO: train a linear probe
  3 | usage:
  4 |    python gtzan_embeddings.py --args.load conf/interface.yml --Interface.device cuda --path_to_audio /path/to/audio/labels  --output_dir /path/to/output
  5 | """
  6 | from pathlib import Path
  7 | from typing import List
  8 | 
  9 | import audiotools as at
 10 | from audiotools import AudioSignal
 11 | import argbind
 12 | import torch
 13 | import numpy as np
 14 | import zipfile
 15 | import json
 16 | 
 17 | from vampnet.interface import Interface
 18 | import tqdm
 19 | 
 20 | # bind the Interface to argbind
 21 | Interface = argbind.bind(Interface)
 22 | 
 23 | DEBUG = False
 24 | 
 25 | 
 26 | def smart_plotly_export(fig, save_path: Path):
 27 |     img_format = save_path.suffix[1:]
 28 |     if img_format == "html":
 29 |         fig.write_html(save_path)
 30 |     elif img_format == 'bytes':
 31 |         return fig.to_image(format='png')
 32 |     #TODO: come back and make this prettier
 33 |     elif img_format == 'numpy':
 34 |         import io
 35 |         from PIL import Image
 36 | 
 37 |         def plotly_fig2array(fig):
 38 |             #convert Plotly fig to  an array
 39 |             fig_bytes = fig.to_image(format="png", width=1200, height=700)
 40 |             buf = io.BytesIO(fig_bytes)
 41 |             img = Image.open(buf)
 42 |             return np.asarray(img)
 43 | 
 44 |         return plotly_fig2array(fig)
 45 |     elif img_format == 'jpeg' or 'png' or 'webp':
 46 |         fig.write_image(save_path)
 47 |     else:
 48 |         raise ValueError("invalid image format")
 49 | 
 50 | 
 51 | def dim_reduce(annotated_embeddings, layer, output_dir, n_components=3, method="tsne"):
 52 |     """
 53 |     dimensionality reduction for visualization!
 54 |     saves an html plotly figure to save_path
 55 |     parameters:
 56 |         annotated_embeddings (list): the annotated enmbeddings to be reduced; embeddings have shape (samples, features)
 57 |         labels (list): list of labels for embedding
 58 |         save_path (str): path where u wanna save ur figure
 59 |         method (str): umap, tsne, or pca
 60 |         title (str): title for ur figure
 61 |     returns:
 62 |         proj (np.ndarray): projection vector with shape (samples, dimensions)
 63 |     """
 64 |     import pandas as pd
 65 |     import plotly.express as px
 66 | 
 67 |     fig_name = f"vampnet-embeddings-layer={layer}"
 68 |     fig_title = f"{fig_name}_{method}"
 69 |     save_path = (output_dir / fig_name).with_suffix(".html")
 70 | 
 71 |     if method == "umap":
 72 |         from umap import UMAP
 73 |         reducer = umap.UMAP(n_components=n_components)
 74 |     elif method == "tsne":
 75 |         from sklearn.manifold import TSNE
 76 | 
 77 |         reducer = TSNE(n_components=n_components)
 78 |     elif method == "pca":
 79 |         from sklearn.decomposition import PCA
 80 | 
 81 |         reducer = PCA(n_components=n_components)
 82 |     else:
 83 |         raise ValueError(f"invalid method: {method}")
 84 | 
 85 |     labels = [emb.label for emb in annotated_embeddings]
 86 |     names = [emb.filename for emb in annotated_embeddings]
 87 |     embs = [emb.embedding for emb in annotated_embeddings]
 88 |     embs_at_layer = np.stack(embs)[:, layer, :]
 89 |     projs = reducer.fit_transform(embs_at_layer)
 90 | 
 91 |     df = pd.DataFrame(
 92 |         {
 93 |             "label": labels,
 94 |             "name": names,
 95 |             "x": projs[:, 0],
 96 |             "y": projs[:, 1],
 97 |         }
 98 |     )
 99 |     if n_components == 2:
100 |         fig = px.scatter(
101 |             df, x="x", y="y", color="label", hover_name="name", title=fig_title,
102 |         )
103 | 
104 |     elif n_components == 3:
105 |         df['z'] = projs[:, 2]
106 |         fig = px.scatter_3d(
107 |             df, x="x", y="y", z="z", color="label", hover_name="name", title=fig_title
108 |         )
109 |     else:
110 |         raise ValueError(f"can't plot {n_components} components")
111 | 
112 |     fig.update_traces(
113 |         marker=dict(size=6, line=dict(width=1, color="DarkSlateGrey")),
114 |         selector=dict(mode="markers"),
115 |     )
116 | 
117 |     return smart_plotly_export(fig, save_path)
118 | 
119 | 
120 | 
121 | # per JukeMIR, we want the emebddings from the middle layer?
122 | def vampnet_embed(sig: AudioSignal, interface: Interface, layer=10):
123 |     with torch.inference_mode():
124 |         # preprocess the signal
125 |         sig = interface.preprocess(sig)
126 | 
127 |         # get the coarse vampnet model
128 |         vampnet = interface.coarse
129 | 
130 |         # get the tokens
131 |         z = interface.encode(sig)[:, :vampnet.n_codebooks, :]
132 |         z_latents = vampnet.embedding.from_codes(z, interface.codec)
133 | 
134 |         # do a forward pass through the model, get the embeddings
135 |         _z, embeddings = vampnet(z_latents, return_activations=True)
136 |         # print(f"got embeddings with shape {embeddings.shape}")
137 |         # [layer, batch, time, n_dims]
138 |         # [20, 1, 600ish, 768]
139 | 
140 | 
141 |         # squeeze batch dim (1 bc layer should be dim 0)
142 |         assert embeddings.shape[1] == 1, f"expected batch dim to be 1, got {embeddings.shape[0]}"
143 |         embeddings = embeddings.squeeze(1)
144 | 
145 |         num_layers = embeddings.shape[0]
146 |         assert layer < num_layers, f"layer {layer} is out of bounds for model with {num_layers} layers"
147 | 
148 |         # do meanpooling over the time dimension
149 |         embeddings = embeddings.mean(dim=-2)
150 |         # [20, 768]
151 | 
152 |         # return the embeddings
153 |         return embeddings
154 | 
155 | from dataclasses import dataclass, fields
156 | @dataclass
157 | class AnnotatedEmbedding:
158 |     label: str
159 |     filename: str
160 |     embedding: np.ndarray
161 | 
162 |     def save(self, path):
163 |         """Save the Embedding object to a given path as a zip file."""
164 |         with zipfile.ZipFile(path, 'w') as archive:
165 | 
166 |             # Save numpy array
167 |             with archive.open('embedding.npy', 'w') as f:
168 |                 np.save(f, self.embedding)
169 | 
170 |             # Save non-numpy data as json
171 |             non_numpy_data = {f.name: getattr(self, f.name) for f in fields(self) if f.name != 'embedding'}
172 |             with archive.open('data.json', 'w') as f:
173 |                 f.write(json.dumps(non_numpy_data).encode('utf-8'))
174 | 
175 |     @classmethod
176 |     def load(cls, path):
177 |         """Load the Embedding object from a given zip path."""
178 |         with zipfile.ZipFile(path, 'r') as archive:
179 | 
180 |             # Load numpy array
181 |             with archive.open('embedding.npy') as f:
182 |                 embedding = np.load(f)
183 | 
184 |             # Load non-numpy data from json
185 |             with archive.open('data.json') as f:
186 |                 data = json.loads(f.read().decode('utf-8'))
187 | 
188 |         return cls(embedding=embedding, **data)
189 | 
190 | 
191 | @argbind.bind(without_prefix=True)
192 | def main(
193 |     path_to_audio: str = None,
194 |     cache_dir: str = "./.emb_cache",
195 |     output_dir: str = "./vampnet_embeddings",
196 |     layers: List[int] = [1, 3, 5, 7, 9, 11, 13, 15, 17, 19],
197 |     method: str = "tsne",
198 |     n_components: int = 2,
199 | ):
200 |     path_to_audio = Path(path_to_audio)
201 |     assert path_to_audio.exists(), f"{path_to_audio} does not exist"
202 | 
203 |     cache_dir = Path(cache_dir)
204 |     output_dir = Path(output_dir)
205 |     output_dir.mkdir(exist_ok=True, parents=True)
206 | 
207 |     # load our interface
208 |     # argbind will automatically load the default config,
209 |     interface = Interface()
210 | 
211 |     # we expect path_to_audio to consist of a folder for each label, so let's get the list of labels
212 |     labels = [Path(x).name for x in path_to_audio.iterdir() if x.is_dir()]
213 |     print(f"Found {len(labels)} labels")
214 |     print(f"labels: {labels}")
215 | 
216 |     # collect audio files, labels, and embeddings
217 |     annotated_embeddings = []
218 |     for label in labels:
219 |         audio_files = list(at.util.find_audio(path_to_audio / label))
220 |         print(f"Found {len(audio_files)} audio files for label {label}")
221 | 
222 |         for audio_file in tqdm.tqdm(audio_files, desc=f"embedding label {label}"):
223 |             # check if we have a cached embedding for this file
224 |             cached_path = cache_dir / f"{label}_{audio_file.stem}.emb"
225 |             if cached_path.exists():
226 |                 # if so, load it
227 |                 if DEBUG:
228 |                     print(f"loading cached embedding for {cached_path.stem}")
229 |                 embedding = AnnotatedEmbedding.load(cached_path)
230 |             else:
231 |                 try:
232 |                     sig = AudioSignal(audio_file)
233 |                 except Exception as e:
234 |                     print(f"failed to load {audio_file.name} with error {e}")
235 |                     print(f"skipping {audio_file.name}")
236 |                     continue
237 | 
238 |                 # gets the embedding
239 |                 emb = vampnet_embed(sig, interface).cpu().numpy()
240 | 
241 |                 # create an embedding we can save/load
242 |                 embedding = AnnotatedEmbedding(
243 |                     label=label, filename=audio_file.name, embedding=emb
244 |                 )
245 | 
246 |                 # cache the embeddings
247 |                 cached_path.parent.mkdir(exist_ok=True, parents=True)
248 |                 embedding.save(cached_path)
249 |             annotated_embeddings.append(embedding)
250 | 
251 |     # now, let's do a dim reduction on the embeddings and visualize them.
252 |     for layer in tqdm.tqdm(layers, desc="dim reduction"):
253 |         dim_reduce(
254 |             annotated_embeddings,
255 |             layer,
256 |             output_dir=output_dir,
257 |             n_components=n_components,
258 |             method=method,
259 |         )
260 | 
261 | 
262 | if __name__ == "__main__":
263 |     args = argbind.parse_args()
264 |     with argbind.scope(args):
265 |         main()
266 | 


--------------------------------------------------------------------------------
/scripts/utils/gtzan_embeddings.py:
--------------------------------------------------------------------------------
  1 | """
  2 | TODO: train a linear probe
  3 | usage:
  4 |    python gtzan_embeddings.py --args.load conf/interface.yml --Interface.device cuda --path_to_gtzan /path/to/gtzan/genres_original  --output_dir /path/to/output
  5 | """
  6 | from pathlib import Path
  7 | from typing import List
  8 | 
  9 | import audiotools as at
 10 | from audiotools import AudioSignal
 11 | import argbind
 12 | import torch
 13 | import numpy as np
 14 | import zipfile
 15 | import json
 16 | 
 17 | from vampnet.interface import Interface
 18 | import tqdm
 19 | 
 20 | # bind the Interface to argbind
 21 | Interface = argbind.bind(Interface)
 22 | 
 23 | DEBUG = False
 24 | 
 25 | def smart_plotly_export(fig, save_path):
 26 |     img_format = save_path.split('.')[-1]
 27 |     if img_format == 'html':
 28 |         fig.write_html(save_path)
 29 |     elif img_format == 'bytes':
 30 |         return fig.to_image(format='png')
 31 |     #TODO: come back and make this prettier
 32 |     elif img_format == 'numpy':
 33 |         import io 
 34 |         from PIL import Image
 35 | 
 36 |         def plotly_fig2array(fig):
 37 |             #convert Plotly fig to  an array
 38 |             fig_bytes = fig.to_image(format="png", width=1200, height=700)
 39 |             buf = io.BytesIO(fig_bytes)
 40 |             img = Image.open(buf)
 41 |             return np.asarray(img)
 42 |         
 43 |         return plotly_fig2array(fig)
 44 |     elif img_format == 'jpeg' or 'png' or 'webp':
 45 |         fig.write_image(save_path)
 46 |     else:
 47 |         raise ValueError("invalid image format")
 48 | 
 49 | def dim_reduce(emb, labels, save_path, n_components=3, method='tsne', title=''):
 50 |     """
 51 |     dimensionality reduction for visualization!
 52 |     saves an html plotly figure to save_path
 53 |     parameters:
 54 |         emb (np.ndarray): the samples to be reduces with shape (samples, features)
 55 |         labels (list): list of labels for embedding
 56 |         save_path (str): path where u wanna save ur figure
 57 |         method (str): umap, tsne, or pca
 58 |         title (str): title for ur figure
 59 |     returns:    
 60 |         proj (np.ndarray): projection vector with shape (samples, dimensions)
 61 |     """
 62 |     import pandas as pd
 63 |     import plotly.express as px
 64 |     if method == 'umap':
 65 |         from umap import UMAP
 66 |         reducer = umap.UMAP(n_components=n_components)
 67 |     elif method == 'tsne':
 68 |         from sklearn.manifold import TSNE
 69 |         reducer = TSNE(n_components=n_components)
 70 |     elif method == 'pca':
 71 |         from sklearn.decomposition import PCA
 72 |         reducer = PCA(n_components=n_components)
 73 |     else:
 74 |         raise ValueError
 75 |  
 76 |     proj = reducer.fit_transform(emb)
 77 | 
 78 |     if n_components == 2:
 79 |         df = pd.DataFrame(dict(
 80 |             x=proj[:, 0],
 81 |             y=proj[:, 1],
 82 |             instrument=labels
 83 |         ))
 84 |         fig = px.scatter(df, x='x', y='y', color='instrument',
 85 |                         title=title+f"_{method}")
 86 | 
 87 |     elif n_components == 3:
 88 |         df = pd.DataFrame(dict(
 89 |             x=proj[:, 0],
 90 |             y=proj[:, 1],
 91 |             z=proj[:, 2],
 92 |             instrument=labels
 93 |         ))
 94 |         fig = px.scatter_3d(df, x='x', y='y', z='z',
 95 |                         color='instrument',
 96 |                         title=title)
 97 |     else:
 98 |         raise ValueError("cant plot more than 3 components")
 99 | 
100 |     fig.update_traces(marker=dict(size=6,
101 |                                   line=dict(width=1,
102 |                                             color='DarkSlateGrey')),
103 |                       selector=dict(mode='markers'))
104 | 
105 |     return smart_plotly_export(fig, save_path)
106 | 
107 | 
108 | 
109 | # per JukeMIR, we want the emebddings from the middle layer?
110 | def vampnet_embed(sig: AudioSignal, interface: Interface, layer=10):
111 |     with torch.inference_mode():
112 |         # preprocess the signal
113 |         sig = interface.preprocess(sig)
114 | 
115 |         # get the coarse vampnet model
116 |         vampnet = interface.coarse
117 | 
118 |         # get the tokens
119 |         z = interface.encode(sig)[:, :vampnet.n_codebooks, :]
120 |         z_latents = vampnet.embedding.from_codes(z, interface.codec)
121 | 
122 |         # do a forward pass through the model, get the embeddings
123 |         _z, embeddings = vampnet(z_latents, return_activations=True)
124 |         # print(f"got embeddings with shape {embeddings.shape}")
125 |         # [layer, batch, time, n_dims]
126 |         # [20, 1, 600ish, 768]
127 |     
128 | 
129 |         # squeeze batch dim (1 bc layer should be dim 0)
130 |         assert embeddings.shape[1] == 1, f"expected batch dim to be 1, got {embeddings.shape[0]}"
131 |         embeddings = embeddings.squeeze(1)
132 | 
133 |         num_layers = embeddings.shape[0]
134 |         assert layer < num_layers, f"layer {layer} is out of bounds for model with {num_layers} layers"
135 | 
136 |         # do meanpooling over the time dimension
137 |         embeddings = embeddings.mean(dim=-2)
138 |         # [20, 768]
139 | 
140 |         # return the embeddings
141 |         return embeddings
142 | 
143 | from dataclasses import dataclass, fields
144 | @dataclass
145 | class Embedding:
146 |     genre: str
147 |     filename: str
148 |     embedding: np.ndarray
149 | 
150 |     def save(self, path):
151 |         """Save the Embedding object to a given path as a zip file."""
152 |         with zipfile.ZipFile(path, 'w') as archive:
153 |             
154 |             # Save numpy array
155 |             with archive.open('embedding.npy', 'w') as f:
156 |                 np.save(f, self.embedding)
157 | 
158 |             # Save non-numpy data as json
159 |             non_numpy_data = {f.name: getattr(self, f.name) for f in fields(self) if f.name != 'embedding'}
160 |             with archive.open('data.json', 'w') as f:
161 |                 f.write(json.dumps(non_numpy_data).encode('utf-8'))
162 | 
163 |     @classmethod
164 |     def load(cls, path):
165 |         """Load the Embedding object from a given zip path."""
166 |         with zipfile.ZipFile(path, 'r') as archive:
167 |             
168 |             # Load numpy array
169 |             with archive.open('embedding.npy') as f:
170 |                 embedding = np.load(f)
171 | 
172 |             # Load non-numpy data from json
173 |             with archive.open('data.json') as f:
174 |                 data = json.loads(f.read().decode('utf-8'))
175 | 
176 |         return cls(embedding=embedding, **data)
177 | 
178 | 
179 | @argbind.bind(without_prefix=True)
180 | def main(
181 |     path_to_gtzan: str = None, 
182 |     cache_dir: str = "./.gtzan_emb_cache",
183 |     output_dir: str = "./gtzan_vampnet_embeddings",
184 |     layers: List[int] = [1, 3, 5, 7, 9, 11, 13, 15, 17, 19]
185 | ):
186 |     path_to_gtzan = Path(path_to_gtzan)
187 |     assert path_to_gtzan.exists(), f"{path_to_gtzan} does not exist"
188 | 
189 |     cache_dir = Path(cache_dir)
190 |     output_dir = Path(output_dir)
191 |     output_dir.mkdir(exist_ok=True, parents=True)
192 | 
193 |     # load our interface
194 |     # argbind will automatically load the default config,
195 |     interface = Interface()
196 | 
197 |     # gtzan should have a folder for each genre, so let's get the list of genres
198 |     genres = [Path(x).name for x in path_to_gtzan.iterdir() if x.is_dir()]
199 |     print(f"Found {len(genres)} genres")
200 |     print(f"genres: {genres}")
201 | 
202 |     # collect audio files, genres, and embeddings
203 |     data = []
204 |     for genre in genres:
205 |         audio_files = list(at.util.find_audio(path_to_gtzan / genre))
206 |         print(f"Found {len(audio_files)} audio files for genre {genre}")
207 | 
208 |         for audio_file in tqdm.tqdm(audio_files, desc=f"embedding genre {genre}"):
209 |             # check if we have a cached embedding for this file
210 |             cached_path = (cache_dir / f"{genre}_{audio_file.stem}.emb")
211 |             if cached_path.exists():
212 |                 # if so, load it
213 |                 if DEBUG:
214 |                     print(f"loading cached embedding for {cached_path.stem}")
215 |                 embedding = Embedding.load(cached_path)
216 |             else:
217 |                 try:
218 |                     sig = AudioSignal(audio_file)
219 |                 except Exception as e:
220 |                     print(f"failed to load {audio_file.name} with error {e}")
221 |                     print(f"skipping {audio_file.name}")
222 |                     continue
223 | 
224 |                 # gets the embedding 
225 |                 emb = vampnet_embed(sig, interface).cpu().numpy()
226 | 
227 |                 # create an embedding we can save/load
228 |                 embedding = Embedding(
229 |                     genre=genre,
230 |                     filename=audio_file.name,
231 |                     embedding=emb
232 |                 )
233 | 
234 |                 # cache the embeddings
235 |                 cached_path.parent.mkdir(exist_ok=True, parents=True)
236 |                 embedding.save(cached_path)
237 |             data.append(embedding)
238 | 
239 |     # now, let's do a dim reduction on the embeddings
240 |     # and visualize them. 
241 | 
242 |     # collect a list of embeddings and labels
243 |     embeddings = [d.embedding for d in data]
244 |     labels = [d.genre for d in data]
245 | 
246 |     # convert the embeddings to a numpy array
247 |     embeddings = np.stack(embeddings)
248 | 
249 |     # do dimensionality reduction for each layer we're given
250 |     for layer in tqdm.tqdm(layers, desc="dim reduction"):
251 |         dim_reduce(
252 |             embeddings[:, layer, :], labels, 
253 |             save_path=str(output_dir / f'vampnet-gtzan-layer={layer}.html'), 
254 |             n_components=2, method='tsne', 
255 |             title=f'vampnet-gtzan-layer={layer}'
256 |         )
257 |         
258 | 
259 | 
260 | 
261 | if __name__ == "__main__":
262 |     args = argbind.parse_args()
263 |     with argbind.scope(args):
264 |         main()


--------------------------------------------------------------------------------
/vampnet/newmask.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional
  2 | 
  3 | import torch
  4 | 
  5 | from .util import scalar_to_batch_tensor
  6 | 
  7 | def _gamma(r):
  8 |     return (r * torch.pi / 2).cos().clamp(1e-10, 1.0)
  9 | 
 10 | def _invgamma(y):
 11 |     if not torch.is_tensor(y):
 12 |         y = torch.tensor(y)[None]
 13 |     return 2 * y.acos() / torch.pi
 14 | 
 15 | def full_mask(x: torch.Tensor):
 16 |     assert x.ndim == 3, "x must be (batch, n_codebooks, seq)"
 17 |     return torch.ones_like(x).int()
 18 | 
 19 | def empty_mask(x: torch.Tensor):
 20 |     assert x.ndim == 3, "x must be (batch, n_codebooks, seq)"
 21 |     return torch.zeros_like(x).int()
 22 | 
 23 | def apply_mask(
 24 |         x: torch.Tensor, 
 25 |         mask: torch.Tensor, 
 26 |         mask_token: int
 27 |     ):
 28 |     assert mask.ndim == 3, f"mask must be (batch, n_codebooks, seq), but got {mask.ndim}"
 29 |     assert mask.shape == x.shape, f"mask must be same shape as x, but got {mask.shape} and {x.shape}" 
 30 |     assert mask.dtype == torch.int, f"mask must be int dtype, but got {mask.dtype}"
 31 |     assert ~torch.any(mask > 1), "mask must be binary"
 32 |     assert ~torch.any(mask < 0), "mask must be binary"
 33 |     mask = mask.int()
 34 | 
 35 |     fill_x = torch.full_like(x, mask_token)
 36 |     x = x * (1 - mask) + fill_x * mask
 37 | 
 38 |     return x
 39 | 
 40 | def random(
 41 |     x: torch.Tensor,
 42 |     r: torch.Tensor
 43 | ):
 44 |     assert x.ndim == 3, "x must be (batch, n_codebooks, seq)"
 45 |     if not isinstance(r, torch.Tensor):
 46 |         r = scalar_to_batch_tensor(r, x.shape[0]).to(x.device)
 47 | 
 48 |     r = _gamma(r)[:, None, None]
 49 |     probs = torch.ones_like(x) * r
 50 | 
 51 |     mask = torch.bernoulli(probs)
 52 |     mask = mask.round().int()
 53 | 
 54 |     return mask, torch.zeros_like(mask).bool()
 55 | 
 56 | def random_along_time(x: torch.Tensor, r: torch.Tensor):
 57 |     assert x.ndim == 3, "x must be (batch, channel, seq)"
 58 |     if not isinstance(r, torch.Tensor):
 59 |         r = scalar_to_batch_tensor(r, x.shape[0]).to(x.device)
 60 |     
 61 |     x = x[:, 0, :]
 62 |     r = _gamma(r)[:, None]
 63 |     probs = torch.ones_like(x) * r
 64 | 
 65 |     mask = torch.bernoulli(probs)
 66 |     mask = mask.round().int()
 67 | 
 68 |     return mask
 69 |     
 70 | 
 71 | def stemgen_random(x: torch.Tensor, r: torch.Tensor):
 72 |     assert x.ndim == 3, "x must be (batch, n_codebooks, seq)"
 73 |     if not isinstance(r, torch.Tensor):
 74 |         r = scalar_to_batch_tensor(r, x.shape[0]).to(x.device)
 75 | 
 76 |     # Assuming x is your input tensor and r is the probability for the Bernoulli distribution
 77 |     nb, nc, nt = x.shape
 78 | 
 79 |     # Randomly sample a codebook level to infer for each item in the batch
 80 |     c = torch.randint(0, nc, (nb,)).to(x.device)
 81 | 
 82 |     # Create a mask tensor of the same shape as x, initially filled with ones
 83 |     mask = torch.ones_like(x).long().to(x.device)
 84 |     ignore_indices_mask = torch.zeros_like(x).long().to(x.device)
 85 | 
 86 |     # Iterate over each item in the batch
 87 |     for i in range(nb):
 88 |         # Create the Bernoulli mask for the sampled level
 89 |         level_mask = torch.bernoulli(torch.ones(nt).to(x.device) * r[i]).long()
 90 | 
 91 |         # Apply the mask to the sampled level
 92 |         mask[i, c[i]] = level_mask
 93 | 
 94 |         # All levels below the sampled level are unmasked (zeros)
 95 |         mask[i, :c[i]] = 0
 96 |         ignore_indices_mask[i, :c[i]] = 1
 97 | 
 98 |         # All levels above the sampled level are masked (ones)
 99 |         mask[i, c[i]+1:] = 1
100 |         ignore_indices_mask[i, c[i]+1:] = 1
101 | 
102 |     # save a debug mask to np txt
103 |     # import numpy as np
104 |     # np.savetxt("mask.txt", mask[0].cpu().numpy(), fmt="%d")
105 |     # np.savetxt("ignore_indices_mask.txt", ignore_indices_mask[0].cpu().numpy(), fmt="%d")
106 | 
107 |     return mask.int(), ignore_indices_mask.bool()
108 | 
109 | 
110 | def hugo_random(x: torch.Tensor, r:torch.Tensor):
111 |     assert x.ndim == 3, "x must be (batch, n_codebooks, seq)"
112 |     if not isinstance(r, torch.Tensor):
113 |         r = scalar_to_batch_tensor(r, x.shape[0]).to(x.device).float()
114 |     
115 |     r = _gamma(r)[:, None, None]
116 |     
117 |     nb, nc, nt = x.shape
118 | 
119 |     probs = torch.ones_like(x) * r
120 |     mask = torch.bernoulli(probs)
121 |     # alternatively, the mask level could be the cumsum of the mask
122 |     mask = mask.round().long().to(x.device)
123 |     mask_levels = nc - mask.sum(dim=1) - 1
124 | 
125 |     # create a new mask, where all levels below the mask level are masked
126 |     # shape (nb, nc, nt) where new_mask[i, CB:, t] = 1, CB = mask_level[i, t] 
127 |     # mask = mask_levels[:, :, None] > torch.arange(nc)[None, None, :]
128 |     mask = (mask_levels[:, None, :] < torch.arange(nc, device=x.device)[None, :, None]).long()
129 | 
130 |     ignore_levels = mask_levels + 1
131 |     ignore_indices_mask = (ignore_levels[:, None, :] < torch.arange(nc, device=x.device)[None, :, None]).long()
132 | 
133 |     # for _b in range(nb):
134 |     #     for _t in range(nt):
135 |     #         for _c in range(nc):
136 |     #             if mask[_b, _c, _t] == 1:
137 |     #                 mask[_b, _c:, _t] = 1
138 |     #                 ignore_indices_mask[_b, _c + 1:, _t] = 1
139 |     #                 break
140 |     
141 |     return mask.long(), ignore_indices_mask.bool()
142 | 
143 | 
144 | def better_cond_random_but_not_working(x: torch.Tensor, r:torch.Tensor):
145 |     assert x.ndim == 3, "x must be (batch, n_codebooks, seq)"
146 |     if not isinstance(r, torch.Tensor):
147 |         r = scalar_to_batch_tensor(r, x.shape[0]).to(x.device).float()
148 |     
149 |     r = _gamma(r)[:, None, None]
150 |     
151 |     nb, nc, nt = x.shape
152 | 
153 |     probs = torch.ones_like(x) * r
154 |     mask = torch.bernoulli(probs)
155 | 
156 |     mask = mask.round().long().to(x.device)
157 | 
158 |     # there cannot be anything unmasked if there's an masked token
159 |     # in the same timestep and below it 
160 |     for i in range(nb):
161 |         for j in range(nc):
162 |             for t in range(nt):
163 |                 if mask[i, j, t] == 1:
164 |                     mask[i, j:, t] = 1
165 |                     break
166 |     
167 |     # an ignore indices mask, since we can truly only predict one token
168 |     # per timestep
169 |     ignore_indices = torch.zeros_like(x)
170 |     for i in range(nb):
171 |         for j in range(nc):
172 |             for t in range(nt):
173 |                 if mask[i, j, t] == 1:
174 |                     ignore_indices[i, j, t+1:] = 1
175 |                     break
176 |     return mask.int(), ignore_indices
177 | 
178 | 
179 | @torch.jit.script_if_tracing
180 | def linear_random(
181 |     x: torch.Tensor,
182 |     r: torch.Tensor,
183 | ):
184 |     assert x.ndim == 3, "x must be (batch, n_codebooks, seq)"
185 |     if not isinstance(r, torch.Tensor):
186 |         r = scalar_to_batch_tensor(r, x.shape[0]).to(x.device).float()
187 |         r = r[:, None, None]
188 | 
189 |     probs = torch.ones_like(x).to(x.device).float()
190 |     # expand to batch and codebook dims
191 |     probs = probs.expand(x.shape[0], x.shape[1], -1)
192 |     probs = probs * r
193 | 
194 |     mask = torch.bernoulli(probs)
195 |     mask = mask.round().int()
196 | 
197 |     return mask
198 | 
199 | @torch.jit.script_if_tracing
200 | def inpaint(x: torch.Tensor, n_prefix: int, n_suffix: int,):
201 |     assert n_prefix is not None
202 |     assert n_suffix is not None
203 |     
204 |     mask = full_mask(x)
205 | 
206 |     # if we have a prefix or suffix, set their mask prob to 0
207 |     if n_prefix > 0:
208 |         if not isinstance(n_prefix, torch.Tensor):
209 |             n_prefix = scalar_to_batch_tensor(n_prefix, x.shape[0]).to(x.device) 
210 |         for i, n in enumerate(n_prefix):
211 |             if n > 0:
212 |                 mask[i, :, :n] = 0.0
213 |     if n_suffix > 0:
214 |         if not isinstance(n_suffix, torch.Tensor):
215 |             n_suffix = scalar_to_batch_tensor(n_suffix, x.shape[0]).to(x.device)
216 |         for i, n in enumerate(n_suffix):
217 |             if n > 0:
218 |                 mask[i, :, -n:] = 0.0
219 |     return mask
220 | 
221 | @torch.jit.script_if_tracing
222 | def periodic_mask(x: torch.Tensor, period: int,
223 |                   width: int = 1, random_roll: bool = False,):
224 |     mask = full_mask(x)
225 |     if period == 0:
226 |         return full_mask(x)
227 | 
228 |     if not isinstance(period, torch.Tensor):
229 |         period = scalar_to_batch_tensor(period, x.shape[0])
230 |     if period.ndim == 0:
231 |         period = period[None]
232 |         
233 |     for i, factor in enumerate(period):
234 |         if factor == 0:
235 |             continue
236 |         for j in range(mask.shape[-1]):
237 |             if j % factor == 0:
238 |                 # figure out how wide the mask should be
239 |                 j_start = max(0, j - width // 2  )
240 |                 j_end = min(mask.shape[-1] - 1, j + width // 2 ) + 1 
241 |                 # flip a coin for each position in the mask
242 |                 j_mask = torch.bernoulli(torch.ones(j_end - j_start))
243 |                 assert torch.all(j_mask == 1)
244 |                 j_fill = torch.ones_like(j_mask) * (1 - j_mask)
245 |                 assert torch.all(j_fill == 0)
246 |                 # fill
247 |                 mask[i, :, j_start:j_end] = j_fill
248 | 
249 |     return mask
250 | 
251 | def codebook_unmask(
252 |     mask: torch.Tensor, 
253 |     n_conditioning_codebooks: int
254 | ):
255 |     if n_conditioning_codebooks == None:
256 |         return mask
257 |     # if we have any conditioning codebooks, set their mask  to 0
258 |     mask = mask.clone()
259 |     mask[:, :n_conditioning_codebooks, :] = 0
260 |     return mask
261 | 
262 | def codebook_mask(mask: torch.Tensor, val1: int, val2: int = None):
263 |     mask = mask.clone()
264 |     mask[:, val1:, :] = 1
265 |     # val2 = val2 or val1
266 |     # vs = torch.linspace(val1, val2, mask.shape[1])
267 |     # for t, v in enumerate(vs):
268 |     #     v = int(v)
269 |     #     mask[:, v:, t] = 1 
270 | 
271 |     return mask
272 | 
273 | @torch.jit.script_if_tracing
274 | def mask_and(
275 |     mask1: torch.Tensor, 
276 |     mask2: torch.Tensor
277 | ):
278 |     assert mask1.shape == mask2.shape, "masks must be same shape"
279 |     return torch.min(mask1, mask2)
280 | 
281 | def drop_ones(mask: torch.Tensor, p: float):
282 |     oldshp = mask.shape
283 |     mask = mask.view(-1)
284 | 
285 |     # find ones idxs
286 |     ones_idxs = torch.where(mask == 1)[0]
287 |     # shuffle idxs
288 |     ones_idxs_idxs = torch.randperm(len(ones_idxs))
289 |     ones_idxs = ones_idxs[ones_idxs_idxs]
290 |     # drop p% of ones
291 |     ones_idxs = ones_idxs[:int(len(ones_idxs) * p)]
292 |     # set those idxs to 0
293 |     mask[ones_idxs] = 0
294 | 
295 |     mask = mask.view(oldshp)
296 |     return mask
297 | 
298 | 
299 | def mask_or(
300 |     mask1: torch.Tensor, 
301 |     mask2: torch.Tensor
302 | ):
303 |     assert mask1.shape == mask2.shape, f"masks must be same shape, but got {mask1.shape} and {mask2.shape}"
304 |     assert mask1.max() <= 1, "mask1 must be binary"
305 |     assert mask2.max() <= 1, "mask2 must be binary"
306 |     assert mask1.min() >= 0, "mask1 must be binary"
307 |     assert mask2.min() >= 0, "mask2 must be binary"
308 |     return (mask1 + mask2).clamp(0, 1)
309 | 
310 | def time_stretch_mask(
311 |     x: torch.Tensor, 
312 |     stretch_factor: int,
313 | ):
314 |     assert stretch_factor >= 1, "stretch factor must be >= 1"
315 |     c_seq_len = x.shape[-1]
316 |     x = x.repeat_interleave(stretch_factor, dim=-1)
317 | 
318 |     # trim cz to the original length
319 |     x = x[:, :, :c_seq_len]
320 | 
321 |     mask = periodic_mask(x, stretch_factor, width=1)
322 |     return mask
323 | 
324 | def onset_mask(
325 |     onset_frame_idxs: torch.Tensor, 
326 |     z: torch.Tensor,
327 |     width: int = 1, 
328 | ):
329 |     if len(onset_frame_idxs) == 0:
330 |         print("no onsets detected")
331 |     # print("onset_frame_idxs", onset_frame_idxs)
332 |     # print("mask shape", z.shape)
333 | 
334 |     mask = torch.ones_like(z).int()
335 |     for idx in onset_frame_idxs:
336 |         mask[:, :, idx-width:idx+width] = 0
337 | 
338 |     return mask.int()
339 | 
340 | def tria_mask(
341 |     codes: torch.Tensor, 
342 |     min_amt: float = 0.1, 
343 |     max_amt: float = 0.4,
344 | ):
345 |     """ 
346 |     unmasks a prefix of the codes tensor, 
347 |     in the range provided
348 |     """
349 | 
350 |     mask = full_mask(codes)
351 |     nb, nc, nt = codes.shape
352 |     for i in range(nb):
353 |         amt = torch.rand(1) * (max_amt - min_amt) + min_amt
354 |         amt = int(amt * nt)
355 |         mask[i, :, :amt] = 0
356 | 
357 |     return mask
358 | 
359 | 
360 | 
361 | 
362 | 
363 | 
364 | if __name__ == "__main__":
365 |     sig = AudioSignal("assets/example.wav")
366 | 


--------------------------------------------------------------------------------
/unloop/max/click.maxpat:
--------------------------------------------------------------------------------
  1 | {
  2 | 	"patcher" : 	{
  3 | 		"fileversion" : 1,
  4 | 		"appversion" : 		{
  5 | 			"major" : 8,
  6 | 			"minor" : 6,
  7 | 			"revision" : 5,
  8 | 			"architecture" : "x64",
  9 | 			"modernui" : 1
 10 | 		}
 11 | ,
 12 | 		"classnamespace" : "box",
 13 | 		"rect" : [ 59.0, 106.0, 640.0, 480.0 ],
 14 | 		"bglocked" : 0,
 15 | 		"openinpresentation" : 0,
 16 | 		"default_fontsize" : 12.0,
 17 | 		"default_fontface" : 0,
 18 | 		"default_fontname" : "Arial",
 19 | 		"gridonopen" : 1,
 20 | 		"gridsize" : [ 15.0, 15.0 ],
 21 | 		"gridsnaponopen" : 1,
 22 | 		"objectsnaponopen" : 1,
 23 | 		"statusbarvisible" : 2,
 24 | 		"toolbarvisible" : 1,
 25 | 		"lefttoolbarpinned" : 0,
 26 | 		"toptoolbarpinned" : 0,
 27 | 		"righttoolbarpinned" : 0,
 28 | 		"bottomtoolbarpinned" : 0,
 29 | 		"toolbars_unpinned_last_save" : 0,
 30 | 		"tallnewobj" : 0,
 31 | 		"boxanimatetime" : 200,
 32 | 		"enablehscroll" : 1,
 33 | 		"enablevscroll" : 1,
 34 | 		"devicewidth" : 0.0,
 35 | 		"description" : "",
 36 | 		"digest" : "",
 37 | 		"tags" : "",
 38 | 		"style" : "",
 39 | 		"subpatcher_template" : "",
 40 | 		"assistshowspatchername" : 0,
 41 | 		"boxes" : [ 			{
 42 | 				"box" : 				{
 43 | 					"id" : "obj-323",
 44 | 					"maxclass" : "newobj",
 45 | 					"numinlets" : 2,
 46 | 					"numoutlets" : 1,
 47 | 					"outlettype" : [ "" ],
 48 | 					"patching_rect" : [ 129.751264274120331, 104.0, 32.0, 22.0 ],
 49 | 					"text" : "gate"
 50 | 				}
 51 | 
 52 | 			}
 53 | , 			{
 54 | 				"box" : 				{
 55 | 					"id" : "obj-321",
 56 | 					"maxclass" : "newobj",
 57 | 					"numinlets" : 2,
 58 | 					"numoutlets" : 1,
 59 | 					"outlettype" : [ "" ],
 60 | 					"patching_rect" : [ 59.239391028881073, 100.0, 32.0, 22.0 ],
 61 | 					"text" : "gate"
 62 | 				}
 63 | 
 64 | 			}
 65 | , 			{
 66 | 				"box" : 				{
 67 | 					"format" : 6,
 68 | 					"id" : "obj-319",
 69 | 					"maxclass" : "flonum",
 70 | 					"numinlets" : 1,
 71 | 					"numoutlets" : 2,
 72 | 					"outlettype" : [ "", "bang" ],
 73 | 					"parameter_enable" : 0,
 74 | 					"patching_rect" : [ 108.0, 266.418817639350891, 50.0, 22.0 ]
 75 | 				}
 76 | 
 77 | 			}
 78 | , 			{
 79 | 				"box" : 				{
 80 | 					"id" : "obj-317",
 81 | 					"maxclass" : "newobj",
 82 | 					"numinlets" : 1,
 83 | 					"numoutlets" : 2,
 84 | 					"outlettype" : [ "bang", "bang" ],
 85 | 					"patching_rect" : [ 129.751264274120331, 135.0, 32.0, 22.0 ],
 86 | 					"text" : "t b b"
 87 | 				}
 88 | 
 89 | 			}
 90 | , 			{
 91 | 				"box" : 				{
 92 | 					"id" : "obj-316",
 93 | 					"maxclass" : "newobj",
 94 | 					"numinlets" : 1,
 95 | 					"numoutlets" : 2,
 96 | 					"outlettype" : [ "bang", "bang" ],
 97 | 					"patching_rect" : [ 59.239391028881073, 129.0, 32.0, 22.0 ],
 98 | 					"text" : "t b b"
 99 | 				}
100 | 
101 | 			}
102 | , 			{
103 | 				"box" : 				{
104 | 					"id" : "obj-311",
105 | 					"maxclass" : "message",
106 | 					"numinlets" : 2,
107 | 					"numoutlets" : 1,
108 | 					"outlettype" : [ "" ],
109 | 					"patching_rect" : [ 150.735043168067932, 223.683774471282959, 29.5, 22.0 ],
110 | 					"text" : "600"
111 | 				}
112 | 
113 | 			}
114 | , 			{
115 | 				"box" : 				{
116 | 					"id" : "obj-310",
117 | 					"maxclass" : "message",
118 | 					"numinlets" : 2,
119 | 					"numoutlets" : 1,
120 | 					"outlettype" : [ "" ],
121 | 					"patching_rect" : [ 108.0, 223.683774471282959, 29.5, 22.0 ],
122 | 					"text" : "400"
123 | 				}
124 | 
125 | 			}
126 | , 			{
127 | 				"box" : 				{
128 | 					"id" : "obj-307",
129 | 					"maxclass" : "newobj",
130 | 					"numinlets" : 1,
131 | 					"numoutlets" : 1,
132 | 					"outlettype" : [ "" ],
133 | 					"patching_rect" : [ 579.222337603569031, 292.05984354019165, 77.0, 22.0 ],
134 | 					"text" : "loadmess 80"
135 | 				}
136 | 
137 | 			}
138 | , 			{
139 | 				"box" : 				{
140 | 					"id" : "obj-284",
141 | 					"maxclass" : "newobj",
142 | 					"numinlets" : 2,
143 | 					"numoutlets" : 1,
144 | 					"outlettype" : [ "int" ],
145 | 					"patching_rect" : [ 509.991567671298981, 297.188048720359802, 29.5, 22.0 ],
146 | 					"text" : "* 8"
147 | 				}
148 | 
149 | 			}
150 | , 			{
151 | 				"box" : 				{
152 | 					"id" : "obj-286",
153 | 					"maxclass" : "newobj",
154 | 					"numinlets" : 4,
155 | 					"numoutlets" : 1,
156 | 					"outlettype" : [ "signal" ],
157 | 					"patching_rect" : [ 451.871908962726593, 353.598305702209473, 106.0, 22.0 ],
158 | 					"text" : "reson~ 1. 100. 10."
159 | 				}
160 | 
161 | 			}
162 | , 			{
163 | 				"box" : 				{
164 | 					"id" : "obj-281",
165 | 					"maxclass" : "newobj",
166 | 					"numinlets" : 2,
167 | 					"numoutlets" : 1,
168 | 					"outlettype" : [ "signal" ],
169 | 					"patching_rect" : [ 50.0, 432.230785131454468, 40.0, 22.0 ],
170 | 					"text" : "*~ 40."
171 | 				}
172 | 
173 | 			}
174 | , 			{
175 | 				"box" : 				{
176 | 					"format" : 6,
177 | 					"id" : "obj-279",
178 | 					"maxclass" : "flonum",
179 | 					"numinlets" : 1,
180 | 					"numoutlets" : 2,
181 | 					"outlettype" : [ "", "bang" ],
182 | 					"parameter_enable" : 0,
183 | 					"patching_rect" : [ 579.222337603569031, 319.410271167755127, 50.0, 22.0 ]
184 | 				}
185 | 
186 | 			}
187 | , 			{
188 | 				"box" : 				{
189 | 					"id" : "obj-272",
190 | 					"maxclass" : "newobj",
191 | 					"numinlets" : 2,
192 | 					"numoutlets" : 1,
193 | 					"outlettype" : [ "int" ],
194 | 					"patching_rect" : [ 380.795777916908264, 300.606852173805237, 29.5, 22.0 ],
195 | 					"text" : "* 4"
196 | 				}
197 | 
198 | 			}
199 | , 			{
200 | 				"box" : 				{
201 | 					"id" : "obj-275",
202 | 					"maxclass" : "newobj",
203 | 					"numinlets" : 4,
204 | 					"numoutlets" : 1,
205 | 					"outlettype" : [ "signal" ],
206 | 					"patching_rect" : [ 322.795777916908264, 356.66199141740799, 106.0, 22.0 ],
207 | 					"text" : "reson~ 1. 100. 10."
208 | 				}
209 | 
210 | 			}
211 | , 			{
212 | 				"box" : 				{
213 | 					"id" : "obj-264",
214 | 					"maxclass" : "newobj",
215 | 					"numinlets" : 2,
216 | 					"numoutlets" : 1,
217 | 					"outlettype" : [ "int" ],
218 | 					"patching_rect" : [ 247.598402619361877, 300.606852173805237, 29.5, 22.0 ],
219 | 					"text" : "* 2"
220 | 				}
221 | 
222 | 			}
223 | , 			{
224 | 				"box" : 				{
225 | 					"id" : "obj-259",
226 | 					"maxclass" : "newobj",
227 | 					"numinlets" : 4,
228 | 					"numoutlets" : 1,
229 | 					"outlettype" : [ "signal" ],
230 | 					"patching_rect" : [ 189.47874391078949, 356.66199141740799, 106.0, 22.0 ],
231 | 					"text" : "reson~ 1. 100. 10."
232 | 				}
233 | 
234 | 			}
235 | , 			{
236 | 				"box" : 				{
237 | 					"id" : "obj-200",
238 | 					"maxclass" : "newobj",
239 | 					"numinlets" : 1,
240 | 					"numoutlets" : 1,
241 | 					"outlettype" : [ "signal" ],
242 | 					"patching_rect" : [ 50.0, 266.418817639350891, 39.0, 22.0 ],
243 | 					"text" : "click~"
244 | 				}
245 | 
246 | 			}
247 | , 			{
248 | 				"box" : 				{
249 | 					"id" : "obj-4",
250 | 					"maxclass" : "newobj",
251 | 					"numinlets" : 4,
252 | 					"numoutlets" : 1,
253 | 					"outlettype" : [ "signal" ],
254 | 					"patching_rect" : [ 50.0, 356.66199141740799, 106.0, 22.0 ],
255 | 					"text" : "reson~ 1. 100. 10."
256 | 				}
257 | 
258 | 			}
259 | , 			{
260 | 				"box" : 				{
261 | 					"comment" : "1 enables click, 0 disables.",
262 | 					"id" : "obj-324",
263 | 					"index" : 1,
264 | 					"maxclass" : "inlet",
265 | 					"numinlets" : 0,
266 | 					"numoutlets" : 1,
267 | 					"outlettype" : [ "int" ],
268 | 					"patching_rect" : [ 59.239391028881073, 40.0, 30.0, 30.0 ]
269 | 				}
270 | 
271 | 			}
272 | , 			{
273 | 				"box" : 				{
274 | 					"comment" : "bang for beat",
275 | 					"id" : "obj-325",
276 | 					"index" : 2,
277 | 					"maxclass" : "inlet",
278 | 					"numinlets" : 0,
279 | 					"numoutlets" : 1,
280 | 					"outlettype" : [ "bang" ],
281 | 					"patching_rect" : [ 104.239391028881073, 40.0, 30.0, 30.0 ]
282 | 				}
283 | 
284 | 			}
285 | , 			{
286 | 				"box" : 				{
287 | 					"comment" : "bang for downbeat",
288 | 					"id" : "obj-326",
289 | 					"index" : 3,
290 | 					"maxclass" : "inlet",
291 | 					"numinlets" : 0,
292 | 					"numoutlets" : 1,
293 | 					"outlettype" : [ "bang" ],
294 | 					"patching_rect" : [ 175.239391028881073, 40.0, 30.0, 30.0 ]
295 | 				}
296 | 
297 | 			}
298 | , 			{
299 | 				"box" : 				{
300 | 					"comment" : "signal out",
301 | 					"id" : "obj-327",
302 | 					"index" : 1,
303 | 					"maxclass" : "outlet",
304 | 					"numinlets" : 1,
305 | 					"numoutlets" : 0,
306 | 					"patching_rect" : [ 49.999889028881171, 514.23083500000007, 30.0, 30.0 ]
307 | 				}
308 | 
309 | 			}
310 |  ],
311 | 		"lines" : [ 			{
312 | 				"patchline" : 				{
313 | 					"destination" : [ "obj-259", 0 ],
314 | 					"order" : 1,
315 | 					"source" : [ "obj-200", 0 ]
316 | 				}
317 | 
318 | 			}
319 | , 			{
320 | 				"patchline" : 				{
321 | 					"destination" : [ "obj-275", 0 ],
322 | 					"order" : 0,
323 | 					"source" : [ "obj-200", 0 ]
324 | 				}
325 | 
326 | 			}
327 | , 			{
328 | 				"patchline" : 				{
329 | 					"destination" : [ "obj-4", 0 ],
330 | 					"order" : 2,
331 | 					"source" : [ "obj-200", 0 ]
332 | 				}
333 | 
334 | 			}
335 | , 			{
336 | 				"patchline" : 				{
337 | 					"destination" : [ "obj-281", 0 ],
338 | 					"source" : [ "obj-259", 0 ]
339 | 				}
340 | 
341 | 			}
342 | , 			{
343 | 				"patchline" : 				{
344 | 					"destination" : [ "obj-259", 2 ],
345 | 					"source" : [ "obj-264", 0 ]
346 | 				}
347 | 
348 | 			}
349 | , 			{
350 | 				"patchline" : 				{
351 | 					"destination" : [ "obj-275", 2 ],
352 | 					"source" : [ "obj-272", 0 ]
353 | 				}
354 | 
355 | 			}
356 | , 			{
357 | 				"patchline" : 				{
358 | 					"destination" : [ "obj-281", 0 ],
359 | 					"source" : [ "obj-275", 0 ]
360 | 				}
361 | 
362 | 			}
363 | , 			{
364 | 				"patchline" : 				{
365 | 					"destination" : [ "obj-259", 3 ],
366 | 					"order" : 2,
367 | 					"source" : [ "obj-279", 0 ]
368 | 				}
369 | 
370 | 			}
371 | , 			{
372 | 				"patchline" : 				{
373 | 					"destination" : [ "obj-275", 3 ],
374 | 					"order" : 1,
375 | 					"source" : [ "obj-279", 0 ]
376 | 				}
377 | 
378 | 			}
379 | , 			{
380 | 				"patchline" : 				{
381 | 					"destination" : [ "obj-286", 3 ],
382 | 					"order" : 0,
383 | 					"source" : [ "obj-279", 0 ]
384 | 				}
385 | 
386 | 			}
387 | , 			{
388 | 				"patchline" : 				{
389 | 					"destination" : [ "obj-4", 3 ],
390 | 					"order" : 3,
391 | 					"source" : [ "obj-279", 0 ]
392 | 				}
393 | 
394 | 			}
395 | , 			{
396 | 				"patchline" : 				{
397 | 					"destination" : [ "obj-327", 0 ],
398 | 					"source" : [ "obj-281", 0 ]
399 | 				}
400 | 
401 | 			}
402 | , 			{
403 | 				"patchline" : 				{
404 | 					"destination" : [ "obj-286", 2 ],
405 | 					"source" : [ "obj-284", 0 ]
406 | 				}
407 | 
408 | 			}
409 | , 			{
410 | 				"patchline" : 				{
411 | 					"destination" : [ "obj-281", 0 ],
412 | 					"source" : [ "obj-286", 0 ]
413 | 				}
414 | 
415 | 			}
416 | , 			{
417 | 				"patchline" : 				{
418 | 					"destination" : [ "obj-279", 0 ],
419 | 					"source" : [ "obj-307", 0 ]
420 | 				}
421 | 
422 | 			}
423 | , 			{
424 | 				"patchline" : 				{
425 | 					"destination" : [ "obj-319", 0 ],
426 | 					"source" : [ "obj-310", 0 ]
427 | 				}
428 | 
429 | 			}
430 | , 			{
431 | 				"patchline" : 				{
432 | 					"destination" : [ "obj-319", 0 ],
433 | 					"source" : [ "obj-311", 0 ]
434 | 				}
435 | 
436 | 			}
437 | , 			{
438 | 				"patchline" : 				{
439 | 					"destination" : [ "obj-200", 0 ],
440 | 					"source" : [ "obj-316", 1 ]
441 | 				}
442 | 
443 | 			}
444 | , 			{
445 | 				"patchline" : 				{
446 | 					"destination" : [ "obj-310", 0 ],
447 | 					"source" : [ "obj-316", 0 ]
448 | 				}
449 | 
450 | 			}
451 | , 			{
452 | 				"patchline" : 				{
453 | 					"destination" : [ "obj-200", 0 ],
454 | 					"source" : [ "obj-317", 0 ]
455 | 				}
456 | 
457 | 			}
458 | , 			{
459 | 				"patchline" : 				{
460 | 					"destination" : [ "obj-311", 0 ],
461 | 					"source" : [ "obj-317", 1 ]
462 | 				}
463 | 
464 | 			}
465 | , 			{
466 | 				"patchline" : 				{
467 | 					"destination" : [ "obj-264", 0 ],
468 | 					"order" : 2,
469 | 					"source" : [ "obj-319", 0 ]
470 | 				}
471 | 
472 | 			}
473 | , 			{
474 | 				"patchline" : 				{
475 | 					"destination" : [ "obj-272", 0 ],
476 | 					"order" : 1,
477 | 					"source" : [ "obj-319", 0 ]
478 | 				}
479 | 
480 | 			}
481 | , 			{
482 | 				"patchline" : 				{
483 | 					"destination" : [ "obj-284", 0 ],
484 | 					"order" : 0,
485 | 					"source" : [ "obj-319", 0 ]
486 | 				}
487 | 
488 | 			}
489 | , 			{
490 | 				"patchline" : 				{
491 | 					"destination" : [ "obj-4", 2 ],
492 | 					"order" : 3,
493 | 					"source" : [ "obj-319", 0 ]
494 | 				}
495 | 
496 | 			}
497 | , 			{
498 | 				"patchline" : 				{
499 | 					"destination" : [ "obj-316", 0 ],
500 | 					"source" : [ "obj-321", 0 ]
501 | 				}
502 | 
503 | 			}
504 | , 			{
505 | 				"patchline" : 				{
506 | 					"destination" : [ "obj-317", 0 ],
507 | 					"source" : [ "obj-323", 0 ]
508 | 				}
509 | 
510 | 			}
511 | , 			{
512 | 				"patchline" : 				{
513 | 					"destination" : [ "obj-321", 0 ],
514 | 					"order" : 1,
515 | 					"source" : [ "obj-324", 0 ]
516 | 				}
517 | 
518 | 			}
519 | , 			{
520 | 				"patchline" : 				{
521 | 					"destination" : [ "obj-323", 0 ],
522 | 					"order" : 0,
523 | 					"source" : [ "obj-324", 0 ]
524 | 				}
525 | 
526 | 			}
527 | , 			{
528 | 				"patchline" : 				{
529 | 					"destination" : [ "obj-321", 1 ],
530 | 					"source" : [ "obj-325", 0 ]
531 | 				}
532 | 
533 | 			}
534 | , 			{
535 | 				"patchline" : 				{
536 | 					"destination" : [ "obj-323", 1 ],
537 | 					"source" : [ "obj-326", 0 ]
538 | 				}
539 | 
540 | 			}
541 | , 			{
542 | 				"patchline" : 				{
543 | 					"destination" : [ "obj-281", 0 ],
544 | 					"source" : [ "obj-4", 0 ]
545 | 				}
546 | 
547 | 			}
548 |  ]
549 | 	}
550 | 
551 | }
552 | 


--------------------------------------------------------------------------------