├── html ├── favicon.ico ├── recordings │ └── .keep ├── images │ ├── mic-off.png │ ├── mic-on.png │ ├── speaker-off.png │ └── speaker-on.png ├── worker-decoder.js ├── worker-encoder.js ├── lib.js ├── lyrics.css ├── full-instructions.html ├── lyrics.js ├── audiochunk.js ├── net.js ├── audio-worklet.js └── index.html ├── requirements.txt ├── audio └── README ├── start_stress_servers.sh ├── util.py ├── LICENSE ├── stress.py ├── unit-stress.py ├── upload.py ├── demetronome.py ├── .gitignore ├── stress_helper.py ├── shm.py ├── NOTES ├── README.md ├── server_wrapper.py └── server.py /html/favicon.ico: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /html/recordings/.keep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.22.0 2 | opuslib==3.0.1 3 | SharedArray==3.2.1 4 | twilio==6.50.1 5 | -------------------------------------------------------------------------------- /html/images/mic-off.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jeffkaufman/bucket-brigade/HEAD/html/images/mic-off.png -------------------------------------------------------------------------------- /html/images/mic-on.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jeffkaufman/bucket-brigade/HEAD/html/images/mic-on.png -------------------------------------------------------------------------------- /html/images/speaker-off.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jeffkaufman/bucket-brigade/HEAD/html/images/speaker-off.png -------------------------------------------------------------------------------- /html/images/speaker-on.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jeffkaufman/bucket-brigade/HEAD/html/images/speaker-on.png -------------------------------------------------------------------------------- /audio/README: -------------------------------------------------------------------------------- 1 | Files manually updated here may be used as backing tracks 2 | 3 | All files should be saved as mono wav at 48kHz 4 | -------------------------------------------------------------------------------- /html/worker-decoder.js: -------------------------------------------------------------------------------- 1 | addEventListener('error', (event) => { 2 | event.preventDefault(); 3 | let {name, message, stack, unpreventable} = event.error ?? {}; 4 | [name, message, stack] = [name, message, stack].map(String); 5 | unpreventable = Boolean(unpreventable); 6 | postMessage({ 7 | type: "exception", 8 | exception: {name, message, stack, unpreventable}, 9 | }); 10 | }); 11 | addEventListener('unhandledrejection', (event) => { 12 | event.preventDefault(); 13 | throw event.reason; 14 | }); 15 | importScripts('opusjs/decoder.js') 16 | -------------------------------------------------------------------------------- /html/worker-encoder.js: -------------------------------------------------------------------------------- 1 | addEventListener('error', (event) => { 2 | event.preventDefault(); 3 | let {name, message, stack, unpreventable} = event.error ?? {}; 4 | [name, message, stack] = [name, message, stack].map(String); 5 | unpreventable = Boolean(unpreventable); 6 | postMessage({ 7 | type: "exception", 8 | exception: {name, message, stack, unpreventable}, 9 | }); 10 | }); 11 | addEventListener('unhandledrejection', (event) => { 12 | event.preventDefault(); 13 | throw event.reason; 14 | }); 15 | importScripts('opusjs/encoder.js') 16 | -------------------------------------------------------------------------------- /html/lib.js: -------------------------------------------------------------------------------- 1 | var log_counts = {} 2 | export function log_every(n, tag, ...args) { 3 | if (tag.constructor != String) { 4 | console.error("In log_every, tag must be a string! Got:", n, tag, args); 5 | return; 6 | } 7 | 8 | if (log_counts[tag] === undefined) { 9 | log_counts[tag] = 0; 10 | } 11 | if (log_counts[tag] % n == 0) { 12 | console.debug("<" + tag + "/" + n + ">", ...args); 13 | } 14 | log_counts[tag]++; 15 | } 16 | 17 | export function check(condition, message, ...rest) { 18 | if (!condition) { 19 | console.error(message, ...rest); 20 | throw new Error(message); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /html/lyrics.css: -------------------------------------------------------------------------------- 1 | #lyrics { 2 | background: #ffc; 3 | margin: auto; 4 | } 5 | 6 | .lyrics { 7 | white-space: pre-wrap; 8 | color: #4aa; 9 | } 10 | 11 | .clicked { 12 | color: black; 13 | text-shadow: 0 0 1px yellow; 14 | } 15 | 16 | .heard { 17 | color: black; 18 | text-shadow: 1px 1px 1px red; 19 | } 20 | 21 | #lyriccontrols { 22 | display: none; 23 | } 24 | 25 | #lyricButton { 26 | width: 30vw; 27 | background: #aaa; 28 | text-decoration: none; 29 | box-shadow: 2px 2px 1px #666; 30 | display: none; 31 | text-align: center; 32 | padding: 9vw 0; 33 | margin: 4px auto; 34 | } 35 | 36 | #lyrics textarea { 37 | font-size: 7pt; 38 | } 39 | -------------------------------------------------------------------------------- /start_stress_servers.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # usage: either: 3 | # unsharded: ./start_stress_servers.sh 4 | # sharded: ./start_stress_servers.sh {1..8} 5 | 6 | trap ctrl_c INT 7 | 8 | function ctrl_c() { 9 | echo 10 | echo shutting down... 11 | killall uwsgi 12 | killall python3 13 | exit 14 | } 15 | 16 | if [[ $# -gt 1 ]]; then 17 | SEGMENTS="" 18 | for i in $@; do 19 | SEGMENTS+=" stress0$i" 20 | done 21 | 22 | python3 shm.py $SEGMENTS & 23 | 24 | for i in $@; do 25 | uwsgi --http :810$i --wsgi-file \ 26 | server_wrapper.py --threads=1 --processes=1 --disable-logging \ 27 | --declare-option 'segment=$1' --segment=stress0$i & 28 | done 29 | else 30 | uwsgi --http :8101 --wsgi-file \ 31 | server_wrapper.py --threads=1 --processes=1 --disable-logging & 32 | fi 33 | 34 | echo running... 35 | while true; do read; done 36 | -------------------------------------------------------------------------------- /util.py: -------------------------------------------------------------------------------- 1 | import os 2 | import traceback 3 | import json 4 | 5 | AUDIO_DIR = os.path.join(os.path.dirname(__file__), "audio") 6 | BACKING_TRACK_UPLOAD_FNAME = os.path.join(AUDIO_DIR, "User Upload") 7 | IMAGE_UPLOAD_FNAME = os.path.join( 8 | os.path.dirname(__file__), "html", "user-upload-image") 9 | 10 | def die500(start_response, e): 11 | # This is slightly sketchy: this assumes we are currently in the middle 12 | # of an exception handler for the exception e (which happens to be 13 | # true.) 14 | trb = traceback.format_exc().encode("utf-8") 15 | start_response('500 Internal Server Error', [ 16 | ('Content-Type', 'text/plain'), 17 | ("Access-Control-Allow-Origin", "*"), 18 | ("Access-Control-Max-Age", "86400"), 19 | ("Access-Control-Expose-Headers", "X-Audio-Metadata"), 20 | ("X-Audio-Metadata", json.dumps({ 21 | "kill_client": True, 22 | "message": str(e) 23 | }))]) 24 | return trb, 25 | 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Glenn Willen and Jeff Kaufman 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /html/full-instructions.html: -------------------------------------------------------------------------------- 1 | 2 | Full Instructions 3 | 8 | 9 |

Full Instructions

10 | 11 | This is a program (source code) 13 | that allows multiple people to make music together over the 14 | internet. You can think of it like one person recording their voice 15 | onto a cassette tape, mailing it to the next person who adds their voice, 16 | mailing it to the next person who adds their voice, etc. Except it's 17 | much faster than using the post! 18 | 19 |

20 | 21 | The audio offset controls where you are in the series of people. The 22 | larger your offset, the farther along you are in the chain, the more 23 | people will be ahead for you to hear, and the fewer people will be 24 | behind to hear you. If you're with a group of people, figure out now 25 | who will be at which position. A good place to start is people at 0, 26 | 10, 20, 30, 40, etc. Numbers over 100 generally won't work because 27 | the server buffer isn't large enough. Eventually, we're planning to 28 | tune this so delays can be much smaller. 29 | -------------------------------------------------------------------------------- /stress.py: -------------------------------------------------------------------------------- 1 | # run as: python3 stress.py 2 | 3 | import sys 4 | import subprocess 5 | import opuslib 6 | import numpy as np 7 | import tempfile 8 | import random 9 | from multiprocessing import Pool 10 | import time 11 | import json 12 | 13 | PACKET_INTERVAL = 0.6 # 600ms 14 | 15 | def summarize(timing): 16 | return min(timing), max(timing), sum(timing)/len(timing) 17 | 18 | def run(n_workers, users_per_client, n_rounds, url, should_sleep): 19 | n_workers = int(n_workers) 20 | 21 | processes = [] 22 | for i in range(n_workers): 23 | processes.append(subprocess.Popen( 24 | ["python3", "stress_helper.py", n_rounds, users_per_client, 25 | "stress%s" % i, url, should_sleep], 26 | stdout=subprocess.PIPE)) 27 | timings = [] 28 | for process in processes: 29 | process.wait() 30 | result_text = process.stdout.read() 31 | try: 32 | timings.append(json.loads(result_text)) 33 | except: 34 | print("Failure:", result_text) 35 | 36 | should_sleep = {"sleep": True, 37 | "nosleep": False}[should_sleep] 38 | if should_sleep: 39 | all_timings = [] 40 | for timings in timings: 41 | all_timings.extend(timings) 42 | print("[min=%.0f max=%.0f avg=%.0f]" % summarize(all_timings)) 43 | else: 44 | total = 0 45 | for timing in timings: 46 | est = PACKET_INTERVAL * 1000 * len(timing) / sum(timing) 47 | print("est %.0f clients" % est) 48 | total += est 49 | print("total: %.0f clients" % total) 50 | 51 | if __name__ == "__main__": 52 | run(*sys.argv[1:]) 53 | -------------------------------------------------------------------------------- /unit-stress.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | import random 4 | import numpy as np 5 | import server 6 | import server_wrapper 7 | import opuslib 8 | 9 | PACKET_INTERVAL = 0.6 # 600ms 10 | PACKET_SAMPLES = int(server.SAMPLE_RATE * PACKET_INTERVAL) 11 | 12 | enc = opuslib.Encoder( 13 | server.SAMPLE_RATE, server_wrapper.CHANNELS, opuslib.APPLICATION_AUDIO) 14 | zeros = np.zeros(PACKET_SAMPLES).reshape( 15 | [-1, server_wrapper.OPUS_FRAME_SAMPLES]) 16 | 17 | data = server_wrapper.pack_multi([ 18 | np.frombuffer( 19 | enc.encode_float(packet.tobytes(), server_wrapper.OPUS_FRAME_SAMPLES), 20 | np.uint8) 21 | for packet in zeros]).tobytes() 22 | 23 | userid = int(random.random()*10000000) 24 | username = "unitstress" 25 | 26 | def query_string(): 27 | return "read_clock=%s&userid=%s&username=%s" % ( 28 | (server.calculate_server_clock(), 29 | userid, 30 | username)) 31 | 32 | def fake_outer_request(): 33 | server_wrapper.handle_post( 34 | userid, 35 | PACKET_SAMPLES, 36 | data, 37 | [], 38 | query_string()) 39 | 40 | def fake_inner_request(): 41 | server.handle_post( 42 | data, 43 | [], 44 | query_string()) 45 | 46 | def stress(): 47 | for i in range(3): 48 | start = time.time() 49 | n_requests = 1000 50 | for i in range(n_requests): 51 | fake_request() 52 | end = time.time() 53 | 54 | each_s = (end-start)/n_requests 55 | 56 | print("%.2fms each; est %.0f clients" % ( 57 | each_s*1000, 58 | PACKET_INTERVAL/each_s)) 59 | 60 | def setup(args): 61 | global fake_request 62 | 63 | if "inner" in args: 64 | fake_request = fake_inner_request 65 | else: 66 | fake_request = fake_outer_request 67 | 68 | if __name__ == "__main__": 69 | setup(sys.argv[1:]) 70 | stress() 71 | -------------------------------------------------------------------------------- /upload.py: -------------------------------------------------------------------------------- 1 | import util 2 | import tempfile 3 | import subprocess 4 | import traceback 5 | import sys 6 | import urllib.parse 7 | 8 | def decode_and_save_backing_track(in_data_raw): 9 | with tempfile.NamedTemporaryFile() as tmp_upload: 10 | tmp_upload.write(in_data_raw) 11 | tmp_upload.flush() 12 | 13 | subprocess.check_call([ 14 | "sox", 15 | "-t", "mp3", tmp_upload.name, 16 | "-r", "48000", 17 | "-t", "wav", util.BACKING_TRACK_UPLOAD_FNAME, 18 | "remix", "1"]) 19 | 20 | def save_image(in_data_raw): 21 | with open(util.IMAGE_UPLOAD_FNAME, "wb") as outf: 22 | outf.write(in_data_raw) 23 | outf.flush() 24 | 25 | def application(environ, start_response): 26 | try: 27 | content_length = int(environ.get('CONTENT_LENGTH', 0)) 28 | in_data_raw = environ['wsgi.input'].read(content_length) 29 | 30 | query_string = environ['QUERY_STRING'] 31 | 32 | if len(query_string) > 0: 33 | query_params = urllib.parse.parse_qs(query_string, strict_parsing=True) 34 | else: 35 | query_params = {} 36 | 37 | uploadType, = query_params.get("type", [None]) 38 | if uploadType == "backingTrack": 39 | decode_and_save_backing_track(in_data_raw) 40 | elif uploadType == "image": 41 | save_image(in_data_raw) 42 | else: 43 | raise Exception("unknown uploadType %s" % uploadType) 44 | 45 | start_response('200 OK', [("Content-Type", "text/plain")]) 46 | return b"ok", 47 | except Exception as e: 48 | print("ERROR:", query_string, "\n", traceback.\ 49 | format_exc(), file=sys.stderr) 50 | return util.die500(start_response, e) 51 | 52 | def serve(): 53 | from wsgiref.simple_server import make_server 54 | make_server(b'',8082,application).serve_forever() 55 | 56 | if __name__ == "__main__": 57 | serve() 58 | -------------------------------------------------------------------------------- /demetronome.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import sys 4 | import wave 5 | 6 | # Remove the metronome from a recording. 7 | # 8 | # This could be a lot smarter: the beats are a consistent number of samples 9 | # apart, for example, and we could detect them by looking for samples that are 10 | # way apart from their neighbors instead of just ones that are very high. But 11 | # this works on the sample, so no need to do any more for now. 12 | 13 | input_fname, output_fname, threshold = sys.argv[1:] 14 | threshold = int(threshold) # try 13000 or so 15 | 16 | input_wave = wave.open(input_fname, mode='rb') 17 | output_wave = wave.open(output_fname, mode='wb') 18 | 19 | CHANNELS=1 20 | WIDTH=2 21 | 22 | assert input_wave.getnchannels() == CHANNELS 23 | assert input_wave.getsampwidth() == WIDTH 24 | output_wave.setnchannels(input_wave.getnchannels()) 25 | output_wave.setsampwidth(input_wave.getsampwidth()) 26 | output_wave.setframerate(input_wave.getframerate()) 27 | 28 | l = 0 29 | samples = [] 30 | while f := input_wave.readframes(1024): 31 | l += len(f) 32 | prev = None 33 | for i, s in enumerate(f): 34 | if i % 2 == 0: 35 | prev = s 36 | else: 37 | sample = int.from_bytes([prev, s], 38 | byteorder="little", 39 | signed=True) 40 | samples.append(sample) 41 | 42 | metronome_values = [] 43 | for sample in samples: 44 | if sample > threshold: 45 | metronome_values.append(sample) 46 | metronome_average = round(sum(metronome_values) / len(metronome_values)) 47 | 48 | new_samples = [] 49 | for sample in samples: 50 | if sample > threshold: 51 | sample -= metronome_average 52 | new_samples.append(sample) 53 | 54 | new_frames = [] 55 | for sample in new_samples: 56 | new_frames.extend(sample.to_bytes(byteorder="little", 57 | signed=True, 58 | length=WIDTH)) 59 | 60 | output_wave.writeframes(bytes(new_frames)) 61 | -------------------------------------------------------------------------------- /html/lyrics.js: -------------------------------------------------------------------------------- 1 | import { start_hooks, stop_hooks, event_hooks, declare_event, init_events } from './app.js'; 2 | 3 | let lyrics = "Hands chip the |flint, light the |fire, skin the |kill\n|Feet move the |tribe track the |herd with a |will \nHuman-|kind |struggles, on the |edge of histo |ry\n|Time to settle |down, time to |grow, time to |bree|eed..\n|Plow tills the |soil, plants the |seed, pray for |rain\n|Scythe reaps the | wheat, to the |mill, to grind the |grain\n|Towns.. and.. |cities spread to |empire over-|night\n|Hands keep |building as we |chant the ancient |rite...".split('|'); 4 | 5 | let button = document.getElementById('lyricButton'); 6 | let holder = document.getElementById('lyricHolder'); 7 | let dbgbox = document.getElementById('lyricDbg'); 8 | let ctrlCb = document.getElementById('lyricCtrlCb'); 9 | let spans = {} 10 | let lyricsCur = 0; 11 | 12 | function dbg(txt) { 13 | let div = document.createElement('div'); 14 | div.innerText = txt 15 | dbgbox.appendChild(div); 16 | } 17 | 18 | document.lyric_dbg_cb = dbg; 19 | 20 | function addSpan(lid, txt) { 21 | let span = document.createElement('span'); 22 | span.innerText = txt; 23 | span.className = 'lyrics'; 24 | holder.appendChild(span); 25 | spans[lid] = span; 26 | } 27 | 28 | start_hooks.push( ()=>{ 29 | holder.innerHTML = ''; 30 | spans = {}; 31 | let aos = document.getElementById('audioOffset').value; 32 | let ctrl = ctrlCb.checked; 33 | if (ctrl) { 34 | button.style.display = 'block'; 35 | lyricsCur = 0; 36 | init_events(); 37 | } else { 38 | button.style.display = 'none'; 39 | for (let i=-Math.floor(Math.min(aos,10)); i<0; i++) { 40 | addSpan(i, (-i)+'...'+(i==-1?'\n':'')); 41 | } 42 | } 43 | for (let i=0; i{ 49 | button.style.display = 'none'; 50 | }); 51 | 52 | button.addEventListener("mousedown", ()=>{ 53 | console.log('mousedown'); 54 | spans[lyricsCur].className = 'lyrics clicked'; 55 | declare_event(lyricsCur); 56 | if (lyricsCur == 0) { 57 | for (let i=1; i<=10; i++) { 58 | declare_event(lyricsCur-i,i); 59 | } 60 | } 61 | lyricsCur++; 62 | }); 63 | 64 | event_hooks.push( (lid)=>{ 65 | console.info("event hook invoked "+lid); 66 | if (spans[lid]) { 67 | spans[lid].className='lyrics heard'; 68 | console.info("colored span "+lid); 69 | } else { 70 | console.info("no span "+lid); 71 | } 72 | }); 73 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # Emacs backup files 132 | *~ 133 | 134 | # Bucket-brigade-specific files 135 | html/recordings 136 | !html/recordings/.keep 137 | html/local-style.css 138 | html/user-upload-image 139 | -------------------------------------------------------------------------------- /stress_helper.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | import opuslib 4 | import numpy as np 5 | import server 6 | import server_wrapper 7 | import random 8 | import time 9 | import requests 10 | import json 11 | import wave 12 | 13 | PACKET_INTERVAL = 0.6 # 600ms 14 | PACKET_SAMPLES = int(server.SAMPLE_RATE * PACKET_INTERVAL) 15 | OFFSET = 12 16 | READ_WRITE_OFFSET = 2 17 | 18 | enc = opuslib.Encoder( 19 | server.SAMPLE_RATE, server_wrapper.CHANNELS, opuslib.APPLICATION_AUDIO) 20 | 21 | def stress(n_rounds, users_per_client, worker_name, url, should_sleep): 22 | n_rounds = int(n_rounds) 23 | users_per_client = int(users_per_client) 24 | should_sleep = {"sleep": True, 25 | "nosleep": False}[should_sleep] 26 | 27 | if should_sleep: 28 | # avoid having everyone at the same offset 29 | time.sleep(random.random() * PACKET_INTERVAL) 30 | 31 | with wave.open("stress.wav") as inf: 32 | if inf.getnchannels() != 1: 33 | raise Exception( 34 | "wrong number of channels on %s" % state.requested_track) 35 | if inf.getsampwidth() != 2: 36 | raise Exception( 37 | "wrong sample width on %s" % state.requested_track) 38 | if inf.getframerate() != 48000: 39 | raise Exception( 40 | "wrong sample rate on %s" % state.requested_track) 41 | 42 | audio_data = np.frombuffer( 43 | inf.readframes(-1), np.int16).astype(np.float32) / (2**15) 44 | audio_data = audio_data[:PACKET_SAMPLES] 45 | audio_packets = audio_data.reshape([-1, server_wrapper.OPUS_FRAME_SAMPLES]) 46 | 47 | data = server_wrapper.pack_multi([ 48 | np.frombuffer( 49 | enc.encode_float(packet.tobytes(), server_wrapper.OPUS_FRAME_SAMPLES), 50 | np.uint8) 51 | for packet in audio_packets]).tobytes() 52 | 53 | s = requests.Session() 54 | 55 | userid = int(random.random()*10000000) 56 | timing = [] 57 | full_start = int(time.time()) 58 | clock_start = int((time.time() - OFFSET) * server.SAMPLE_RATE) 59 | for i in range(n_rounds): 60 | start = time.time() 61 | 62 | ts = clock_start + PACKET_SAMPLES * (i//users_per_client) 63 | resp = s.post( 64 | url='%s?read_clock=%s&write_clock=%s&userid=%s%s&username=%s' 65 | % (url, ts, ts - (READ_WRITE_OFFSET * server.SAMPLE_RATE), userid, i%users_per_client, worker_name), 66 | data=data, 67 | headers={ 68 | 'Content-Type': 'application/octet-stream', 69 | 'Accept-Encoding': 'gzip', 70 | }) 71 | if resp.status_code != 200: 72 | print("got: %s (%s)" % (resp.status_code, resp.content)) 73 | 74 | end = time.time() 75 | 76 | duration = end-start 77 | timing.append(duration*1000) 78 | 79 | full_duration = end - full_start 80 | expected_full_elapsed = (i//users_per_client) * PACKET_INTERVAL 81 | 82 | if should_sleep: 83 | if full_duration < expected_full_elapsed: 84 | time.sleep(expected_full_elapsed - full_duration) 85 | 86 | print(json.dumps(timing)) 87 | 88 | if __name__ == "__main__": 89 | stress(*sys.argv[1:]) 90 | -------------------------------------------------------------------------------- /shm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import SharedArray # pip install SharedArray 3 | import sys 4 | import time 5 | import struct 6 | import server 7 | import json 8 | import traceback 9 | 10 | CLIENT_SLEEP_S = 1/10000 #0.1ms 11 | SERVER_SLEEP_S = 1/10000 #0.1ms 12 | 13 | MESSAGE_TYPE_POST = 1 14 | MESSAGE_TYPE_RESPONSE = 2 15 | 16 | # Buffer layout: 17 | # 1 byte: status 18 | # 2 bytes: json length 19 | # N bytes: json 20 | # 4 bytes: data length 21 | # N bytes: data 22 | MAX_JSON_LENGTH = 10000 23 | MAX_DATA_LENGTH = 199998 24 | BUFFER_SIZE = 1 + 2 + MAX_JSON_LENGTH + 4 + MAX_DATA_LENGTH 25 | 26 | def attach_or_create(name): 27 | name = "shm://" + name 28 | 29 | try: 30 | return SharedArray.attach(name) 31 | except Exception: 32 | pass 33 | 34 | return SharedArray.create(name, BUFFER_SIZE, dtype=np.uint8) 35 | 36 | def server_turn(buf): 37 | return buf[0] == MESSAGE_TYPE_POST 38 | 39 | def encode_json_and_data(buf, json_raw, data, throw_exceptions): 40 | data = data.view(dtype=np.uint8) 41 | 42 | index = 1 43 | 44 | json_raw_bytes = json_raw.encode("utf-8") 45 | 46 | errormsg = None 47 | if len(json_raw_bytes) > MAX_JSON_LENGTH: 48 | errormsg = "json too long: %s" % len(json_raw_bytes) 49 | elif len(data) > MAX_DATA_LENGTH: 50 | errormsg = "data too long: %s" % len(data) 51 | 52 | if errormsg: 53 | if throw_exceptions: 54 | raise Exception(errormsg) 55 | else: 56 | json_raw_bytes = json.dumps({"error": errormsg}).encode("utf-8") 57 | data = np.zeros(0, dtype=np.uint8) 58 | 59 | buf[index : index + 2] = memoryview(struct.pack("H", len(json_raw_bytes))) 60 | index += 2 61 | 62 | buf[index : index + len(json_raw_bytes)] = memoryview(json_raw_bytes) 63 | index += len(json_raw_bytes) 64 | 65 | buf[index : index + 4] = memoryview(struct.pack("I", len(data))) 66 | index += 4 67 | 68 | buf[index : index + len(data)] = data 69 | 70 | def decode_json_and_data(buf): 71 | index = 1 72 | 73 | json_length, = buf[index : index + 2].view(dtype=np.uint16) 74 | index += 2 75 | 76 | if json_length > MAX_JSON_LENGTH: 77 | raise Exception("bad json length %s" % json_length) 78 | 79 | json_raw = buf[index : index + json_length].tobytes() 80 | index += json_length 81 | 82 | data_length, = buf[index : index + 4].view(dtype=np.uint32) 83 | index += 4 84 | 85 | if data_length > MAX_DATA_LENGTH: 86 | raise Exception("bad data length %s" % data_length) 87 | 88 | data = buf[index : index + data_length].view(np.uint8) 89 | 90 | return json_raw, data 91 | 92 | class ShmServer: 93 | @staticmethod 94 | def post(buf): 95 | try: 96 | in_json_raw, in_data = decode_json_and_data(buf) 97 | out_json_raw, out_data = server.handle_json_post(in_json_raw, in_data) 98 | encode_json_and_data(buf, out_json_raw, out_data, 99 | throw_exceptions=False) 100 | except Exception as e: 101 | encode_json_and_data(buf, json.dumps( 102 | {"error": str(e), "inner_bt": traceback.format_exc()} 103 | ), np.zeros(0, dtype=np.uint8), throw_exceptions=False) 104 | 105 | @staticmethod 106 | def run(buffer_names): 107 | buffers = [attach_or_create(buffer_name) for buffer_name in buffer_names] 108 | 109 | while True: 110 | didAction = False 111 | for buf in buffers: 112 | if server_turn(buf): 113 | ShmServer.post(buf) 114 | buf[0] = MESSAGE_TYPE_RESPONSE 115 | didAction = True 116 | if not didAction: 117 | time.sleep(SERVER_SLEEP_S) 118 | 119 | class ShmClient: 120 | def __init__(self, shm_name): 121 | self.buf = attach_or_create(shm_name) 122 | 123 | def handle_post(self, in_json_raw, in_data): 124 | encode_json_and_data(self.buf, in_json_raw, in_data, throw_exceptions=True) 125 | self.buf[0] = MESSAGE_TYPE_POST 126 | 127 | self.wait_resp_() 128 | 129 | return decode_json_and_data(self.buf) 130 | 131 | def wait_resp_(self): 132 | while server_turn(self.buf): 133 | time.sleep(CLIENT_SLEEP_S) 134 | 135 | class FakeClient: 136 | def handle_post(self, in_json_raw, in_data): 137 | out_json_raw, out_data = server.handle_json_post(in_json_raw, in_data) 138 | return out_json_raw.encode("utf-8"), out_data 139 | 140 | if __name__ == "__main__": 141 | ShmServer.run(sys.argv[1:]) 142 | -------------------------------------------------------------------------------- /NOTES: -------------------------------------------------------------------------------- 1 | To do / to fix: 2 | * Using a single request to send-and-then-receive data adds variable latency to the received data, since it has to wait in line behind the sent data (and there's no easy way to know how long it waited.) 3 | * Subtle issue: The relative precision of unsynchronized computer clocks is no better than 1ppm (generally 2-10x worse, IIRC.) 4 | * Use request headers instead of query params so that URL doesn't change (forcing CORS preflight to be redone). 5 | 6 | Debugging notes from 2020-07-22: 7 | * Weird as fuck: sometimes after it lags, when it comes back the pitch 8 | is too high? ??? ?????? I have seen it too high by a consistent ~3% 9 | for many seconds. (1/2 a semitone roughly) 10 | * iiiinteresting, I can repro this using "main app thread" loopback. 11 | * this matches with what we see in the visualizer, which is skipping _inside_ a batch sent to the server. 12 | * Clients seem to get further and further behind. I think they are generally stable except when weird shit is happening, but weird shit causes them to slip, and eventually they slip too far and die. Connecting multiple clients seems to make this happen much faster but I can't tell if that's inherent, or just because it increases lossage due to bandwidth, CPU, etc. 13 | * This may be specific to the bluetooth headset actually, and it may be related to the sporadic glitching I was getting with them many versions ago, which usually goes away if I close and reopen the device. 14 | * Hmmmmmm, under heavy load (3 clients), my request payloads seem to end up empty, even when the source is constant audio data (hamilton). That doesn't make any sense. 15 | 16 | Debugging notes from 2020-07-23 w/ jefftk: 17 | * should add an alert if something goes wrong with the fixed-offset relationship between the read and write clocks going to the server 18 | * a speaker-to-mic echo test with clicks seemed quite smooth, which is cool 19 | 20 | Debugging notes 2020-07-24: 21 | * NOTE that changing the latency compensation at runtime messes up the continuity of the buffer in the audioworklet and I think virtually guarantees it will report overflow/underflow after wraparound. 22 | * Would be good if latency window did not grow when the software / network glitches out 23 | 24 | Debugging notes 2020-07-26: 25 | * Automatic latency calibration is absolutely mandatory to have 26 | * For further testing, in case of any doubt, multiple options would be good to make sure there's something that works for everyone, as long as they're all easy to try and it's obvious if it worked. 27 | * Noise is obnoxiously additive 28 | * I think a lot of this is quantization noise from naive downsampling to 8-bit. Some is just background noise. (Jeff suggests gating.) 29 | * No point in sending audio if we know it's silent OR we know we're the caboose 30 | * "Snap to caboose" feature would be nice 31 | * Ray thinks it would be cool for people e.g. with iphones to be able to just hear stuff and not send any stuff (since sending stuff doesn't work.) 32 | * No point in receiving audio if we're not going to do anything with it, give us a way to just not request it. 33 | * If things lag, sometimes we start up with an unexpectedly high "client total time consumed" from the very beginning. (~9s vs ~5s.) [I _think_ this makes sense due to how we're managing our server connection, but I need to think more about it and how to fix it.] 34 | * I saw at one point that a client was experiencing "cascading read slippage", i.e. it was getting later and later (and slippage getting larger) on every request. I don't understand what could cause this. I took a profile using the dev tools profiler, and there's a lot of weird stuff in it, but I don't really know how to read it. 35 | * The profile contains multiple tabs sharing threads, which makes sense in retrospect, but makes things confusing, and seems like it COULD be somehow related to the actual problem. 36 | * Comparing to a non-slippage profile: Rendering of "frames" starts to take longer and longer. Hundreds, then thousands of ms. (With CPU time dozens of ms, up to over 100.) In the healthy state it takes consistently <10 ms and a decent fraction of that is CPU time. I don't know whether this is real or an artifact. 37 | * Our tab's "frames" show up as stretching from a point when the OTHER tab completes a network request, until a point where we do. That makes no sense to me. I'm not sure whether it's an artifact but I think it could be. 38 | *** UGH *** I may be screwing myself with my testing method. The blocking limit of XHRs per domain is SHARED if I have many window to the same domain open. So I can rapidly run out if they overlap. The offending XHRs causing the apparent priority inversion are from DIFFERENT WINDOWS. The multiple entries on the audioworklet thread are also. [NOTE: During the offending tests we did not ever run out of parallel XHRs that I could see, but this is still a problem.] 39 | * I can't figure out whether the apparent interaction between windows is real or an artifact, whether the "long frames" are real or a devtools bug, if they really are long, if that's a chrome bug or my bug somehow. 40 | * It kind of seems like the XHRs are just happening at a consistent but too-slow interval. If the "long frames" are an artifact, then this would seemingly be caused by process not getting called often enough, or otherwise somehow us not having the target amount of data to send until later? 41 | * Could our process be getting starved or something by the other window's process, which kicks in if they happen to drift into alignment or something? 42 | * We seem to be running very close to 3/4 target speed, which is .... weird unless something changed the sample rate or something broke in Web Audio. 43 | 44 | *** We should start tracking things like how often our callbacks get called, how much data we accumulate, and whether it seems to match the purported sample rate. *** 45 | 46 | * Random note: "go to chrome://flags, search for worklet and enable the flag named "Use realtime priority thread for Audio Worklet" 47 | 48 | * NOTE: test and fix in firefox 49 | 50 | * Perhaps we can get audiocontext latency more stable if we request a specific value (perhaps we can measure and then request higher?) 51 | 52 | * make calibration user interface nicer and easier to use -- guide people through volume settings, max out our click volume but warn them not to hurt their ears, see if automatic works, then suggest manual. Tell them when we think it's done. 53 | * allow calibrating any time, whether or not connected to server. allow stopping 54 | and starting at any time without destroying audio context. 55 | * ideally, allow changing server offset and such without reconnecting 56 | * getting everyone set up with offsets and so forth is really obnoxious, and then 57 | having to do the jump-to-end thing or it otherwise being hard to hear what we did 58 | * it would be good to be able to 'admin' configure server settings like short wrapping, and clearing on/off. 59 | * would be nice to be able to admin-force people to offsets (this requires being 60 | able to change offsets dynamically at all) 61 | 62 | * Notes from 2020-08-05 testing: 63 | * We really need a way to test audio I/O at the beginning before doing ANYTHING else, as several people had problems 64 | * (Debian Chrome, the default device had weird behavior, it displayed as "default" with no description and for at least one person it didn't seem to work for input at first, and then changing the input selection possibly caused output to stop working) 65 | * Background noise remains annoying 66 | 67 | Time constants: 68 | * Audioworklet "time quantum": 128 samples @ 44,100Hz ~= 3ms 69 | * Our send buffer: SAMPLE_BATCH_SIZE * 128 samples ~= 290ms 70 | 71 | Sources of latency to account for: 72 | * Sending: 73 | * "Outside world latency": 74 | * Head-to-mic acoustic latency: <= 3ms (about 1ms/ft) 75 | * [Optional] bluetooth latency: 100-200 ms 76 | * System/JS audio processing latency: dozens of ms? 77 | * Buffer latency into audioworklet: ~3ms 78 | * Client side latency: 79 | * Buffer latency (our code): ~290ms 80 | * Network/backend latency: 81 | * XHR TCP connection establishment: ~1.5x RTT (unless conn is reused) 82 | * [Optional] wait for single-threaded HTTP server to be free 83 | * Upload time: Send buffer size / upload bandwidth 84 | * Receiving: 85 | * [Time from XHR start until receiving begins] 86 | * This is time we have to compensate for when deciding which audio to ask for, but not inherently latency in getting it 87 | * Network/backend latency: 88 | * Download time 89 | * Client side latency: 90 | * Buffer latency (our code) 91 | * "Outside world latency": 92 | * System/JS audio processing latency 93 | * Optional bluetooth latency 94 | * Speaker-to-head acoustic latency 95 | 96 | -------------------------------------------------------------------------------- /html/audiochunk.js: -------------------------------------------------------------------------------- 1 | import {check} from './lib.js'; 2 | 3 | const CLOCK_SERVER = Symbol("CLOCK_SERVER"); 4 | const CLOCK_CLIENT = Symbol("CLOCK_CLIENT"); 5 | 6 | export class ClockReference { 7 | constructor({ sample_rate }) { 8 | check(this.side !== undefined, "Cannot directly construct abstract base class ClockReference"); 9 | check(sample_rate !== undefined, "Must provide sample_rate as a named argument"); 10 | check(Number.isInteger(sample_rate), "sample_rate must be integer"); 11 | 12 | this.sample_rate = sample_rate; 13 | this.type = this.constructor.name; 14 | } 15 | 16 | equals(other) { 17 | return this.side == other.side && this.sample_rate == other.sample_rate; 18 | } 19 | } 20 | function thaw_clock_reference(o) { 21 | if (o.type == "ServerClockReference") { 22 | return new ServerClockReference({ 23 | sample_rate: o.sample_rate 24 | }); 25 | } else { 26 | return new ClientClockReference({ 27 | sample_rate: o.sample_rate 28 | }); 29 | } 30 | } 31 | 32 | export class ServerClockReference extends ClockReference { 33 | get side() { return CLOCK_SERVER; } 34 | } 35 | 36 | export class ClientClockReference extends ClockReference { 37 | get side() { return CLOCK_CLIENT; } 38 | } 39 | 40 | export class ClockInterval { 41 | constructor({ reference, end, length }) { 42 | check(reference !== undefined, "Must provide reference as a named argument"); 43 | check(Number.isInteger(end), "end must be an integer (measured in samples)", end); 44 | check(Number.isInteger(length), "length must be an integer (measured in samples)", length); 45 | check(reference instanceof ClockReference, "reference must be a ClockReference", reference); 46 | 47 | this.end = end; 48 | this.length = length; 49 | this.reference = reference; 50 | } 51 | 52 | get sample_rate() { 53 | return this.reference.sample_rate; 54 | } 55 | 56 | get length_seconds() { 57 | return this.length / this.sample_rate; 58 | } 59 | 60 | get start() { 61 | return this.end - this.length; 62 | } 63 | } 64 | function thaw_clock_interval(o) { 65 | if (o === undefined) { 66 | return o; 67 | } 68 | return new ClockInterval({ 69 | reference: thaw_clock_reference(o.reference), 70 | end: o.end, 71 | length: o.length 72 | }); 73 | } 74 | 75 | export class AudioChunkBase { 76 | constructor({ data, interval }) { 77 | check(data !== undefined && interval !== undefined, "Must provide data and interval as named arguments"); 78 | check(interval instanceof ClockInterval, "interval must be a ClockInterval"); 79 | 80 | this.data = data; 81 | this.interval = interval; 82 | this.type = this.constructor.name; 83 | } 84 | 85 | check_clock_reference(clock_reference) { 86 | if (!clock_reference.equals(this.reference)) { 87 | throw new Error("Clock references unequal in AudioChunk.check_clock_reference"); 88 | } 89 | } 90 | 91 | get start() { return this.interval.start; } 92 | get end() { return this.interval.end; } 93 | get length() { return this.interval.length; } 94 | get length_seconds() { return this.interval.length_seconds; } 95 | get reference() { return this.interval.reference; } 96 | get sample_rate() { return this.interval.sample_rate; } 97 | } 98 | 99 | export function thaw_audio_chunk_base(o) { 100 | if (o.type == "AudioChunk") { 101 | return new AudioChunk({ 102 | data: o.data, 103 | interval: thaw_clock_interval(o.interval), 104 | }); 105 | } else { 106 | return new CompressedAudioChunk({ 107 | data: o.data, 108 | interval: thaw_clock_interval(o.interval), 109 | }); 110 | } 111 | } 112 | 113 | 114 | // This would more correctly be named UncompressedAudioChunk, but the shorter name is nicer. 115 | export class AudioChunk extends AudioChunkBase { 116 | constructor({ data, interval }) { 117 | super({ data, interval }); 118 | 119 | check(interval.reference instanceof ClientClockReference, "uncompressed audio chunks must be referenced to the client clock"); 120 | check(data instanceof Float32Array, "uncompressed audio data must be a Float32Array"); 121 | check(data.length == interval.length, "interval length must match uncompressed data length"); 122 | } 123 | } 124 | 125 | export class CompressedAudioChunk extends AudioChunkBase { 126 | constructor({ data, interval }) { 127 | super({ data, interval }); 128 | 129 | check(data instanceof Uint8Array, "compressed audio data must be a Uint8Array"); 130 | check(interval.reference instanceof ServerClockReference, "compressed audio chunks must be referenced to the server clock"); 131 | } 132 | } 133 | 134 | export class PlaceholderChunk { 135 | constructor({ reference, length, interval }){ 136 | check(reference !== undefined && length !== undefined, "Must provide reference and length as named arguments"); 137 | check(reference instanceof ClockReference, "reference must be a ClockReference"); 138 | check(Number.isInteger(length), "length must be an integer"); 139 | if (interval !== undefined) { 140 | check(interval.length == length, "interval must match length"); 141 | check(interval.reference == reference, "interval must match reference"); 142 | } 143 | 144 | this.reference = reference; 145 | this.length = length; 146 | this.interval = interval; 147 | this.data = new Float32Array(length); // This exists for convenience but is always all zeros 148 | this.type = this.constructor.name; 149 | } 150 | 151 | check_clock_reference(clock_reference) { 152 | if (!clock_reference.equals(this.reference)) { 153 | throw new Error("Clock references unequal in PlaceholderChunk.check_clock_reference"); 154 | } 155 | } 156 | 157 | get start() { return this.interval.start; } 158 | get end() { return this.interval.end; } 159 | get length_seconds() { return this.interval.length_seconds; } 160 | get sample_rate() { return this.reference.sample_rate; } 161 | } 162 | 163 | export function thaw_placeholder_chunk(o) { 164 | return new PlaceholderChunk({ 165 | reference: thaw_clock_reference(o.reference), 166 | length: o.length, 167 | interval: thaw_clock_interval(o.interval), 168 | }); 169 | } 170 | 171 | function concat_typed_arrays(arrays, _constructor) { 172 | if (arrays.length == 0 && _constructor === undefined) { 173 | throw new Error("cannot concat zero arrays without constructor provided"); 174 | } 175 | var constructor = _constructor || arrays[0].constructor; 176 | var total_len = 0; 177 | arrays.forEach((a) => { 178 | if (a.constructor !== constructor) { 179 | throw new Error("must concat arrays of same type"); 180 | } 181 | total_len += a.length; 182 | }); 183 | var result = new constructor(total_len); 184 | var result_idx = 0; 185 | arrays.forEach((a) => { 186 | result.set(a, result_idx); 187 | result_idx += a.length; 188 | }); 189 | return result; 190 | } 191 | 192 | export function concat_chunks(chunks, _reference) { 193 | check(chunks instanceof Array, "Must provide Array of chunks", chunks); 194 | check(chunks.length != 0 || _reference !== undefined, "Cannot concat zero chunks without clock reference provided"); 195 | 196 | var reference = _reference || chunks[0].reference; 197 | var arrays = []; 198 | 199 | // PlaceholderChunks have no timing information (and all zeros for samples) 200 | var placeholder = (chunks[0] instanceof PlaceholderChunk); 201 | 202 | for (var i = 0; i < chunks.length; ++i) { 203 | check((chunks[i] instanceof PlaceholderChunk) || (chunks[i] instanceof AudioChunk), "can only use concat_chunks on (uncompressed or placeholder) audio chunks", chunks); 204 | chunks[i].check_clock_reference(reference); 205 | arrays.push(chunks[i].data); 206 | 207 | if (i != 0 && !placeholder) { 208 | check(!(chunks[i] instanceof PlaceholderChunk), "Cannot switch from audio chunk back to placeholder chunk"); 209 | check(chunks[i-1].end == chunks[i].start, "Cannot concat non-contiguous chunks"); 210 | } 211 | placeholder = (chunks[i] instanceof PlaceholderChunk); 212 | } 213 | 214 | var big_array = concat_typed_arrays(arrays); 215 | if (placeholder) { 216 | return new PlaceholderChunk({ 217 | reference, 218 | length: big_array.length 219 | }); 220 | } else { 221 | var interval = new ClockInterval({ 222 | reference, 223 | end: chunks[chunks.length - 1].end, 224 | length: big_array.length, 225 | }); 226 | return new AudioChunk({ 227 | interval, 228 | data: big_array 229 | }); 230 | } 231 | } 232 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Bucket Brigade 2 | 3 | Bucket-brigade singing implementation 4 | 5 | ## Local Development 6 | 7 | (These are "orthodox python way" instructions. If you skip the 8 | "virtualenv venv" and the ". venv/bin/activate", you will install the 9 | dependencies in your global Python environment. This is probably 10 | fine.) 11 | 12 | ``` 13 | git clone https://github.com/jeffkaufman/bucket-brigade.git 14 | cd bucket-brigade 15 | virtualenv venv # optional 16 | . venv/bin/activate # optional 17 | pip install -r requirements.txt 18 | ``` 19 | 20 | If you're on a Mac, you will need to install the Opus C library: 21 | 22 | ``` 23 | brew install opus-tools 24 | ``` 25 | 26 | Now, you will need two separate terminals (or screen/tmux sessions 27 | or similar.) 28 | 29 | Serve the static files: 30 | ``` 31 | cd html/ 32 | python -mhttp.server 33 | ``` 34 | 35 | Serve the app backend: 36 | ``` 37 | ./server_wrapper.py 38 | ``` 39 | 40 | The static file server will run on http://localhost:8000/ . The app 41 | server will run on http://localhost:8081/ . 42 | 43 | If you go to http://localhost:8000/ , hopefully the app should 44 | work. The client ordinarily wants the app server to be running on the 45 | same host as the static file server, at the path "/api/". However, as 46 | a special case, it will automatically notice when it's running on 47 | localhost, and switch to assuming the app server is at 48 | http://localhost:8081/ instead. 49 | 50 | When the app is running in the mode. Chrome will be slightly upset 51 | that the static files and the app server have different origins, and 52 | it will send a CORS preflight before every single request. Since the 53 | app makes many requests per second, this can cause weird performance 54 | issues. 55 | 56 | The production approach is to use nginx as both a static fileserver, 57 | and a reverse proxy for the app server, on the same port. This 58 | eliminates the CORS issue, but if it's not running on localhost, it 59 | requires using https://, which requires a certificate. (Chrome will 60 | not allow a website served over http to use the microphone.) 61 | 62 | There's probably a workable configuration using nginx on 63 | localhost. The app isn't currently set up for that, but it could be. 64 | 65 | ## Backing Tracks 66 | 67 | Backing tracks are 16-bit 1-channel 48k wav files. You can make one with: 68 | 69 | $ sox input.mp3 -r 48000 output.wav remix 1 70 | 71 | This should look like: 72 | 73 | $ soxi output.wav 74 | Channels : 1 75 | Sample Rate : 48000 76 | Precision : 16-bit 77 | Sample Encoding: 16-bit Signed Integer PCM 78 | 79 | ## Running an Instance 80 | 81 | If you want to run an instance, you need a server. There are many 82 | companies that offer Virtual Private Servers (VPSes), with different 83 | trade-offs. This project is almost entirely limited by CPU, for 84 | encoding and decoding audio, which means there's no reason to get an 85 | instance with large amounts of memory. 86 | 87 | If you want to support up to about 60 users, any single core server 88 | should be fine. The public instance is running on Amazon Lightsail, 89 | With their smallest server (512 MB RAM, 1 vCPU, 20 GB SSD, 90 | $3.50/month). 91 | 92 | It is possible to support much larger numbers of users, but you'll 93 | need a lot of cores. If you're interested in doing this, you will 94 | probably also need to customize the UI, since one video call for 100s 95 | of users is not going to work. See 96 | https://github.com/dspeyer/ritualEngine for an axample of this kind of 97 | customization. 98 | 99 | ## Configuring a Server 100 | 101 | These instructions are verified for a fresh Ubuntu 20.04 LTS install. 102 | 103 | ### Install Dependencies 104 | ``` 105 | sudo apt update 106 | sudo apt upgrade 107 | sudo apt install python3-distutils uuid-dev libcap-dev libpcre3-dev \ 108 | nginx python3-pip emacs letsencrypt opus-tools \ 109 | python3-certbot-nginx sox libsox-fmt-mp3 110 | sudo python3 -mpip install uwsgi 111 | mkdir ~/src 112 | cd ~/src && git clone https://github.com/jeffkaufman/bucket-brigade.git 113 | sudo usermod -a -G www-data ubuntu 114 | sudo chgrp www-data /home/ubuntu/src/bucket-brigade 115 | chmod g+rwxs /home/ubuntu/src/bucket-brigade 116 | cd ~/src/bucket-brigade && sudo python3 -mpip install -r requirements.txt 117 | mkdir ~/src/bucket-brigade/recordings 118 | # also populate ~/src/bucket-brigade/secrets.json 119 | ``` 120 | 121 | If you get: 122 | 123 | ``` 124 | ./src/shared_array_create.c:24:10: fatal error: numpy/arrayobject.h: No such file or directory 125 | 24 | #include 126 | | ^~~~~~~~~~~~~~~~~~~~~ 127 | compilation terminated. 128 | ``` 129 | 130 | This means that pip tried to install SharedArray before numpy. Fix it with: 131 | 132 | ``` 133 | sudo python3 -mpip uninstall SharedArray 134 | sudo python3 -mpip install -r requirements.txt 135 | ``` 136 | 137 | ### Twilio Setup 138 | 139 | While the singing component does not require any external integration, 140 | the video call component to support the default interface 141 | does. You will need to sign up for a Twilio account, and then fill out 142 | `~/src/bucket-brigade/secrets.json` as: 143 | 144 | ``` 145 | { 146 | "twilio": { 147 | "account_sid": "...", 148 | "api_key": "...", 149 | "api_secret": "...", 150 | "room": "You can name your room anything" 151 | } 152 | } 153 | ``` 154 | 155 | ### Theming 156 | 157 | You can change the colors as you like, by creating 158 | `~/src/bucket-brigade/local-style.css` with something like: 159 | 160 | ``` 161 | :root { 162 | --theme-light: rgb(255, 247, 248); 163 | --theme-medium: rgb(255, 227, 229); 164 | --theme-dark: rgb(252, 169, 179); 165 | } 166 | ``` 167 | 168 | If there are other changes you would like to make, PRs to make the 169 | styling easier to override are welcome. 170 | 171 | ### Nginx Config 172 | 173 | We mark absolutely everything as uncachable, because at least for now 174 | that's easier than managing it and the savings from proper caching are 175 | tiny. 176 | 177 | In /etc/nginx/sites-available/default add: 178 | 179 | ``` 180 | add_header Cache-Control no-cache; 181 | ``` 182 | 183 | ### Uploader Configuration 184 | 185 | To support people uploading backing tracks, in `/etc/systemd/system/` 186 | create `echo-uploader.service` as: 187 | 188 | ``` 189 | [Unit] 190 | Description=uWSGI echo uploader 191 | 192 | [Service] 193 | WorkingDirectory=/home/ubuntu/src/bucket-brigade 194 | ExecStart=/usr/local/bin/uwsgi --socket :7201 --wsgi-file /home/ubuntu/src/bucket-brigade/upload.py --logto /var/log/echo-uploader.log 195 | Restart=always 196 | KillSignal=SIGQUIT 197 | Type=notify 198 | NotifyAccess=all 199 | 200 | [Install] 201 | WantedBy=multi-user.target 202 | ``` 203 | 204 | Then run `sudo systemctl enable echo-uploader`. 205 | 206 | In /etc/nginx/sites-available/default add: 207 | 208 | ``` 209 | location /upload { 210 | include uwsgi_params; 211 | uwsgi_pass 127.0.0.1:7201; 212 | client_max_body_size 16M; 213 | } 214 | ``` 215 | 216 | ### Simple Configuration 217 | 218 | Handles up to ~60users. 219 | 220 | In `/etc/systemd/system/` create `uwsgi-echo-01.service` as: 221 | 222 | ``` 223 | [Unit] 224 | Description=uWSGI echo 225 | 226 | [Service] 227 | WorkingDirectory=/home/ubuntu/src/bucket-brigade 228 | ExecStart=/usr/local/bin/uwsgi --socket :7101 --wsgi-file /home/ubuntu/src/bucket-brigade/server_wrapper.py --logto /var/log/uwsgi-echo-01.log 229 | Restart=always 230 | KillSignal=SIGQUIT 231 | Type=notify 232 | NotifyAccess=all 233 | 234 | [Install] 235 | WantedBy=multi-user.target 236 | ``` 237 | 238 | Then run `sudo systemctl enable uwsgi-echo-01`. 239 | 240 | In /etc/nginx/sites-available/default add: 241 | 242 | ``` 243 | location /api { 244 | include uwsgi_params; 245 | uwsgi_pass 127.0.0.1:7101; 246 | } 247 | ``` 248 | 249 | ### Sharded Configuration 250 | 251 | Handles up to ~1000 users, at ~60/core. The instructions below assume 252 | you are using a 12 core machine: one core for nginx, one core for 253 | bucket brigade, and ten cores for the shards. 254 | 255 | In /etc/systemd/system/ create ten files as `uwsgi-echo-01.service` 256 | through `uwsgi-echo-10.service`: 257 | 258 | ``` 259 | [Unit] 260 | Description=uWSGI echo 261 | 262 | [Service] 263 | WorkingDirectory=/home/ubuntu/src/bucket-brigade 264 | ExecStart=/usr/local/bin/uwsgi --socket :7101 --wsgi-file /home/ubuntu/src/bucket-brigade/server_wrapper.py --logto /var/log/uwsgi-echo-01.log --declare-option 'segment=$1' --segment=echo01 265 | Restart=always 266 | KillSignal=SIGQUIT 267 | Type=notify 268 | NotifyAccess=all 269 | Environment=PYTHONUNBUFFERED=1 270 | 271 | [Install] 272 | WantedBy=multi-user.target 273 | ``` 274 | 275 | In /etc/systemd/system/ create one file as `echo-shm.service`: 276 | 277 | ``` 278 | [Unit] 279 | Description=Echo Shared Memory Server 280 | 281 | [Service] 282 | Type=simple 283 | WorkingDirectory=/home/ubuntu/src/bucket-brigade 284 | ExecStart=/usr/bin/python3 /home/ubuntu/src/bucket-brigade/shm.py echo01 echo02 echo03 echo04 echo05 echo06 echo07 echo08 echo09 echo10 285 | Restart=always 286 | KillSignal=SIGQUIT 287 | NotifyAccess=all 288 | Environment=PYTHONUNBUFFERED=1 289 | 290 | [Install] 291 | WantedBy=multi-user.target 292 | ``` 293 | 294 | Then run `sudo systemctl enable uwsgi-echo-0{1,2,3,4,5,6,7,8,9} ; sudo systemctl enable uwsgi-echo-10 echo-shm`. 295 | 296 | In /etc/nginx/sites-available/default add: 297 | 298 | ``` 299 | location /api/01 { 300 | include uwsgi_params; 301 | uwsgi_pass 127.0.0.1:7101; 302 | } 303 | location /api/02 { 304 | include uwsgi_params; 305 | uwsgi_pass 127.0.0.1:7102; 306 | } 307 | ... 308 | location /api/10 { 309 | include uwsgi_params; 310 | uwsgi_pass 127.0.0.1:7110; 311 | } 312 | 313 | location /api { 314 | error_page 418 = @shardone; 315 | error_page 419 = @shardtwo; 316 | ... 317 | error_page 427 = @shardten; 318 | 319 | if ( $arg_userid ~ "^1" ) { return 418; } 320 | if ( $arg_userid ~ "^2" ) { return 419; } 321 | ... 322 | if ( $arg_userid ~ "^0" ) { return 427; } 323 | return 418; 324 | } 325 | 326 | location @shardone { 327 | include uwsgi_params; 328 | uwsgi_pass 127.0.0.1:7101; 329 | } 330 | location @shardtwo { 331 | include uwsgi_params; 332 | uwsgi_pass 127.0.0.1:7102; 333 | } 334 | ... 335 | location @shardten { 336 | include uwsgi_params; 337 | uwsgi_pass 127.0.0.1:7110; 338 | } 339 | ``` 340 | 341 | ## Deploying 342 | 343 | Any time you modify your service files you'll need to run: 344 | 345 | sudo systemctl daemon-reload 346 | 347 | Anytime you have a new code to run on the server, run either: 348 | 349 | ``` 350 | # Simple 351 | cd ~/src/bucket-brigade && git pull && sudo systemctl restart uwsgi-echo-01 352 | 353 | # Sharded 354 | cd ~/src/bucket-brigade && git pull && sudo systemctl restart uwsgi-echo-01 uwsgi-echo-02 uwsgi-echo-03 uwsgi-echo-04 uwsgi-echo-05 uwsgi-echo-06 uwsgi-echo-07 uwsgi-echo-08 uwsgi-echo-09 uwsgi-echo-10 echo-shm 355 | ``` 356 | 357 | ## Auto Restart 358 | 359 | There is somewhat strange behavior when this has been running for a long time. 360 | I'm currently too lazy to debug this, so I've programmed it to automatically 361 | restart every day at 7AM GMT (2AM or 3AM Eastern): 362 | 363 | ``` 364 | $ sudo crontab -e 365 | 0 7 * * * /bin/systemctl restart uwsgi-echo-01 366 | ``` 367 | 368 | ### Logs 369 | 370 | #### Simple 371 | ``` 372 | tail -f /var/log/uwsgi-echo-01.log 373 | ``` 374 | 375 | #### Sharded 376 | ``` 377 | tail -f /var/log/uwsgi-echo-01.log 378 | tail -f /var/log/uwsgi-echo-02.log 379 | ... 380 | tail -f /var/log/uwsgi-echo-10.log 381 | journalctl -u echo-shm.service -n 1000 382 | ``` 383 | 384 | ## Profiling 385 | 386 | The server creates a cProfile profiler by default, but doesn't enable 387 | it. To start profiling, hit the `/start_profile` endpoint; to stop, 388 | hit `/stop_profile`, and to see the results hit `/get_profile`. 389 | 390 | These are GET requests so you can do them from a browser easily. I 391 | expect them to be idempotent (i.e. hitting them repeatedly is 392 | harmless), but this still violates good sense by having side effects 393 | in a GET request, so weird things may happen if the browser does 394 | prefetching or something. Be ye warned. 395 | 396 | Be careful if using this in production; the profiler has significant 397 | overhead. Don't leave it running. 398 | 399 | ## Demetronome 400 | 401 | The metronome is recorded, but maybe you don't want that. The demetronome.py 402 | script removes metronome beats. If you have an example file where it's not 403 | working file a bug and share the file: it could be a lot more sophisticated but 404 | I don't want to get into that until I have an example of a case where it's 405 | needed. 406 | -------------------------------------------------------------------------------- /server_wrapper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import sys 4 | import json 5 | import urllib.parse 6 | import numpy as np # type:ignore 7 | import opuslib # type:ignore 8 | import time 9 | import struct 10 | import traceback 11 | 12 | try: 13 | import uwsgi 14 | except Exception: 15 | # only available in app, not in shell 16 | uwsgi = None 17 | 18 | import SharedArray # pip install SharedArray 19 | 20 | sys.path.append(os.path.dirname(__file__)) # for finding our files 21 | import server 22 | import shm 23 | import util 24 | 25 | from typing import Any, Dict, List, Tuple 26 | 27 | import cProfile 28 | import pstats 29 | import io 30 | 31 | LOG_DIR = os.path.join(os.path.dirname(__file__), "logs") 32 | try: 33 | os.mkdir(LOG_DIR) 34 | except FileExistsError: 35 | pass 36 | 37 | pr = cProfile.Profile() 38 | # enable for just a moment so the profile object isn't empty 39 | pr.enable() 40 | pr.disable() 41 | 42 | CHANNELS = 1 43 | 44 | OPUS_FRAME_MS = 60 45 | OPUS_FRAME_SAMPLES = server.SAMPLE_RATE // 1000 * OPUS_FRAME_MS 46 | OPUS_BYTES_PER_SAMPLE = 4 # float32 47 | OPUS_FRAME_BYTES = OPUS_FRAME_SAMPLES * CHANNELS * OPUS_BYTES_PER_SAMPLE 48 | 49 | # TODO: have a system for cleaning up users when we haven't heard for them in 50 | # a long time, so we don't just accumulate encoders indefinitely. 51 | users = {} # userid -> (enc, dec) 52 | 53 | # This will become either a shm.ShmClient or a shm.FakeClient, depending on 54 | # whether we're in sharded mode or not. 55 | backend = None 56 | 57 | def pack_multi(packets) -> Any: 58 | encoded_length = 1 59 | for p in packets: 60 | encoded_length += 2 + len(p) 61 | outdata = np.zeros(encoded_length, np.uint8) 62 | outdata[0] = len(packets) 63 | idx = 1 64 | for p in packets: 65 | if p.dtype != np.uint8: 66 | raise Exception("pack_multi only accepts uint8") 67 | outdata[idx] = len(p) >> 8 68 | outdata[idx + 1] = len(p) % 256 69 | idx += 2 70 | outdata[idx:idx+len(p)] = p 71 | idx += len(p) 72 | return outdata 73 | 74 | def unpack_multi(data) -> List[Any]: 75 | if data.dtype != np.uint8: 76 | raise Exception("unpack_multi only accepts uint8") 77 | packet_count = data[0] 78 | data_idx = 1 79 | result = [] 80 | for i in range(packet_count): 81 | length = (data[data_idx] << 8) + data[data_idx + 1] 82 | data_idx += 2 83 | packet = data[data_idx:data_idx+length] 84 | data_idx += length 85 | result.append(packet) 86 | return result 87 | 88 | def calculate_volume(in_data): 89 | return np.sqrt(np.mean(in_data**2)) 90 | 91 | def handle_post_special(query_string): 92 | data, x_audio_metadata = handle_json_post(np.zeros(0), query_string, {}) 93 | return data.tobytes(), x_audio_metadata 94 | 95 | def handle_post(userid, n_samples, in_data_raw, 96 | query_string, client_address=None) -> Tuple[Any, str]: 97 | if not userid.isdigit(): 98 | raise ValueError("UserID must be numeric; got: %r"%userid) 99 | try: 100 | enc, dec = users[userid] 101 | except KeyError: 102 | enc = opuslib.Encoder(server.SAMPLE_RATE, CHANNELS, 103 | opuslib.APPLICATION_AUDIO) 104 | dec = opuslib.Decoder(server.SAMPLE_RATE, CHANNELS) 105 | users[userid] = enc, dec 106 | 107 | post_body = np.frombuffer(in_data_raw, dtype=np.uint8) 108 | parsed_params = urllib.parse.parse_qs(query_string, strict_parsing=True) 109 | json_len, = parsed_params.get("json_len", [None]) 110 | if json_len: 111 | json_len = int(json_len) 112 | json_kvs = json.loads(post_body[:json_len].tobytes().decode('utf8')) 113 | in_data = post_body[json_len:] 114 | else: 115 | in_data = post_body 116 | json_kvs = {} 117 | 118 | # If the user does not send us any data, we will treat it as silence of length n_samples. This is useful if they are just starting up. 119 | client_no_data = len(in_data)==0 120 | if client_no_data: 121 | if n_samples == 0: 122 | raise ValueError("Must provide either n_samples or data") 123 | in_data = np.zeros(n_samples, np.float32) 124 | else: 125 | packets = unpack_multi(in_data) 126 | decoded = [] 127 | for p in packets: 128 | d = dec.decode_float(p.tobytes(), OPUS_FRAME_SAMPLES, decode_fec=False) 129 | decoded.append(np.frombuffer(d, np.float32)) 130 | in_data = np.concatenate(decoded) 131 | 132 | # Sending n_samples is optional if data is sent, but in case of both they must match 133 | if n_samples == 0: 134 | n_samples = len(in_data) 135 | if n_samples != len(in_data): 136 | raise ValueError("Client is confused about how many samples it sent (got %s expected %s" % (n_samples, len(in_data))) 137 | 138 | rms_volume = calculate_volume(in_data) 139 | # This is only safe because query_string is guaranteed to already contain 140 | # at least the userid parameter. 141 | query_string += '&rms_volume=%s'%rms_volume 142 | 143 | data, x_audio_metadata = handle_json_post( 144 | in_data, query_string, json_kvs, 145 | client_address=client_address) 146 | 147 | # Divide data into user_summary and raw audio data 148 | n_users_in_summary, = struct.unpack(">H", data[:2]) 149 | user_summary_n_bytes = server.summary_length(n_users_in_summary) 150 | 151 | user_summary = data[:user_summary_n_bytes] 152 | raw_audio = data[user_summary_n_bytes:].view(np.float32) 153 | 154 | # Encode raw audio 155 | packets = raw_audio.reshape([-1, OPUS_FRAME_SAMPLES]) 156 | encoded = [] 157 | for p in packets: 158 | e = np.frombuffer(enc.encode_float(p.tobytes(), OPUS_FRAME_SAMPLES), np.uint8) 159 | encoded.append(e) 160 | compressed_audio = pack_multi(encoded) 161 | 162 | # Combine user_summary and compressed audio data 163 | data = np.append(user_summary, compressed_audio) 164 | 165 | with open(os.path.join(LOG_DIR, userid), "a") as log_file: 166 | log_file.write("%d %.8f\n"%( 167 | time.time(), 168 | -1 if client_no_data else rms_volume)) 169 | 170 | return data.tobytes(), x_audio_metadata 171 | 172 | def handle_json_post(in_data, query_string, json_kvs, client_address=None): 173 | json_kvs.update({ 174 | "query_string": query_string, 175 | "client_address": client_address, 176 | }) 177 | out_json_raw, out_data = backend.handle_post(json.dumps(json_kvs), in_data) 178 | 179 | out_json = json.loads(out_json_raw) 180 | 181 | if "error" in out_json: 182 | inner_bt = "" 183 | if "inner_bt" in out_json: 184 | inner_bt = "\nBackend error details: " + out_json["inner_bt"] 185 | raise Exception(out_json["error"] + inner_bt) 186 | 187 | return out_data, out_json["x-audio-metadata"] 188 | 189 | def get_telemetry(start_response) -> None: 190 | in_json = { 191 | "request": "get_telemetry" 192 | } 193 | out_json_raw, _ = backend.handle_post(json.dumps(in_json), np.zeros(0)) 194 | start_response( 195 | '200 OK', 196 | [("Access-Control-Allow-Origin", "*"), 197 | ("Access-Control-Max-Age", "86400"), 198 | ("Access-Control-Expose-Headers", "X-Audio-Metadata"), 199 | ("Content-Type", "application/json")]) 200 | return out_json_raw, 201 | 202 | 203 | def do_OPTIONS(environ, start_response) -> None: 204 | start_response( 205 | '200 OK', 206 | [("Access-Control-Allow-Origin", "*"), 207 | ("Access-Control-Max-Age", "86400")]) 208 | return b'', 209 | 210 | # GET requests do not require any specific parameters. Primarily they are used 211 | # when a client is starting up, to retrieve the server's current time. The 212 | # use of them to start and stop profiling is kind of gross and should really 213 | # be a POST, but it's purely for debugging so it's not a big issue. 214 | def do_GET(environ, start_response) -> None: 215 | global pr 216 | 217 | if environ.get('PATH_INFO', '') == "/api/start_profile": 218 | pr.enable() 219 | start_response('200 OK', []) 220 | return b'profiling enabled', 221 | 222 | if environ.get('PATH_INFO', '') == "/api/stop_profile": 223 | pr.disable() 224 | start_response('200 OK', []) 225 | return b'profiling disabled', 226 | 227 | if environ.get('PATH_INFO', '') == "/api/get_profile": 228 | s = io.StringIO() 229 | ps = pstats.Stats(pr, stream=s).sort_stats('tottime') 230 | ps.print_stats() 231 | start_response('200 OK', []) 232 | return s.getvalue().encode("utf-8"), 233 | 234 | if environ.get('PATH_INFO', '') == "/api/telemetry": 235 | return get_telemetry(start_response) 236 | 237 | server_clock = server.calculate_server_clock() 238 | 239 | start_response( 240 | '200 OK', 241 | [("Access-Control-Allow-Origin", "*"), 242 | ("Access-Control-Max-Age", "86400"), 243 | ("Access-Control-Expose-Headers", "X-Audio-Metadata"), 244 | ("X-Audio-Metadata", json.dumps({ 245 | "server_clock": server_clock, 246 | "server_sample_rate": server.SAMPLE_RATE, 247 | "server_version": server.SERVER_VERSION, 248 | "server_branch": server.SERVER_BRANCH, 249 | })), 250 | ("Content-Type", "application/octet-stream")]) 251 | # If we give a 0-byte response, Chrome Dev Tools gives a misleading error (see https://stackoverflow.com/questions/57477805/why-do-i-get-fetch-failed-loading-when-it-actually-worked) 252 | return b'ok', 253 | 254 | # POST requests absolutely must have a numeric user_id for all requests which 255 | # make it as far as handle_post; such requests must be associated with a user 256 | # or there's nothing we can do with them, and they will fail. 257 | # There are a few exceptions for "special" requests not associated with a 258 | # specific user, which are handled right here. 259 | def do_POST(environ, start_response) -> None: 260 | content_length = int(environ.get('CONTENT_LENGTH', 0)) 261 | in_data_raw = environ['wsgi.input'].read(content_length) 262 | 263 | query_string = environ['QUERY_STRING'] 264 | 265 | try: 266 | client_address = environ['HTTP_X_FORWARDED_FOR'] 267 | except KeyError: 268 | client_address = environ.get('REMOTE_ADDR', "") 269 | # XXX: do something with it 270 | 271 | # For some reason parse_qs can't handle an empty query string 272 | if len(query_string) > 0: 273 | query_params = urllib.parse.parse_qs(query_string, strict_parsing=True) 274 | else: 275 | query_params = {} 276 | 277 | userid = None 278 | try: 279 | userid, = query_params.get("userid", (None,)) 280 | 281 | n_samples, = query_params.get("n_samples", ("0",)) 282 | n_samples = int(n_samples) 283 | 284 | if (userid is None) and (len(in_data_raw) > 0 or n_samples != 0): 285 | raise Exception("Can't send non-user request with audio data.") 286 | 287 | reset_user_state, = query_params.get("reset_user_state", (None,)) 288 | if reset_user_state and userid and (userid in users): 289 | del users[userid] 290 | 291 | if userid is not None: 292 | data, x_audio_metadata = handle_post(userid, n_samples, in_data_raw, query_string, client_address=client_address) 293 | else: 294 | data, x_audio_metadata = handle_post_special(query_string) 295 | except Exception as e: 296 | # Clear out stale session 297 | if userid and (userid in users): 298 | del users[userid] 299 | # Log it 300 | print("Request raised exception!\nParams:", query_string, "\n", traceback.format_exc(), file=sys.stderr) 301 | return util.die500(start_response, e) 302 | 303 | combined_data = x_audio_metadata.encode('utf-8') + data 304 | 305 | simple_x_audio_metadata = json.dumps({ 306 | "metadata_len": len(x_audio_metadata) 307 | }) 308 | 309 | start_response( 310 | '200 OK', 311 | [("Access-Control-Allow-Origin", "*"), 312 | ("Access-Control-Max-Age", "86400"), 313 | ("Access-Control-Expose-Headers", "X-Audio-Metadata"), 314 | ("X-Audio-Metadata", simple_x_audio_metadata), 315 | ("Content-Type", "application/octet-stream")]) 316 | return combined_data, 317 | 318 | def application(environ, start_response): 319 | global backend 320 | 321 | if backend is None: 322 | if uwsgi is not None and 'segment' in uwsgi.opt: 323 | shm_name = uwsgi.opt['segment'] 324 | if shm_name: 325 | backend = shm.ShmClient(shm_name.decode("utf-8")) 326 | 327 | # If that didn't work, we're not sharded. 328 | if backend is None: 329 | backend = shm.FakeClient() 330 | 331 | return {"GET": do_GET, 332 | "POST": do_POST, 333 | "OPTIONS": do_OPTIONS}[environ["REQUEST_METHOD"]]( 334 | environ, start_response) 335 | 336 | def serve(): 337 | from wsgiref.simple_server import make_server 338 | make_server(b'',8081,application).serve_forever() 339 | 340 | if __name__ == "__main__": 341 | serve() 342 | -------------------------------------------------------------------------------- /html/net.js: -------------------------------------------------------------------------------- 1 | import {check} from './lib.js'; 2 | import {AudioChunk, PlaceholderChunk, CompressedAudioChunk, ServerClockReference, ClockInterval} from './audiochunk.js' 3 | 4 | // This gates all the logs that put references to REALLY HUGE objects into the console 5 | // very frequently. When this is on, having the console open eventually causes the 6 | // browser to lag severely and dev tools to lag/hang/crash. Don't use this unless 7 | // you actually need it. 8 | const LOG_ULTRA_VERBOSE = false; 9 | // XXX: 10 | console.debug = () => {} 11 | 12 | class ServerConnectionBase { 13 | constructor() {} 14 | 15 | // This is how much notional time we take up between getting audio and sending it back, server-to-server. ("Notional" because the flow of samples is not continuous, so for most purposes the size of the chunks we send to the server must be added to this.) 16 | get client_window_time() { 17 | if (!this.running || !this.read_clock || !this.write_clock || !this.clock_reference.sample_rate) { 18 | return undefined; 19 | } 20 | return (this.read_clock - this.write_clock) / this.clock_reference.sample_rate; 21 | } 22 | 23 | // This is how far behind our target place in the audio stream we are. This must be added to the value above, to find out how closely it's safe to follow behind where we are _aiming_ to be. This value should be small and relatively stable, or something has gone wrong. 24 | get clientReadSlippage() { 25 | if (!this.running) { 26 | return undefined; 27 | } 28 | return (this.last_server_clock - this.read_clock - this.audio_offset) / this.clock_reference.sample_rate; 29 | } 30 | } 31 | 32 | export class ServerConnection extends ServerConnectionBase { 33 | constructor({ target_url, audio_offset_seconds, userid, receive_cb, failure_cb }) { 34 | super(); 35 | 36 | check( 37 | target_url !== undefined && 38 | audio_offset_seconds !== undefined && 39 | userid !== undefined, 40 | "target_url, audio_offset_seconds, userid, receive_cb must be provided as named parameters"); 41 | check(target_url instanceof URL, "target_url must be a URL"); 42 | check(typeof audio_offset_seconds == "number", "audio_offset_seconds must be a number"); 43 | check(Number.isInteger(userid), "userid must be an integer") 44 | 45 | this.target_url = target_url; 46 | this.audio_offset_seconds = audio_offset_seconds; 47 | this.read_clock = null; 48 | this.write_clock = null; 49 | this.userid = userid; 50 | this.send_metadata = {}; 51 | this.json_kvs = {} 52 | this.running = false; 53 | this.receive_cb = receive_cb; 54 | this.failure_cb = failure_cb; 55 | } 56 | 57 | async start() { 58 | if (this.running || this.starting) { 59 | console.warn("ServerConnection already started, ignoring"); 60 | return; 61 | } 62 | this.starting = true; 63 | 64 | const server_clock_data = await query_server_clock(this.target_url); 65 | if (!server_clock_data || !this.starting) { 66 | return false; 67 | } 68 | var { server_clock, server_sample_rate } = server_clock_data; 69 | 70 | this.clock_reference = new ServerClockReference({ sample_rate: server_sample_rate }); 71 | this.audio_offset = this.audio_offset_seconds * server_sample_rate; 72 | this.read_clock = server_clock - this.audio_offset; 73 | this.running = true; 74 | this.starting = false; 75 | return true; 76 | } 77 | 78 | stop() { 79 | this.starting = false; 80 | this.running = false; 81 | } 82 | 83 | set_metadata(send_metadata) { 84 | this.send_metadata = send_metadata; 85 | } 86 | 87 | set_json_kv(key, value) { 88 | this.json_kvs[key] = value; 89 | } 90 | 91 | send(chunk) { 92 | if (!this.running) { 93 | console.warn("Not sending to server because not running"); 94 | return; 95 | } 96 | chunk.check_clock_reference(this.clock_reference); 97 | var chunk_data = null; 98 | 99 | if (!(chunk instanceof PlaceholderChunk)) { 100 | chunk_data = chunk.data; 101 | 102 | if (this.write_clock === null) { 103 | this.write_clock = chunk.start; 104 | } 105 | check(this.write_clock == chunk.start, "Trying to send non-contiguous chunk to server"); 106 | // Remember: 107 | // * Our convention is clock at the END; 108 | // * We implicitly request as many samples we send, so the more we're sending, the further ahead we need to read from. 109 | // * For the VERY first request, this means we have to start the clock BEFORE we start accumulating audio to send. 110 | this.write_clock += chunk.length; // ... = chunk.end; 111 | } 112 | this.read_clock += chunk.length; 113 | 114 | // These could change while we're asleep 115 | var saved_read_clock = this.read_clock; 116 | var saved_write_clock = this.write_clock; 117 | 118 | let json_kvs_str = JSON.stringify(this.json_kvs); 119 | if (json_kvs_str === "{}") { 120 | json_kvs_str = null; 121 | } 122 | this.json_kvs = {}; 123 | samples_to_server(chunk_data, this.target_url, json_kvs_str, { 124 | read_clock: this.read_clock, 125 | write_clock: this.write_clock, 126 | n_samples: chunk.length, 127 | userid: this.userid, 128 | ... this.send_metadata 129 | }).then(this.server_response.bind(this), this.server_failure.bind(this)); 130 | } 131 | 132 | server_failure(e) { 133 | console.warn("Failure talking to server:", e); 134 | this.failure_cb(); 135 | this.stop(); 136 | return; 137 | } 138 | 139 | server_response(response) { 140 | if (!response) { 141 | this.server_failure("No server response"); 142 | return; 143 | } 144 | if (!this.running) { 145 | console.warn("ServerConnection stopped while waiting for response from server"); 146 | return; 147 | } 148 | 149 | var metadata = response.metadata; 150 | try { 151 | check(this.server_sample_rate == metadata.sample_rate, "wrong sample rate from server"); 152 | // XXX check(saved_read_clock == metadata.client_read_clock, "wrong read clock from server"); 153 | // XXX check(saved_write_clock === null || saved_write_clock == metadata.client_write_clock, "wrong write clock from server"); 154 | } catch(e) { 155 | this.server_failure(e); 156 | return; 157 | } 158 | 159 | this.last_server_clock = metadata.server_clock; 160 | 161 | var result_interval = new ClockInterval({ 162 | reference: this.clock_reference, 163 | end: metadata.client_read_clock, 164 | length: metadata.n_samples, 165 | }); 166 | 167 | metadata.user_summary = []; 168 | 169 | let data = response.data; 170 | if (data.byteLength > 0) { 171 | const users_in_summary = 172 | new DataView(data).getUint16(0, /*littleEndian=*/false); 173 | const utf8decoder = new TextDecoder(); 174 | 175 | let pos = 2; 176 | for (var user_index = 0; user_index < users_in_summary; 177 | user_index++) { 178 | // getUint64 doesn't exist, but we know here that it's < MAX_SAFE_INT 179 | const useridView = new DataView(data.slice(pos, pos + 8)); 180 | const left = useridView.getUint32(0, /*littleEndian=*/false); 181 | const right = useridView.getUint32(4, /*littleEndian=*/false); 182 | const userid = "" + (2**32*left + right); 183 | pos += 8; 184 | 185 | let name = ""; 186 | try { 187 | name = utf8decoder.decode(data.slice(pos, pos + 32)).replace(/\0/g, ""); 188 | } catch {} 189 | pos += 32; 190 | 191 | const mic_volume = 192 | new DataView(data.slice(pos, pos + 4)).getFloat32(0); 193 | pos += 4; 194 | 195 | const rms_volume = 196 | new DataView(data.slice(pos, pos + 4)).getFloat32(0); 197 | pos += 4; 198 | 199 | const delay = 200 | new DataView(data.slice(pos, pos + 2)).getUint16( 201 | 0, /*littleEndian=*/false); 202 | pos += 2; 203 | 204 | const bits = 205 | new DataView(data.slice(pos, pos + 1)).getUint8(0); 206 | const muted = bits & 0b00000001; 207 | const is_monitored = bits & 0b00000010; 208 | pos += 1; 209 | 210 | metadata.user_summary.push([ 211 | delay, name, mic_volume, userid, rms_volume, muted, is_monitored]); 212 | } 213 | data = data.slice(pos); 214 | } 215 | 216 | data = new Uint8Array(data) 217 | this.receive_cb({ 218 | epoch: this.app_epoch, 219 | metadata, 220 | chunk: new CompressedAudioChunk({ 221 | interval: result_interval, 222 | data 223 | }) 224 | }); 225 | } 226 | } 227 | 228 | // XXX this is not great, we will just hang around chaining 1s promises forever until the server comes back up... maybe that's what we want? but there's no higher-level control over the process. 229 | function fetch_with_retry(resource, init) { 230 | return fetch(resource, init).catch(async () => { 231 | await new Promise((resolve) => { 232 | console.warn("fetch_with_retry failed, waiting 1s", resource); 233 | setTimeout(resolve, 1000); 234 | }); 235 | return fetch_with_retry(resource, init); 236 | }); 237 | } 238 | 239 | export async function query_server_clock(target_url) { 240 | var request_time_ms = Date.now(); 241 | const fetch_init = {method: "get", cache: "no-store"}; 242 | const fetch_result = await fetch(target_url, fetch_init) 243 | // Retry immediately on first failure; wait one second after subsequent ones 244 | .catch(() => { 245 | console.warn("First fetch failed in query_server_clock, retrying"); 246 | return fetch_with_retry(target_url, fetch_init) 247 | }); 248 | 249 | if (!fetch_result.ok) { 250 | throw({ 251 | message: 'Server request gave an error. ' + 252 | 'Talk to whoever is running things, or ' + 253 | 'refresh and try again.', 254 | unpreventable: true, 255 | }); 256 | } 257 | 258 | // We need one-way latency; dividing by 2 is unprincipled but probably close enough. 259 | // XXX: This is not actually correct. We should really be using the roundtrip latency here. Because we want to know not "what is the server clock now", but "what will the server clock be by the time my request reaches the server." 260 | // Proposed alternative: 261 | /* 262 | var request_time_samples = Math.round(request_time_ms * sample_rate / 1000.0); 263 | var metadata = JSON.parse(fetch_result.headers.get("X-Audio-Metadata")); 264 | // Add this to "our time now" to yield "server time when it gets our request." 265 | server_sample_offset = metadata["server_clock"] - request_time_samples; 266 | // Note: In the presence of network jitter, our message can get to the server either before or after the target server moment. This means that if our target server moment is "now", our actual requested moment could end up in the future. Someone on one side or the other has to deal with this. But in general if we are requesting "now" it means we do not expect to get audio data at all, so it should be okay for us to never ask for audio data in the case (and it should be ok for the server to give us zeros for "future" data, since we should never have asked, but that's what _would_ be there.) 267 | */ 268 | // Ref: https://github.com/jeffkaufman/bucket-brigade/issues/38 269 | var server_latency_ms = (Date.now() - request_time_ms) / 2.0; // Wrong, see above 270 | var metadata = JSON.parse(fetch_result.headers.get("X-Audio-Metadata")); 271 | console.debug("query_server_clock got metadata:", metadata); 272 | var server_sample_rate = parseInt(metadata["server_sample_rate"], 10); 273 | var server_clock = Math.round(metadata["server_clock"] + server_latency_ms * server_sample_rate / 1000.0); 274 | console.info("Server clock is estimated to be:", server_clock, " (", metadata["server_clock"], "+", server_latency_ms * server_sample_rate / 1000.0); 275 | return { server_clock, server_sample_rate }; 276 | } 277 | 278 | var xhrs_inflight = 0; 279 | export async function samples_to_server( 280 | outdata, target_url, json_kvs_str, send_metadata) { 281 | console.debug("samples_to_server send_metadata:", send_metadata, 282 | "json_kvs_str", json_kvs_str); 283 | if (outdata === null) { 284 | outdata = new Uint8Array(); 285 | } 286 | 287 | if (json_kvs_str) { 288 | const json_kvs_bytes = (new TextEncoder()).encode(json_kvs_str); 289 | send_metadata.json_len = json_kvs_bytes.length; 290 | 291 | const combined_outdata = 292 | new Uint8Array(json_kvs_bytes.length + outdata.length); 293 | combined_outdata.set(json_kvs_bytes); 294 | combined_outdata.set(outdata, json_kvs_bytes.length); 295 | outdata = combined_outdata; 296 | } 297 | 298 | return new Promise((resolve, reject) => { 299 | var xhr = new XMLHttpRequest(); 300 | xhr.onerror = () => { 301 | reject("xhr.onerror fired"); 302 | } 303 | xhr.onreadystatechange = () => { 304 | if (xhr.readyState == 4 /* done*/) { 305 | handle_xhr_result(xhr, resolve, reject); 306 | } 307 | }; 308 | xhr.debug_id = Date.now(); 309 | 310 | var params = new URLSearchParams(); 311 | 312 | // Going forward, I would like to simplify by: 313 | // (1) using the same names for parameters on the server and the client 314 | // (2) only setting parameters if we want to send them, and always sending them as-is 315 | // The below has been carefully crafted to preserve the exact behavior we had before, when we had a separate "if" statement for every single parameter. 316 | 317 | const param_map = { 318 | chats: 'chat', 319 | requestedLeadPosition: 'request_lead', 320 | markStartSinging: 'mark_start_singing', 321 | markStopSinging: 'mark_stop_singing', 322 | globalVolume: 'volume', 323 | backingVolume: 'backing_volume', 324 | micVolumes: 'mic_volume', 325 | backingTrack: 'track', 326 | loopback_mode: 'loopback', 327 | } 328 | 329 | const skip_params = [] 330 | const truthy_params = ['track', 'monitor']; 331 | const nonnull_params = ['write_clock', 'volume', 'backing_volume', 'bpm', 'repeats', 'bpr']; 332 | const stringify_params = ['chat', 'mic_volume', 'event_data', 'client_telemetry']; 333 | const flag_params = ['request_lead', 'mark_start_singing', 'mark_stop_singing']; 334 | 335 | for (var k in send_metadata) { 336 | var v = send_metadata[k]; 337 | //console.log("BEFORE MAPPING:", k, v); 338 | 339 | if (k in param_map) { 340 | k = param_map[k]; 341 | } 342 | 343 | var send_v = v; 344 | if (skip_params.includes(k)) 345 | continue; 346 | if (truthy_params.includes(k) && !v) 347 | continue; 348 | if (nonnull_params.includes(k) && v === null) 349 | continue; 350 | if (stringify_params.includes(k)) 351 | send_v = JSON.stringify(v); 352 | if (flag_params.includes(k)) 353 | send_v = '1'; 354 | if (k == "loopback") { 355 | if (v == "server") { 356 | console.debug("SPAM", "looping back samples at server"); 357 | send_v = true; 358 | } else { 359 | continue; 360 | } 361 | } 362 | 363 | //console.log("AFTER MAPPING:", k, send_v); 364 | // Default is to send the parameter exactly as we received it 365 | params.set(k, send_v); 366 | } 367 | 368 | target_url.search = params.toString(); 369 | 370 | // Arbitrary cap; browser cap is 8(?) after which they queue 371 | if (xhrs_inflight >= 4) { 372 | console.warn("NOT SENDING XHR w/ ID:", xhr.debug_id, " due to limit -- already in flight:", xhrs_inflight); 373 | return resolve(null); 374 | } 375 | 376 | console.debug("SPAM", "Sending XHR w/ ID:", xhr.debug_id, "already in flight:", xhrs_inflight++, "; data size:", outdata.length); 377 | xhr.open("POST", target_url, true); 378 | xhr.responseType = "arraybuffer"; 379 | xhr.send(outdata); 380 | if (LOG_ULTRA_VERBOSE) { 381 | console.debug("SPAM", "... XHR sent."); 382 | } 383 | }); 384 | } 385 | 386 | // Only called when readystate is 4 (done) 387 | function handle_xhr_result(xhr, resolve, reject) { 388 | --xhrs_inflight; 389 | 390 | if (xhr.status == 200) { 391 | let metadata = JSON.parse(xhr.getResponseHeader("X-Audio-Metadata")); 392 | let data = xhr.response; 393 | 394 | const metadata_len = metadata.metadata_len; 395 | if (metadata_len) { 396 | metadata = JSON.parse(new TextDecoder('utf8').decode( 397 | data.slice(0, metadata_len))); 398 | data = data.slice(metadata_len); 399 | } 400 | 401 | if (LOG_ULTRA_VERBOSE) { 402 | console.debug("SPAM", "metadata:", metadata); 403 | console.debug("SPAM", "Got XHR response w/ ID:", xhr.debug_id, "result:", xhr.response, " -- still in flight:", xhrs_inflight); 404 | } 405 | 406 | return resolve({metadata, data}); 407 | } else { 408 | console.error("XHR failed w/ ID:", xhr.debug_id, "stopping:", xhr, " -- still in flight:", xhrs_inflight); 409 | var metadata_raw = xhr.getResponseHeader("X-Audio-Metadata"); 410 | 411 | if (metadata_raw) { 412 | try { 413 | var metadata = JSON.parse(metadata_raw); 414 | console.warning("metadata on failed XHR:", metadata); 415 | if (metadata.kill_client) { 416 | console.error("Received kill from server:", metadata.message); 417 | return reject("Received kill from server: " + metadata.message); 418 | } 419 | } catch { /* ignore JSON parse failure when already failing */ } 420 | } 421 | 422 | return reject("XHR failed w/ status " + xhr.status); 423 | } 424 | } 425 | -------------------------------------------------------------------------------- /html/audio-worklet.js: -------------------------------------------------------------------------------- 1 | // This gates all the logs that put references to REALLY HUGE objects into the console 2 | // very frequently. When this is on, having the console open eventually causes the 3 | // browser to lag severely and dev tools to lag/hang/crash. Don't use this unless 4 | // you actually need it. 5 | const LOG_ULTRA_VERBOSE = false; 6 | 7 | console.info("Audio worklet module loading"); 8 | 9 | // XXX start copy-pasted imports from lib.js 10 | 11 | var log_counts = {} 12 | function log_every(n, tag, ...args) { 13 | if (tag.constructor != String) { 14 | console.error("In log_every, tag must be a string! Got:", n, tag, args); 15 | return; 16 | } 17 | 18 | if (log_counts[tag] === undefined) { 19 | log_counts[tag] = 0; 20 | } 21 | if (log_counts[tag] % n == 0) { 22 | console.debug("<" + tag + "/" + n + ">", ...args); 23 | } 24 | log_counts[tag]++; 25 | } 26 | 27 | function check(condition, message, ...rest) { 28 | if (!condition) { 29 | console.error(message, ...rest); 30 | throw new Error(message); 31 | } 32 | } 33 | 34 | // XXX start copy-pasted imports from audiochunk.js 35 | 36 | class ClockReference { 37 | constructor({ sample_rate }) { 38 | check(sample_rate !== undefined, "Must provide sample_rate as a named argument"); 39 | check(Number.isInteger(sample_rate), "sample_rate must be integer"); 40 | 41 | this.sample_rate = sample_rate; 42 | this.type = this.constructor.name; 43 | } 44 | 45 | equals(other) { 46 | return this.side == other.side && this.sample_rate == other.sample_rate; 47 | } 48 | } 49 | 50 | function thaw_clock_reference(o) { 51 | return new ClockReference({ 52 | sample_rate: o.sample_rate 53 | }); 54 | } 55 | 56 | class ClockInterval { 57 | constructor({ reference, end, length }) { 58 | check(reference !== undefined, "Must provide reference as a named argument"); 59 | check(Number.isInteger(end), "end must be an integer (measured in samples)", end); 60 | check(Number.isInteger(length), "length must be an integer (measured in samples)", length); 61 | check(reference instanceof ClockReference, "reference must be a ClockReference", reference); 62 | 63 | this.end = end; 64 | this.length = length; 65 | this.reference = reference; 66 | } 67 | 68 | get sample_rate() { 69 | return this.reference.sample_rate; 70 | } 71 | 72 | get length_seconds() { 73 | return this.length / this.sample_rate; 74 | } 75 | 76 | get start() { 77 | return this.end - this.length; 78 | } 79 | } 80 | 81 | function thaw_clock_interval(o) { 82 | if (o === undefined) { 83 | return o; 84 | } 85 | return new ClockInterval({ 86 | reference: thaw_clock_reference(o.reference), 87 | end: o.end, 88 | length: o.length 89 | }); 90 | } 91 | class AudioChunk { 92 | constructor({ data, interval }) { 93 | check(data !== undefined && interval !== undefined, "Must provide data and interval as named arguments"); 94 | check(interval instanceof ClockInterval, "interval must be a ClockInterval"); 95 | check(interval.reference instanceof ClockReference, "reference must be a ClockReference"); 96 | check(data instanceof Float32Array, "uncompressed audio data must be a Float32Array"); 97 | check(data.length == interval.length, "interval length must match uncompressed data length"); 98 | this.data = data; 99 | this.interval = interval; 100 | this.type = this.constructor.name; 101 | } 102 | 103 | check_clock_reference(clock_reference) { 104 | if (!clock_reference.equals(this.reference)) { 105 | throw new Error("Clock references unequal in AudioChunk.check_clock_reference"); 106 | } 107 | } 108 | 109 | get start() { return this.interval.start; } 110 | get end() { return this.interval.end; } 111 | get length() { return this.interval.length; } 112 | get length_seconds() { return this.interval.length_seconds; } 113 | get reference() { return this.interval.reference; } 114 | get sample_rate() { return this.interval.sample_rate; } 115 | } 116 | function thaw_audio_chunk(o) { 117 | return new AudioChunk({ 118 | data: o.data, 119 | interval: thaw_clock_interval(o.interval), 120 | }); 121 | } 122 | 123 | class PlaceholderChunk { 124 | constructor({ reference, length, interval }){ 125 | check(reference !== undefined && length !== undefined, "Must provide reference and length as named arguments"); 126 | check(reference instanceof ClockReference, "reference must be a ClockReference"); 127 | check(Number.isInteger(length), "length must be an integer"); 128 | if (interval !== undefined) { 129 | check(interval.length == length, "interval must match length"); 130 | check(interval.reference == reference, "interval must match reference"); 131 | } 132 | 133 | this.reference = reference; 134 | this.length = length; 135 | this.interval = interval; 136 | this.data = new Float32Array(length); // This exists for convenience but is always all zeros 137 | this.type = this.constructor.name; 138 | } 139 | 140 | check_clock_reference(clock_reference) { 141 | if (!clock_reference.equals(this.reference)) { 142 | throw new Error("Clock references unequal in PlaceholderChunk.check_clock_reference"); 143 | } 144 | } 145 | 146 | get start() { return this.interval.start; } 147 | get end() { return this.interval.end; } 148 | get length_seconds() { return this.interval.length_seconds; } 149 | get sample_rate() { return this.reference.sample_rate; } 150 | } 151 | 152 | function thaw_placeholder_chunk(o) { 153 | return new PlaceholderChunk({ 154 | reference: thaw_clock_reference(o.reference), 155 | length: o.length, 156 | interval: thaw_clock_interval(o.interval), 157 | }); 158 | } 159 | 160 | // XXX end copy-pasted imports 161 | const FRAME_SIZE = 128; // by Web Audio API spec 162 | 163 | class ClockedRingBuffer { 164 | constructor(len_seconds, leadin_seconds, clock_reference, port) { 165 | if (leadin_seconds > len_seconds) { 166 | // Note that even getting close is likely to result in failure. 167 | console.error("leadin time must not exceed size"); 168 | throw new Error("leadin time must not exceed size"); 169 | } 170 | // Before the first write, all reads will be zero. After the first write, 171 | // the first leadin_samples read will be zero, then real reads will start. 172 | // (This allows a buffer to build up.) 173 | 174 | // Round both to FRAME_SIZE. 175 | this.leadin_samples = Math.round(leadin_seconds * sampleRate / FRAME_SIZE) * FRAME_SIZE; 176 | this.len = Math.round(len_seconds * sampleRate / FRAME_SIZE) * FRAME_SIZE; 177 | 178 | this.read_clock = null; 179 | this.buf = new Float32Array(this.len); 180 | this.buf.fill(NaN); 181 | 182 | if (clock_reference.sample_rate !== sampleRate) { 183 | throw new Error("clock_reference has wrong sample rate in ClockedRingBuffer constructor"); 184 | } 185 | this.clock_reference = clock_reference; 186 | 187 | this.port = port; 188 | 189 | // For debugging, mostly 190 | this.buffered_data = 0; 191 | this.last_write_clock = null; 192 | } 193 | 194 | // Note: We can get writes out of order, so having space left is 195 | // no guarantee that a given write will succeed. 196 | space_left() { 197 | return this.len - this.buffered_data; 198 | } 199 | 200 | real_offset(offset) { 201 | var len = this.len; 202 | // Hack to handle negative numbers (just in case) 203 | var real_offset = ((offset % len) + len) % len; 204 | 205 | if (!(real_offset >= 0 && real_offset < len)) { 206 | console.error("Bad offset:", offset); 207 | throw "Bad offset:" + offset; 208 | } 209 | return real_offset; 210 | } 211 | 212 | read_into(buf) { 213 | //console.debug("Reading chunk of size", buf.length); 214 | if (this.read_clock === null) { 215 | buf.fill(0); 216 | return new PlaceholderChunk({ 217 | reference: this.clock_reference, 218 | length: buf.length 219 | }); 220 | } 221 | 222 | var interval = new ClockInterval({ 223 | reference: this.clock_reference, 224 | end: this.read_clock + buf.length, 225 | length: buf.length 226 | }); 227 | var chunk = new AudioChunk({ data: buf, interval }); 228 | var errors = []; 229 | let underflowed = false; 230 | for (var i = 0; i < chunk.data.length; i++) { 231 | var sample = this.read(chunk.interval.start + i); 232 | if (typeof sample === "number") { 233 | chunk.data[i] = sample; 234 | } else if (sample === null) { 235 | chunk.data[i] = 0; 236 | underflowed = true; 237 | } else { 238 | chunk.data[i] = 0; 239 | errors.push(sample); 240 | } 241 | } 242 | if (underflowed) { 243 | this.port.postMessage({type: "underflow"}); 244 | } 245 | if (errors.length > 0) { 246 | var err_uniq = Array.from(new Set(errors)); 247 | console.error("Errors while reading chunk", interval, err_uniq); 248 | throw new Error("Failed to read audio chunk from buffer in worklet because: " + JSON.stringify(err_uniq)); 249 | } 250 | return chunk; 251 | } 252 | 253 | read() { 254 | if (LOG_ULTRA_VERBOSE) { 255 | log_every(128000, "buf_read", "leadin_samples:", this.leadin_samples, "read_clock:", this.read_clock, "buffered_data:", this.buffered_data, "space_left:", this.space_left()); 256 | } 257 | if (this.read_clock === null) { 258 | return "no read clock" ; 259 | } 260 | if (this.leadin_samples > 0) { 261 | this.read_clock++; 262 | this.leadin_samples--; 263 | return 0; 264 | } 265 | var val = this.buf[this.real_offset(this.read_clock)]; 266 | if (isNaN(val)) { 267 | // XXX TODO: Seeing an underflow should make us allocate more client slack .... but that's tricky because it will cause a noticeable glitch on the server as our window expands (but at this point it's probably too late to prevent that) 268 | // * It would also make sense to instead just try to drop some audio and recover. (Although audio trapped in the audiocontext pipeline buffers cannot be dropped without restarting the whole thing.) 269 | // XXX this used to be an error log 270 | log_every(12800, "buf_read underflow", "Buffer underflow :-( leadin_samples:", this.leadin_samples, "read_clock:", this.read_clock, "buffered_data:", this.buffered_data, "space_left:", this.space_left(), "last_write_clock:", this.last_write_clock); 271 | this.read_clock++; 272 | this.buffered_data--; 273 | return null; 274 | } 275 | this.buf[this.real_offset(this.read_clock)] = NaN; // Mostly for debugging 276 | this.read_clock++; 277 | this.buffered_data--; 278 | return val; 279 | } 280 | 281 | write_chunk(chunk) { 282 | // console.debug("SPAM", "Writing chunk of size", chunk.length); 283 | chunk.check_clock_reference(this.clock_reference); 284 | for (var i = 0; i < chunk.data.length; i++) { 285 | this.write(chunk.data[i], chunk.start + i); 286 | } 287 | } 288 | 289 | // XXX: fix performance (take an entire slice at once) 290 | write(value, write_clock) { 291 | check(write_clock == Math.round(write_clock), "write_clock not an integer?!", write_clock); 292 | if (this.last_write_clock !== null) { 293 | if (write_clock != this.last_write_clock + 1) { 294 | // Ostensibly we allow this, but I think it should never happen and is always a bug... 295 | console.error("Write clock not incrementing?! Last write clock:", this.last_write_clock, ", new write clock:", write_clock, ", difference from expected:", write_clock - (this.last_write_clock + 1)); 296 | throw new Exception("Write clock skipped or went backwards"); 297 | } 298 | } 299 | this.last_write_clock = write_clock; 300 | // XXX(slow): lib.log_every(12800, "buf_write", "write_clock:", write_clock, "read_clock:", this.read_clock, "buffered_data:", this.buffered_data, "space_left:", this.space_left()); 301 | if (this.read_clock === null) { 302 | // It should be acceptable for this to end up negative 303 | this.read_clock = write_clock - this.leadin_samples; 304 | } 305 | if (this.space_left() == 0) { 306 | // This is a "true" buffer overflow, we have actually run completely out of buffer. 307 | console.error("Buffer overflow :-( write_clock:", write_clock, "read_clock:", this.read_clock, "buffered_data:", this.buffered_data, "space_left:", this.space_left()); 308 | throw new Error("Buffer overflow"); 309 | } 310 | if (!isNaN(this.buf[this.real_offset(write_clock)])) { 311 | // This is a "false" buffer overflow -- we are overwriting some past data that the reader skipped over (presumably due to an underflow.) Just write it anyway. (XXX: this should never happen I think, and I never observe it.) 312 | // XXX this used to be a warning log 313 | log_every(12800, "sorta_overflow", "Writing over existing buffered data; write_clock:", write_clock, "read_clock:", this.read_clock, "buffered_data:", this.buffered_data, "space_left:", this.space_left()); 314 | } 315 | if (this.buffered_data >= 0) { 316 | this.buf[this.real_offset(write_clock)] = value; 317 | } else { 318 | // Don't write into the buffer if we're behind the read pointer, it will just fuck us up later when we wrap around to it 319 | log_every(12800, "compensated_underflow", "Compensating for underflow by discarding data until we reach the read pointer"); 320 | } 321 | this.buffered_data++; 322 | } 323 | } 324 | 325 | class LatencyCalibrator { 326 | constructor() { 327 | // State related to peak detection processing: 328 | // clicks 329 | this.click_index = 0; 330 | this.beat_index = 0; 331 | const bpm = 105; 332 | this.click_frame_interval = 333 | Math.round(sampleRate / FRAME_SIZE * 60 / bpm); 334 | this.click_index_samples = 0; 335 | this.click_length_samples = sampleRate / 64; 336 | 337 | // peak detection 338 | this.window = []; 339 | this.last_peak = Date.now(); 340 | this.background_noise = 0; 341 | this.background_samples = []; 342 | this.max_background_samples = sampleRate * 3 / FRAME_SIZE; // 3s 343 | this.frames_since_last_beat = 0; 344 | 345 | // tuning params 346 | this.peak_ratio = 10; 347 | this.min_peak_interval_ms = 200; 348 | this.window_size_samples = 20; 349 | this.click_interval_samples = 3000; 350 | this.min_n_latencies = 3; 351 | 352 | this.latencies = []; 353 | } 354 | 355 | detect_peak(index, now) { 356 | var abs_sum = 0; 357 | for (var i = 0; i < this.window.length; i++) { 358 | abs_sum += Math.abs(this.window[i]); 359 | } 360 | 361 | if (abs_sum / this.window.length > 362 | this.background_noise / (this.background_samples.length*FRAME_SIZE) * this.peak_ratio && 363 | now - this.last_peak > this.min_peak_interval_ms) { 364 | this.last_peak = now; 365 | var latency_samples = index + 128*this.frames_since_last_beat; 366 | var latency_ms = 1000.0 * latency_samples / sampleRate; 367 | if (latency_ms > 500) { 368 | latency_ms -= 1000; 369 | } 370 | 371 | this.latencies.push(latency_ms); 372 | const msg = { 373 | "type": "latency_estimate", 374 | "samples": this.latencies.length, 375 | } 376 | 377 | if (this.latencies.length >= this.min_n_latencies) { 378 | this.sorted_latencies = this.latencies.slice(); 379 | this.sorted_latencies.sort((a, b) => a-b); 380 | msg.p25 = this.sorted_latencies[Math.round(this.latencies.length * 0.25)]; 381 | msg.p50 = this.sorted_latencies[Math.round(this.latencies.length * 0.5)]; 382 | msg.p75 = this.sorted_latencies[Math.round(this.latencies.length * 0.75)]; 383 | } 384 | return msg; 385 | } 386 | 387 | return null; 388 | } 389 | 390 | process_latency_measurement(input, output, click_volume) { 391 | this.click_index++; 392 | var is_beat = this.click_index % this.click_frame_interval == 0; 393 | if (is_beat) { 394 | this.frames_since_last_beat = 0; 395 | this.click_index_samples = 0; 396 | this.beat_index++; 397 | } else { 398 | this.frames_since_last_beat++; 399 | } 400 | 401 | const freq = 1024; 402 | const period = sampleRate / freq; 403 | 404 | for (var k = 0; k < output.length; k++) { 405 | if (this.click_index_samples < this.click_length_samples) { 406 | output[k] = click_volume * Math.sin(Math.PI * 2 * this.click_index_samples / period); 407 | this.click_index_samples++; 408 | } else { 409 | output[k] = 0; 410 | } 411 | } 412 | 413 | var now = Date.now(); 414 | var noise = 0; 415 | var final_result = null; 416 | for (var i = 0 ; i < input.length; i++) { 417 | noise += Math.abs(input[i]); 418 | 419 | this.window.push(input[i]); 420 | if (this.window.length > this.window_size_samples) { 421 | this.window.shift(); 422 | } 423 | 424 | if (this.background_noise > 0) { 425 | var result = this.detect_peak(i, now); 426 | if (result !== null) { 427 | final_result = result; 428 | } 429 | } 430 | } 431 | 432 | this.background_samples.push(noise); 433 | this.background_noise += noise; 434 | if (this.background_samples.length > this.max_background_samples) { 435 | // Note: if this ends up using too much CPU we can use a circular buffer. 436 | this.background_noise -= Math.abs(this.background_samples.shift()); 437 | } 438 | 439 | if (this.beat_index > 1 && this.background_noise == 0) { 440 | final_result = {type: "no_mic_input"}; 441 | } 442 | 443 | return final_result; 444 | } 445 | } 446 | 447 | class VolumeCalibrator { 448 | constructor() { 449 | this.volumes = []; 450 | this.block_volumes = []; 451 | this.finished = false; 452 | } 453 | 454 | process_volume_measurement(input) { 455 | if (this.finished) { 456 | return null; 457 | } 458 | 459 | let volume = 0; 460 | for (var i = 0 ; i < input.length; i++) { 461 | volume += Math.abs(input[i]); 462 | } 463 | this.volumes.push(volume / input.length); 464 | 465 | if (this.volumes.length == 100) { 466 | var block_volume = 0; 467 | for (var i = 0; i < this.volumes.length; i++) { 468 | block_volume += this.volumes[i]; 469 | } 470 | block_volume = block_volume / this.volumes.length; 471 | this.block_volumes.push(block_volume / this.volumes.length); 472 | this.volumes = []; 473 | 474 | // About 5s. 475 | if (this.block_volumes.length == 18) { 476 | this.finished = true; 477 | this.block_volumes.sort((a,b) => a-b); 478 | 479 | // 90th percentile volume 480 | const volume_90th = 481 | this.block_volumes[Math.trunc(this.block_volumes.length * .9)] 482 | 483 | const target_avg = 0.0004; 484 | let input_gain = Math.min(target_avg / volume_90th, 10); 485 | console.info("90th percentile avg volume: " + volume_90th + 486 | "; input_gain: " + input_gain); 487 | 488 | return { 489 | "type": "input_gain", 490 | "input_gain": input_gain 491 | } 492 | } else { 493 | return { 494 | "type": "current_volume", 495 | "volume": block_volume 496 | } 497 | } 498 | } 499 | 500 | return null; 501 | } 502 | } 503 | 504 | class Player extends AudioWorkletProcessor { 505 | constructor () { 506 | super(); 507 | this.try_do(() => { 508 | console.info("Audio worklet object constructing"); 509 | this.ready = false; 510 | this.port.onmessage = (event) => { 511 | this.try_do(() => { 512 | this.handle_message(event); 513 | }); 514 | }; 515 | this.clock_reference = new ClockReference({ sample_rate: sampleRate }); 516 | this.local_latency = 150 * sampleRate / 1000; // rough initial guess (150ms) 517 | this.click_volume = 0; 518 | this.input_gain = 1.0; 519 | 520 | // List of { time, cb }, sorted in time order so we can efficiently 521 | // check for the next one to fire. See insert_time_callback. 522 | this.time_callbacks = []; 523 | 524 | // sound effects 525 | this.chime_sample_index = null; 526 | this.chime_length = sampleRate / 4; 527 | this.chime_volume = 0.05; 528 | this.chime_frequency = 440; // Hz 529 | this.chime_raise = 2000; // Hz 530 | }) 531 | } 532 | 533 | try_do(callback) { 534 | try { 535 | callback(); 536 | } catch (err) { 537 | let {name, message, stack, unpreventable} = err ?? {}; 538 | [name, message, stack] = [name, message, stack].map(String); 539 | unpreventable = Boolean(unpreventable); 540 | this.port.postMessage({ 541 | type: "exception", 542 | exception: {name, message, stack, unpreventable}, 543 | }); 544 | } 545 | } 546 | 547 | insert_time_callback(time, cb) { 548 | // We need to maintain the sorted order of `this.time_callbacks`. 549 | // Events are normally given to us in order, so we start at the end. 550 | for (var i = this.time_callbacks.length - 1; i >= 0; --i) { 551 | if (time > this.time_callbacks[i].time) { 552 | this.time_callbacks.splice(i + 1, 0, { time, cb }); 553 | break; 554 | } 555 | } 556 | if (i == -1) { 557 | this.time_callbacks.splice(0, 0, { time, cb }); 558 | } 559 | 560 | console.log("inserted", time, cb, "time_callbacks is now", this.time_callbacks); 561 | } 562 | 563 | handle_message(event) { 564 | var msg = event.data; 565 | // console.debug("VERYSPAM", "handle_message in audioworklet:", msg); 566 | 567 | if (msg.type == "audio_params") { 568 | // Reset and/or set up everything. 569 | this.latency_calibrator = null; 570 | this.latency_measurement_mode = false; 571 | this.volume_measurement_mode = false; 572 | 573 | this.epoch = msg.epoch; 574 | 575 | this.synthetic_source = msg.synthetic_source; 576 | this.click_interval = msg.click_interval; 577 | this.loopback_mode = msg.loopback_mode; 578 | 579 | // This is _extra_ slack on top of the size of the server request. 580 | this.client_slack = .500; // 500 ms? 581 | 582 | // 15 seconds of total buffer, `this.client_slack` seconds of leadin 583 | this.play_buffer = new ClockedRingBuffer(15, this.client_slack, this.clock_reference, this.port); 584 | 585 | this.ready = true; 586 | this.port.postMessage({ 587 | type: "ready", 588 | cookie: msg.cookie, 589 | }); 590 | return; 591 | } else if (msg.type == "stop") { 592 | this.ready = false; 593 | return; 594 | } else if (msg.type == "local_latency") { 595 | this.local_latency = msg.local_latency; 596 | return; 597 | } else if (msg.type == "input_gain") { 598 | this.input_gain = msg.input_gain; 599 | return; 600 | } else if (msg.type == "latency_estimation_mode") { 601 | console.debug("latency estimation mode in worklet:", msg.enabled); 602 | this.latency_measurement_mode = msg.enabled; 603 | if (this.latency_measurement_mode) { 604 | this.latency_calibrator = new LatencyCalibrator(); 605 | } else { 606 | this.latency_calibrator = null; 607 | } 608 | return; 609 | } else if (msg.type == "ignore_input") { 610 | if (this.play_buffer && this.play_buffer.read_clock) { 611 | // This violates an invariant, and will cause an assertion failure elsewhere later if it happens, so blow up now instead. 612 | throw new Error("Not allowed to start ignoring input after the clock has already started!"); 613 | } 614 | this.ignore_input = msg.enabled; 615 | return; 616 | } else if (msg.type == "volume_estimation_mode") { 617 | this.volume_measurement_mode = msg.enabled; 618 | if (this.volume_measurement_mode) { 619 | this.volume_calibrator = new VolumeCalibrator(); 620 | } else { 621 | this.volume_calibrator = null; 622 | } 623 | return; 624 | } else if (msg.type == "play_chime") { 625 | if (this.chime_sample_index == null) { 626 | this.chime_sample_index = 0; 627 | } 628 | return; 629 | } else if (msg.type == "mic_pause_mode") { 630 | this.mic_pause_mode = msg.enabled; 631 | return; 632 | } else if (msg.type == "speaker_pause_mode") { 633 | this.speaker_pause_mode = msg.enabled; 634 | return; 635 | } else if (msg.type == "click_volume_change") { 636 | this.set_click_volume(msg.value/100); 637 | return; 638 | } else if (msg.type == "request_cur_clock") { 639 | this.port.postMessage({ 640 | type: "cur_clock", 641 | clock: this.play_buffer.read_clock 642 | }); 643 | return; 644 | } else if (msg.type == "set_alarm") { 645 | console.info("audio worklet setting alarm", msg); 646 | let cb = ()=>{ this.port.postMessage({type:"alarm",time:msg.time }) }; 647 | this.insert_time_callback(msg.time, cb); 648 | return; 649 | } else if (msg.type == "clear_alarms") { 650 | this.time_callbacks = []; 651 | return; 652 | } else if (!this.ready) { 653 | console.error("received message before ready:", msg); 654 | return; 655 | } else if (msg.type != "samples_in") { 656 | console.error("Unknown message:", msg); 657 | return; 658 | } 659 | 660 | var chunk = thaw_audio_chunk(msg.chunk); 661 | this.play_buffer.write_chunk(chunk); 662 | // console.debug("VERYSPAM", "new play buffer:", this.play_buffer); 663 | } 664 | 665 | set_click_volume(linear_volume) { 666 | // https://www.dr-lex.be/info-stuff/volumecontrols.html 667 | this.click_volume = Math.exp(6.908 * linear_volume)/1000; 668 | } 669 | 670 | synthesize_clicks(input, interval) { 671 | // console.debug("VERYSPAM", "synthesizing clicks"); 672 | if (!this.synthetic_source_counter) { 673 | console.info("Starting up clicks"); 674 | this.synthetic_source_counter = 0; 675 | } 676 | 677 | var sound_level = 0.0; 678 | if (this.synthetic_source_counter % Math.round(sampleRate * interval / FRAME_SIZE) == 0) { 679 | sound_level = this.click_volume; 680 | } 681 | 682 | // This is probably not very kosher... 683 | for (var i = 0; i < input.length; i++) { 684 | input[i] = sound_level; 685 | } 686 | this.synthetic_source_counter++; 687 | } 688 | 689 | decimal_sine(x) { // 0 to 1 -> sine 690 | return Math.sin(x*Math.PI*2); 691 | } 692 | 693 | maybe_write_chime(output) { 694 | if (this.chime_sample_index == null) { 695 | return; 696 | } 697 | 698 | for (var i = 0; i < output.length; i++) { 699 | const raise_amount = 700 | Math.abs(this.chime_length - this.chime_sample_index * 2) / 701 | this.chime_length; 702 | const pos = this.chime_sample_index * 703 | (this.chime_frequency + 704 | this.chime_raise * raise_amount) / 705 | sampleRate; 706 | output[i] += (this.chime_volume * this.decimal_sine(pos)); 707 | 708 | this.chime_sample_index++; 709 | if (this.chime_sample_index >= this.chime_length) { 710 | this.chime_sample_index = null; 711 | return; 712 | } 713 | } 714 | } 715 | 716 | process_normal(input, output) { 717 | //// console.debug("VERYSPAM", "process_normal:", input); 718 | if (this.synthetic_source == "CLICKS") { 719 | this.synthesize_clicks(input, this.click_interval); 720 | } 721 | 722 | if (this.loopback_mode === "worklet") { 723 | // Send input straight to output and do nothing else with it (only for debugging) 724 | output.set(input); 725 | } else { 726 | // Normal input/output handling 727 | var play_chunk = this.play_buffer.read_into(output); 728 | 729 | this.maybe_write_chime(output); 730 | 731 | // console.debug("VERYSPAM", "about to play chunk:", play_chunk); 732 | 733 | if (this.synthetic_source == "ECHO") { 734 | // This is the "opposite" of local loopback: There, we take whatever 735 | // we hear on the mic and send to the speaker, whereas here we take 736 | // whatever we're about to send to the speaker, and pretend we 737 | // heard it on the mic. (This has zero latency.) 738 | input.set(play_chunk.data()); 739 | } 740 | 741 | if (!(play_chunk instanceof PlaceholderChunk)) { 742 | while ((this.time_callbacks.length > 0) && 743 | (play_chunk.end > this.time_callbacks[0].time)) { 744 | console.log("firing time_callbacks[0]:", 745 | this.time_callbacks[0], 746 | "time_callbacks is now", 747 | JSON.stringify(this.time_callbacks)); 748 | this.time_callbacks.shift().cb(); 749 | } 750 | } 751 | 752 | var mic_chunk = null; 753 | if (!(play_chunk instanceof PlaceholderChunk) && !this.ignore_input) { 754 | var interval = new ClockInterval({ 755 | reference: play_chunk.reference, 756 | length: input.length, 757 | // This is where the magic happens: outgoing chunks are timestamped NOT 758 | // with when we got them, but with when we got the incoming audio 759 | // that aligns them. 760 | end: play_chunk.end - this.local_latency, 761 | }); 762 | 763 | for (var i = 0; i < input.length; i++) { 764 | input[i] *= this.input_gain; 765 | } 766 | 767 | mic_chunk = new AudioChunk({ 768 | data: input, 769 | interval 770 | }); 771 | } else { 772 | mic_chunk = new PlaceholderChunk({ 773 | reference: play_chunk.reference, 774 | length: input.length 775 | }); 776 | } 777 | 778 | // console.debug("VERYSPAM", "about to return heard chunk:", mic_chunk); 779 | this.port.postMessage({ 780 | epoch: this.epoch, 781 | jank: this.acc_err, 782 | jank_over: Date.now() - this.acc_err_since, 783 | dropped_calls: this.dropped_calls, 784 | type: "samples_out", 785 | chunk: mic_chunk, 786 | }); // XXX don't transfer , [mic_chunk.data.buffer]); 787 | // End normal handling 788 | } 789 | } 790 | 791 | profile_web_audio() { 792 | var now_ms = Date.now(); 793 | this.calls += 1; 794 | const process_history_len = 100; 795 | if (this.process_history_ms === undefined) { 796 | this.bad_sample_rate = 0; 797 | this.acc_err = 0; 798 | this.acc_err_since = now_ms; 799 | this.calls = 0; 800 | this.process_history_ms = new Array(process_history_len).fill(NaN); 801 | } else if (!isNaN(this.process_history_ms[0])) { 802 | var interval = now_ms - this.process_history_ms[0]; 803 | var total_interval = now_ms - this.acc_err_since; 804 | var target_interval = process_history_len * 128 * 1000 / sampleRate; 805 | var err = interval - target_interval; 806 | var eff_rate = process_history_len * 128 * 1000 / interval; 807 | this.acc_err += err / process_history_len; 808 | var target_calls = total_interval * sampleRate / 1000 / 128; 809 | this.dropped_calls = target_calls - this.calls 810 | if (LOG_ULTRA_VERBOSE) { 811 | log_every(500, "profile_web_audio", total_interval, target_calls, this.calls, this.dropped_calls, sampleRate, eff_rate, this.process_history_ms[0], now_ms, interval, target_interval, err, this.acc_err, this.acc_err / (128 * 1000 / 22050 /* XXX... */)); 812 | } 813 | 814 | // other parameters of interest 815 | // XXX // console.debug("VERYSPAM", currentTime, currentFrame, /* getOutputTimestamp(), performanceTime, contextTime*/); 816 | 817 | if (eff_rate < 0.75 * sampleRate) { 818 | if (this.bad_sample_rate == 0) { 819 | console.warn("BAD SAMPLE RATE, WEB AUDIO BUG? Should be", sampleRate, "but seeing", eff_rate, ". :-("); 820 | } 821 | this.bad_sample_rate += 1; 822 | if (this.bad_sample_rate % 2000 == 0) { 823 | console.warn("SAMPLE RATE STILL BAD. Should be", sampleRate, "but seeing", eff_rate, ". :-("); 824 | // Ask the main app to reload the audio input device 825 | /* XXX this.killed = true; 826 | throw { 827 | message: "Your computer's audio system is lagging a lot, which is breaking the app. Please disconnect any bluetooth headphones or speakers, close unnecessary apps / reduce load on your computer, then refresh the page and try again.", 828 | unpreventable: true, 829 | }; */ 830 | } 831 | } 832 | } 833 | this.process_history_ms.push(now_ms); 834 | this.process_history_ms.shift(); 835 | } 836 | 837 | process(inputs, outputs) { 838 | let keep_alive = false; 839 | this.try_do(() => { 840 | // Gather some stats, and restart if things look wonky for too long. 841 | this.profile_web_audio() 842 | 843 | if (this.killed) { 844 | return; 845 | } 846 | if (!this.ready) { 847 | keep_alive = true; 848 | return; 849 | } 850 | 851 | if (!inputs || !inputs[0] || !inputs[0][0] || !outputs || !outputs[0] || !outputs[0][0]) { 852 | // Firefox has `inputs[0] == []` sometimes when first starting up; it 853 | // may or may not arguably be permitted by the standard; in any case 854 | // we ignore it and continue. 855 | // * https://bugzilla.mozilla.org/show_bug.cgi?id=1629478 856 | keep_alive = true; 857 | return; 858 | } 859 | var input = inputs[0][0]; 860 | var output = outputs[0][0]; 861 | 862 | if (this.latency_measurement_mode) { 863 | var calibration_result = this.latency_calibrator.process_latency_measurement(input, output, this.click_volume); 864 | if (calibration_result !== null) { 865 | calibration_result.jank = this.acc_err; 866 | this.port.postMessage(calibration_result); 867 | } 868 | // Don't even send or receive audio in this mode. 869 | } else if (this.volume_measurement_mode) { 870 | var calibration_result = this.volume_calibrator.process_volume_measurement(input); 871 | if (calibration_result !== null) { 872 | this.input_gain = calibration_result.input_gain; 873 | this.port.postMessage(calibration_result); 874 | } 875 | output = new Float32Array(output.length); 876 | } else { 877 | if (this.mic_pause_mode || this.ignore_input) { 878 | // Mute the microphone by replacing the input with zeros. 879 | input = new Float32Array(input.length); 880 | } 881 | if (this.speaker_pause_mode) { 882 | // Mute the speaker by setting the output to empty. 883 | output = new Float32Array(output.length); 884 | } 885 | this.process_normal(input, output); 886 | // Hack: If we've fallen behind, pretend we were called some extra times to skip a bit of audio until we catch up. This will audibly glitch (but there is an extremely high likelihood that we actually just did anyway, to get here.) 887 | if (this.dropped_calls > 125 /* arbitrary */) { 888 | // Don't do too many at once, because sometimes lag can be temporary, and we don't want to overshoot too much. 889 | // Do up to 5, but no more than required to get us down to 100. 890 | var calls_to_make_up = Math.min(this.dropped_calls - 100, 5); 891 | console.warn("Making up for lost time by throwing away some audio: calls_to_make_up =", calls_to_make_up, "total dropped calls =", this.dropped_calls); 892 | while (calls_to_make_up > 0) { 893 | calls_to_make_up -= 1; 894 | this.calls += 1; 895 | this.dropped_calls -= 1; 896 | this.process_normal(input, output); 897 | } 898 | this.warned_overcomp = 0; 899 | this.port.postMessage({ 900 | type: "audio_lag", 901 | }); 902 | } 903 | if (this.dropped_calls < this.warned_overcomp - 10) { 904 | this.warned_overcomp = this.dropped_calls; 905 | console.warn("Whoops, we overcompensated for call drops, we're now ahead by:", -this.dropped_calls); 906 | } 907 | } 908 | // Handle stereo output by cloning mono output. 909 | for (var chan = 1; chan < outputs[0].length; chan++) { 910 | outputs[0][chan].set(outputs[0][0]); 911 | } 912 | keep_alive = true; 913 | }); 914 | return keep_alive; 915 | } 916 | } 917 | 918 | registerProcessor('player', Player); 919 | -------------------------------------------------------------------------------- /html/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Bucket Brigade 6 | 661 | 662 | 663 | 666 | 667 | 668 | 669 | 670 | 671 |

672 |
uploading...
673 |
674 | 675 |
676 | 678 |
679 |
Sorry, too many lyrics
680 | 681 | 682 |
683 |
684 | 685 |
686 | 687 |
688 | 694 | 695 |
696 | 697 | 698 |
699 |
700 | 701 |
702 |
703 | 704 | Home 705 | Help 706 | About 707 | Reserve 708 | Recordings 709 | Advanced 710 | Debug 711 |
712 | 713 |
714 | 715 |
716 | 717 | There are enough people that your mic is muted for conversation. You 718 | can unmute by changing "mic" to "on" in the lower left, or by holding 719 | the spacebar while you talk.
721 | 722 |
723 |
724 |

Best to leave these alone, since they do affect everyone

725 | 726 | Beats per Minute: 727 | (Enables metronome, set to 0 to disable) 728 |

729 | Repeats (for rounds): 730 |

731 | Beats Per Repeat (for rounds): 732 |

733 | 734 |

735 | 736 |

Mixing Console

737 | 738 |
739 |
740 | Hear Solos Mix: 741 |
742 | 743 |
744 | 745 |
746 | 747 |
748 |
749 | 750 |

Settings that only affect you

751 | 752 | Disable tutorial: 753 | 754 |

755 | Disable latency measurement: 756 | 757 |

758 | API path: 759 | 760 |

761 | Upload path: 762 | 763 |

764 | Loopback mode: 765 | 771 |

772 | Presentation mode: 773 | 774 | 775 | 776 |

Settings that affect everyone

777 | Leave these alone unless you know what you're doing 778 | 779 |

780 | 781 | Disable auto gain: 782 | 783 |

784 | Disable video during songs: 785 | 786 |
787 |

(Clicks BPM if selected: )
788 |
789 | Test event contents: 790 | Offset (optional): 791 | 792 |
793 |

794 | Global Volume Control (0-2): 795 | 796 |

797 | Backing Track Volume Control (0-2): 798 | 799 |

800 | 801 |

Debug Info

802 | Sample rate: 803 | 804 |
805 | Input / output peak absolute amplitude: 806 | 807 | 808 |
809 | Input gain (scalar): 810 | 811 |
812 | This client total time consumed (s): 813 | 814 |
815 | This client read slippage (s): 816 | 817 |
818 | Total time to next client [more than 3 is bad] (s): 819 | 820 |
821 | Batch size (ms): 822 | 823 |
824 | Client Latency (ms): 825 | 826 |
827 | Web Audio Jank (initial) (ms): 828 | 829 |
830 | "True" (de-janked) Latency (ms): 831 | 832 |
833 | Web Audio Jank (current) (ms): 834 | 835 | 836 | 837 |
838 |
839 | 840 | 841 |
842 |
843 |
844 | 845 | If no one currently has Bucket Brigade reserved, feel free to use it. 846 | 847 |

848 | 849 | If you would like to reserve Bucket Brigade, which you are welcome to 850 | do for whatever sort of event you would like to hold here, send a 851 | calendar invitation to 852 | gsc268k1lu78lbvfbhphdr0cs4@group.calendar.google.com. For 853 | example, you can add that email address as a guest for an event in 854 | Google Calendar. 855 | 856 |

857 | 858 | By default, events are not password-protected. If 859 | you would like to require a password, enter one: . Include in the description field of your 863 | calendar event. You and your guests will need to enter the password 864 | exactly as you typed it. 865 | 866 |

867 | 868 | Let your guests know in advance they'll need: 869 | 870 |

    871 |
  • A computer (not a phone or tablet) 872 |
  • Up-to-date Chrome, Firefox, Edge, or Safari 873 |
  • Ideally, wired headphones. For multiple people, headphone splitters are ~$5 and work well (2-way, 5-way). 874 |
875 | 876 |
877 | 878 | 879 |
880 |
881 | 882 |
883 |
884 |
885 | 886 |
887 |
888 | 889 | Bucket Brigade will be shutting down 2024-12-05 (more 891 | details). 892 | 893 |

894 | 895 | Bucket Brigade is an open source program for making music over the Internet. Glenn, Jeff, and others 899 | develop and maintain it on a "best effort" basis. 900 | 901 |

902 | 903 | It is free to use, but does cost ~25¢/person/hour 905 | to run. If you end up using it much, it would be nice if you would 906 | send us a small amount of money to cover server expenses: paypal.me/jefftkaufman. We'll 908 | credit you on the page with our expenses and 910 | contributions, unless you ask to be anonymous. 911 | 912 |

913 | 914 | The code is open 916 | source, and if you would be interested in running your own 917 | instance we'd be happy to help you get set up. It is also possible to 918 | use as a library, and in that form it has handled an event with over 200 920 | users. 921 | 922 |

923 | 924 | If you have questions or run into problems, file 926 | an issue or write to bucketbrigade@googlegroups.com. 927 | 928 |

929 |
930 | 931 |
932 |
933 | 934 |

Someone is much too quiet during songs

935 | 936 | Volume levels during songs are separate from volume levels while 937 | talking, so someone can be a normal volume in the video call portion 938 | but too quiet while singing. This often happens if someone 939 | accidentally makes an uncharacteristically loud noise, or switches 940 | microphones. They should refresh the page and recalibrate. 941 | 942 |

Someone is consistently off the beat by the same amount.

943 | 944 | If someone is consistently a fraction of a beat early or late, their 945 | client side latency adjustment has gotten off. Most likely, they 946 | switched microphones or headphones without recalibrating. They should 947 | refresh the page and recalibrate. 948 | 949 |

950 | 951 | On the other hand, if someone is in consistently off the beat, sometimes 952 | early and sometimes late, they are probably just not singing/playing their 953 | best. They could mute their mic or move to a late bucket. 954 | 955 |

956 | 957 | Figuring out which of these is going on is awkward. Please be kind to 958 | each other! A good default is to recalibrate if in doubt, to quickly 959 | rule out a technical problem. 960 | 961 |

Audio is cutting in and out

962 | 963 | Someone's network connection isn't up to the task. They should 964 | probably try moving closer to the router, or even switching to a wired 965 | connection. But is it your connection or someone else's? 966 | 967 |
    968 | 969 |
  • If your "internet" meter, at the bottom of the page, isn't 970 | showing all six bars, it's probably yours.

  • 971 | 972 |
  • If the audio is fully dropping out, and there are multiple 973 | people you should be hearing, and then it's yours.

  • 974 | 975 |
  • If one of the people ahead of you is dropping out, but you can 976 | still hear other people when that happens, it's theirs.

  • 977 | 978 |
  • If you're in the second bucket, only expecting to be hearing 979 | one person ahead of you, it could be either you or them. If you jump 980 | to a later bucket you should be able to tell the difference.

  • 981 | 982 |
983 | 984 |

I keep seeing "Due to lag, your singing is not included"

985 | 986 | This happens most often if your computer is overloaded. Try closing 987 | other things running on your computer, including background tabs. 988 | 989 |

Something else weird is going on

990 | 991 | If you'd like Jeff to drop into your call and have a look, you're 992 | welcome to text 617-871-0237 between 8am and 10pm. This is best 993 | effort; I'll come if I'm not busy. 994 | 995 |

996 | 997 | Alternatively, fill out the form on 999 | github. Please give as much information as you can, so I can try 1000 | and trigger your problem and figure out why it wasn't doing what it 1001 | should. 1002 | 1003 |

1004 |
1005 | 1006 |
1007 |
1008 | 1009 |

1010 | It looks like your browser is too old. Try using the most 1011 | recent version of Chrome, Firefox, Edge, or Safari. 1012 |

1013 | 1014 |

1015 | While we don't recommend trying to continue anyway, because it will 1016 | probably fail silently and just be frustrating for you, if you want 1017 | you can continue: 1018 | 1019 | 1020 |

1021 |
1022 | 1023 |
1024 |
1025 | 1026 |

1027 | It looks like you are on a mobile browser. This is not recommended: 1028 | we haven't found any mobile devices that are able to keep up with 1029 | the strain of encoding and decoding audio in JavaScript. 1030 |

1031 | 1032 |

1033 | 1034 | While we don't recommend trying to continue anyway, because it will 1035 | probably fail silently and just be frustrating for you, if you want 1036 | you can continue: 1037 | 1038 | 1039 |

1040 |
1041 |
1042 | 1043 |
1044 | We're sorry, the room is currently full. Please try again later! 1045 |
1046 | 1047 |
1048 |
1049 | 1050 | Bucket Brigade will be shutting down 2024-12-05 (more 1052 | details). 1053 | 1054 |

1055 | 1056 | Welcome! This is a program for singing with people over the 1057 | Internet. 1058 | 1059 |

1060 |
1061 | 1062 | Before we start, a few questions: 1063 | 1064 |

1065 | 1066 |

1067 | What's your name? 1068 | 1069 |
* we need to know what to call you
1070 |
1071 | 1072 |
1073 | What's the password for this event? 1074 | 1075 |
* that is not the password. If you were invited, 1076 | please check in with the organizer.
1077 |
1078 | 1079 |
1080 | Are you planning on singing or just listening? 1081 | 1082 | 1083 |
1084 |
1085 | Are you using headphones? 1086 | 1087 | 1088 |
1089 | 1090 |
1091 | Do you have wired headphones that you could easily use? 1092 | 1093 | 1094 |
1095 | 1096 |
1097 | Are your headphones wired? 1098 | 1099 | 1100 |
1101 |
1102 | 1103 |
1104 | Great! Please attach them and then refresh this page. 1105 |
1106 | 1107 |
1108 | That's OK! Using headphones helps reduce noise, but it still works OK 1109 | if a few people aren't wearing headphones. 1110 | 1111 |
1112 | 1113 |
1114 | Wireless headphones have large and inconsistent latency. Please 1115 | detach them and refresh this page. 1116 |
1117 | 1118 |
1119 | Wired headphones are ideal! 1120 |

1121 | 1122 |

1123 |
1124 | 1125 |
1126 | Last step! We just need to sort out cameras. 1127 | 1128 |
No camera found
1129 |

1130 | 1131 | 1132 | 1133 |

1134 |

loading...
1135 |
1136 | 1137 |
1138 | 1139 |
1140 | No microphones found. Possibly you do have a microphone, but the 1141 | browser is not allowing access for privacy reasons? 1142 |
1143 | 1144 | 1145 |
1146 | Input device: 1147 | 1150 |
1151 | 1152 |
1153 | 1154 |

1155 | 1156 | In the next step we are going to need to make some really loud beeps to 1157 | calibrate latency. 1158 | 1159 |

1160 | 1161 | Please turn your volume all the way up. 1162 | 1163 | Take your headphones off your head, and position the earpieces as 1164 | close as possible to your mic. On a mac laptop this is near the "esc" 1165 | key. Press "start" when ready. 1166 | 1167 |

1168 | 1169 |
1170 |

1171 | 1172 | If you've changed something about your audio setup since your last 1173 | visit, press "recalibrate". Otherwise press "start" to begin. 1174 | 1175 |

1176 | 1177 | 1178 | 1179 |

1180 | 1181 | 1182 | 1183 |
1184 | 1185 |
1186 | You should be hearing some beeps. 1187 | 1188 |
    1189 |
  • If you don't hear anything, is your speaker unmuted? 1190 |
  • If you do hear beeps but #beeps detected is not counting up, 1191 | turn up the volume. 1192 |
  • If the beeps are very loud but it's still not counting up, 1193 | try refreshing the page and changing your input device. 1194 |
  • Other things to try: 1195 |
      1196 |
    • Restarting your browser. 1197 |
    • Restarting your computer. 1198 |
    • Switching to another browser (Chrome, Firefox, Edge, or Safari) 1199 |
    • Refreshing the page and choosing "Only Listening" to skip calibration. 1200 |
    1201 |
1202 |
1203 | 1204 |
1205 | Mic producing any audio. Is your input device set correctly? 1206 |
1207 | 1208 | 1209 |

1210 | 1211 |

1212 |
1213 |
1214 | 1215 | Unable to calibrate latency. Please make sure that the microphone is 1216 | able to hear the speaker. 1217 |
1218 |
1219 | 1220 | 1221 |
1222 |
1223 | 1224 | It's fine to continue without calibration if you just to sing along, 1225 | but your audio can't be sent out for everyone else to hear unless we 1226 | know exactly how much client-side latency you have. 1227 | 1228 |
1229 | 1230 | ...
1231 | #beeps detected 1232 | 1233 | ...
1234 | latency
1235 |
1236 | 1237 | ...
1238 | variance 1239 |
1240 |
1241 | 1242 | 1243 | 1244 |

1245 | 1246 | Click volume: 1248 | 1249 |

1250 | 1251 |
1252 | 1253 |
1254 | Your latency is now calibrated! If it any point you change 1255 | something about your audio setup, such as switching to a different 1256 | microphone, please refresh the page and recalibrate. 1257 | 1258 |

1259 | 1260 | Now we need to calibrate your volume. This will only apply to music 1261 | making; the video call portion automatically adjusts to your current 1262 | volume. You probably want to turn your speaker volume back down. 1263 | Pick something you're comfortable singing, start singing, and then 1264 | click . 1266 | 1267 |

1268 | 1269 |

1270 | Volume: ... 1271 |
1272 | 1273 |
1274 |
1275 | 1276 |
1277 |
1278 | Lost connectivity to the server. Trying to reconnect… 1279 |
1280 | 1281 |
1282 | Are you still here? If so, move the mouse or something. 1283 |
1284 | 1285 |
1286 | The metronome is set to . 1287 | Rounds are configured, with repeats of beats each. 1288 | To disable, set to zero on the Advanced Settings tab. 1289 |
1290 | 1291 |
1292 | 1293 | 1294 | 1295 | 1296 | Image Uploaded. 1297 | Image Upload Failed. 1298 | 1299 | 1300 | Track Uploaded. 1301 | Track Upload Failed. 1302 | 1303 |
1304 |
1305 | 1306 |
1307 | Due to lag, your singing is not included 1308 | 1309 |
1310 |
1311 | 1312 |
1313 | 1314 |
1315 | 1316 |
1317 |
1318 | 1319 | Each person is in a bucket. You can hear the people in earlier 1320 | buckets, and you can be heard by the people in later buckets. You 1321 | can't hear other people in your own bucket. You can move between 1322 | buckets by pressing the "join" button in the upper right. 1323 | 1324 |

1325 | 1326 | If you are a strong singer or know the song well you might choose 1327 | an earlier bucket, while if you are less confident you might 1328 | choose a later one. Similarly, if you are in a noisy environment 1329 | or are not wearing headphones, you might also choose a later one. 1330 | 1331 |

1332 | 1333 | 1336 | 1337 |
1338 | 1339 |
1340 |
1341 | 1342 |
1343 | 1344 |
1345 | 1346 |
1347 | 1348 |
1349 | 1350 |
1351 |
1352 |
1353 |
1354 | 1355 |
1356 | 1357 |
1358 |
1359 | 1360 |
1361 |
1362 | mic: 1363 |
1364 | 1365 | 1367 | 1368 |
1369 |
1370 | 1371 |
1372 | 1373 | 1374 |
As a spectator, your singing is not included
1375 |
1376 | 1377 |
1378 | 1379 | 1380 |
1381 | 1382 |
1383 |
1384 | 1385 |
1386 | 1387 |
1388 | Internet: 1389 |
1390 | 1391 |
1392 |
1393 |
1394 |
1395 | 1396 | 1397 |
Time Travel: 1398 |
s
1399 |
1400 | 1401 |
1402 | 1403 | 1404 | 1405 |
1406 |

This app has crashed. We're really sorry :-(

1407 |
1408 |

Please file a bug with the following information; it will help us fix it.

1409 | 1410 |

Then refresh the page and try again.

1411 |
1412 |
1413 | 1414 | 1502 | 1503 | 1504 | 1505 | 1513 | 1514 | 1515 | -------------------------------------------------------------------------------- /server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import pprint 5 | 6 | import json 7 | import urllib.parse 8 | import time 9 | import numpy as np # type:ignore 10 | import random 11 | import math 12 | import os 13 | import logging 14 | import wave 15 | import threading 16 | import datetime 17 | import struct 18 | import subprocess 19 | import copy 20 | import sys 21 | import string 22 | 23 | from typing import Any, Dict, List, Tuple, Iterable 24 | 25 | sys.path.append(os.path.dirname(__file__)) # for finding our files 26 | import util 27 | 28 | logging.basicConfig(filename='server.log',level=logging.DEBUG) 29 | 30 | # big-endian 31 | # 8 userid: uint64 32 | # 32 name: 32 bytes of utf8, '\0' padded 33 | # 4 mic_volume: float32, 34 | # 4 rms_volume: float32 35 | # 2 delay: uint16 36 | # 1 muted: uint8 37 | BINARY_USER_CONFIG_FORMAT = struct.Struct(">Q32sffHB") 38 | 39 | FRAME_SIZE = 128 40 | 41 | N_IMAGINARY_USERS = 0 # for debugging user summary + mixing console performance 42 | 43 | SUPPORT_SERVER_CONTROL = False 44 | 45 | # The maximum number of users to allow to join. This is enforced on a 46 | # best-effort basis by the client. If many people are calibrating at 47 | # the same time this will be exceeded, because we only check before 48 | # calibration. 49 | # 50 | # In stress testing, the server seems to do fine with 61 users, but 51 | # the video call might change that (stress test includes no video). 52 | MAX_USERS = 35 # XXX needs tuning 53 | 54 | try: 55 | # Grab these on startup, when they are very very likely to be the actual 56 | # running version. 57 | SERVER_VERSION = subprocess.check_output( 58 | ["git", "rev-parse", "--short", "HEAD"]).strip().decode("utf-8") 59 | SERVER_BRANCH = subprocess.check_output( 60 | ["git", "rev-parse", "--abbrev-ref", "HEAD"]).strip().decode("utf-8") 61 | except Exception: 62 | SERVER_VERSION="unknown" 63 | SERVER_BRANCH="unknown" 64 | 65 | SERVER_STARTUP_TIME = int(time.time()) 66 | 67 | ENABLE_TWILIO = True 68 | SECRETS_FNAME = "secrets.json" 69 | 70 | secrets = {} 71 | if os.path.exists(SECRETS_FNAME): 72 | with open(SECRETS_FNAME) as inf: 73 | secrets = json.loads(inf.read()) 74 | else: 75 | ENABLE_TWILIO = False 76 | 77 | if ENABLE_TWILIO: 78 | from twilio.jwt.access_token import AccessToken 79 | from twilio.jwt.access_token.grants import VideoGrant 80 | 81 | class State(): 82 | def __init__(self): 83 | self.reset() 84 | 85 | def reset(self): 86 | self.server_controlled = False 87 | 88 | self.last_request_clock = None 89 | self.last_cleared_clock = None 90 | self.global_volume = 1.2 91 | self.backing_volume = 1.0 92 | self.song_end_clock = 0 93 | self.song_start_clock = 0 94 | self.requested_track: Any = None 95 | 96 | self.bpm = 0 97 | self.repeats = 0 98 | self.bpr = 0 99 | self.leftover_beat_samples = 0 100 | 101 | self.first_bucket = DELAY_INTERVAL 102 | 103 | self.leader = None 104 | 105 | self.backing_track: Any = np.zeros(0) 106 | self.backing_track_index = 0 107 | self.backing_track_type = "" 108 | 109 | self.max_position = DELAY_INTERVAL*LAYERING_DEPTH 110 | 111 | self.disable_auto_gain = False 112 | self.disable_song_video = False 113 | 114 | self.lyrics = "" 115 | self.image = None 116 | 117 | if recorder: 118 | recorder.reset() 119 | 120 | def friendly_volume_to_scalar(volume): 121 | if volume < 0.0000001: 122 | return 0 123 | # https://www.dr-lex.be/info-stuff/volumecontrols.html 124 | return math.exp(6.908 * volume) / 1000 125 | 126 | def scalar_to_friendly_volume(scalar): 127 | if scalar < 0.0001: 128 | return 0 129 | return math.log(scalar * 1000)/6.908 130 | 131 | LEADER_BOOST = friendly_volume_to_scalar(1.1) 132 | 133 | QUEUE_SECONDS = 120 134 | 135 | SAMPLE_RATE = 48000 136 | 137 | # How often to print status updates. With no requests are coming in no status 138 | # update will be printed. 139 | STATUS_PRINT_INTERVAL_S = 10 140 | 141 | # Leave this much space between users. Ideally this would be very 142 | # short, but it needs to be long enough to cover "client total time 143 | # consumed" or else people won't all hear each other. 144 | DELAY_INTERVAL = 3 # 3s; keep in sync with demo.js:DELAY_INTERVAL and 145 | # index.html:audioOffset 146 | 147 | # How many links to use for the chain of users before starting to double up. 148 | LAYERING_DEPTH = 6 # keep in sync with demo.js:N_BUCKETS and 149 | # index.html:audioOffset 150 | 151 | # If we have not heard from a user in N seconds, forget all about them. 152 | USER_LIFETIME_SAMPLES = SAMPLE_RATE * 60 * 60 # 1hr 153 | 154 | # If we have not heard from a user in N seconds, don't consider them a 155 | # current user. 156 | USER_INACTIVE_SAMPLES = SAMPLE_RATE * 5 # 5s 157 | 158 | # Force rounding to multiple of FRAME_SIZE 159 | QUEUE_LENGTH = (QUEUE_SECONDS * SAMPLE_RATE // FRAME_SIZE * FRAME_SIZE) 160 | 161 | audio_queue = np.zeros(QUEUE_LENGTH, np.float32) 162 | backing_queue = np.zeros(QUEUE_LENGTH, np.float32) 163 | monitor_queue = np.zeros(QUEUE_LENGTH, np.float32) 164 | n_people_queue = np.zeros(QUEUE_LENGTH, np.int16) 165 | 166 | def clear_whole_buffer(): 167 | audio_queue.fill(0) 168 | backing_queue.fill(0) 169 | monitor_queue.fill(0) 170 | n_people_queue.fill(0) 171 | 172 | # For volume scaling. 173 | N_PHANTOM_PEOPLE = 2 174 | 175 | 176 | RECORDINGS_DIRNAME = "recordings" 177 | RECORDINGS_DIR = os.path.join( 178 | os.path.dirname(__file__), "html", RECORDINGS_DIRNAME) 179 | RECORDING_LISTING_HTML = os.path.join(RECORDINGS_DIR, "index.html") 180 | RECORDING_N_TO_KEEP = 20 # keep most recent only 181 | RECORDING_MAX_S = 60*60 # 1hr 182 | RECORDING_MAX_SAMPLES = RECORDING_MAX_S * SAMPLE_RATE 183 | 184 | RECORDING_ENABLED = True 185 | 186 | class Recorder: 187 | def __init__(self): 188 | self.out = None 189 | self.written = 0 190 | self.last_clock = None 191 | 192 | @staticmethod 193 | def recording_fname(): 194 | return os.path.join( 195 | RECORDINGS_DIR, 196 | datetime.datetime.now().strftime('%Y-%m-%d-%H%M%S.wav')) 197 | 198 | @staticmethod 199 | def read_offset(): 200 | # While we want to read from the specified clock position in 201 | # the buffer, we don't want to do that until enough time has 202 | # passed that whoever was going to sing has had a chance to. 203 | return (state.max_position + DELAY_INTERVAL*2)*SAMPLE_RATE 204 | 205 | def start_(self): 206 | self.cleanup_() 207 | 208 | self.out = wave.open(self.recording_fname(), mode='wb') 209 | self.out.setnchannels(1) 210 | self.out.setsampwidth(2) 211 | self.out.setframerate(SAMPLE_RATE) 212 | 213 | self.written = 0 214 | self.last_clock = state.song_start_clock - 1 215 | 216 | def end_(self): 217 | self.out.close() 218 | self.out = None 219 | 220 | def write_(self, samples): 221 | self.out.writeframes((samples * 2**14).astype(np.int16)) 222 | self.written += len(samples) 223 | 224 | def reset(self): 225 | if self.out: 226 | self.end_() 227 | 228 | def maybe_write(self, server_clock): 229 | if self.out: 230 | pass 231 | elif (state.song_start_clock and 232 | (state.song_start_clock + self.read_offset() < 233 | server_clock < 234 | state.song_start_clock + self.read_offset() + SAMPLE_RATE*5)): 235 | self.start_() 236 | else: 237 | return 238 | 239 | # Write any samples the desk that are now ready for writing. 240 | # - The first unwritten sample is last_clock + 1 241 | # - The last eligible sample is server_clock + read_offset 242 | # - Unless song_end_clock comes first 243 | begin = self.last_clock + 1 244 | end = server_clock - self.read_offset() 245 | ready_to_close = False 246 | if state.song_end_clock and state.song_end_clock < end: 247 | end = state.song_end_clock 248 | ready_to_close = True 249 | 250 | n_samples = end - begin 251 | if n_samples > QUEUE_LENGTH: 252 | # Something has gone horribly wrong, probably involving 253 | # losing and regaining connectivity in the middle of a 254 | # song. Oh well. 255 | n_samples = QUEUE_LENGTH 256 | 257 | if n_samples > 0: 258 | self.write_(fix_volume( 259 | wrap_get(audio_queue, begin, n_samples), 260 | wrap_get(backing_queue, begin, n_samples), 261 | wrap_get(n_people_queue, begin, n_samples))) 262 | self.last_clock += n_samples 263 | 264 | if ready_to_close or self.written > RECORDING_MAX_SAMPLES: 265 | self.end_() 266 | self.update_directory_listing_() 267 | 268 | def update_directory_listing_(self): 269 | with open(RECORDING_LISTING_HTML, 'w') as outf: 270 | def w(s): 271 | outf.write(s) 272 | outf.write("\n") 273 | w("All times (and dates!) are in UTC.") 274 | w("") 283 | w("Because these files are large we only keep the most recent %s" % 284 | RECORDING_N_TO_KEEP) 285 | 286 | def cleanup_(self): 287 | recordings = os.listdir(RECORDINGS_DIR) 288 | if len(recordings) <= RECORDING_N_TO_KEEP: 289 | return 290 | 291 | for fname in sorted(recordings)[:-RECORDING_N_TO_KEEP]: 292 | if fname != ".keep": 293 | os.remove(os.path.join(RECORDINGS_DIR, fname)) 294 | self.update_directory_listing_() 295 | 296 | recorder = Recorder() if RECORDING_ENABLED else None 297 | 298 | state = State() 299 | 300 | events: Dict[str, str] = {} 301 | 302 | tracks = [] 303 | def populate_tracks() -> None: 304 | for track in sorted(os.listdir(util.AUDIO_DIR)): 305 | if track != "README": 306 | tracks.append(track) 307 | 308 | populate_tracks() 309 | 310 | def insert_event(evid, clock) -> None: 311 | events[evid] = clock 312 | 313 | def calculate_server_clock(): 314 | # Note: This will eventually create a precision problem for the JS 315 | # clients, which are using floats. Specifically, at 44100 Hz, it will 316 | # fail on February 17, 5206. 317 | return int(time.time() * SAMPLE_RATE) 318 | 319 | class User: 320 | def __init__(self, userid, name, last_heard_server_clock, delay_samples) -> None: 321 | self.list_keys = ["chats"] 322 | 323 | self.userid = userid 324 | if len(name) > 32: 325 | name = name[:29] + "..." 326 | self.name = name 327 | self.last_heard_server_clock = last_heard_server_clock 328 | self.delay_samples = delay_samples 329 | 330 | self.mic_volume = 1.0 331 | self.scaled_mic_volume = 1.0 332 | self.last_write_clock = None 333 | self.is_monitored = False 334 | self.is_monitoring = False 335 | self.rms_volume = 0 336 | 337 | self.backing_volume = 1.0 338 | 339 | # For debugging purposes only 340 | self.last_seen_read_clock = None 341 | self.last_seen_write_clock = None 342 | self.last_n_samples = None 343 | 344 | self.client_address = None # Last IP we saw them from 345 | self.client_telemetry = {} # unstructured info from client 346 | 347 | self.in_spectator_mode = False; 348 | self.muted = False; 349 | 350 | self.mark_sent() 351 | 352 | self.send("bpm", state.bpm) 353 | self.send("repeats", state.repeats) 354 | self.send("bpr", state.bpr) 355 | self.send("tracks", tracks) 356 | self.send("first_bucket", state.first_bucket) 357 | self.send("globalVolume", 358 | scalar_to_friendly_volume(state.global_volume)) 359 | self.send("backingVolume", 360 | scalar_to_friendly_volume(state.backing_volume)) 361 | if state.disable_song_video: 362 | self.send("disableSongVideo", state.disable_song_video) 363 | if state.lyrics: 364 | self.send("lyrics", state.lyrics) 365 | if state.image: 366 | self.send("image", state.image) 367 | 368 | def allocate_twilio_token(self): 369 | token = AccessToken(secrets["twilio"]["account_sid"], 370 | secrets["twilio"]["api_key"], 371 | secrets["twilio"]["api_secret"], 372 | identity=self.userid) 373 | 374 | # Create a Video grant and add to token 375 | video_grant = VideoGrant(room=secrets["twilio"]["room"]) 376 | token.add_grant(video_grant) 377 | 378 | jwt = token.to_jwt() 379 | if type(jwt) == type(b""): 380 | jwt = jwt.decode('utf8') 381 | self.send("twilio_token", jwt) 382 | 383 | # XXX: Are we sure we do not need to clear any of the other state across reconnects??? 384 | def flush(self) -> None: 385 | """Delete any state that shouldn't be persisted across reconnects""" 386 | self.last_seen_read_clock = None 387 | self.last_seen_write_clock = None 388 | 389 | def send(self, key, value): 390 | if key in self.list_keys: 391 | self.to_send[key].append(value) 392 | else: 393 | self.to_send[key] = value 394 | 395 | def mark_sent(self): 396 | self.to_send = {} 397 | for key in self.list_keys: 398 | self.to_send[key] = [] 399 | 400 | def update_client_telemetry(self, nct): 401 | if not isinstance(nct, dict): 402 | raise Exception("New client telemetry not a dict:", nct) 403 | merge_into_dict(self.client_telemetry, nct) 404 | 405 | def merge_into_dict(a, b): 406 | for k in b: 407 | if k not in a: 408 | a[k] = b[k] 409 | elif isinstance(a[k], list) and isinstance(b[k], list): 410 | a[k] += b[k] 411 | elif isinstance(a[k], dict) and isinstance(b[k], dict): 412 | merge_into_dict(a[k], b[k]) 413 | else: 414 | a[k] = b[k] 415 | 416 | users: Dict[str, Any] = {} # userid -> User 417 | imaginary_users = [] 418 | def sendall(key, value, exclude=None): 419 | for user in active_users(): 420 | if not exclude or user.userid not in exclude: 421 | user.send(key, value) 422 | 423 | def wrap_get(queue, start, len_vals) -> Any: 424 | start_in_queue = start % len(queue) 425 | 426 | if start_in_queue + len_vals <= len(queue): 427 | return np.copy(queue[start_in_queue:(start_in_queue+len_vals)]) 428 | else: 429 | second_section_size = (start_in_queue + len_vals) % len(queue) 430 | first_section_size = len_vals - second_section_size 431 | assert second_section_size > 0 432 | assert first_section_size > 0 433 | 434 | return np.concatenate([ 435 | queue[start_in_queue:(start_in_queue+first_section_size)], 436 | queue[0:second_section_size] 437 | ]) 438 | 439 | def wrap_assign(queue, start, vals) -> None: 440 | assert len(vals) <= len(queue) 441 | start_in_queue = start % len(queue) 442 | 443 | if start_in_queue + len(vals) <= len(queue): 444 | queue[start_in_queue:(start_in_queue+len(vals))] = vals 445 | else: 446 | second_section_size = (start_in_queue + len(vals) )% len(queue) 447 | first_section_size = len(vals) - second_section_size 448 | assert second_section_size > 0 449 | assert first_section_size > 0 450 | 451 | queue[start_in_queue:(start_in_queue+first_section_size)] = vals[:first_section_size] 452 | queue[0:second_section_size] = vals[first_section_size:] 453 | 454 | def run_backing_track() -> None: 455 | if state.requested_track in tracks: 456 | with wave.open(os.path.join(util.AUDIO_DIR, state.requested_track)) as inf: 457 | if inf.getnchannels() != 1: 458 | raise Exception( 459 | "wrong number of channels on %s" % state.requested_track) 460 | if inf.getsampwidth() != 2: 461 | raise Exception( 462 | "wrong sample width on %s" % state.requested_track) 463 | if inf.getframerate() != 48000: 464 | raise Exception( 465 | "wrong sample rate on %s" % state.requested_track) 466 | 467 | state.backing_track = np.frombuffer( 468 | inf.readframes(-1), np.int16).astype(np.float32) / (2**15) 469 | state.backing_track *= 0.8 # turn it down a bit 470 | state.backing_track_index = 0 471 | state.backing_track_type = "Backing Track" 472 | 473 | # Backing track is used only once. 474 | state.requested_track = None 475 | 476 | def assign_delays(userid_lead) -> None: 477 | initial_position = 0 478 | 479 | repeat_length_s = None 480 | if state.bpr and state.bpm and state.repeats: 481 | beat_length_s = 60 / state.bpm 482 | repeat_length_s = beat_length_s * state.bpr 483 | initial_position += int(repeat_length_s*state.repeats) 484 | 485 | if initial_position > 90: 486 | initial_position = 90 487 | 488 | real_users = [user for user in active_users() if user not in imaginary_users] 489 | leader = users[userid_lead] 490 | spectators = [ 491 | user for user in real_users 492 | if user.in_spectator_mode and user.userid != userid_lead] 493 | followers = [ 494 | user for user in real_users 495 | if not user.in_spectator_mode and user.userid != userid_lead] 496 | 497 | # Only the leader goes in bucket #1 498 | state.first_bucket = initial_position + DELAY_INTERVAL 499 | leader.send("delay_seconds", state.first_bucket) 500 | sendall("first_bucket", state.first_bucket) 501 | 502 | max_follow_buckets = LAYERING_DEPTH - 1 503 | print("max_follow_buckets: %s" % max_follow_buckets) 504 | if repeat_length_s: 505 | print("repeat_length_s: %s" % repeat_length_s) 506 | layers_audible_to_leader = repeat_length_s // DELAY_INTERVAL 507 | print("layers_audible_to_leader: %s" % layers_audible_to_leader) 508 | if layers_audible_to_leader < 1: 509 | layers_audible_to_leader = 1 510 | max_follow_buckets = min(max_follow_buckets, layers_audible_to_leader) 511 | print("max_follow_buckets: %s" % max_follow_buckets) 512 | 513 | n_follow_buckets = int(max(min(max_follow_buckets, len(followers)), 1)) 514 | print("n_follow_buckets: %s" % n_follow_buckets) 515 | 516 | follow_positions = [ 517 | initial_position + (x+2)*DELAY_INTERVAL 518 | for x in range(n_follow_buckets)] 519 | state.max_position = follow_positions[-1] 520 | if n_follow_buckets < max_follow_buckets: 521 | state.max_position += DELAY_INTERVAL 522 | 523 | # Spectators all go in the last bucket. 524 | for spectator in spectators: 525 | spectator.send("delay_seconds", state.max_position) 526 | 527 | # Distribute followers randomly between the remaining buckets. 528 | for i, (_, user) in enumerate(sorted( 529 | [(random.random(), follower) 530 | for follower in followers])): 531 | user.send("delay_seconds", 532 | follow_positions[(len(follow_positions) - 1 - i) % 533 | len(follow_positions)]) 534 | 535 | def update_users(userid, username, server_clock, client_read_clock) -> None: 536 | while len(imaginary_users) < N_IMAGINARY_USERS: 537 | imaginary_user = User( 538 | str(random.randint(0,2**32)), 539 | "imaginary_%s" % (len(imaginary_users)), 540 | server_clock, 541 | SAMPLE_RATE * 7) 542 | imaginary_users.append(imaginary_user) 543 | #imaginary_user.delay_samples = ( 544 | # SAMPLE_RATE * 545 | # DELAY_INTERVAL * 546 | # random.randint(1,LAYERING_DEPTH)) 547 | imaginary_user.delay_samples = ( 548 | SAMPLE_RATE * 549 | DELAY_INTERVAL * (len(imaginary_users) + 1)) 550 | users[imaginary_user.userid] = imaginary_user 551 | 552 | for user in imaginary_users: 553 | user.last_heard_server_clock = server_clock 554 | user.rms_volume = random.random() / 10 555 | 556 | # Delete expired users BEFORE adding us to the list, so that our session 557 | # will correctly reset if we are the next customer after we've been gone 558 | # for awhile. 559 | clean_users(server_clock) 560 | 561 | delay_samples = server_clock - client_read_clock 562 | if userid not in users: 563 | users[userid] = User(userid, username, server_clock, delay_samples) 564 | if ENABLE_TWILIO: 565 | users[userid].allocate_twilio_token() 566 | 567 | users[userid].last_heard_server_clock = server_clock 568 | users[userid].delay_samples = delay_samples 569 | users[userid].name = username 570 | 571 | def clean_users(server_clock) -> None: 572 | to_delete = [] 573 | for userid, user in users.items(): 574 | age_samples = server_clock - user.last_heard_server_clock 575 | if age_samples > USER_LIFETIME_SAMPLES: 576 | to_delete.append(userid) 577 | for userid in to_delete: 578 | del users[userid] 579 | 580 | # If we have ever seen a server-to-server request, we never reset state, 581 | # because the Ritual Engine server may need to perform operations when no 582 | # users are present. 583 | if not active_users() and not state.server_controlled: 584 | state.reset() 585 | 586 | def samples_to_position(samples): 587 | return round(samples / SAMPLE_RATE) 588 | 589 | def jump_user_after(user, position): 590 | target = position + DELAY_INTERVAL 591 | if target == samples_to_position(user.delay_samples): 592 | return 593 | user.send("delay_seconds", target) 594 | 595 | def max_monitor_position(): 596 | max_delay_samples = 0 597 | for user in active_users(): 598 | if user.is_monitored and user.delay_samples > max_delay_samples: 599 | max_delay_samples = user.delay_samples 600 | return samples_to_position(max_delay_samples) 601 | 602 | def jump_to_latest_monitored_user(user): 603 | max_pos = max_monitor_position() 604 | current_pos = samples_to_position(user.delay_samples) 605 | if max_pos > 0: 606 | jump_user_after(user, max_pos) 607 | 608 | def jump_monitors_to_latest_monitored_user(): 609 | for user in active_users(): 610 | if user.is_monitoring: 611 | jump_to_latest_monitored_user(user) 612 | 613 | def active_users(): 614 | server_clock = calculate_server_clock() 615 | return [ 616 | user for user in users.values() 617 | if server_clock - user.last_heard_server_clock < USER_INACTIVE_SAMPLES] 618 | 619 | def user_summary(requested_user_summary) -> List[Any]: 620 | summary = [] 621 | if not requested_user_summary: 622 | return summary 623 | 624 | for user in active_users(): 625 | summary.append(( 626 | round(user.delay_samples / SAMPLE_RATE), 627 | user.name, 628 | user.mic_volume, 629 | user.userid, 630 | user.rms_volume, 631 | user.muted, 632 | user.is_monitored)) 633 | summary.sort() 634 | return summary 635 | 636 | def summary_length(n_users_in_summary): 637 | return 2 + BINARY_USER_CONFIG_FORMAT.size*n_users_in_summary 638 | 639 | def binary_user_summary(summary): 640 | """ 641 | Encode the user summary compactly. 642 | 643 | number of users: uint16 644 | repeat: 645 | BINARY_USER_CONFIG_FORMAT 646 | 647 | Each user is 60 bytes, so 1000 users is ~50k. We could be more 648 | compact by only sending names if they have changed. 649 | """ 650 | binary_summaries = [struct.pack(">H", len(summary))] 651 | for delay, name, mic_volume, userid, rms_volume, muted, is_monitored in summary: 652 | # delay is encoded as a uint16 653 | if delay < 0: 654 | delay = 0 655 | elif delay > 0xffff: 656 | delay = 0xffff 657 | 658 | bits = 0 659 | if muted: 660 | bits += 0b00000001 661 | if is_monitored: 662 | bits += 0b00000010 663 | 664 | binary_summaries.append( 665 | BINARY_USER_CONFIG_FORMAT.pack( 666 | int(userid), 667 | name.encode('utf8'), 668 | mic_volume, 669 | rms_volume, 670 | delay, 671 | bits)) 672 | resp = np.frombuffer(b"".join(binary_summaries), dtype=np.uint8) 673 | 674 | if len(resp) != summary_length(len(summary)): 675 | raise Exception("Data for %s users encoded to %s bytes, expected %s", 676 | len(summary), len(resp), summary_length(len(summary))) 677 | 678 | return resp 679 | 680 | def write_metronome(clear_index, clear_samples): 681 | metronome_samples = np.zeros(clear_samples, np.float32) 682 | 683 | if state.bpm: 684 | beat_samples = SAMPLE_RATE * 60 // state.bpm 685 | 686 | # We now want to mark a beat at positions matching 687 | # leftover_beat_samples + N*beat_samples 688 | # It is possible that we will write no beats, and instead will 689 | # just decrease leftover_beat_samples. 690 | 691 | remaining_clear_samples = clear_samples 692 | while state.leftover_beat_samples < remaining_clear_samples: 693 | remaining_clear_samples -= state.leftover_beat_samples 694 | metronome_samples[-remaining_clear_samples] = 1 695 | state.leftover_beat_samples = beat_samples 696 | state.leftover_beat_samples -= remaining_clear_samples 697 | 698 | wrap_assign(backing_queue, clear_index, metronome_samples) 699 | 700 | def backfill_metronome(): 701 | # fill all time between song_start_clock and last_cleared_clock 702 | # with the current beat 703 | write_metronome(state.song_start_clock, state.last_cleared_clock - state.song_start_clock) 704 | 705 | def update_audio(pos, n_samples, in_data, is_monitored): 706 | old_audio = wrap_get(audio_queue, pos, n_samples) 707 | new_audio = old_audio + in_data 708 | wrap_assign(audio_queue, pos, new_audio) 709 | 710 | if is_monitored: 711 | wrap_assign(monitor_queue, 712 | pos, 713 | wrap_get(monitor_queue, pos, n_samples) + in_data) 714 | 715 | old_n_people = wrap_get(n_people_queue, pos, n_samples) 716 | new_n_people = old_n_people + np.ones(n_samples, np.int16) 717 | wrap_assign(n_people_queue, pos, new_n_people) 718 | 719 | def repeat_length_samples(): 720 | beat_length_s = 60 / state.bpm 721 | repeat_length_s = beat_length_s * state.bpr 722 | return int(repeat_length_s * SAMPLE_RATE) 723 | 724 | def fix_volume(data, backing_data, n_people, user_backing_volume=1.0): 725 | # We could scale volume by having n_people be the number of 726 | # earlier people and then scale by a simple 1/n_people. But a 727 | # curve of (1 + X) / (n_people + X) falls a bit less 728 | # dramatically and should sound better. 729 | # 730 | # Compare: 731 | # https://www.wolframalpha.com/input/?i=graph+%281%29+%2F+%28x%29+from+1+to+10 732 | # https://www.wolframalpha.com/input/?i=graph+%281%2B3%29+%2F+%28x%2B3%29+from+1+to+10 733 | if not state.disable_auto_gain: 734 | data *= ((1 + N_PHANTOM_PEOPLE) / (n_people + N_PHANTOM_PEOPLE)) ** 0.5 735 | data += ( 736 | backing_data * 737 | (state.backing_volume * (1 if state.bpm > 0 else 0.2)) * 738 | user_backing_volume 739 | ) 740 | data *= state.global_volume 741 | return data 742 | 743 | def get_telemetry(): 744 | clients = {} 745 | for user in users.values(): 746 | c = {} 747 | raw = copy.deepcopy(user.__dict__) 748 | del raw["list_keys"] # redundant 749 | 750 | try: 751 | c["client_time_to_next_client_samples"] = raw["last_heard_server_clock"] - raw["last_seen_write_clock"] - raw["client_telemetry"]["audio_offset"] + raw["last_n_samples"] 752 | c["client_time_to_next_client_seconds"] = c["client_time_to_next_client_samples"] / SAMPLE_RATE 753 | except: 754 | pass 755 | 756 | c["raw"] = raw 757 | clients[user.userid] = c 758 | 759 | now = time.time() 760 | result = { 761 | "request_time": now, 762 | "server": { 763 | "server_startup_time": SERVER_STARTUP_TIME, 764 | "server_uptime": int(now) - SERVER_STARTUP_TIME, 765 | "server_version": SERVER_VERSION, 766 | "server_branch": SERVER_BRANCH, 767 | "server_clock": calculate_server_clock(), 768 | "server_sample_rate": SAMPLE_RATE, 769 | "n_connected_users": len(active_users()), 770 | "queue_size": QUEUE_LENGTH / FRAME_SIZE, # in 128-sample frames 771 | "events": get_events_to_send(), 772 | "state": copy.deepcopy(state.__dict__), # XXX: refine this / dedupe 773 | }, 774 | "clients": clients 775 | # XXX: missing client IPs, what else 776 | } 777 | del result["server"]["state"]["backing_track"] # XXX: ok but we really shouldn't have copied it in the first place 778 | return result 779 | 780 | def handle_json_post(in_json_raw, in_data): 781 | in_json = json.loads(in_json_raw) 782 | 783 | if in_json.get("request", None): 784 | if in_json["request"] == "get_telemetry": 785 | result = get_telemetry() 786 | return json.dumps(result), np.zeros(0) 787 | else: 788 | return json.dumps({"error": "unknown request " + in_json["request"]}), np.zeros(0) 789 | 790 | out_data, x_audio_metadata = handle_post(in_json, in_data) 791 | 792 | return json.dumps({ 793 | "x-audio-metadata": x_audio_metadata, 794 | }), out_data 795 | 796 | def end_song(): 797 | state.leader = None 798 | state.backing_track_type = "" 799 | sendall("backing_track_type", state.backing_track_type) 800 | 801 | # Handle special operations that do not require a user (although they may 802 | # optionally support one), but can be done server-to-server as well. 803 | def handle_special(query_params, server_clock, user=None, client_read_clock=None): 804 | volume = query_params.get("volume", None) 805 | if volume: 806 | state.global_volume = friendly_volume_to_scalar(float(volume)) 807 | sendall("globalVolume", scalar_to_friendly_volume(state.global_volume)) 808 | 809 | backing_volume = query_params.get("backing_volume", None) 810 | if backing_volume: 811 | state.backing_volume = friendly_volume_to_scalar(float(backing_volume)) 812 | sendall("backingVolume", scalar_to_friendly_volume(state.backing_volume)) 813 | 814 | msg_chats = query_params.get("chat", None) 815 | if msg_chats: 816 | for msg_chat in json.loads(msg_chats): 817 | if user is not None: 818 | sendall("chats", (user.name, msg_chat), exclude=[user.userid]) 819 | else: 820 | sendall("chats", ("[ANNOUNCEMENT]", msg_chat)) 821 | 822 | mic_volume = query_params.get("mic_volume", None) 823 | if mic_volume: 824 | for other_userid, new_mic_volume in json.loads(mic_volume): 825 | if other_userid in users: 826 | if new_mic_volume > 2: 827 | new_mic_volume = 2 828 | elif new_mic_volume < 0: 829 | new_mic_volume = 0 830 | 831 | users[other_userid].mic_volume = new_mic_volume 832 | 833 | # Make 1 be unity 834 | users[other_userid].scaled_mic_volume = friendly_volume_to_scalar( 835 | new_mic_volume * 0.5) / friendly_volume_to_scalar(0.5) 836 | 837 | requested_track = query_params.get("track", None) 838 | if requested_track: 839 | state.requested_track = requested_track 840 | 841 | if query_params.get("mark_start_singing", None): 842 | clear_whole_buffer() 843 | events.clear() 844 | 845 | # XXX: There is some confusion over exactly where the start marker should go, but it should be a value that we are guaranteed to have, so the song doesn't fail to start. (So not the write clock.) 846 | if client_read_clock is not None: 847 | state.song_start_clock = client_read_clock 848 | else: 849 | state.song_start_clock = server_clock 850 | state.song_end_clock = 0 851 | 852 | state.backing_track_type = "" 853 | if state.bpm > 0: 854 | backfill_metronome() 855 | state.backing_track_type = "Metronome" 856 | elif state.requested_track: 857 | run_backing_track() 858 | # These must be separate from song_start/end_clock, because they 859 | # are used for video sync and must be EXACTLY at the moment the 860 | # backing track starts/ends, not merely close. 861 | #insert_event("backingTrackStart", server_clock) 862 | #insert_event("backingTrackEnd", server_clock + len(state.backing_track)) 863 | sendall("backing_track_type", state.backing_track_type) 864 | 865 | if query_params.get("mark_stop_singing", None): 866 | # stop the backing track from playing, if it's still going 867 | state.backing_track_index = len(state.backing_track) 868 | 869 | if user is not None: 870 | if user.userid == state.leader: 871 | state.song_end_clock = user.last_write_clock 872 | else: 873 | # halt singing, end it immediately 874 | state.song_end_clock = 1 875 | state.song_start_clock = 1 876 | else: 877 | state.song_end_clock = server_clock 878 | 879 | end_song() 880 | 881 | if query_params.get("clear_events", None): 882 | events.clear() 883 | 884 | try: 885 | new_events = json.loads(query_params.get("event_data", "")) 886 | except (KeyError, json.decoder.JSONDecodeError) as e: 887 | new_events = [] 888 | if type(new_events) != list: 889 | new_events = [] 890 | 891 | for ev in new_events: 892 | insert_event(ev["evid"], ev["clock"]) 893 | 894 | disableAutoGain = query_params.get("disableAutoGain", None) 895 | if disableAutoGain: 896 | state.disable_auto_gain = disableAutoGain == "1" 897 | 898 | disableSongVideo = query_params.get("disableSongVideo", None) 899 | if disableSongVideo: 900 | state.disable_song_video = disableSongVideo == "1" 901 | sendall("disableSongVideo", state.disable_song_video) 902 | 903 | # If we are running under Ritual Engine, disable functionality that is not 904 | # required in that setting, and would be disruptive if triggered by 905 | # accident. 906 | if not state.server_controlled: 907 | bpm = query_params.get("bpm", None) 908 | if bpm is not None: 909 | state.bpm = bpm 910 | sendall("bpm", state.bpm) 911 | 912 | repeats = query_params.get("repeats", None) 913 | if repeats is not None: 914 | state.repeats = repeats 915 | sendall("repeats", state.repeats) 916 | 917 | bpr = query_params.get("bpr", None) 918 | if bpr is not None: 919 | state.bpr = bpr 920 | sendall("bpr", state.bpr) 921 | 922 | # Do some format conversions and strip the unnecessary nesting layer that urllib 923 | # query parsing applies 924 | INT_PARAMS = ["write_clock", "read_clock", "bpm", "repeats", "bpr"] 925 | def clean_query_params(params): 926 | clean_params = {} 927 | for (k, v) in params.items(): 928 | if (not isinstance(v, list)) or (not len(v) == 1): 929 | raise ValueError("Duplicate query parameters are not allowed.") 930 | if k in INT_PARAMS: 931 | clean_params[k] = int(v[0]) 932 | else: 933 | clean_params[k] = v[0] 934 | return clean_params 935 | 936 | def extract_params(params, keys): 937 | result = [None] * len(params) 938 | for k in keys: 939 | result 940 | 941 | def get_events_to_send() -> Any: 942 | return [{"evid": i[0], "clock": i[1]} for i in events.items()] 943 | 944 | def handle_post(in_json, in_data) -> Tuple[Any, str]: 945 | in_data = in_data.view(dtype=np.float32) 946 | 947 | raw_params = {} 948 | # For some reason urllib can't handle the query_string being empty 949 | query_string = in_json["query_string"] 950 | if query_string: 951 | raw_params = urllib.parse.parse_qs(query_string, strict_parsing=True) 952 | query_params = clean_query_params(raw_params) 953 | 954 | action = query_params.get("action", None) 955 | if action == "status": 956 | rsp = { 957 | "n_connected_users": len(active_users()), 958 | "max_users": MAX_USERS, 959 | } 960 | if "instance_name" in secrets: 961 | rsp["instance_name"] = secrets["instance_name"] 962 | return np.zeros(0, np.uint8), json.dumps(rsp) 963 | 964 | userid = query_params.get("userid", None) 965 | if userid is not None: 966 | if int(userid) < 0 or int(userid) > 0xffff_ffff_ffff_ffff: 967 | raise ValueError("Userid must be a uint64") 968 | 969 | server_clock = calculate_server_clock() 970 | requested_user_summary = query_params.get("user_summary", None) 971 | 972 | # Prevent weirdness on the very first request since startup 973 | if state.last_request_clock is None: 974 | state.last_request_clock = server_clock 975 | 976 | # Prevent weirdness if it's been a very long time since we heard from 977 | # anybody. Never try to clear more than the entire length of the buffer. 978 | if server_clock - state.last_request_clock > QUEUE_LENGTH: 979 | state.last_request_clock = server_clock - QUEUE_LENGTH 980 | 981 | # NOTE: If we go a long time without a request, and there is a backing 982 | # track running, weird but harmless things will happen. This scenario is 983 | # unlikely and probably not worth correcting. 984 | 985 | # Audio from clients is summed, so we need to clear the circular 986 | # buffer ahead of them. The range we are clearing was "in the 987 | # future" as of the last request, and we never touch the future, 988 | # so nothing has touched it yet "this time around". 989 | clear_samples = min(server_clock - state.last_request_clock, QUEUE_LENGTH) 990 | clear_index = state.last_request_clock 991 | wrap_assign( 992 | n_people_queue, clear_index, np.zeros(clear_samples, np.int16)) 993 | wrap_assign( 994 | monitor_queue, clear_index, np.zeros(clear_samples, np.float32)) 995 | wrap_assign( 996 | audio_queue, clear_index, np.zeros(clear_samples, np.float32)) 997 | state.last_cleared_clock = clear_index + clear_samples 998 | 999 | max_backing_track_samples = len(state.backing_track) - state.backing_track_index 1000 | backing_track_samples = min(max_backing_track_samples, clear_samples) 1001 | if backing_track_samples > 0: 1002 | wrap_assign( 1003 | backing_queue, clear_index, state.backing_track[ 1004 | state.backing_track_index : 1005 | state.backing_track_index + backing_track_samples]) 1006 | state.backing_track_index += backing_track_samples 1007 | clear_samples -= backing_track_samples 1008 | clear_index += backing_track_samples 1009 | 1010 | if state.backing_track_index == len(state.backing_track): 1011 | # the song has ended, mark it so 1012 | state.song_end_clock = clear_index 1013 | end_song() 1014 | 1015 | if clear_samples > 0: 1016 | if state.bpm > 0: 1017 | write_metronome(clear_index, clear_samples) 1018 | else: 1019 | wrap_assign( 1020 | backing_queue, clear_index, 1021 | np.zeros(clear_samples, np.float32)) 1022 | 1023 | saved_last_request_clock = state.last_request_clock 1024 | state.last_request_clock = server_clock 1025 | 1026 | # Handle server-to-server requests: 1027 | if userid is None and SUPPORT_SERVER_CONTROL: 1028 | # If we ever get a server_to_server request, we switch off certain 1029 | # automatic behavior that's troublesome in the Ritual Engine setting. 1030 | state.server_controlled = True 1031 | 1032 | # If we start a song triggered from here, mark its start at the current 1033 | # server clock, since we don't have a user clock to start at. (I'm 1034 | # not sure this clock is used for anything other than the bucket 1035 | # brigade app countdown.) 1036 | handle_special(query_params, server_clock) 1037 | 1038 | # abbreviated non-user metadata 1039 | x_audio_metadata = { 1040 | "server_clock": server_clock, 1041 | "server_sample_rate": SAMPLE_RATE, 1042 | "song_end_clock": state.song_end_clock, 1043 | "song_start_clock": state.song_start_clock, 1044 | "last_request_clock": state.last_request_clock, 1045 | "n_connected_users": len(active_users()), 1046 | "queue_size": QUEUE_LENGTH / FRAME_SIZE, # in 128-sample frames 1047 | "events": get_events_to_send(), 1048 | "leader": state.leader, 1049 | } 1050 | return np.zeros(0, np.uint8), json.dumps(x_audio_metadata) 1051 | 1052 | # NOTE NOTE NOTE: 1053 | # * All `clock` variables are measured in samples. 1054 | # * All `clock` variables represent the END of an interval, NOT the 1055 | # beginning. It's arbitrary which one to use, but you have to be 1056 | # consistent, and trust me that it's slightly nicer this way. 1057 | 1058 | if recorder: 1059 | recorder.maybe_write(server_clock) 1060 | 1061 | client_write_clock = query_params.get("write_clock", None) 1062 | client_read_clock = query_params.get("read_clock", None) 1063 | if client_read_clock is None: 1064 | raise ValueError("no client read clock") 1065 | 1066 | if client_read_clock > server_clock: 1067 | raise ValueError("Attempted to read %s samples into the future" % ( 1068 | client_read_clock - server_clock)) 1069 | 1070 | username = query_params.get("username", None) 1071 | if not username: 1072 | username = "" 1073 | 1074 | # We used to do this by looking for missing client_write_clock, but that may be true on multiple requests, whereas this is only the first one. 1075 | if query_params.get("reset_user_state", None): 1076 | # New session, write some debug info to disk 1077 | logging.debug("*** New client:" + str(query_params) + "\n\n") 1078 | 1079 | if userid in users: 1080 | # Delete any state that shouldn't be persisted. 1081 | users[userid].flush() 1082 | 1083 | update_users(userid, username, server_clock, client_read_clock) 1084 | user = users[userid] 1085 | 1086 | if "client_address" in in_json: 1087 | user.client_address = in_json["client_address"] 1088 | 1089 | user.in_spectator_mode = query_params.get("spectator", None) 1090 | user.muted = query_params.get("muted", None) == "1" 1091 | 1092 | client_telemetry = query_params.get("client_telemetry", None) 1093 | if client_telemetry: 1094 | user.update_client_telemetry(json.loads(client_telemetry)) 1095 | 1096 | rms_volume = query_params.get("rms_volume", None) 1097 | if rms_volume: 1098 | user.rms_volume = float(rms_volume) 1099 | 1100 | user_backing_volume = query_params.get("user_backing_volume", None) 1101 | if user_backing_volume: 1102 | user.backing_volume = friendly_volume_to_scalar( 1103 | float(user_backing_volume)) 1104 | 1105 | if "lyrics" in in_json: 1106 | state.lyrics = in_json["lyrics"] 1107 | sendall("lyrics", state.lyrics) 1108 | 1109 | if query_params.get("image", None): 1110 | state.image = ''.join(random.choices(string.ascii_uppercase, k=10)) 1111 | sendall("image", state.image) 1112 | 1113 | # If we are running under Ritual Engine, disable functionality that is not 1114 | # required in that setting, and would be disruptive if triggered by 1115 | # accident. 1116 | if query_params.get("request_lead", None) and not state.server_controlled: 1117 | assign_delays(userid) 1118 | state.leader = userid 1119 | state.image = "" 1120 | sendall("image", "") 1121 | state.lyrics = "" 1122 | sendall("lyrics", "") 1123 | 1124 | # Handle all operations that do not require a userid 1125 | handle_special(query_params, server_clock, user, client_read_clock) 1126 | 1127 | user.is_monitoring = query_params.get("hear_monitor", False) 1128 | monitor_userid = query_params.get("monitor", None) 1129 | changedMonitoring = False 1130 | if monitor_userid and monitor_userid in users: 1131 | users[monitor_userid].is_monitored = True 1132 | changedMonitoring = True 1133 | user.is_monitoring = True 1134 | 1135 | unmonitor_userid = query_params.get("unmonitor", None) 1136 | if unmonitor_userid and unmonitor_userid in users: 1137 | users[unmonitor_userid].is_monitored = False 1138 | changedMonitoring = True 1139 | 1140 | if changedMonitoring: 1141 | jump_monitors_to_latest_monitored_user() 1142 | if query_params.get("begin_monitor", False): 1143 | jump_to_latest_monitored_user(user) 1144 | 1145 | ### XXX: Debugging note: We used to do clearing of the buffer here, but now 1146 | ### we do it above, closer to the top of the function. 1147 | 1148 | n_samples = len(in_data) 1149 | user.last_n_samples = n_samples 1150 | 1151 | if client_write_clock is None: 1152 | pass 1153 | elif client_write_clock - n_samples < server_clock - QUEUE_LENGTH: 1154 | # Client is too far behind and going to wrap the buffer. :-( 1155 | raise ValueError("Client's write clock is too far in the past") 1156 | else: 1157 | if user.last_seen_write_clock is not None: 1158 | # Since Opus is stateful, we cannot receive or send audio out-of-order; if 1159 | # a client tries to do that, we force them to reconnect. 1160 | if client_write_clock - n_samples != user.last_seen_write_clock: 1161 | raise ValueError( 1162 | f'Client write clock desync (' 1163 | f'{client_write_clock - n_samples} - ' 1164 | f'{user.last_seen_write_clock} = ' 1165 | f'{client_write_clock - n_samples - user.last_seen_write_clock})') 1166 | 1167 | user.last_seen_write_clock = client_write_clock 1168 | if client_write_clock is not None: 1169 | user.last_write_clock = client_write_clock 1170 | 1171 | in_data *= user.scaled_mic_volume 1172 | if state.leader == user.userid: 1173 | in_data *= LEADER_BOOST 1174 | 1175 | # XXX: I'm not sure we consider this desirable for ritual engine? 1176 | # Don't keep any input unless a song is in progress. 1177 | if (state.song_start_clock and client_write_clock > state.song_start_clock and 1178 | (not state.song_end_clock or 1179 | client_write_clock - n_samples < state.song_end_clock)): 1180 | 1181 | pos = client_write_clock - n_samples 1182 | update_audio(pos, n_samples, in_data, user.is_monitored) 1183 | 1184 | if state.bpr and state.bpm and state.repeats: 1185 | for i in range(state.repeats): 1186 | repeat_pos = pos + repeat_length_samples()*(i+1) 1187 | if repeat_pos + n_samples < server_clock: 1188 | update_audio(repeat_pos, n_samples, in_data, False) 1189 | 1190 | # Why subtract n_samples above and below? Because the future is to the 1191 | # right. So when a client asks for n samples at time t, what they 1192 | # actually want is "the time interval ending at t", i.e. [t-n, t). Since 1193 | # the latest possible time they can ask for is "now", this means that 1194 | # the latest possible time interval they can get is "the recent past" 1195 | # instead of "the near future". 1196 | # This doesn't matter to the clients if they all always use the same value of 1197 | # n_samples, but it matters if n_samples changes, and it matters for 1198 | # the server's zeroing. 1199 | 1200 | # Since Opus is stateful, we cannot receive or send audio out-of-order; if 1201 | # a client tries to do that, we force them to reconnect. 1202 | if user.last_seen_read_clock is not None: 1203 | if client_read_clock - n_samples != user.last_seen_read_clock: 1204 | raise ValueError( 1205 | f'Client read clock desync (' 1206 | f'{client_read_clock - n_samples} - ' 1207 | f'{user.last_seen_read_clock} = ' 1208 | f'{client_read_clock - n_samples - user.last_seen_read_clock})') 1209 | user.last_seen_read_clock = client_read_clock 1210 | 1211 | n_people = [-1] 1212 | if query_params.get("loopback", None) == "true": 1213 | data = in_data 1214 | elif user.is_monitoring: 1215 | data = wrap_get(monitor_queue, client_read_clock - n_samples, n_samples) 1216 | else: 1217 | # Only play audio during songs. Mostly this is dealt with by 1218 | # only keeping input when a song is in progress, but the 1219 | # metronome, backing track, and round singing are also forms 1220 | # of input that check doesn't catch. 1221 | if state.song_start_clock and ( 1222 | not state.song_end_clock or 1223 | client_read_clock - n_samples < state.song_end_clock): 1224 | data = wrap_get(audio_queue, client_read_clock - n_samples, 1225 | n_samples) 1226 | backing_data = wrap_get(backing_queue, client_read_clock - n_samples, 1227 | n_samples) 1228 | else: 1229 | data = np.zeros(n_samples, np.float32) 1230 | backing_data = np.zeros(n_samples, np.float32) 1231 | 1232 | n_people = wrap_get( 1233 | n_people_queue, client_read_clock - n_samples, n_samples) 1234 | 1235 | data = fix_volume(data, backing_data, n_people, user.backing_volume) 1236 | 1237 | x_audio_metadata = { 1238 | "server_clock": server_clock, 1239 | "server_sample_rate": SAMPLE_RATE, 1240 | "last_request_clock": saved_last_request_clock, 1241 | "client_read_clock": client_read_clock, 1242 | "client_write_clock": client_write_clock, 1243 | "song_end_clock": state.song_end_clock, 1244 | "song_start_clock": state.song_start_clock, 1245 | "n_samples": n_samples, 1246 | "n_connected_users": len(active_users()), 1247 | "queue_size": QUEUE_LENGTH / FRAME_SIZE, # in 128-sample frames 1248 | "events": get_events_to_send(), 1249 | "leader": state.leader, 1250 | "n_people_heard": int(n_people[0]), 1251 | } 1252 | 1253 | x_audio_metadata.update(user.to_send) 1254 | user.mark_sent() 1255 | 1256 | bin_summary = binary_user_summary(user_summary(requested_user_summary)) 1257 | if len(bin_summary) > 0: 1258 | data = np.append(bin_summary, data.view(dtype=np.uint8)) 1259 | return data, json.dumps(x_audio_metadata) 1260 | 1261 | if __name__ == "__main__": 1262 | print("Run server_wrapper.py or shm.py instead") 1263 | --------------------------------------------------------------------------------