7 |
8 | AudioClass *theAudio;
9 | const int mic_channel_num = 1;
10 | #define APP_LTE_APN "iot.truphone.com"
11 | #define APP_LTE_IP_TYPE (LTE_NET_IPTYPE_V4V6)
12 | #define APP_LTE_AUTH_TYPE (LTE_NET_AUTHTYPE_NONE)
13 | #define APP_LTE_RAT (LTE_NET_RAT_CATM)
14 |
15 | // host configuration
16 | char serverAddress[] = "";
17 | int port = 8001;
18 |
19 | LTE lteAccess;
20 | LTEUDP udp;
21 |
22 | static void audio_attention_cb(const ErrorAttentionParam *atprm) {
23 | puts("Attention!");
24 |
25 | if (atprm->error_code >= AS_ATTENTION_CODE_WARNING) {
26 |
27 | theAudio->startRecorder();
28 | }
29 | }
30 |
31 | void setup() {
32 | char apn[LTE_NET_APN_MAXLEN] = APP_LTE_APN;
33 | LTENetworkAuthType authtype = APP_LTE_AUTH_TYPE;
34 |
35 | Serial.begin(115200);
36 | while (!Serial)
37 | ;
38 |
39 | Serial.println("Starting LTE client setup.");
40 |
41 | Serial.println("=========== APN information ===========");
42 | Serial.print("Access Point Name : ");
43 | Serial.println(apn);
44 | Serial.print("Authentication Type: ");
45 | Serial.println((authtype == LTE_NET_AUTHTYPE_CHAP) ? "CHAP" : (authtype == LTE_NET_AUTHTYPE_NONE) ? "NONE" : "PAP");
46 |
47 | while (true) {
48 | if (lteAccess.begin() != LTE_SEARCHING) {
49 | Serial.println("Could not transition to LTE_SEARCHING.");
50 | Serial.println("Please check the status of the LTE board.");
51 | for (;;) {
52 | sleep(1);
53 | }
54 | }
55 |
56 | if (lteAccess.attach(APP_LTE_RAT,
57 | apn,
58 | "",
59 | "",
60 | authtype,
61 | APP_LTE_IP_TYPE)
62 | == LTE_READY) {
63 | Serial.println("attach succeeded.");
64 |
65 | break;
66 | }
67 | }
68 |
69 | if (!udp.begin(port)) {
70 | Serial.println("Failed to start UDP");
71 | while (true)
72 | ;
73 | }
74 |
75 | Serial.println("UDP started.");
76 |
77 | Serial.println("Init Audio Library");
78 | theAudio = AudioClass::getInstance();
79 | theAudio->begin(audio_attention_cb);
80 |
81 | Serial.println("Init Audio Recorder");
82 | theAudio->setRecorderMode(AS_SETRECDR_STS_INPUTDEVICE_MIC, 10, 200 * 1024);
83 |
84 | uint8_t channel = AS_CHANNEL_MONO;
85 | theAudio->initRecorder(AS_CODECTYPE_MP3, "/mnt/sd0/BIN", AS_SAMPLINGRATE_16000, channel);
86 |
87 | theAudio->startRecorder();
88 |
89 | Serial.println("Rec start!");
90 | }
91 |
92 | void loop() {
93 | static const size_t bufferSize = 4096;
94 | char buffer[bufferSize];
95 | uint32_t readSize;
96 | int err = theAudio->readFrames(buffer, bufferSize, &readSize);
97 |
98 | if (readSize > 0) {
99 | if (readSize > 0) {
100 | udp.beginPacket(serverAddress, port);
101 | udp.write(buffer, readSize);
102 | udp.endPacket();
103 | }
104 | }
105 | }
106 |
--------------------------------------------------------------------------------
/clients/web/.dockerignore:
--------------------------------------------------------------------------------
1 | # Next.js build output
2 | .next/
3 | out/
4 | build/
5 |
6 | # Node modules
7 | node_modules/
8 |
9 | # Logs
10 | *.log
11 | npm-debug.log*
12 | yarn-debug.log*
13 | yarn-error.log*
14 |
15 | # Runtime data
16 | pids/
17 | *.pid
18 | *.seed
19 | *.pid.lock
20 |
21 | # Directory for instrumented libs generated by jscoverage/JSCover
22 | lib-cov/
23 |
24 | # Coverage directory used by tools like istanbul
25 | coverage/
26 | *.lcov
27 |
28 | # nyc test coverage
29 | .nyc_output/
30 |
31 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
32 | .grunt/
33 |
34 | # Bower dependency directory (https://bower.io/)
35 | bower_components/
36 |
37 | # IDEs and editors
38 | .idea/
39 | *.swp
40 | *.swo
41 | .vscode/
42 | *.sublime-workspace
43 | *.sublime-project
44 | *.atom/
45 | *.iml
46 | *.eml
47 | *.esproj
48 | *.tmp
49 | *.tmp_proj
50 | *.tmproj
51 | *.tmproject
52 | *.tproject
53 | nbproject/
54 | *.komodoproject
55 | .kate-swp/
56 | *.swp
57 | *.swo
58 | *.swn
59 | *.sml
60 | .session
61 | *.log
62 |
63 | # OS generated files
64 | .DS_Store
65 | .DS_Store?
66 | ._*
67 | .Spotlight-V100
68 | .Trashes
69 | ehthumbs.db
70 | Thumbs.db
71 |
72 | # Testing
73 | __tests__/
74 | __mocks__/
75 |
76 | # Production
77 | .env.local
78 | .env.development.local
79 | .env.test.local
80 | .env.production.local
81 |
82 | # Misc
83 | *.gz
84 | *.zip
85 | *.rar
86 | *.tar
87 | *.tar.gz
88 | *.tgz
89 | *.bzip
90 | *.bzip2
91 | *.7z
92 | *.iso
93 | *.dmg
94 | *.img
95 | *.msi
96 | *.msp
97 | *.sms
98 | *.exe
99 | *.dll
100 | *.deb
101 | *.rpm
102 | *.qcow2
103 | *.vdi
104 | *.vmdk
105 | *.vhd
106 | *.vhdx
107 | *.bak
108 | *.sql
109 | *.psd
110 | *.ai
111 | *.sketch
112 | *.md
113 | README.md
114 | LICENSE
115 | .dockerignore
116 | Dockerfile
117 | *.pem
118 | *.key
119 | *.env
120 | *.cert
121 |
--------------------------------------------------------------------------------
/clients/web/.gitignore:
--------------------------------------------------------------------------------
1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
2 |
3 | # dependencies
4 | /node_modules
5 | /.pnp
6 | .pnp.js
7 | .yarn/install-state.gz
8 |
9 | # testing
10 | /coverage
11 |
12 | # next.js
13 | /.next/
14 | /out/
15 |
16 | # production
17 | /build
18 |
19 | # misc
20 | .DS_Store
21 | *.pem
22 |
23 | # debug
24 | npm-debug.log*
25 | yarn-debug.log*
26 | yarn-error.log*
27 |
28 | # local env files
29 | .env*.local
30 |
31 | # vercel
32 | .vercel
33 |
34 | # typescript
35 | *.tsbuildinfo
36 | next-env.d.ts
37 |
--------------------------------------------------------------------------------
/clients/web/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM node:18-alpine AS builder
2 |
3 | WORKDIR /app
4 |
5 | COPY package.json package-lock.json ./
6 |
7 | RUN npm install
8 |
9 | COPY . .
10 |
11 | RUN npm run build
12 |
13 | FROM node:18-alpine AS runner
14 |
15 | WORKDIR /app
16 |
17 | COPY --from=builder /app/next.config.mjs ./
18 | COPY --from=builder /app/public ./public
19 | COPY --from=builder /app/.next ./.next
20 | COPY --from=builder /app/node_modules ./node_modules
21 | COPY --from=builder /app/package.json ./package.json
22 | COPY --from=builder /app/server.js ./server.js
23 |
24 | EXPOSE 3000
25 |
26 | ENV NODE_ENV production
27 |
28 | # If no url is set we assume that it's local
29 | ENV OWL_API_URL=http://host.docker.internal:8000
30 |
31 | # Start the app with the custom server if it's local for proxying, otherwise start the normal server
32 | CMD ["/bin/sh", "-c", "if [ \"$OWL_API_URL\" = 'http://host.docker.internal:8000' ]; then npm run startCustom; else npm start; fi"]
33 |
34 |
--------------------------------------------------------------------------------
/clients/web/README.md:
--------------------------------------------------------------------------------
1 |
2 | ## Getting Started
3 |
4 | First, set the client token environmental variable:
5 |
6 | ```bash
7 | export OWL_USER_CLIENT_TOKEN=your-client-token
8 | ```
9 |
10 | Optionally, set the google maps token if you want maps to work:
11 |
12 | ```bash
13 | export GOOGLE_MAPS_API_KEY=your-google-maps-token
14 | ```
15 |
16 | Then, install the dependencies:
17 |
18 | ```bash
19 | yarn install
20 | # or
21 | npm install
22 | # or
23 | pnpm install
24 | # or
25 | bun install
26 | ```
27 |
28 |
29 | Then, run the development server:
30 |
31 | ```bash
32 | npm run dev
33 | # or
34 | yarn dev
35 | # or
36 | pnpm dev
37 | # or
38 | bun dev
39 | ```
40 |
41 | Open [http://localhost:3000](http://localhost:3000)
42 |
43 | You can capture via local microphone or devices via WebBluetooth:
44 |
45 | [](https://youtube.com/shorts/y4bqPLv-EHo "Owl Tutorial")
46 |
47 |
48 | IMPORTANT: Currently the webapp just passes the token for authentication there is no way to log in so do not expose this to the internet!
49 |
--------------------------------------------------------------------------------
/clients/web/jsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "paths": {
4 | "@/*": ["./src/*"]
5 | }
6 | }
7 | }
8 |
--------------------------------------------------------------------------------
/clients/web/next.config.mjs:
--------------------------------------------------------------------------------
1 | /** @type {import('next').NextConfig} */
2 | const nextConfig = {
3 | };
4 |
5 | export default nextConfig;
--------------------------------------------------------------------------------
/clients/web/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "web",
3 | "version": "0.1.0",
4 | "private": true,
5 | "scripts": {
6 | "dev": "next dev",
7 | "build": "next build",
8 | "start": "next start",
9 | "startCustom": "NODE_ENV=production node server.js",
10 | "lint": "next lint"
11 | },
12 | "dependencies": {
13 | "http-proxy": "^1.18.1",
14 | "next": "14.1.0",
15 | "react": "^18",
16 | "react-dom": "^18",
17 | "react-icons": "^5.0.1",
18 | "socket.io-client": "^4.7.4",
19 | "uuid": "^9.0.1"
20 | },
21 | "devDependencies": {
22 | "autoprefixer": "^10.0.1",
23 | "postcss": "^8",
24 | "tailwindcss": "^3.3.0"
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/clients/web/postcss.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 | plugins: {
3 | tailwindcss: {},
4 | autoprefixer: {},
5 | },
6 | };
7 |
--------------------------------------------------------------------------------
/clients/web/public/next.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/clients/web/public/vercel.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/clients/web/server.js:
--------------------------------------------------------------------------------
1 | const http = require('http');
2 | const next = require('next');
3 | const httpProxy = require('http-proxy');
4 |
5 | const port = process.env.PORT || 3000;
6 | const dev = process.env.NODE_ENV !== 'production';
7 | const app = next({ dev });
8 | const handle = app.getRequestHandler();
9 |
10 | const proxy = httpProxy.createProxyServer({});
11 |
12 | const backendBaseUrl = process.env.OWL_API_URL || 'http://127.0.0.1:8000';
13 |
14 | app.prepare().then(() => {
15 | const server = http.createServer((req, res) => {
16 | if (req.url.startsWith('/api/socket')) {
17 | req.url = req.url.replace('/api/socket', '')
18 | req.url = '/socket.io/' + req.url;
19 | proxy.web(req, res, {
20 | target: backendBaseUrl,
21 | ws: true,
22 | });
23 | } else {
24 | handle(req, res);
25 | }
26 | });
27 |
28 | server.on('upgrade', (req, socket, head) => {
29 | if (req.url.startsWith('/api/socket')) {
30 | req.url = req.url.replace('/api/socket', '')
31 | req.url = '/socket.io/' + req.url;
32 | proxy.ws(req, socket, head, {
33 | target: backendBaseUrl,
34 | });
35 | }
36 | });
37 |
38 | server.listen(port, (err) => {
39 | if (err) throw err;
40 | console.log(`> Ready on http://127.0.0.1:${port}`);
41 | });
42 | });
--------------------------------------------------------------------------------
/clients/web/src/app/api/[...all]/route.js:
--------------------------------------------------------------------------------
1 | async function fetchFromBackend(url, options) {
2 | const token = process.env.OWL_USER_CLIENT_TOKEN;
3 | const incomingUrl = new URL(url);
4 | const newPathname = incomingUrl.pathname.replace(/^\/api/, '');
5 |
6 | if (newPathname === '/tokens') {
7 | return new Response(JSON.stringify({
8 | OWL_USER_CLIENT_TOKEN: token,
9 | GOOGLE_MAPS_API_KEY: process.env.GOOGLE_MAPS_API_KEY
10 | }), {
11 | status: 200,
12 | headers: {
13 | 'Content-Type': 'application/json',
14 | },
15 | });
16 | }
17 |
18 | const backendBaseUrl = process.env.OWL_API_URL || 'http://127.0.0.1:8000';
19 | const backendUrl = new URL(newPathname, backendBaseUrl);
20 |
21 | const backendOptions = {
22 | ...options,
23 | headers: {
24 | 'Authorization': `Bearer ${token}`,
25 | 'Content-Type': 'application/json',
26 | ...options.headers,
27 | },
28 | };
29 |
30 | const backendResponse = await fetch(backendUrl.toString(), backendOptions);
31 | const data = await backendResponse.json();
32 |
33 | return new Response(JSON.stringify(data), {
34 | status: backendResponse.status,
35 | headers: {
36 | 'Content-Type': 'application/json',
37 | },
38 | });
39 | }
40 |
41 | export async function GET(request) {
42 | return fetchFromBackend(request.url, { method: 'GET' });
43 | }
44 |
45 | export async function POST(request) {
46 | const body = await request.json();
47 | return fetchFromBackend(request.url, {
48 | method: 'POST',
49 | body: JSON.stringify(body),
50 | });
51 | }
--------------------------------------------------------------------------------
/clients/web/src/app/components/CaptureComponent.js:
--------------------------------------------------------------------------------
1 | 'use client';
2 |
3 | import React, { useState } from 'react';
4 | import { useAudioRecorder } from '../hooks/useAudioRecorder';
5 | import { useLocationTracker } from '../hooks/useLocationTracker';
6 | import { useBluetoothAudioStreamer } from '../hooks/useBluetoothAudioStreamer';
7 | import { useSocket } from '../hooks/useSocket';
8 | import { FiBluetooth, FiMic, FiSquare } from 'react-icons/fi';
9 | import { v4 as uuidv4 } from 'uuid';
10 |
11 | const generateHexUUID = () => {
12 | const uuid = uuidv4();
13 | const hexFormatUUID = uuid.replace(/-/g, '').toLowerCase();
14 | return hexFormatUUID;
15 | };
16 |
17 | const CaptureComponent = () => {
18 | const socket = useSocket();
19 | const [isRecording, setIsRecording] = useState(false);
20 | const [isBluetoothActive, setIsBluetoothActive] = useState(false);
21 | const [captureUUID, setCaptureUUID] = useState('');
22 |
23 | const toggleBluetooth = () => {
24 | if (isBluetoothActive) {
25 | setIsBluetoothActive(false);
26 | setCaptureUUID('');
27 | socket.emit('finish_audio', captureUUID);
28 | } else {
29 | const newCaptureUUID = generateHexUUID();
30 | setCaptureUUID(newCaptureUUID);
31 | setIsBluetoothActive(true);
32 | }
33 | };
34 |
35 | const toggleRecording = () => {
36 | if (isRecording) {
37 | setIsRecording(false);
38 | setCaptureUUID('');
39 | socket.emit('finish_audio', captureUUID);
40 | } else {
41 | const newCaptureUUID = generateHexUUID();
42 | setCaptureUUID(newCaptureUUID);
43 | setIsRecording(true);
44 | }
45 | };
46 |
47 | useBluetoothAudioStreamer(isBluetoothActive, captureUUID);
48 |
49 | useAudioRecorder(isRecording, captureUUID);
50 |
51 | useLocationTracker(isRecording || isBluetoothActive, captureUUID);
52 |
53 | return (
54 |
55 |
61 |
67 |
68 | );
69 | };
70 |
71 | export default CaptureComponent;
--------------------------------------------------------------------------------
/clients/web/src/app/components/CountUpTimer.js:
--------------------------------------------------------------------------------
1 | 'use client';
2 |
3 | import React, { useState, useEffect } from 'react';
4 |
5 | const CountUpTimer = ({ startTime }) => {
6 | const [elapsedTime, setElapsedTime] = useState(0);
7 |
8 | useEffect(() => {
9 | const utcStartTime = new Date(`${startTime}Z`); // Append 'Z' to indicate UTC
10 |
11 | const updateElapsedTime = () => {
12 | const now = new Date();
13 | const newElapsedTime = now - utcStartTime;
14 | setElapsedTime(newElapsedTime);
15 | };
16 |
17 | const timerId = setInterval(updateElapsedTime, 1000);
18 |
19 | updateElapsedTime();
20 |
21 | return () => clearInterval(timerId);
22 | }, [startTime]);
23 |
24 | const formatElapsedTime = (timeInMilliseconds) => {
25 | const totalSeconds = Math.floor(timeInMilliseconds / 1000);
26 | const hours = Math.floor(totalSeconds / 3600);
27 | const minutes = Math.floor((totalSeconds % 3600) / 60);
28 | const seconds = totalSeconds % 60;
29 |
30 | return [hours, minutes, seconds]
31 | .map(val => val < 10 ? `0${val}` : val)
32 | .join(':');
33 | };
34 |
35 | return (
36 |
37 | {formatElapsedTime(elapsedTime)}
38 |
39 | );
40 | };
41 |
42 |
43 | export default CountUpTimer;
--------------------------------------------------------------------------------
/clients/web/src/app/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/clients/web/src/app/favicon.ico
--------------------------------------------------------------------------------
/clients/web/src/app/globals.css:
--------------------------------------------------------------------------------
1 | @tailwind base;
2 | @tailwind components;
3 | @tailwind utilities;
4 |
5 | :root {
6 | --foreground-rgb: 0, 0, 0;
7 | --background-start-rgb: 214, 219, 220;
8 | --background-end-rgb: 255, 255, 255;
9 | }
10 |
11 | @media (prefers-color-scheme: dark) {
12 | :root {
13 | --foreground-rgb: 255, 255, 255;
14 | --background-start-rgb: 0, 0, 0;
15 | --background-end-rgb: 0, 0, 0;
16 | }
17 | }
18 |
19 | body {
20 | color: rgb(var(--foreground-rgb));
21 | background: linear-gradient(
22 | to bottom,
23 | transparent,
24 | rgb(var(--background-end-rgb))
25 | )
26 | rgb(var(--background-start-rgb));
27 | }
28 |
29 | @layer utilities {
30 | .text-balance {
31 | text-wrap: balance;
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/clients/web/src/app/hooks/useAudioRecorder.js:
--------------------------------------------------------------------------------
1 | 'use client';
2 | import { useState, useEffect } from 'react';
3 | import { useSocket } from './useSocket';
4 |
5 | export const useAudioRecorder = (isRecording, captureUUID) => {
6 | const socket = useSocket();
7 | const [audioContext, setAudioContext] = useState(null);
8 | const [mediaStream, setMediaStream] = useState(null);
9 | const [scriptProcessor, setScriptProcessor] = useState(null);
10 |
11 | useEffect(() => {
12 | const startRecording = async () => {
13 | const sampleRate = 16000;
14 | const audioCtx = new (window.AudioContext || window.webkitAudioContext)({
15 | sampleRate: sampleRate,
16 | });
17 | const processor = audioCtx.createScriptProcessor(256, 1, 1);
18 | setAudioContext(audioCtx);
19 | setScriptProcessor(processor);
20 |
21 | try {
22 | const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
23 | setMediaStream(stream);
24 | const source = audioCtx.createMediaStreamSource(stream);
25 | source.connect(processor);
26 | processor.connect(audioCtx.destination);
27 |
28 | processor.onaudioprocess = (e) => {
29 | if (!isRecording) return;
30 |
31 | const inputData = e.inputBuffer.getChannelData(0);
32 | const buffer = new Int16Array(inputData.length);
33 | for (let i = 0; i < inputData.length; i++) {
34 | buffer[i] = inputData[i] * 0x7FFF; // Convert float32 to int16
35 | }
36 | socket.emit('audio_data', buffer.buffer, "web", captureUUID, 'wav');
37 | };
38 | } catch (error) {
39 | console.error("Error accessing the microphone: ", error);
40 | }
41 | };
42 |
43 | if (isRecording && captureUUID) {
44 | startRecording();
45 | } else {
46 | scriptProcessor?.disconnect();
47 | audioContext?.close();
48 | mediaStream?.getTracks().forEach(track => track.stop());
49 | }
50 |
51 | return () => {
52 | scriptProcessor?.disconnect();
53 | audioContext?.close();
54 | mediaStream?.getTracks().forEach(track => track.stop());
55 | };
56 | }, [isRecording, captureUUID]);
57 |
58 | };
59 |
--------------------------------------------------------------------------------
/clients/web/src/app/hooks/useBluetoothAudioStreamer.js:
--------------------------------------------------------------------------------
1 | import { useEffect, useState } from 'react';
2 | import { useSocket } from './useSocket';
3 | import { FrameSequencer } from '../utils/frameSequencer';
4 |
5 | export const useBluetoothAudioStreamer = (isActive, captureUUID) => {
6 | const socket = useSocket();
7 | const [device, setDevice] = useState(null);
8 |
9 | const disconnectDevice = async () => {
10 | if (device && device.gatt.connected) {
11 | console.log("Disconnecting from Bluetooth device.");
12 | await device.gatt.disconnect();
13 | setDevice(null);
14 | }
15 | };
16 |
17 | useEffect(() => {
18 | if (!isActive) {
19 | disconnectDevice();
20 | return;
21 | }
22 |
23 | let frameSequencer = new FrameSequencer();
24 |
25 | async function connectToBLEDevice() {
26 | try {
27 | const device = await navigator.bluetooth.requestDevice({
28 | filters: [{services: ["03d5d5c4-a86c-11ee-9d89-8f2089a49e7e"]}]
29 | });
30 | setDevice(device);
31 | const server = await device.gatt.connect();
32 | const service = await server.getPrimaryService("03d5d5c4-a86c-11ee-9d89-8f2089a49e7e");
33 | const characteristic = await service.getCharacteristic("b189a505-a86c-11ee-a5fb-8f2089a49e7e");
34 |
35 | characteristic.addEventListener('characteristicvaluechanged', (event) => {
36 | let value = event.target.value;
37 | let frame = frameSequencer.add(value);
38 | if (frame) {
39 | socket.emit('audio_data', new Uint8Array(frame), "web", captureUUID, 'aac');
40 | }
41 | });
42 |
43 | await characteristic.startNotifications();
44 | } catch (error) {
45 | console.error("Bluetooth Audio Streaming Error: ", error);
46 | }
47 | }
48 |
49 | connectToBLEDevice();
50 |
51 | return () => {
52 | disconnectDevice();
53 | };
54 | }, [isActive, captureUUID, socket]);
55 |
56 | useEffect(() => {
57 | return () => {
58 | disconnectDevice();
59 | };
60 | }, []);
61 | };
62 |
--------------------------------------------------------------------------------
/clients/web/src/app/hooks/useLocationTracker.js:
--------------------------------------------------------------------------------
1 | import { useState, useEffect } from 'react';
2 |
3 | export const useLocationTracker = (isActive, captureUUID, updateInterval = 10000) => {
4 | useEffect(() => {
5 | let intervalId;
6 |
7 | const postLocation = async (latitude, longitude) => {
8 | try {
9 | const response = await fetch('/api/capture/location', {
10 | method: 'POST',
11 | headers: {
12 | 'Content-Type': 'application/json',
13 | },
14 | body: JSON.stringify({
15 | capture_uuid: captureUUID,
16 | latitude,
17 | longitude,
18 | }),
19 | });
20 | const data = await response.json();
21 | console.log('Location posted:', data);
22 | } catch (error) {
23 | console.error('Error posting location:', error);
24 | }
25 | };
26 |
27 | const updateLocation = () => {
28 | if (!navigator.geolocation) {
29 | console.error("Geolocation is not supported by this browser.");
30 | return;
31 | }
32 |
33 | navigator.geolocation.getCurrentPosition(
34 | (position) => {
35 | const { latitude, longitude } = position.coords;
36 | console.log(`Capture UUID: ${captureUUID}, Latitude: ${latitude}, Longitude: ${longitude}`);
37 | postLocation(latitude, longitude);
38 | },
39 | (error) => {
40 | console.error("Error getting location: ", error);
41 | }
42 | );
43 | };
44 |
45 | if (isActive) {
46 | updateLocation();
47 | intervalId = setInterval(updateLocation, updateInterval);
48 | }
49 |
50 | return () => {
51 | if (intervalId) clearInterval(intervalId);
52 | };
53 | }, [isActive, captureUUID, updateInterval]);
54 |
55 | };
--------------------------------------------------------------------------------
/clients/web/src/app/hooks/useSocket.js:
--------------------------------------------------------------------------------
1 | import { useEffect, useState } from 'react';
2 | import { initSocket, getSocket, disconnectSocket } from '../socket';
3 |
4 | export const useSocket = () => {
5 | const [socket, setSocket] = useState(null);
6 |
7 | useEffect(() => {
8 | const initializeSocket = async () => {
9 | try {
10 | const response = await fetch(`/api/tokens`, {
11 | cache: 'no-store'
12 | });
13 | if (!response.ok) {
14 | throw new Error('Failed to fetch tokens');
15 | }
16 | const data = await response.json();
17 | const socketIo = initSocket(data.OWL_USER_CLIENT_TOKEN);
18 | setSocket(socketIo);
19 | } catch (error) {
20 | console.error(error);
21 | }
22 | };
23 | initializeSocket();
24 |
25 | }, []);
26 |
27 | return socket;
28 | };
--------------------------------------------------------------------------------
/clients/web/src/app/layout.js:
--------------------------------------------------------------------------------
1 |
2 | import { Inter } from "next/font/google";
3 | import "./globals.css";
4 | import CaptureComponent from "./components/CaptureComponent";
5 |
6 | const inter = Inter({ subsets: ["latin"] });
7 |
8 | export const metadata = {
9 | title: "Owl",
10 | description: "Owl",
11 | };
12 |
13 | export default function RootLayout({ children }) {
14 |
15 | return (
16 |
17 |
18 |
19 | {children}
20 |
21 |
22 | );
23 | }
24 |
--------------------------------------------------------------------------------
/clients/web/src/app/socket.js:
--------------------------------------------------------------------------------
1 | 'use client';
2 | import io from 'socket.io-client';
3 |
4 | let socket;
5 |
6 | export const initSocket = (token) => {
7 | if (!socket) {
8 | const dev = process.env.NODE_ENV !== 'production';
9 | const backendBaseUrl = dev ? 'http://127.0.0.1:8000' : '/';
10 | let options = {
11 | extraHeaders: {
12 | Authorization: `Bearer ${token}`
13 | }
14 | }
15 | if (!dev) {
16 | options.path = '/api/socket';
17 | }
18 | socket = io(backendBaseUrl, options);
19 | console.log('Connecting to socket server');
20 | }
21 |
22 | return socket;
23 | };
24 |
25 | export const getSocket = () => {
26 | if (!socket) {
27 | throw new Error('Socket not initialized. Call initSocket(serverUrl) first.');
28 | }
29 | return socket;
30 | };
31 |
32 | export const disconnectSocket = () => {
33 | if (socket) {
34 | socket.disconnect();
35 | socket = null;
36 | }
37 | };
38 |
--------------------------------------------------------------------------------
/clients/web/src/app/utils/frameSequencer.js:
--------------------------------------------------------------------------------
1 | export class FrameSequencer {
2 | constructor() {
3 | this.packets = [];
4 | this.expectedNumPackets = null;
5 | this.expectedIntraframeSeqno = null;
6 | }
7 |
8 | add(data) {
9 | const interframeSeqno = data.getUint8(0) & 0x0F;
10 | const numPackets = data.getUint8(1) >> 4;
11 | const intraframeSeqno = data.getUint8(1) & 0x0F;
12 |
13 | console.log(`Received packet: Interframe: ${interframeSeqno}, Num: ${numPackets}, Intraframe: ${intraframeSeqno}`);
14 |
15 | if (this.expectedIntraframeSeqno === null || this.expectedNumPackets === null) {
16 | this.expectedIntraframeSeqno = intraframeSeqno;
17 | this.expectedNumPackets = numPackets;
18 | }
19 |
20 | if (intraframeSeqno !== this.expectedIntraframeSeqno || numPackets !== this.expectedNumPackets) {
21 | console.log(`Packet out of sequence. Resetting...`);
22 | this.resetState();
23 | return null;
24 | }
25 |
26 | this.packets.push(data.buffer.slice(2)); // Add the current packet (minus header)
27 | if (intraframeSeqno === numPackets - 1) {
28 | const completeFrame = this.concatenateBuffers(this.packets);
29 | this.resetState();
30 | return completeFrame;
31 | }
32 |
33 | // Prepare for the next packet
34 | this.expectedIntraframeSeqno = (this.expectedIntraframeSeqno + 1) % 16;
35 | return null;
36 | }
37 |
38 | resetState() {
39 | this.packets = [];
40 | this.expectedNumPackets = null;
41 | this.expectedIntraframeSeqno = null;
42 | }
43 |
44 | extractSequenceNumbers(dataBuffer) {
45 | const data = new DataView(dataBuffer);
46 | const interframeSeqno = data.getUint8(0) & 0x0F;
47 | const numPackets = data.getUint8(1) >> 4;
48 | const intraframeSeqno = data.getUint8(1) & 0x0F;
49 | return [interframeSeqno, numPackets, intraframeSeqno];
50 | }
51 |
52 | concatenateBuffers(arrayBuffers) {
53 | let totalLength = arrayBuffers.reduce((acc, value) => acc + value.byteLength, 0);
54 | let result = new Uint8Array(totalLength);
55 | let length = 0;
56 | for (let arrayBuffer of arrayBuffers) {
57 | result.set(new Uint8Array(arrayBuffer), length);
58 | length += arrayBuffer.byteLength;
59 | }
60 | return result.buffer;
61 | }
62 | }
--------------------------------------------------------------------------------
/clients/web/tailwind.config.js:
--------------------------------------------------------------------------------
1 | /** @type {import('tailwindcss').Config} */
2 | module.exports = {
3 | content: [
4 | "./src/pages/**/*.{js,ts,jsx,tsx,mdx}",
5 | "./src/components/**/*.{js,ts,jsx,tsx,mdx}",
6 | "./src/app/**/*.{js,ts,jsx,tsx,mdx}",
7 | ],
8 | theme: {
9 | extend: {
10 | backgroundImage: {
11 | "gradient-radial": "radial-gradient(var(--tw-gradient-stops))",
12 | "gradient-conic":
13 | "conic-gradient(from 180deg at 50% 50%, var(--tw-gradient-stops))",
14 | },
15 | },
16 | },
17 | plugins: [],
18 | };
19 |
--------------------------------------------------------------------------------
/clients/xiao-esp32s3-sense/.gitignore:
--------------------------------------------------------------------------------
1 | .pio
2 | .vscode/.browse.c_cpp.db*
3 | .vscode/c_cpp_properties.json
4 | .vscode/launch.json
5 | .vscode/ipch
6 |
--------------------------------------------------------------------------------
/clients/xiao-esp32s3-sense/.vscode/extensions.json:
--------------------------------------------------------------------------------
1 | {
2 | // See http://go.microsoft.com/fwlink/?LinkId=827846
3 | // for the documentation about the extensions.json format
4 | "recommendations": [
5 | "platformio.platformio-ide"
6 | ],
7 | "unwantedRecommendations": [
8 | "ms-vscode.cpptools-extension-pack"
9 | ]
10 | }
11 |
--------------------------------------------------------------------------------
/clients/xiao-esp32s3-sense/firmware/.gitignore:
--------------------------------------------------------------------------------
1 | .pio
2 | .vscode/.browse.c_cpp.db*
3 | .vscode/c_cpp_properties.json
4 | .vscode/launch.json
5 | .vscode/ipch
6 |
--------------------------------------------------------------------------------
/clients/xiao-esp32s3-sense/firmware/.vscode/extensions.json:
--------------------------------------------------------------------------------
1 | {
2 | // See http://go.microsoft.com/fwlink/?LinkId=827846
3 | // for the documentation about the extensions.json format
4 | "recommendations": [
5 | "platformio.platformio-ide"
6 | ],
7 | "unwantedRecommendations": [
8 | "ms-vscode.cpptools-extension-pack"
9 | ]
10 | }
11 |
--------------------------------------------------------------------------------
/clients/xiao-esp32s3-sense/firmware/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "cmake.configureOnOpen": false
3 | }
--------------------------------------------------------------------------------
/clients/xiao-esp32s3-sense/firmware/include/README:
--------------------------------------------------------------------------------
1 |
2 | This directory is intended for project header files.
3 |
4 | A header file is a file containing C declarations and macro definitions
5 | to be shared between several project source files. You request the use of a
6 | header file in your project source file (C, C++, etc) located in `src` folder
7 | by including it, with the C preprocessing directive `#include'.
8 |
9 | ```src/main.c
10 |
11 | #include "header.h"
12 |
13 | int main (void)
14 | {
15 | ...
16 | }
17 | ```
18 |
19 | Including a header file produces the same results as copying the header file
20 | into each source file that needs it. Such copying would be time-consuming
21 | and error-prone. With a header file, the related declarations appear
22 | in only one place. If they need to be changed, they can be changed in one
23 | place, and programs that include the header file will automatically use the
24 | new version when next recompiled. The header file eliminates the labor of
25 | finding and changing all the copies as well as the risk that a failure to
26 | find one copy will result in inconsistencies within a program.
27 |
28 | In C, the usual convention is to give header files names that end with `.h'.
29 | It is most portable to use only letters, digits, dashes, and underscores in
30 | header file names, and at most one dot.
31 |
32 | Read more about using header files in official GCC documentation:
33 |
34 | * Include Syntax
35 | * Include Operation
36 | * Once-Only Headers
37 | * Computed Includes
38 |
39 | https://gcc.gnu.org/onlinedocs/cpp/Header-Files.html
40 |
--------------------------------------------------------------------------------
/clients/xiao-esp32s3-sense/firmware/lib/README:
--------------------------------------------------------------------------------
1 |
2 | This directory is intended for project specific (private) libraries.
3 | PlatformIO will compile them to static libraries and link into executable file.
4 |
5 | The source code of each library should be placed in a an own separate directory
6 | ("lib/your_library_name/[here are source files]").
7 |
8 | For example, see a structure of the following two libraries `Foo` and `Bar`:
9 |
10 | |--lib
11 | | |
12 | | |--Bar
13 | | | |--docs
14 | | | |--examples
15 | | | |--src
16 | | | |- Bar.c
17 | | | |- Bar.h
18 | | | |- library.json (optional, custom build options, etc) https://docs.platformio.org/page/librarymanager/config.html
19 | | |
20 | | |--Foo
21 | | | |- Foo.c
22 | | | |- Foo.h
23 | | |
24 | | |- README --> THIS FILE
25 | |
26 | |- platformio.ini
27 | |--src
28 | |- main.c
29 |
30 | and a contents of `src/main.c`:
31 | ```
32 | #include
33 | #include
34 |
35 | int main (void)
36 | {
37 | ...
38 | }
39 |
40 | ```
41 |
42 | PlatformIO Library Dependency Finder will find automatically dependent
43 | libraries scanning project source files.
44 |
45 | More information about PlatformIO Library Dependency Finder
46 | - https://docs.platformio.org/page/librarymanager/ldf.html
47 |
--------------------------------------------------------------------------------
/clients/xiao-esp32s3-sense/firmware/lib/espressif_esp_audio_codec_1.0.1/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | idf_component_register(INCLUDE_DIRS "include")
2 | get_filename_component(BASE_DIR ${CMAKE_CURRENT_SOURCE_DIR} NAME)
3 | add_prebuilt_library(${BASE_DIR} "${CMAKE_CURRENT_SOURCE_DIR}/lib/${CONFIG_IDF_TARGET}/libesp_audio_codec.a"
4 | PRIV_REQUIRES ${BASE_DIR})
5 | target_link_libraries(${COMPONENT_LIB} INTERFACE "-L ${CMAKE_CURRENT_SOURCE_DIR}/lib/${CONFIG_IDF_TARGET}")
6 | target_link_libraries(${COMPONENT_LIB} INTERFACE esp_audio_codec)
--------------------------------------------------------------------------------
/clients/xiao-esp32s3-sense/firmware/lib/espressif_esp_audio_codec_1.0.1/LICENSE:
--------------------------------------------------------------------------------
1 | ESPRESSIF MIT License
2 |
3 | Copyright (c) 2023-2026
4 |
5 | Permission is hereby granted for use on all ESPRESSIF SYSTEMS products, in which case,
6 | it is free of charge, to any person obtaining a copy of this software and associated
7 | documentation files (the "Software"), to deal in the Software without restriction, including
8 | without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 | and/or sell copies of the Software, and to permit persons to whom the Software is furnished
10 | to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all copies or
13 | substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 |
--------------------------------------------------------------------------------
/clients/xiao-esp32s3-sense/firmware/lib/espressif_esp_audio_codec_1.0.1/idf_component.yml:
--------------------------------------------------------------------------------
1 | dependencies:
2 | idf:
3 | version: '>=4.4'
4 | description: Espressif audio encoder and decoder
5 | issues: https://github.com/espressif/esp-adf/issues
6 | repository: https://github.com/espressif/esp-adf-libs.git
7 | url: https://github.com/espressif/esp-adf-libs/tree/master/esp_audio_codec
8 | version: 1.0.1
9 |
--------------------------------------------------------------------------------
/clients/xiao-esp32s3-sense/firmware/lib/espressif_esp_audio_codec_1.0.1/include/esp_audio_codec_version.h:
--------------------------------------------------------------------------------
1 | /*
2 | * ESPRESSIF MIT License
3 | *
4 | * Copyright (c) 2023-2026
5 | *
6 | * Permission is hereby granted for use on all ESPRESSIF SYSTEMS products, in which case,
7 | * it is free of charge, to any person obtaining a copy of this software and associated
8 | * documentation files (the "Software"), to deal in the Software without restriction, including
9 | * without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 | * and/or sell copies of the Software, and to permit persons to whom the Software is furnished
11 | * to do so, subject to the following conditions:
12 | *
13 | * The above copyright notice and this permission notice shall be included in all copies or
14 | * substantial portions of the Software.
15 | *
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
18 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
19 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
20 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 | *
23 | */
24 |
25 | #ifndef ESP_AUDIO_VERSION_H
26 | #define ESP_AUDIO_VERSION_H
27 |
28 | #ifdef __cplusplus
29 | extern "C" {
30 | #endif
31 |
32 | /**
33 | * Features:
34 | * - Support encoder: AAC-LC, AMR-NB, AMR-WB, ADPCM, G711a, G711u, OPUS, PCM
35 | * - Support encoding bit per sample: 16 bit
36 | * - Support register encoder for certain audio type
37 | * - Support create multiple encoder handles to encode multi-stream
38 | *
39 | * To be implemented:
40 | * - To support decoder process
41 | *
42 | * Release Notes:
43 | * v1.0.0:
44 | * - Add AAC-LC, AMR-NB, AMR-WB, ADPCM, G711a, G711u, OPUS, PCM encoding support
45 | * - Add a common encoder interface to register encoder for certain audio type
46 | * - Support create multiple encoder handles to encode multi-stream
47 | */
48 |
49 | /**
50 | * @brief Get audio codec version string
51 | */
52 | const char *esp_audio_codec_get_version();
53 |
54 | #ifdef __cplusplus
55 | }
56 | #endif
57 |
58 | #endif
--------------------------------------------------------------------------------
/clients/xiao-esp32s3-sense/firmware/lib/espressif_esp_audio_codec_1.0.1/lib/esp32/libesp_audio_codec.a:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/clients/xiao-esp32s3-sense/firmware/lib/espressif_esp_audio_codec_1.0.1/lib/esp32/libesp_audio_codec.a
--------------------------------------------------------------------------------
/clients/xiao-esp32s3-sense/firmware/lib/espressif_esp_audio_codec_1.0.1/lib/esp32c3/libesp_audio_codec.a:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/clients/xiao-esp32s3-sense/firmware/lib/espressif_esp_audio_codec_1.0.1/lib/esp32c3/libesp_audio_codec.a
--------------------------------------------------------------------------------
/clients/xiao-esp32s3-sense/firmware/lib/espressif_esp_audio_codec_1.0.1/lib/esp32s2/libesp_audio_codec.a:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/clients/xiao-esp32s3-sense/firmware/lib/espressif_esp_audio_codec_1.0.1/lib/esp32s2/libesp_audio_codec.a
--------------------------------------------------------------------------------
/clients/xiao-esp32s3-sense/firmware/lib/espressif_esp_audio_codec_1.0.1/lib/esp32s3/libesp_audio_codec.a:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/clients/xiao-esp32s3-sense/firmware/lib/espressif_esp_audio_codec_1.0.1/lib/esp32s3/libesp_audio_codec.a
--------------------------------------------------------------------------------
/clients/xiao-esp32s3-sense/firmware/platformio.ini:
--------------------------------------------------------------------------------
1 | ; PlatformIO Project Configuration File
2 | ;
3 | ; Build options: build flags, source filter
4 | ; Upload options: custom upload port, speed and extra flags
5 | ; Library options: dependencies, extra library storages
6 | ; Advanced options: extra scripting
7 | ;
8 | ; Please visit documentation for the other options and examples
9 | ; https://docs.platformio.org/page/projectconf.html
10 |
11 | [env:seeed_xiao_esp32s3]
12 | platform = espressif32
13 | board = seeed_xiao_esp32s3
14 | framework = arduino
15 | build_flags = -Llib/espressif_esp_audio_codec_1.0.1/lib/esp32s3 -lesp_audio_codec
16 |
--------------------------------------------------------------------------------
/clients/xiao-esp32s3-sense/firmware/test/README:
--------------------------------------------------------------------------------
1 |
2 | This directory is intended for PlatformIO Test Runner and project tests.
3 |
4 | Unit Testing is a software testing method by which individual units of
5 | source code, sets of one or more MCU program modules together with associated
6 | control data, usage procedures, and operating procedures, are tested to
7 | determine whether they are fit for use. Unit testing finds problems early
8 | in the development cycle.
9 |
10 | More information about PlatformIO Unit Testing:
11 | - https://docs.platformio.org/en/latest/advanced/unit-testing/index.html
12 |
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: '3.8'
2 |
3 | services:
4 | backend:
5 | image: etown/owl:latest
6 | environment:
7 | - OWL_ASYNC_WHISPER_HF_TOKEN=${OWL_ASYNC_WHISPER_HF_TOKEN}
8 | - OWL_USER_CLIENT_TOKEN=${OWL_USER_CLIENT_TOKEN:-change_me}
9 | - OWL_LLM_API_BASE_URL=${OWL_LLM_API_BASE_URL:-http://host.docker.internal:11434}
10 | - OMP_NUM_THREADS=1
11 |
12 | shm_size: 3g
13 | ports:
14 | - "8000:8000"
15 | volumes:
16 | - .:/app
17 |
18 | web:
19 | image: etown/owl-web:latest
20 | environment:
21 | - GOOGLE_MAPS_API_KEY=${GOOGLE_MAPS_API_KEY}
22 | - OWL_USER_CLIENT_TOKEN=${OWL_USER_CLIENT_TOKEN:-change_me}
23 | ports:
24 | - "3000:3000"
25 | depends_on:
26 | - backend
27 |
--------------------------------------------------------------------------------
/docs/development_boards.md:
--------------------------------------------------------------------------------
1 | ### Getting started with development boards
2 |
3 | Designing an AI wearable involves balancing compute power, sensor capabilities, and connectivity to achieve the desired battery life and form factor. Fortunately, there are many development boards available that make it easy to get started and begin field testing. Below are just a few boards we've experimented with for inspiration.
4 |
5 |
6 | | Development Board | Microphone | Camera | Bluetooth | LTE | WiFi | Chip | Notes |
7 | |-----------------------------------------------------------------------------------------------------------|:----------:|:------:|:---------:|:---:|:----:|----------|--------------------------------------------|
8 | | [Seeeduino XIAO ESP32S3](https://wiki.seeedstudio.com/xiao_esp32s3_getting_started/) | ✓ | ✓ | ✓ | - | ✓ | ESP32-S3 | Compact, easy to get started |
9 | | [Sony Spresense](https://developer.sony.com/develop/spresense) | ✓ | ✓ | ✓ | ✓ | ✓ | CXD5602 | High-quality camera sensor, GPS. Connectivity requires extension boards, and microphone not included. |
10 | | [LilyGo T-SIMCAM ESP32-S3](https://www.lilygo.cc/products/t-simcam) | ✓ | ✓ | ✓ | ✓ | ✓ | ESP32-S3 | Optional 4G LTE via mPCIe |
11 | | [ESP32-S3-EYE](https://www.espressif.com/en/products/devkits/esp-eye/overview.) | ✓ | ✓ | ✓ | - | ✓ | ESP32-S3 | AI development board with LCD display |
12 | | [ESP32-S3-Korvo](https://www.espressif.com/en/products/devkits/esp32-s3-korvo-1) | ✓ | - | ✓ | - | - | ESP32-S3 | Designed for voice processing; has microphone array |
13 | | [Adafruit Feather Sense](https://www.adafruit.com/product/4516) | ✓ | - | ✓ | - | - | nRF52840 | Low power |
14 | | [Arduino Nano 33 BLE Sense](https://store-usa.arduino.cc/products/arduino-nano-33-ble-sense) | ✓ | - | ✓ | - | - | nRF52840 | Low power |
15 | | [Nordic Thingy:52](https://www.nordicsemi.com/Products/Development-hardware/Nordic-Thingy-52) | ✓ | - | ✓ | - | - | nRF52832 | All in one development kit including battery |
16 |
--------------------------------------------------------------------------------
/docs/docker_setup.md:
--------------------------------------------------------------------------------
1 | # Owl - Always-on Wearable AI Setup Guide
2 |
3 | ## Prerequisites
4 |
5 | Before you begin, ensure you have the following installed on your system:
6 |
7 | - Docker
8 | - Docker Compose
9 |
10 | ## Model Agreements
11 |
12 | Owl uses PyAnnote for diarization. Please visit Hugging Face and accept the terms for the following models:
13 | - [PyAnnote Segmentation Model](https://huggingface.co/pyannote/segmentation)
14 | - [PyAnnote Speaker Diarization Model](https://huggingface.co/pyannote/speaker-diarization)
15 |
16 | ## Setup Instructions
17 |
18 | 1. **Environment Variables**
19 |
20 | Set your Hugging Face token as an environment variable:
21 | ```
22 | export OWL_ASYNC_WHISPER_HF_TOKEN=
23 | ```
24 |
25 | 2. **Clone Repository**
26 |
27 | Clone the Owl repository from GitHub:
28 | ```
29 | git clone https://github.com/OwlAIProject/Owl.git
30 | cd Owl
31 | ```
32 |
33 | 3. **Launch Containers**
34 |
35 | Launched the API and Web containers:
36 | ```
37 | docker compose up
38 | ```
39 |
40 | You can now access the web interface at `http://localhost:3000`. Start testing captures with a microphone or Bluetooth devices. You can also build the iOS app and test captures via the Apple Watch or Bluetooth devices through your iPhone.
41 |
42 | ## Using Commercial Models
43 |
44 | If you prefer using commercial models for transcription and summarization, set up the following environment variables instead of the PyAnnote setup:
45 |
46 | - For Deepgram:
47 | ```
48 | export OWL_STREAMING_TRANSCRIPTION_PROVIDER=deepgram
49 | export OWL_ASYNC_TRANSCRIPTION_PROVIDER=deepgram
50 | export OWL_DEEPGRAM_API_KEY=
51 | ```
52 |
53 | - For GPT-4 Turbo:
54 | ```
55 | export OWL_LLM_MODEL=gpt-4-turbo-preview
56 | export OWL_LLM_API_BASE_URL=https://api.openai.com/v1
57 | export OWL_LLM_API_KEY=
58 | ```
59 |
60 | **Note for Mac Users:**
61 |
62 | If you're using Docker on a Mac, you may need to adjust the Docker settings to allocate more RAM to ensure optimal performance, especially when running local models. Docker's default settings might not provide sufficient RAM.
63 |
64 | Additionally, running local models directly on Docker for Mac might result in slower performance compared to native environments. This is due to the overhead associated with Docker's virtualization on macOS.
65 |
66 | For optimal performance on a Mac, consider following the specific instructions provided [here](./macos_and_linux_setup.md).
67 |
68 | [<< Back to Home](../README.md)
69 |
--------------------------------------------------------------------------------
/docs/images/apple_watch/complication_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/apple_watch/complication_1.jpg
--------------------------------------------------------------------------------
/docs/images/apple_watch/complication_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/apple_watch/complication_2.jpg
--------------------------------------------------------------------------------
/docs/images/apple_watch/complication_3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/apple_watch/complication_3.jpg
--------------------------------------------------------------------------------
/docs/images/apple_watch/complication_4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/apple_watch/complication_4.jpg
--------------------------------------------------------------------------------
/docs/images/apple_watch/complication_5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/apple_watch/complication_5.jpg
--------------------------------------------------------------------------------
/docs/images/apple_watch/content_view.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/apple_watch/content_view.png
--------------------------------------------------------------------------------
/docs/images/apple_watch/settings_view.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/apple_watch/settings_view.png
--------------------------------------------------------------------------------
/docs/images/bee/bee.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/bee/bee.png
--------------------------------------------------------------------------------
/docs/images/capture_storage/captures_today.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/capture_storage/captures_today.png
--------------------------------------------------------------------------------
/docs/images/capture_storage/conversations.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/capture_storage/conversations.png
--------------------------------------------------------------------------------
/docs/images/featured/apple_watch.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/featured/apple_watch.jpg
--------------------------------------------------------------------------------
/docs/images/featured/devices.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/featured/devices.jpg
--------------------------------------------------------------------------------
/docs/images/featured/ios_conversation_example_ces.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/featured/ios_conversation_example_ces.png
--------------------------------------------------------------------------------
/docs/images/featured/ios_conversation_example_home_depot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/featured/ios_conversation_example_home_depot.png
--------------------------------------------------------------------------------
/docs/images/featured/ios_conversations.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/featured/ios_conversations.png
--------------------------------------------------------------------------------
/docs/images/featured/pendant_wearable.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/featured/pendant_wearable.jpg
--------------------------------------------------------------------------------
/docs/images/windows/windows_env_vars_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/windows/windows_env_vars_1.png
--------------------------------------------------------------------------------
/docs/images/windows/windows_env_vars_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/windows/windows_env_vars_2.png
--------------------------------------------------------------------------------
/docs/images/windows/windows_env_vars_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/windows/windows_env_vars_3.png
--------------------------------------------------------------------------------
/docs/images/windows/windows_env_vars_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/windows/windows_env_vars_4.png
--------------------------------------------------------------------------------
/docs/images/xcode/developer_mode_iphone.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/xcode/developer_mode_iphone.png
--------------------------------------------------------------------------------
/docs/images/xcode/developer_mode_watch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/xcode/developer_mode_watch.png
--------------------------------------------------------------------------------
/docs/images/xcode/xcode_app_constants.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/xcode/xcode_app_constants.png
--------------------------------------------------------------------------------
/docs/images/xcode/xcode_device.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/xcode/xcode_device.png
--------------------------------------------------------------------------------
/docs/images/xcode/xcode_issue_navigator.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/xcode/xcode_issue_navigator.png
--------------------------------------------------------------------------------
/docs/images/xcode/xcode_scheme.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/xcode/xcode_scheme.png
--------------------------------------------------------------------------------
/docs/images/xcode/xcode_signing_and_capabilities.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/xcode/xcode_signing_and_capabilities.png
--------------------------------------------------------------------------------
/docs/images/xcode/xcode_team_selection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/xcode/xcode_team_selection.png
--------------------------------------------------------------------------------
/docs/images/xiao_esp32s3_sense/antenna_installation.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/xiao_esp32s3_sense/antenna_installation.gif
--------------------------------------------------------------------------------
/docs/images/xiao_esp32s3_sense/battery_eemb_1200mah.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/xiao_esp32s3_sense/battery_eemb_1200mah.jpg
--------------------------------------------------------------------------------
/docs/images/xiao_esp32s3_sense/daughterboard_installation.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/xiao_esp32s3_sense/daughterboard_installation.gif
--------------------------------------------------------------------------------
/docs/images/xiao_esp32s3_sense/ios_conversation_completed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/xiao_esp32s3_sense/ios_conversation_completed.png
--------------------------------------------------------------------------------
/docs/images/xiao_esp32s3_sense/ios_conversation_details.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/xiao_esp32s3_sense/ios_conversation_details.png
--------------------------------------------------------------------------------
/docs/images/xiao_esp32s3_sense/ios_conversation_in_progress.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/xiao_esp32s3_sense/ios_conversation_in_progress.png
--------------------------------------------------------------------------------
/docs/images/xiao_esp32s3_sense/soldered_connectors.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/xiao_esp32s3_sense/soldered_connectors.jpg
--------------------------------------------------------------------------------
/docs/images/xiao_esp32s3_sense/vscode_extensions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/xiao_esp32s3_sense/vscode_extensions.png
--------------------------------------------------------------------------------
/docs/images/xiao_esp32s3_sense/vscode_platformio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/xiao_esp32s3_sense/vscode_platformio.png
--------------------------------------------------------------------------------
/docs/images/xiao_esp32s3_sense/vscode_platformio_build_button.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/xiao_esp32s3_sense/vscode_platformio_build_button.png
--------------------------------------------------------------------------------
/docs/images/xiao_esp32s3_sense/vscode_platformio_build_success.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/xiao_esp32s3_sense/vscode_platformio_build_success.png
--------------------------------------------------------------------------------
/docs/images/xiao_esp32s3_sense/vscode_platformio_serial_port_button.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/xiao_esp32s3_sense/vscode_platformio_serial_port_button.png
--------------------------------------------------------------------------------
/docs/images/xiao_esp32s3_sense/vscode_platformio_upload_button.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/xiao_esp32s3_sense/vscode_platformio_upload_button.png
--------------------------------------------------------------------------------
/docs/images/xiao_esp32s3_sense/vscode_platformio_upload_success.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/xiao_esp32s3_sense/vscode_platformio_upload_success.png
--------------------------------------------------------------------------------
/docs/images/xiao_esp32s3_sense/xiao_bottom_pads.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/xiao_esp32s3_sense/xiao_bottom_pads.png
--------------------------------------------------------------------------------
/docs/images/xiao_esp32s3_sense/xiao_esp32s3_sense_board.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/xiao_esp32s3_sense/xiao_esp32s3_sense_board.jpg
--------------------------------------------------------------------------------
/docs/images/xiao_esp32s3_sense/xiao_pi_case_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/xiao_esp32s3_sense/xiao_pi_case_1.jpg
--------------------------------------------------------------------------------
/docs/images/xiao_esp32s3_sense/xiao_pi_case_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/docs/images/xiao_esp32s3_sense/xiao_pi_case_2.jpg
--------------------------------------------------------------------------------
/docs/macos_and_linux_setup.md:
--------------------------------------------------------------------------------
1 | # Owl - Always-on Wearable AI Setup Guide
2 |
3 | [<< Home](../README.md)
4 |
5 | ## Prerequisites
6 |
7 | Before you begin, ensure you have the following installed on your system:
8 |
9 | - Python (version 3.11 or newer)
10 | - Node.js (version 18 or newer)
11 | - Poetry
12 | - FFmpeg
13 | - Ollama (optional, for local-only mode)
14 |
15 | ## Model Agreements
16 |
17 | Owl uses PyAnnote for diarization. Please visit Hugging Face and accept the terms for the following models:
18 | - [PyAnnote Segmentation Model](https://huggingface.co/pyannote/segmentation)
19 | - [PyAnnote Speaker Diarization Model](https://huggingface.co/pyannote/speaker-diarization)
20 |
21 | ## Setup Instructions
22 |
23 | 1. **Environment Variables**
24 |
25 | Set your Hugging Face token as an environment variable:
26 | ```
27 | export OWL_ASYNC_WHISPER_HF_TOKEN=
28 | ```
29 |
30 | 2. **Clone Repository**
31 |
32 | Clone the Owl repository from GitHub:
33 | ```
34 | git clone https://github.com/OwlAIProject/Owl.git
35 | cd Owl
36 | ```
37 |
38 | 3. **Install Dependencies**
39 |
40 | Install required OS dependencies
41 | ```
42 | # on Ubuntu or Debian
43 | sudo apt install portaudio19-dev python3-pyaudio
44 |
45 | # on Mac Os
46 | brew install portaudio
47 | ```
48 | Use Poetry to install the required dependencies:
49 | ```
50 | poetry install
51 | ```
52 | 4. **Activate environment**
53 | Activate virtual env created with poetry
54 |
55 | ```
56 | poetry shell
57 | ```
58 | 5. **Start the Server**
59 |
60 | Launch the Owl server:
61 | ```
62 | owl serve --web
63 | ```
64 |
65 | You can now access the web interface at `http://localhost:3000`. Start testing captures with a microphone or Bluetooth devices. You can also build the iOS app and test captures via the Apple Watch or Bluetooth devices through your iPhone.
66 |
67 | ## Using Commercial Models
68 |
69 | If you prefer using commercial models for transcription and summarization, set up the following environment variables instead of the PyAnnote setup:
70 |
71 | - For Deepgram:
72 | ```
73 | export OWL_STREAMING_TRANSCRIPTION_PROVIDER=deepgram
74 | export OWL_ASYNC_TRANSCRIPTION_PROVIDER=deepgram
75 | export OWL_DEEPGRAM_API_KEY=
76 | ```
77 |
78 | - For GPT-4 Turbo:
79 | ```
80 | export OWL_LLM_MODEL=gpt-4-turbo-preview
81 | export OWL_LLM_API_BASE_URL=https://api.openai.com/v1
82 | export OWL_LLM_API_KEY=
83 | ```
84 |
85 | [<< Home](../README.md)
86 |
--------------------------------------------------------------------------------
/docs/sony_spresense_setup.md:
--------------------------------------------------------------------------------
1 | # Owl - Always-on Wearable AI
2 |
3 | [<< Home](../README.md)
4 |
5 | ## Sony Spresense Board Setup and User Guide
6 |
7 | TODO
8 |
9 | [<< Home](../README.md)
10 |
--------------------------------------------------------------------------------
/docs/windows_setup.md:
--------------------------------------------------------------------------------
1 | # Owl - Always-on Wearable AI
2 |
3 | [<< Home](../README.md)
4 |
5 | ## Windows Setup
6 |
7 | To install the server, first clone the git repository to a directory on disk and then perform these steps:
8 |
9 | - [Anaconda](https://www.anaconda.com/download) is the recommended way to manage your Python environment. Install it first.
10 | - Open an Anaconda Command Prompt and create a new Python 3.11 environment named `owl` with this command: `conda create -n owl python=3.11`
11 | - Switch to the environment: `conda activate owl`
12 | - In the root of the source tree, where `requirements-windows.txt` is, install the required packages: `pip install -r requirements-windows.txt`
13 | - FFmpeg is used by the server to convert audio formats. [Install it](https://ffmpeg.org/download.html) and ensure it is in the path and can be run from the command line as `ffmpeg`.
14 | - Test that you can run the server by issuing this command from the root directory, which will print usage instructions: `python -m owl.core.cli --help`
15 |
16 | Once installed, you will need to also [configure the server](./server_configuration.md) (API tokens, etc.)
17 |
18 | To run the server:
19 |
20 | - Open up an Anaconda Command Prompt and switch to the `ai` environment, if you have not already done so: `conda activate owl`
21 | - Start the server with the `serve` command and make sure to specify the host as `0.0.0.0` so it is accessible remotely: `python -m owl.core.cli serve --host=0.0.0.0`. By default, the configuration in `owl/sample_config.yaml` is used, but this may be overridden with the `--config` option. It will likely be necessary to create a customized `config.yaml` based on the sample, in which case the command line would change to: `python -m owl.core.cli serve --host=0.0.0.0 --config=config.yaml`.
22 |
23 | [<< Home](../README.md)
24 |
--------------------------------------------------------------------------------
/owl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/owl/__init__.py
--------------------------------------------------------------------------------
/owl/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/owl/core/__init__.py
--------------------------------------------------------------------------------
/owl/core/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .async_multiprocessing_queue import AsyncMultiprocessingQueue
2 | from .hexdump import hexdump
--------------------------------------------------------------------------------
/owl/core/utils/async_multiprocessing_queue.py:
--------------------------------------------------------------------------------
1 | #
2 | # async_multiprocessing_queue.py
3 | #
4 | # A wrapper around a multiprocessing.Queue that provides an async interface.
5 | #
6 |
7 | import asyncio
8 | from multiprocessing import Queue
9 | from queue import Empty, Full
10 |
11 |
12 | class AsyncMultiprocessingQueue:
13 | """
14 | Async wrapper for multiprocessing.Queue.
15 | """
16 |
17 | _sleep: float = 0
18 |
19 | def __init__(self, queue: Queue):
20 | """
21 | Instantiates an asynchronous interface to a multiprocessing.Queue.
22 |
23 | Parameters
24 | ----------
25 | queue: multiprocessing.Queue
26 | Underlying multiprocessing.Queue to wrap.
27 | """
28 | self._q = queue
29 |
30 | async def get(self):
31 | while True:
32 | try:
33 | return self._q.get_nowait()
34 | except Empty:
35 | await asyncio.sleep(self._sleep)
36 |
37 | async def put(self, item):
38 | while True:
39 | try:
40 | self._q.put_nowait(item)
41 | return None
42 | except Full:
43 | await asyncio.sleep(self._sleep)
44 |
45 | def task_done(self):
46 | self._q.task_done()
47 |
48 | def underlying_queue(self) -> Queue:
49 | return self._q
--------------------------------------------------------------------------------
/owl/core/utils/hexdump.py:
--------------------------------------------------------------------------------
1 | def hexdump(bytes, bytes_per_line = 16, offset_size = 2):
2 | """
3 | Prints a byte buffer as a human-readable hexadecimal dump.
4 |
5 | Parameters
6 | ----------
7 | bytes : bytes
8 | Buffer to dump.
9 | bytes_per_line : int
10 | How many bytes to print per line.
11 | offset_size : int
12 | Size of the location offset in the byte buffer in bytes. For example, if the buffer is less
13 | than 256 bytes, the offset need only be one byte long and can be set to 1. The offset is
14 | printed at the beginning of each line
15 | as a hexadecimal number with number of digits equal to twice offset_size.
16 | """
17 | offset_mask = int.from_bytes(bytes = [ 0xff ] * offset_size, byteorder = "big")
18 | offset_format = "%%0%dx" % (offset_size * 2)
19 | start = 0
20 | while start < len(bytes):
21 | end = min(start + 16, len(bytes))
22 | hex_output = " ".join([ ("%02x" % bytes[start + j]) for j in range(end - start) ])
23 | ascii_output = "".join([ ("%c" % bytes[start + j] if chr(bytes[start + j]).isprintable() else ".") for j in range(end - start) ])
24 | expected_hex_line_length = 3 * bytes_per_line
25 | hex_padding = " " * (expected_hex_line_length - len(hex_output))
26 | ascii_padding = " " * (bytes_per_line - len(ascii_output))
27 | offset = offset_format % (start & offset_mask)
28 | print("%s: %s%s [ %s%s ]" % (offset, hex_output, hex_padding, ascii_output, ascii_padding))
29 | start += bytes_per_line
--------------------------------------------------------------------------------
/owl/core/utils/suppress_output.py:
--------------------------------------------------------------------------------
1 | import contextlib
2 | import os
3 | import sys
4 |
5 |
6 | @contextlib.contextmanager
7 | def suppress_output():
8 | # Redirect stdout and stderr to /dev/null
9 | with open(os.devnull, 'w') as devnull:
10 | original_stdout, original_stderr = sys.stdout, sys.stderr
11 | sys.stdout, sys.stderr = devnull, devnull
12 | try:
13 | yield
14 | finally:
15 | # Restore stdout and stderr
16 | sys.stdout, sys.stderr = original_stdout, original_stderr
--------------------------------------------------------------------------------
/owl/database/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/owl/database/__init__.py
--------------------------------------------------------------------------------
/owl/database/database.py:
--------------------------------------------------------------------------------
1 | from sqlmodel import SQLModel, create_engine
2 | from sqlalchemy.orm import sessionmaker, scoped_session
3 | from ..core.config import DatabaseConfiguration
4 | from alembic.config import Config
5 | from alembic import command
6 |
7 | class Database:
8 | def __init__(self, config: DatabaseConfiguration):
9 | self.engine = create_engine(
10 | config.url,
11 | pool_size=50,
12 | max_overflow=100,
13 | echo=False,
14 | pool_timeout=30,
15 | pool_recycle=1800
16 | )
17 | self.session_factory = sessionmaker(autocommit=False, autoflush=False, bind=self.engine)
18 | self.SessionLocal = scoped_session(self.session_factory)
19 |
20 | def init_db(self):
21 | alembic_cfg = Config("./alembic.ini")
22 | alembic_cfg.set_main_option('sqlalchemy.url', str(self.engine.url))
23 | command.upgrade(alembic_cfg, "head")
24 |
25 | def get_db(self):
26 | db = self.SessionLocal()
27 | try:
28 | yield db
29 | finally:
30 | db.close()
31 | self.SessionLocal.remove()
--------------------------------------------------------------------------------
/owl/devices/__init__.py:
--------------------------------------------------------------------------------
1 | from .device_type import DeviceType
--------------------------------------------------------------------------------
/owl/devices/device_type.py:
--------------------------------------------------------------------------------
1 | from enum import Enum, EnumMeta
2 |
3 | class DeviceTypeMeta(EnumMeta):
4 | def __contains__(cls, item):
5 | # This allows us to test e.g. ("apple_watch" in DeviceType)
6 | try:
7 | cls(item)
8 | except ValueError:
9 | return False
10 | return True
11 |
12 | class DeviceType(Enum, metaclass=DeviceTypeMeta):
13 | """
14 | Short-form unique identifiers for supported capture devices. Must be consistent with all client
15 | software. Names must be usable in filepaths.
16 | """
17 | UNKNOWN = "unknown_device"
18 | IPHONE = "iphone"
19 | APPLE_WATCH = "apple_watch"
20 | XIAO_ESP32S3_SENSE = "xiao_esp32s3_sense"
21 | SONY_SPRESENSE = "spresense"
22 | WEB = "web"
23 | ANDROID = "android"
--------------------------------------------------------------------------------
/owl/files/__init__.py:
--------------------------------------------------------------------------------
1 | from .capture_directory import CaptureDirectory
2 | from .wav_file import append_to_wav_file
3 | from .aac_frame_sequencer import AACFrameSequencer
--------------------------------------------------------------------------------
/owl/files/aac_frame_sequencer.py:
--------------------------------------------------------------------------------
1 | #
2 | # aac_frame_sequencer.py
3 | #
4 | # Looks for and extracts complete frames from an ADTS AAC stream.
5 | #
6 | # Useful resources:
7 | # - https://wiki.multimedia.cx/index.php/ADTS
8 | # - https://android.googlesource.com/platform/frameworks/av/+/jb-dev/media/libstagefright/codecs/aacdec/get_adts_header.cpp
9 | #
10 |
11 | from typing import Tuple
12 |
13 |
14 | class AACFrameSequencer:
15 | def __init__(self):
16 | self._buffer = bytes()
17 |
18 | def get_next_frames(self, received_bytes: bytes) -> bytes:
19 | self._buffer += received_bytes
20 | output_frames = bytes()
21 | while True:
22 | found_header, advance_to_idx = self._find_next_header_candidate()
23 | self._buffer = self._buffer[advance_to_idx:]
24 | if not found_header:
25 | break
26 | frame_length = self._get_frame_length()
27 | if frame_length > len(self._buffer):
28 | break
29 | output_frames += self._buffer[0:frame_length]
30 | self._buffer = self._buffer[frame_length:]
31 | return output_frames
32 |
33 | def _find_next_header_candidate(self) -> Tuple[bool, int]:
34 | for i in range(len(self._buffer)):
35 | # Search for the 12 sync bits (FF Fx) followed by enough bytes to decode header
36 | if self._buffer[i] == 0xff:
37 | # Check if header is present in subsequent bytes, otherwise we have to stop at the
38 | # first 0xff for now
39 | bytes_remaining = len(self._buffer) - i
40 | if bytes_remaining < 7:
41 | return (False, i) # not sure yet but safe to discard preceding bytes
42 | if self._buffer[i + 1] & 0xf0 == 0xf0:
43 | # Maybe! Need to verify some more information
44 | layer = (self._buffer[i + 1] >> 1) & 3
45 | mp4_sampling_frequency_index = (self._buffer[i + 2] >> 2) & 0xf
46 | if layer == 0 and mp4_sampling_frequency_index == 8:
47 | # Layer 0 and 16KHz sampling -> looks correct
48 | return (True, i) # found it
49 | return (False, i + 2) # invalid header, skip past these false sync bits
50 |
51 | # Not found, safe to discard everything
52 | return (False, len(self._buffer))
53 |
54 | def _get_frame_length(self):
55 | assert len(self._buffer) >= 7
56 | return ((self._buffer[3] & 0x03) << 11) | (self._buffer[4] << 3) | ((self._buffer[5] >> 5) & 0x07)
--------------------------------------------------------------------------------
/owl/files/realtime_audio_converter.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 |
3 | class RealtimeAudioConverter:
4 | def __init__(self, ffmpeg_command, chunk_size=512):
5 | self._ffmpeg_command = ffmpeg_command
6 | self._chunk_size = chunk_size
7 | self._process = None
8 | self._stdin = None
9 | self._stdout = None
10 |
11 | async def start(self):
12 | """Start the ffmpeg process with asyncio subprocess and prepare non-blocking streams."""
13 | self._process = await asyncio.create_subprocess_exec(
14 | *self._ffmpeg_command,
15 | stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.DEVNULL)
16 | self._stdin = self._process.stdin
17 | self._stdout = self._process.stdout
18 |
19 | async def feed_input_chunk(self, input_chunk):
20 | """Feed an audio data chunk to ffmpeg's stdin asynchronously."""
21 | if self._stdin:
22 | self._stdin.write(input_chunk)
23 | await self._stdin.drain()
24 |
25 | async def close_input(self):
26 | """Close ffmpeg's stdin to signal that no more data will be sent."""
27 | if self._stdin:
28 | self._stdin.close()
29 | await self._stdin.wait_closed()
30 |
31 | async def read_output_chunk(self):
32 | """Asynchronously read and return a chunk of converted audio from ffmpeg's stdout."""
33 | return await self._stdout.read(self._chunk_size)
34 |
35 | async def cleanup(self):
36 | """Wait for the ffmpeg process to exit and perform cleanup."""
37 | await self._process.wait()
38 |
--------------------------------------------------------------------------------
/owl/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/owl/models/__init__.py
--------------------------------------------------------------------------------
/owl/models/bing.py:
--------------------------------------------------------------------------------
1 | from typing import List, Optional
2 | from pydantic import BaseModel, HttpUrl
3 |
4 | # Bing Search API Response Models
5 |
6 | class RichFactItem(BaseModel):
7 | text: str
8 |
9 | class RichFact(BaseModel):
10 | label: Optional[RichFactItem] = None
11 | items: List[RichFactItem] = []
12 | hint: Optional[RichFactItem] = None
13 |
14 | class WebPage(BaseModel):
15 | id: HttpUrl
16 | name: str
17 | url: HttpUrl
18 | isFamilyFriendly: bool
19 | displayUrl: HttpUrl
20 | snippet: str
21 | dateLastCrawled: str
22 | language: str
23 | isNavigational: bool
24 | richFacts: Optional[List[RichFact]] = None
25 |
26 | class WebPages(BaseModel):
27 | webSearchUrl: HttpUrl
28 | totalEstimatedMatches: int
29 | value: List[WebPage]
30 |
31 | class BingSearchResponse(BaseModel):
32 | _type: str
33 | queryContext: dict
34 | webPages: WebPages
--------------------------------------------------------------------------------
/owl/models/datetime_serialization.py:
--------------------------------------------------------------------------------
1 | #
2 | # datetime_serialization.py
3 | #
4 | # Serialization of datetimes into strings in a standardized way for the server and display clients.
5 | # This is only intended for serializing database objects and not e.g., the capture directory, which
6 | # uses a different format.
7 | #
8 | # TODO:
9 | # -----
10 | # - We should probably standardize on %Y%m%d-%H%M%S.%f, like the capture files, instead.
11 | #
12 |
13 | from datetime import datetime
14 |
15 |
16 | def datetime_string(timestamp: datetime) -> str:
17 | return timestamp.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3]
--------------------------------------------------------------------------------
/owl/prompts/__init__.py:
--------------------------------------------------------------------------------
1 | from .summarization import summarization_system_message
2 | from .summarization import short_summarization_system_message
3 | from .suggestion import suggest_links_system_message
--------------------------------------------------------------------------------
/owl/prompts/suggestion.py:
--------------------------------------------------------------------------------
1 | from ..core.config import Configuration
2 |
3 | def suggest_links_system_message(config: Configuration) -> str:
4 | return f"""
5 | You are an world's most advanced AI assistant. You are given the transcript of an interaction. One
6 | of the participants is your client. Their name is {config.user.name}. Your task is to generate a rich search query based on the summary of the interaction. You want to optimize the search query to get maximally interesting relevant link for {config.user.name}. IMPORTANT: Try and make your search query about a single subject that is most relevant to the interaction. Make it as specific as possible and only pick one subject. Don't include {config.user.name}'s name in just output the query and nothing else. VERY IMPORTANT: You must just output the search engine query without any prefix and nothing else!""".replace("\n", " ")
--------------------------------------------------------------------------------
/owl/prompts/summarization.py:
--------------------------------------------------------------------------------
1 | from ..core.config import Configuration
2 |
3 | def summarization_system_message(config: Configuration) -> str:
4 | return f"""
5 | You are the world's most advanced AI assistant. You are given the transcript of an interaction. One
6 | of the participants is your client. Their name is {config.user.name}. The transcript includes
7 | speaker ids, but unfortunately sometimes we don't know the specific person name and sometimes they
8 | can be mislabeled. Do your best to infer the participants based on the context, but never referred
9 | to the speaker ids in the summary because they alone are not useful. Your job is to return a short
10 | summary of the interaction on behalf of {config.user.name} so they can remember what was
11 | happening. This is for {config.user.name}'s memories so please include anything that might be
12 | useful but also make it narrative so that it's helpful for creating a cherished memory. Format your
13 | summary with the following sections: Summary, Atmosphere, Key Take aways (bullet points)""".replace("\n", " ")
14 |
15 | def short_summarization_system_message(config: Configuration) -> str:
16 | return f"""
17 | You are an world's most advanced AI assistant. You are given the transcript of an interaction. One
18 | of the participants is your client. Their name is {config.user.name}. The transcript includes
19 | speaker ids, but unfortunately sometimes we don't know the specific person name and sometimes they
20 | can be mislabeled. Do your best to infer the participants based on the context, but never referred
21 | to the speaker ids in the summary because they alone are not useful. Your job is to return a one
22 | sentence summary of the interaction on behalf of {config.user.name}. It should capture the
23 | overall significance of the interaction but not exceed one sentence.""".replace("\n", " ")
24 |
--------------------------------------------------------------------------------
/owl/sample_config.yaml:
--------------------------------------------------------------------------------
1 | #
2 | # sample_config.yaml
3 | #
4 | # It is recommended you copy this up one level to the root of the source tree, from where the server
5 | # will be run, and rename to config.yaml. Change settings as needed and specify
6 | # --config=config.yaml when running the server.
7 | #
8 |
9 | # Configure this with your name and a create a secure token (which must also be entered into the
10 | # e.g. iOS client app)
11 | user:
12 | name: "Bob"
13 | client_token: your_own_secret_token
14 |
15 | # Choose an LLM. To use OpenAI models, 'api_key' must be valid and 'api_base_url' must remain blank.
16 | llm:
17 | # Local mistral
18 | model: ollama/mistral:instruct
19 | api_base_url: http://localhost:11434
20 | api_key:
21 |
22 | # OpenAI
23 | #
24 | # model: gpt-4-1106-preview
25 | # api_base_url:
26 | # api_key: your_llm_api_key_if_needed
27 |
28 |
29 | # Which provider to use for final transcription of captures.
30 | async_transcription:
31 | provider: "whisper"
32 | #provider: "deepgram"
33 |
34 | # Which provider to use for streaming transcription.
35 | streaming_transcription:
36 | provider: "whisper"
37 | # provider: "deepgram"
38 |
39 | # Deepgram configuration if using Deepgram for transcription.
40 | deepgram:
41 | api_key: ""
42 | model: "nova-2"
43 | language: "en-US"
44 |
45 | # Whisper model configuration if using Whisper for transcription.
46 | async_whisper:
47 | host: "127.0.0.1"
48 | port: 8010
49 | hf_token: your_hugging_face_token
50 | device: cpu
51 | compute_type: int8
52 | batch_size: 16
53 | model: tiny
54 | verification_threshold: 0.1
55 | verification_model_source: speechbrain/spkrec-ecapa-voxceleb
56 | verification_model_savedir: pretrained_models/spkrec-ecapa-voxceleb
57 |
58 | streaming_whisper:
59 | host: "127.0.0.1"
60 | port: 8009
61 | model: "small"
62 | language: "en"
63 | silero_sensitivity: 0.4
64 | webrtc_sensitivity: 2
65 | post_speech_silence_duration: 0.5
66 |
67 | captures:
68 | capture_dir: captures
69 |
70 | vad:
71 | vad_model_savedir: pretrained_models/vad
72 |
73 | database:
74 | url: "sqlite:///./db.sqlite3"
75 |
76 | conversation_endpointing:
77 | timeout_seconds: 300
78 | min_utterances: 2
79 |
80 | notification:
81 | apn_team_id: ""
82 |
83 | # Enable for LTE-M boards
84 | udp:
85 | enabled: false
86 | host: '0.0.0.0'
87 | port: 8001
88 |
89 | # To enable web search
90 | # bing:
91 | # subscription_key: your_bing_subscription_service_key
--------------------------------------------------------------------------------
/owl/server/__init__.py:
--------------------------------------------------------------------------------
1 | from .app_state import AppState
2 | from .main import create_server_app
--------------------------------------------------------------------------------
/owl/server/app_state.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations # required for AppState annotation in AppState.get()
2 | from dataclasses import dataclass, field
3 | from typing import Dict
4 | from fastapi import FastAPI, HTTPException, Request, Depends, Header
5 | from typing import Optional
6 | from ..core.config import Configuration
7 | from ..services import CaptureService, ConversationService, LLMService, NotificationService, BingSearchService
8 | from .streaming_capture_handler import StreamingCaptureHandler
9 | from ..database.database import Database
10 | from ..services import ConversationDetectionService
11 | from queue import Queue
12 |
13 | @dataclass
14 | class AppState:
15 | """
16 | Server application state.
17 | """
18 |
19 | config: Configuration
20 |
21 | database: Database
22 | capture_service: CaptureService
23 | conversation_service: ConversationService
24 | llm_service: LLMService
25 | notification_service: NotificationService
26 | bing_search_service: BingSearchService
27 |
28 | capture_handlers: Dict[str, StreamingCaptureHandler] = field(default_factory=lambda: {})
29 | conversation_detection_service_by_id: Dict[str, ConversationDetectionService] = field(default_factory=lambda: {})
30 |
31 | task_queue = Queue()
32 |
33 | @staticmethod
34 | def get(from_obj: FastAPI | Request) -> AppState:
35 | if isinstance(from_obj, FastAPI):
36 | return from_obj.state._app_state
37 | elif isinstance(from_obj, Request):
38 | return from_obj.app.state._app_state
39 | else:
40 | raise TypeError("`from_obj` must be of type `FastAPI` or `Request`")
41 |
42 | @staticmethod
43 | def get_db(request: Request):
44 | app_state: AppState = AppState.get(request)
45 | return next(app_state.database.get_db())
46 |
47 | @staticmethod
48 | async def _parse_and_verify_token(authorization: str, expected_token: str):
49 | if not authorization:
50 | raise HTTPException(status_code=401, detail="Authorization header missing")
51 |
52 | parts = authorization.split()
53 | if len(parts) != 2 or parts[0].lower() != 'bearer':
54 | raise HTTPException(status_code=401, detail="Invalid token type")
55 |
56 | token = parts[1]
57 | if token != expected_token:
58 | raise HTTPException(status_code=403, detail="Invalid or expired token")
59 |
60 | @staticmethod
61 | async def authenticate_request(request: Request, authorization: Optional[str] = Header(None)):
62 | app_state: AppState = AppState.get(request)
63 | await AppState._parse_and_verify_token(authorization, app_state.config.user.client_token)
64 | return app_state
65 |
66 | @staticmethod
67 | async def authenticate_socket(environ: dict):
68 | headers = {k.decode('utf-8').lower(): v.decode('utf-8') for k, v in environ.get('asgi.scope', {}).get('headers', [])}
69 | authorization = headers.get('authorization')
70 | app_state: AppState = AppState.get(environ['asgi.scope']['app'])
71 | await AppState._parse_and_verify_token(authorization, app_state.config.user.client_token)
72 | return app_state
--------------------------------------------------------------------------------
/owl/server/capture_socket.py:
--------------------------------------------------------------------------------
1 | #
2 | # capture_socket.py
3 | #
4 | # Socket handlers for streaming audio capture.
5 | #
6 | # Using namespace objects to implement socketio event handlers:
7 | # https://python-socketio.readthedocs.io/en/latest/server.html#class-based-namespaces
8 | #
9 | import asyncio
10 | import os
11 | import logging
12 | from fastapi import FastAPI
13 | import socketio
14 |
15 | from .streaming_capture_handler import StreamingCaptureHandler
16 |
17 | logger = logging.getLogger(__name__)
18 |
19 | class CaptureSocketApp(socketio.AsyncNamespace):
20 | def __init__(self, app_state):
21 | super().__init__(namespace="*")
22 | self._app_state = app_state
23 | self._sio = socketio.AsyncServer(async_mode='asgi', cors_allowed_origins='*')
24 | self._app = socketio.ASGIApp(self._sio)
25 | self._sio.register_namespace(self)
26 | self._processing_task = None
27 |
28 | def mount_to(self, app: FastAPI, at_path: str):
29 | app.mount(path=at_path, app=self._app)
30 |
31 | async def on_connect(self, path, sid, environ):
32 | logger.info(f'Connected: {sid}')
33 | try:
34 | await self._app_state.authenticate_socket(environ)
35 | except ValueError as e:
36 | logger.error(f"Authentication failed for {sid}: {e}")
37 | await self._sio.disconnect(sid)
38 | return False
39 |
40 | async def on_disconnect(self, path, sid, *args):
41 | logger.info(f'Disconnected: {sid}')
42 |
43 | async def on_audio_data(self, path, sid, binary_data, device_name, capture_uuid, file_extension="aac", *args):
44 | if capture_uuid not in self._app_state.capture_handlers:
45 | self._app_state.capture_handlers[capture_uuid] = StreamingCaptureHandler(
46 | self._app_state, device_name, capture_uuid, file_extension
47 | )
48 |
49 | capture_handler = self._app_state.capture_handlers[capture_uuid]
50 |
51 | await capture_handler.handle_audio_data(binary_data)
52 |
53 | async def on_finish_audio(self, path, sid, capture_uuid, *args):
54 | logger.info(f"Client signalled end of audio stream for {capture_uuid}")
55 | if capture_uuid not in self._app_state.capture_handlers:
56 | logger.error(f"Capture session not found: {capture_uuid}")
57 | return
58 | capture_handler = self._app_state.capture_handlers[capture_uuid]
59 | capture_handler.finish_capture_session()
60 |
61 | async def emit_message(self, event, message):
62 | print(f"emit_message message: {event} {message}")
63 | await self._sio.emit(event, message)
64 |
--------------------------------------------------------------------------------
/owl/server/task.py:
--------------------------------------------------------------------------------
1 | #
2 | # task.py
3 | #
4 | # Abstract base class for a background server task. These are held in a queue in the AppState
5 | # object.
6 | #
7 |
8 | from __future__ import annotations
9 | from abc import ABC, abstractmethod
10 | from typing import TYPE_CHECKING
11 |
12 | if TYPE_CHECKING: # see: https://stackoverflow.com/questions/39740632/python-type-hinting-without-cyclic-imports
13 | from .app_state import AppState
14 |
15 |
16 | class Task(ABC):
17 | @abstractmethod
18 | async def run(self, app_state: AppState):
19 | pass
--------------------------------------------------------------------------------
/owl/server/udp_capture_socket.py:
--------------------------------------------------------------------------------
1 | #
2 | # udp_capture_socket.py
3 | #
4 | # UDP socket handlers for streaming audio capture.
5 | #
6 | # UDP is necessary for some LTE-M boards, such as the Spresense, where bandwidth is limited.
7 | #
8 |
9 | import asyncio
10 | import uuid
11 | from .app_state import AppState
12 | from .streaming_capture_handler import StreamingCaptureHandler
13 | import logging
14 |
15 | logger = logging.getLogger(__name__)
16 |
17 | class UDPCaptureSocketApp(asyncio.DatagramProtocol):
18 | def __init__(self, app_state: AppState, timeout_seconds=5):
19 | self._app_state = app_state
20 | self._transport = None
21 | self._capture_uuid = None
22 | self._timeout_seconds = timeout_seconds
23 | self._timeout_handle = None
24 |
25 | def connection_made(self, transport: asyncio.DatagramTransport) -> None:
26 | self._transport = transport
27 | # Create a new capture session with id generated by the server since we don't have a UPD protocol for this
28 | self._capture_uuid = uuid.uuid1().hex
29 | self._app_state.capture_handlers[self._capture_uuid] = StreamingCaptureHandler(
30 | self._app_state, "spresense", self._capture_uuid, "mp3"
31 | )
32 |
33 | def datagram_received(self, data: bytes, addr):
34 | if self._timeout_handle:
35 | self._timeout_handle.cancel()
36 | self._timeout_handle = asyncio.get_running_loop().call_later(
37 | self._timeout_seconds, self.connection_timed_out)
38 |
39 | asyncio.create_task(self.send_info_to_client(data))
40 |
41 | async def send_info_to_client(self, data: bytes):
42 | await self._app_state.capture_handlers[self._capture_uuid].handle_audio_data(data)
43 |
44 | def connection_timed_out(self):
45 | if self._capture_uuid not in self._app_state.capture_handlers:
46 | logger.error(f"Capture session not found: {self._capture_uuid}")
47 | return
48 | capture_handler = self._app_state.capture_handlers[self._capture_uuid]
49 | capture_handler.finish_capture_session()
50 |
--------------------------------------------------------------------------------
/owl/services/__init__.py:
--------------------------------------------------------------------------------
1 | from .capture.capture_service import CaptureService
2 | from .conversation.conversation_service import ConversationService
3 | from .endpointing.chunking.conversation_detection_service import ConversationDetectionService
4 | from .notification.notification_service import NotificationService
5 | from .llm.llm_service import LLMService
6 | from .web_search.bing_search_service import BingSearchService
--------------------------------------------------------------------------------
/owl/services/capture/capture_service.py:
--------------------------------------------------------------------------------
1 | #
2 | # capture_service.py
3 | #
4 | # Manages capture file and capture segment (conversation) references. These are stored in the data-
5 | # base so that state can always be recovered.
6 | #
7 |
8 | from datetime import datetime
9 | import logging
10 | import os
11 |
12 | from ...database.database import Database
13 | from ...core.config import Configuration
14 | from ...devices import DeviceType
15 | from ...models.schemas import Capture
16 | from ...database.crud import create_capture_file_ref, get_capture_file_ref
17 | from ...files import CaptureDirectory
18 |
19 | logger = logging.getLogger(__name__)
20 |
21 | class CaptureService:
22 | def __init__(self, config: Configuration, database: Database):
23 | self._config = config
24 | self._database = database
25 |
26 | def create_capture_file(self, capture_uuid: str, format: str, start_time: datetime, device_type: DeviceType | str) -> Capture:
27 | with next(self._database.get_db()) as db:
28 | # This method is only for creating new captures
29 | existing_capture_file_ref = get_capture_file_ref(db=db, capture_uuid=capture_uuid)
30 | assert existing_capture_file_ref is None
31 |
32 | # Parse device type
33 | assert isinstance(device_type, DeviceType) or isinstance(device_type, str)
34 | device: DeviceType = None
35 | if isinstance(device_type, str):
36 | device = DeviceType(device_type) if device_type in DeviceType else DeviceType.UNKNOWN
37 | else:
38 | device = device_type
39 |
40 | # Create and enter into database
41 | new_capture_file = Capture(
42 | capture_uuid=capture_uuid,
43 | filepath=CaptureDirectory(config=self._config).get_capture_filepath(capture_uuid=capture_uuid, format=format, start_time=start_time, device_type=device),
44 | device_type=device.value,
45 | start_time=start_time
46 | )
47 | saved_capture_file = create_capture_file_ref(db, new_capture_file)
48 |
49 | return saved_capture_file
50 |
51 | def get_capture_file(self, capture_uuid: str) -> Capture | None:
52 | with next(self._database.get_db()) as db:
53 | return get_capture_file_ref(db=db, capture_uuid=capture_uuid)
--------------------------------------------------------------------------------
/owl/services/conversation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/owl/services/conversation/__init__.py
--------------------------------------------------------------------------------
/owl/services/conversation/transcript_summarizer.py:
--------------------------------------------------------------------------------
1 | from ...models.schemas import Transcription
2 | from ...services.llm.llm_service import LLMService
3 | from ...core.config import Configuration
4 | from ...prompts import summarization_system_message, short_summarization_system_message, suggest_links_system_message
5 |
6 | class TranscriptionSummarizer:
7 | def __init__(self, config: Configuration):
8 | self._config = config
9 | self._llm_service = LLMService(config.llm)
10 |
11 | async def summarize(self, transcription: Transcription) -> str:
12 | system_message = summarization_system_message(config=self._config)
13 |
14 | utterances = [f"{utterance.speaker}: {utterance.text}" for utterance in transcription.utterances]
15 | user_message = "Transcript:\n" + "\n".join(utterances)
16 |
17 | response = await self._llm_service.async_llm_completion(
18 | messages=[
19 | {"content": system_message, "role": "system"},
20 | {"content": user_message, "role": "user"}
21 | ]
22 | )
23 |
24 | return response.choices[0].message.content
25 |
26 | async def short_summarize(self, transcription: Transcription) -> str:
27 | system_message = short_summarization_system_message(config=self._config)
28 |
29 | utterances = [f"{utterance.speaker}: {utterance.text}" for utterance in transcription.utterances]
30 | user_message = "Transcript:\n" + "\n".join(utterances)
31 |
32 | response = await self._llm_service.async_llm_completion(
33 | messages=[
34 | {"content": system_message, "role": "system"},
35 | {"content": user_message, "role": "user"}
36 | ]
37 | )
38 |
39 | return response.choices[0].message.content
40 |
41 | async def get_query_from_summary(self, summary: str) -> str:
42 | system_message = suggest_links_system_message(config=self._config)
43 |
44 | response = await self._llm_service.async_llm_completion(
45 | messages=[
46 | {"content": system_message, "role": "system"},
47 | {"content": summary, "role": "user"}
48 | ]
49 | )
50 |
51 | return response.choices[0].message.content
--------------------------------------------------------------------------------
/owl/services/endpointing/streaming/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/owl/services/endpointing/streaming/__init__.py
--------------------------------------------------------------------------------
/owl/services/endpointing/streaming/abstract_streaming_endpointing_service.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 |
3 | # Temporary to be replaced with VAD
4 | class AbstractStreamingEndpointingService(ABC):
5 |
6 | @abstractmethod
7 | async def utterance_detected(self):
8 | pass
9 |
10 |
--------------------------------------------------------------------------------
/owl/services/endpointing/streaming/streaming_endpointing_service.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from .abstract_streaming_endpointing_service import AbstractStreamingEndpointingService
3 |
4 | class StreamingEndpointingService(AbstractStreamingEndpointingService):
5 | def __init__(self, timeout_seconds: int, min_utterances: int, endpoint_callback=None):
6 | self.timeout_seconds = timeout_seconds
7 | self.min_utterances = min_utterances
8 | self.endpoint_callback = endpoint_callback
9 | self._utterance_count = 0
10 | self._last_utterance_time = None
11 | self._endpoint_called = False
12 | self._timeout_task = asyncio.create_task(self._check_timeout())
13 |
14 | async def utterance_detected(self):
15 | current_time = asyncio.get_event_loop().time()
16 | self._last_utterance_time = current_time
17 | self._utterance_count += 1
18 |
19 | async def _check_timeout(self):
20 | while True:
21 | await asyncio.sleep(1)
22 | if self._last_utterance_time is None:
23 | continue
24 |
25 | current_time = asyncio.get_event_loop().time()
26 | time_elapsed_since_last = current_time - self._last_utterance_time
27 |
28 | if (time_elapsed_since_last >= self.timeout_seconds and
29 | self._utterance_count >= self.min_utterances and
30 | not self._endpoint_called):
31 | if self.endpoint_callback:
32 | await self.endpoint_callback()
33 | self._reset()
34 |
35 | def _reset(self):
36 | self._utterance_count = 0
37 | self._last_utterance_time = None
38 | self._endpoint_called = False
39 | self._timeout_task.cancel()
40 | self._timeout_task = asyncio.create_task(self._check_timeout())
41 |
42 | def stop(self):
43 | if self._timeout_task:
44 | self._timeout_task.cancel()
45 |
--------------------------------------------------------------------------------
/owl/services/llm/llm_service.py:
--------------------------------------------------------------------------------
1 | #
2 | # llm.py
3 | #
4 | # LLM class: LLM abstraction layer. Performs LLM requests using a particular local or remote model.
5 | #
6 | from litellm import completion, acompletion
7 | from ...core.config import LLMConfiguration
8 | import logging
9 |
10 | logger = logging.getLogger(__name__)
11 |
12 | class LLMService:
13 | def __init__(self, config: LLMConfiguration):
14 | self._config = config
15 | self._model = config.model
16 |
17 | def llm_completion(self, messages, stream=False):
18 | logger.info(f"LLM completion request for model {self._model}...")
19 | llm_params = {
20 | "model": self._model,
21 | "messages": messages,
22 | "stream": stream
23 | }
24 |
25 | if self._config.api_base_url:
26 | llm_params["api_base"] = self._config.api_base_url
27 | if self._config.api_key:
28 | llm_params["api_key"] = self._config.api_key
29 |
30 | return completion(**llm_params)
31 |
32 | async def async_llm_completion(self, messages):
33 | logger.info(f"LLM completion request for model {self._model}...")
34 | llm_params = {
35 | "model": self._model,
36 | "messages": messages
37 | }
38 |
39 | if self._config.api_base_url:
40 | llm_params["api_base"] = self._config.api_base_url
41 | if self._config.api_key:
42 | llm_params["api_key"] = self._config.api_key
43 |
44 | return await acompletion(**llm_params)
45 |
46 |
--------------------------------------------------------------------------------
/owl/services/notification/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/owl/services/notification/__init__.py
--------------------------------------------------------------------------------
/owl/services/notification/notification_service.py:
--------------------------------------------------------------------------------
1 | from litellm import completion, acompletion
2 | from ...core.config import NotificationConfiguration
3 | import logging
4 |
5 | logger = logging.getLogger(__name__)
6 |
7 | class NotificationService:
8 | def __init__(self, config: NotificationConfiguration):
9 | self._config = config
10 | self.socket_app = None
11 |
12 | async def send_notification(self, title, body, type, payload=None):
13 | logger.info(f"Sending notification: {title} {body} {type} {payload}")
14 | if self.socket_app:
15 | await self.socket_app.emit_message(type, payload)
16 |
17 | async def emit_message(self, type: str, payload=None):
18 | await self.socket_app.emit_message(type, payload)
--------------------------------------------------------------------------------
/owl/services/stt/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/owl/services/stt/__init__.py
--------------------------------------------------------------------------------
/owl/services/stt/asynchronous/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/owl/services/stt/asynchronous/__init__.py
--------------------------------------------------------------------------------
/owl/services/stt/asynchronous/abstract_async_transcription_service.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 | from ....models.schemas import Transcription
3 |
4 | class AbstractAsyncTranscriptionService(ABC):
5 |
6 | @abstractmethod
7 | async def transcribe_audio(self, main_audio_filepath, voice_sample_filepath=None, speaker_name=None) -> Transcription:
8 | pass
9 |
10 |
--------------------------------------------------------------------------------
/owl/services/stt/asynchronous/async_transcription_service_factory.py:
--------------------------------------------------------------------------------
1 | from .async_whisper_transcription_service import AsyncWhisperTranscriptionService
2 | from .async_deepgram_transcription_service import AsyncDeepgramTranscriptionService
3 | import logging
4 |
5 | logger = logging.getLogger(__name__)
6 |
7 | class AsyncTranscriptionServiceFactory:
8 | _instances = {}
9 |
10 | @staticmethod
11 | def get_service(config):
12 | service_type = config.async_transcription.provider
13 | if service_type not in AsyncTranscriptionServiceFactory._instances:
14 | logger.info(f"Creating new {service_type} asynchronous transcription service")
15 | if service_type == "whisper":
16 | AsyncTranscriptionServiceFactory._instances[service_type] = AsyncWhisperTranscriptionService(config.async_whisper)
17 | elif service_type == "deepgram":
18 | AsyncTranscriptionServiceFactory._instances[service_type] = AsyncDeepgramTranscriptionService(config.deepgram)
19 | else:
20 | raise ValueError(f"Unknown transcription service type: {service_type}")
21 |
22 | return AsyncTranscriptionServiceFactory._instances[service_type]
23 |
24 |
--------------------------------------------------------------------------------
/owl/services/stt/asynchronous/async_whisper/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/owl/services/stt/asynchronous/async_whisper/__init__.py
--------------------------------------------------------------------------------
/owl/services/stt/asynchronous/async_whisper_transcription_service.py:
--------------------------------------------------------------------------------
1 | import httpx
2 | from .abstract_async_transcription_service import AbstractAsyncTranscriptionService
3 | from ....models.schemas import Transcription, Utterance, Word
4 | from .async_whisper.async_whisper_transcription_server import TranscriptionResponse
5 | import logging
6 |
7 | logger = logging.getLogger(__name__)
8 |
9 | class AsyncWhisperTranscriptionService(AbstractAsyncTranscriptionService):
10 | def __init__(self, config):
11 | self._config = config
12 | self.http_client = httpx.AsyncClient(timeout=None)
13 |
14 | async def transcribe_audio(self, main_audio_filepath, voice_sample_filepath=None, speaker_name=None):
15 | payload = {
16 | "main_audio_file_path": main_audio_filepath,
17 | "speaker_name": speaker_name,
18 | "voice_sample_filepath": voice_sample_filepath
19 | }
20 |
21 | url = f"http://{self._config.host}:{self._config.port}/transcribe/"
22 |
23 | try:
24 | logger.info(f"Sending request to local async whisper server at {url}...")
25 | response = await self.http_client.post(url, json=payload)
26 | response.raise_for_status()
27 | response_string = response.text
28 | logger.info(f"Received response from local async whisper server: {response_string}")
29 | transcript_response = TranscriptionResponse.model_validate_json(response_string)
30 | utterances = []
31 | logger.info(f"Transcription response: {transcript_response}")
32 | for whisper_utterance in transcript_response.utterances:
33 | utterance = Utterance(
34 | start=whisper_utterance.start,
35 | end=whisper_utterance.end,
36 | text=whisper_utterance.text,
37 | speaker=whisper_utterance.speaker,
38 | )
39 |
40 | utterance.words = [
41 | Word(
42 | word=whisper_word.word,
43 | start=whisper_word.start,
44 | end=whisper_word.end,
45 | score=whisper_word.score,
46 | speaker=whisper_word.speaker,
47 | ) for whisper_word in whisper_utterance.words
48 | ]
49 | utterances.append(utterance)
50 |
51 | transcript = Transcription(utterances=utterances)
52 | transcript.model = "whisper"
53 | logger.info(f"Transcription response: {transcript}")
54 | return transcript
55 |
56 | except httpx.HTTPStatusError as e:
57 | logger.error(f"Error response {e.response.status_code} while requesting {e.request.url!r}.")
58 | except httpx.RequestError as e:
59 | logger.error(f"An error occurred while requesting {e.request.url!r}.")
60 | except Exception as e:
61 | logger.error(f"An unexpected error occurred while requesting {url}: {e}")
--------------------------------------------------------------------------------
/owl/services/stt/streaming/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/owl/services/stt/streaming/__init__.py
--------------------------------------------------------------------------------
/owl/services/stt/streaming/abstract_streaming_transcription_service.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 |
3 | class AbstractStreamingTranscriptionService(ABC):
4 |
5 | @abstractmethod
6 | async def send_audio(self, audio_chunk):
7 | pass
8 |
9 | @abstractmethod
10 | def set_callback(self, callback):
11 | pass
12 |
13 | @abstractmethod
14 | def set_stream_format(self, stream_format):
15 | pass
16 |
--------------------------------------------------------------------------------
/owl/services/stt/streaming/streaming_transcription_service_factory.py:
--------------------------------------------------------------------------------
1 | from .streaming_deepgram_transcription_service import StreamingDeepgramTranscriptionService
2 | from .streaming_whisper_transcription_service import StreamingWhisperTranscriptionService
3 |
4 | import logging
5 |
6 | logger = logging.getLogger(__name__)
7 |
8 | class StreamingTranscriptionServiceFactory:
9 | _instances = {}
10 |
11 | @staticmethod
12 | def get_service(config, stream_format=None):
13 | service_type = config.streaming_transcription.provider
14 |
15 | if service_type not in StreamingTranscriptionServiceFactory._instances:
16 | logger.info(f"Creating new {service_type} streaming transcription service")
17 | if service_type == "deepgram":
18 | # Always make a new deepgram service
19 | return StreamingDeepgramTranscriptionService(config.deepgram, stream_format=stream_format)
20 | elif service_type == "whisper":
21 | StreamingTranscriptionServiceFactory._instances[service_type] = StreamingWhisperTranscriptionService(config.streaming_whisper, stream_format=stream_format)
22 | return StreamingTranscriptionServiceFactory._instances[service_type]
23 | else:
24 | raise ValueError(f"Unknown transcription service type: {service_type}")
25 |
26 | return StreamingTranscriptionServiceFactory._instances[service_type]
27 |
--------------------------------------------------------------------------------
/owl/services/stt/streaming/streaming_whisper/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/owl/services/stt/streaming/streaming_whisper/__init__.py
--------------------------------------------------------------------------------
/owl/services/vad/time_segment.py:
--------------------------------------------------------------------------------
1 | #
2 | # time_segment.py
3 | #
4 | # Data structure for defining a segment of time where the units can be represented as integers
5 | # (samples, milliseconds). Used by VAD and conversation endpointing to represent voiced segments and
6 | # conversations, respectively.
7 | #
8 |
9 | from dataclasses import dataclass
10 |
11 |
12 | @dataclass
13 | class TimeSegment:
14 | """
15 | A segment of time, [start,end), in an audio waveform. Units may be milliseconds or sample
16 | indices.
17 | """
18 | start: int
19 | end: int
--------------------------------------------------------------------------------
/owl/services/web_search/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/owl/services/web_search/__init__.py
--------------------------------------------------------------------------------
/owl/services/web_search/bing_search_service.py:
--------------------------------------------------------------------------------
1 | import httpx
2 | from ...models.bing import BingSearchResponse
3 | from ...core.config import BingConfiguration
4 |
5 | class BingSearchService:
6 | def __init__(self, config: BingConfiguration):
7 | self._config = config
8 | self._base_url = "https://api.bing.microsoft.com/v7.0/search"
9 | self._headers = {"Ocp-Apim-Subscription-Key": self._config.subscription_key}
10 |
11 | async def search(self, query: str) -> BingSearchResponse:
12 | async with httpx.AsyncClient() as client:
13 | params = {"q": query}
14 | response = await client.get(self._base_url, headers=self._headers, params=params)
15 | response.raise_for_status()
16 | return BingSearchResponse(**response.json())
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "owl"
3 | version = "0.1.0"
4 | description = ""
5 | authors = ["Bart Trzynadlowski ", "Ethan Sutin "]
6 | readme = "README.md"
7 |
8 | [tool.poetry.dependencies]
9 | python = "^3.11"
10 | pydub = "^0.25.1"
11 | speechbrain = "^0.5.16"
12 | pandas = "^2.1.4"
13 | transformers = "^4.36.2"
14 | nltk = "^3.8.1"
15 | huggingface-hub = ">=0.13"
16 | tokenizers = ">=0.13,<0.16"
17 | onnxruntime = ">=1.14,<2"
18 | torch = ">=2"
19 | torchaudio = ">=2"
20 | pyannote-audio = "3.1.1"
21 | pydantic = "^2.5.3"
22 | click = "^8.1.7"
23 | torchvision = "^0.16.2"
24 | litellm = "^1.17.4"
25 | fastapi = "^0.109.0"
26 | uvicorn = "^0.25.0"
27 | asyncio = "^3.4.3"
28 | python-socketio = "^5.11.0"
29 | pydantic-yaml = "^1.2.0"
30 | sqlmodel = "^0.0.14"
31 | deepgram-sdk = "^3.1.0"
32 | python-multipart = "^0.0.6"
33 | webrtcvad = "^2.0.10"
34 | pyaudio = "^0.2.14"
35 | halo = "^0.0.31"
36 | alembic = "^1.13.1"
37 | faster-whisper = "^1.0.0"
38 | whisperx = {git = "https://github.com/m-bain/whisperx.git"}
39 |
40 | [tool.poetry.group.dev.dependencies]
41 | pytest = "^7.4.4"
42 |
43 | [tool.poetry.scripts]
44 | owl = "owl.core.cli:cli"
45 |
46 | [build-system]
47 | requires = ["poetry-core"]
48 | build-backend = "poetry.core.masonry.api"
--------------------------------------------------------------------------------
/requirements-windows.txt:
--------------------------------------------------------------------------------
1 | #
2 | # requirements-windows.txt
3 | #
4 | # pip requirements file for Windows. Unfortunately, poetry cannot properly resolve Windows-specific
5 | # dependencies, so an entirely different installation procedure must be used there.
6 | #
7 | # Installation Guide: Anaconda
8 | # ----------------------------
9 | #
10 | # Anaconda is highly recommended for managing your Python installation and environment. Get it
11 | # here: https://www.anaconda.com/download
12 | #
13 | # 1. In an Anaconda shell, create an environment (e.g., "ai") with Python 3.11.
14 | #
15 | # conda create -n ai python=3.11
16 | #
17 | # 2. Activate it (do this each time you want to run the server, too).
18 | #
19 | # conda activate ai
20 | #
21 | # 3. Install required packages.
22 | #
23 | # pip install -r requirements-windows.txt
24 | #
25 | # Installation Guide: venv
26 | # ------------------------
27 | # 1. Ensure Python 3.11 is available (this is up to you).
28 | #
29 | # 2. Create an environment (we'll call it "ai_env" here).
30 | #
31 | # python -m venv ai_env
32 | #
33 | # 3. Activate the environment (do this each time you want to run the server).
34 | #
35 | # ai_env\scripts\activate
36 | #
37 | # 4. Install required packages.
38 | #
39 | # pip install -r requirements-windows.txt
40 | #
41 | # Running the Server
42 | # ------------------
43 | #
44 | # Run e.g.:
45 | #
46 | # python -m owl.core.cli --host=0.0.0.0 --config=config.yaml
47 | #
48 | pydub~=0.25.1
49 | speechbrain~=0.5.16
50 | whisperx @ git+https://github.com/m-bain/whisperx.git
51 | pandas~=2.1.4
52 | transformers~=4.36.2
53 | nltk~=3.8.1
54 | ctranslate2>=4.0,<5
55 | huggingface-hub >=0.13
56 | tokenizers >=0.13,<0.16
57 | onnxruntime >=1.14,<2
58 | faster-whisper @ git+https://github.com/SYSTRAN/faster-whisper.git@v1.0.0
59 | pyannote-audio==3.1.1
60 | pydantic~=2.5.3
61 | click~=8.1.7
62 | litellm~=1.17.4
63 | fastapi~=0.109.0
64 | uvicorn~=0.25.0
65 | asyncio~=3.4.3
66 | python-socketio~=5.11.0
67 | pydantic-yaml~=1.2.0
68 | sqlmodel~=0.0.14
69 | ray~=2.9.1
70 | deepgram-sdk~=3.1.0
71 | python-multipart~=0.0.6
72 | pvporcupine~=3.0.2
73 | webrtcvad~=2.0.10
74 | PyAudio~=0.2.14
75 | halo~=0.0.31
76 | alembic~=1.13.1
77 |
78 |
79 | # PyTorch CUDA 12.1. For a different CUDA version, change the index URL accordingly. Use the PyTorch
80 | # install tool if you aren't sure: https://pytorch.org/get-started/locally/
81 | --extra-index-url https://download.pytorch.org/whl/cu121
82 | torch >=2
83 | torchaudio >=2
84 | torchvision ~= 0.16.2
--------------------------------------------------------------------------------
/tests/data/audio/test_session.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/tests/data/audio/test_session.wav
--------------------------------------------------------------------------------
/tests/data/audio/test_speaker.m4a:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OwlAIProject/Owl/91922655c3cbf0e5f3282c22806b5ac0cf30063e/tests/data/audio/test_speaker.m4a
--------------------------------------------------------------------------------
/tests/test_whisper_transcription_service.py:
--------------------------------------------------------------------------------
1 | import os
2 | from owl.services.transcription.whisper_transcription_service import transcribe_audio
3 |
4 | def test_transcribe_audio():
5 | test_dir = os.path.dirname(os.path.abspath(__file__))
6 |
7 | main_audio_filepath = os.path.join(test_dir, "data/audio/test_session.wav")
8 | speaker_verification_audio_path = os.path.join(test_dir, "data/audio/test_speaker.m4a")
9 |
10 | transcription_result = transcribe_audio(main_audio_filepath, speaker_verification_audio_path, "Bob")
11 |
12 | assert transcription_result is not None
13 |
14 | expected_transcriptions = [
15 | "Just testing now.",
16 | "Just testing now.",
17 | "That's all I'm doing."
18 | ]
19 | for idx, utterance in enumerate(transcription_result.utterances):
20 | expected_text = expected_transcriptions[idx]
21 | assert utterance.speaker == "Bob", f"Failed assertion at utterance {idx + 1}: Speaker mismatch."
22 | assert utterance.text.lstrip() == expected_text, f"Failed assertion at utterance {idx + 1}: Text mismatch."
23 |
24 |
--------------------------------------------------------------------------------