├── .gitignore ├── .swiftpm └── xcode │ ├── package.xcworkspace │ └── contents.xcworkspacedata │ └── xcshareddata │ └── xcschemes │ └── SakuraKit.xcscheme ├── LICENSE ├── Package.resolved ├── Package.swift ├── README.md ├── Sources └── SakuraKit │ ├── Events │ ├── ClientEvent.swift │ ├── ConversationItemCreateEvent.swift │ ├── ConversationItemDeleteEvent.swift │ ├── ConversationItemTruncateEvent.swift │ ├── EventType.swift │ ├── InputAudioBufferAppendEvent.swift │ ├── InputAudioBufferClearEvent.swift │ ├── InputAudioBufferCommitEvent.swift │ ├── ResponseCancelEvent.swift │ ├── ResponseCreateEvent.swift │ └── SessionUpdateEvent.swift │ └── Play │ ├── PlayAI.swift │ ├── PlayAIError.swift │ ├── PlayAIWebSocketAuthResponse.swift │ ├── PlayNote.swift │ └── PlayNoteID.swift └── Tests └── SakuraKitTests.swift /.gitignore: -------------------------------------------------------------------------------- 1 | # Xcode 2 | # 3 | # gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore 4 | 5 | ## User settings 6 | xcuserdata/ 7 | 8 | ## Obj-C/Swift specific 9 | *.hmap 10 | 11 | ## App packaging 12 | *.ipa 13 | *.dSYM.zip 14 | *.dSYM 15 | 16 | ## Playgrounds 17 | timeline.xctimeline 18 | playground.xcworkspace 19 | 20 | # Swift Package Manager 21 | # 22 | # Add this line if you want to avoid checking in source code from Swift Package Manager dependencies. 23 | # Packages/ 24 | # Package.pins 25 | # Package.resolved 26 | # *.xcodeproj 27 | # 28 | # Xcode automatically generates this directory with a .xcworkspacedata file and xcuserdata 29 | # hence it is not needed unless you have added a package configuration file to your project 30 | # .swiftpm 31 | 32 | .build/ 33 | 34 | # CocoaPods 35 | # 36 | # We recommend against adding the Pods directory to your .gitignore. However 37 | # you should judge for yourself, the pros and cons are mentioned at: 38 | # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control 39 | # 40 | # Pods/ 41 | # 42 | # Add this line if you want to avoid checking in source code from the Xcode workspace 43 | # *.xcworkspace 44 | 45 | # Carthage 46 | # 47 | # Add this line if you want to avoid checking in source code from Carthage dependencies. 48 | # Carthage/Checkouts 49 | 50 | Carthage/Build/ 51 | 52 | # fastlane 53 | # 54 | # It is recommended to not store the screenshots in the git repo. 55 | # Instead, use fastlane to re-generate the screenshots whenever they are needed. 56 | # For more information about the recommended setup visit: 57 | # https://docs.fastlane.tools/best-practices/source-control/#source-control 58 | 59 | fastlane/report.xml 60 | fastlane/Preview.html 61 | fastlane/screenshots/**/*.png 62 | fastlane/test_output 63 | -------------------------------------------------------------------------------- /.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.swiftpm/xcode/xcshareddata/xcschemes/SakuraKit.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 5 | 9 | 10 | 16 | 22 | 23 | 24 | 25 | 26 | 32 | 33 | 35 | 41 | 42 | 43 | 44 | 45 | 55 | 57 | 63 | 64 | 65 | 66 | 72 | 74 | 80 | 81 | 82 | 83 | 85 | 86 | 89 | 90 | 91 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Rudrank Riyam 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Package.resolved: -------------------------------------------------------------------------------- 1 | { 2 | "originHash" : "c8f24694ad7a02b92fd1bb40d2624fef259b0e4b78934c36c965a03b30d4f13a", 3 | "pins" : [ 4 | { 5 | "identity" : "swift-docc-plugin", 6 | "kind" : "remoteSourceControl", 7 | "location" : "https://github.com/apple/swift-docc-plugin", 8 | "state" : { 9 | "revision" : "85e4bb4e1cd62cec64a4b8e769dcefdf0c5b9d64", 10 | "version" : "1.4.3" 11 | } 12 | }, 13 | { 14 | "identity" : "swift-docc-symbolkit", 15 | "kind" : "remoteSourceControl", 16 | "location" : "https://github.com/swiftlang/swift-docc-symbolkit", 17 | "state" : { 18 | "revision" : "b45d1f2ed151d057b54504d653e0da5552844e34", 19 | "version" : "1.0.0" 20 | } 21 | } 22 | ], 23 | "version" : 3 24 | } 25 | -------------------------------------------------------------------------------- /Package.swift: -------------------------------------------------------------------------------- 1 | // swift-tools-version: 6.0 2 | // The swift-tools-version declares the minimum version of Swift required to build this package. 3 | 4 | import PackageDescription 5 | 6 | let package = Package( 7 | name: "SakuraKit", 8 | platforms: [ 9 | .iOS(.v16), 10 | .macOS(.v14), 11 | .tvOS(.v16), 12 | .watchOS(.v9), 13 | .visionOS(.v1) 14 | ], 15 | products: [ 16 | .library( 17 | name: "SakuraKit", 18 | targets: ["SakuraKit"]) 19 | ], 20 | dependencies: [ 21 | .package(url: "https://github.com/apple/swift-docc-plugin", from: "1.0.0") 22 | ], 23 | targets: [ 24 | .target( 25 | name: "SakuraKit", 26 | dependencies: []), 27 | .testTarget( 28 | name: "SakuraKitTests", 29 | dependencies: ["SakuraKit"] 30 | ) 31 | ] 32 | ) 33 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SakuraKit: Swift SDK for Prototyping AI Speech Generation 2 | 3 | SakuraKit is a Swift SDK designed to quickly prototyping speech-to-speech or text-to-speech using different APIs to build low-latency, multimodal experiences with ease. 4 | 5 | This SDK is named after the cherry blossoms (Sakura) to enjoy in Shibuya next year. 🌸 6 | 7 | ## Support 8 | 9 | Love this project? Check out my books to explore more of AI and iOS development: 10 | - [Exploring AI for iOS Development](https://academy.rudrank.com/product/ai) 11 | - [Exploring AI-Assisted Coding for iOS Development](https://academy.rudrank.com/product/ai-assisted-coding) 12 | 13 | Your support helps to keep this project growing! 14 | 15 | ## Installation 16 | 17 | To get started with SakuraKit, add it to your Swift project using Swift Package Manager (SPM): 18 | 19 | ```swift 20 | dependencies: [ 21 | .package(url: "https://github.com/rryam/SakuraKit", from: "0.1.0") 22 | ] 23 | ``` 24 | 25 | Then, import it into your project: 26 | 27 | ```swift 28 | import SakuraKit 29 | ``` 30 | 31 | ## Getting Started 32 | 33 | ### Prerequisites 34 | - Play.ht API Key and User ID: Required for text-to-speech functionality. 35 | 36 | ## Basic Usage 37 | 38 | ### Play.ht Text-to-Speech 39 | 40 | Initialize the Play.ht client: 41 | 42 | ## Basic Usage 43 | 44 | Here is a quick example to get you started: 45 | 46 | ```swift 47 | import SakuraKit 48 | 49 | // Initialize the SakuraKit client 50 | let playAI = PlayAI(apiKey: "your_playht_api_key", userId: "your_user_id") 51 | 52 | // Create a PlayNote for generating audio from PDF: 53 | let request = PlayNoteRequest( 54 | sourceFileUrl: sourceURL, 55 | synthesisStyle: .podcast, 56 | voice1: .angelo, 57 | voice2: .nia 58 | ) 59 | 60 | let response = try await playAI.createPlayNote(request) 61 | ``` 62 | 63 | Available voice styles include: 64 | - Podcast conversations 65 | - Executive briefings 66 | - Children's stories 67 | - Debates 68 | 69 | ## Contributing 70 | 71 | I welcome contributions! Feel free to open issues or submit pull requests to help improve SakuraKit. 72 | 73 | ## License 74 | 75 | SakuraKit is licensed under the MIT License. See LICENSE for more details. 76 | -------------------------------------------------------------------------------- /Sources/SakuraKit/Events/ClientEvent.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ClientEvent.swift 3 | // SakuraKit 4 | // 5 | // Created by Rudrank Riyam on 10/9/24. 6 | // 7 | 8 | import Foundation 9 | 10 | protocol ClientEvent: Encodable { 11 | var event_id: String? { get } 12 | var type: EventType { get } 13 | } 14 | -------------------------------------------------------------------------------- /Sources/SakuraKit/Events/ConversationItemCreateEvent.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ConversationItemCreateEvent.swift 3 | // SakuraKit 4 | // 5 | // Created by Rudrank Riyam on 10/9/24. 6 | // 7 | 8 | 9 | struct ConversationItemCreateEvent: ClientEvent { 10 | let event_id: String? 11 | let type: EventType = .conversationItemCreate 12 | let previous_item_id: String? 13 | let item: ConversationItem 14 | } 15 | 16 | struct ConversationItem: Encodable { 17 | let id: String? 18 | let type: String 19 | let status: String? 20 | let role: String 21 | let content: [ContentPart] 22 | } 23 | 24 | struct ContentPart: Encodable { 25 | let type: String 26 | let text: String? 27 | let audio: String? 28 | } -------------------------------------------------------------------------------- /Sources/SakuraKit/Events/ConversationItemDeleteEvent.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ConversationItemDeleteEvent.swift 3 | // SakuraKit 4 | // 5 | // Created by Rudrank Riyam on 10/9/24. 6 | // 7 | 8 | 9 | struct ConversationItemDeleteEvent: ClientEvent { 10 | let event_id: String? 11 | let type: EventType = .conversationItemDelete 12 | let item_id: String 13 | } -------------------------------------------------------------------------------- /Sources/SakuraKit/Events/ConversationItemTruncateEvent.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ConversationItemTruncateEvent.swift 3 | // SakuraKit 4 | // 5 | // Created by Rudrank Riyam on 10/9/24. 6 | // 7 | 8 | 9 | struct ConversationItemTruncateEvent: ClientEvent { 10 | let event_id: String? 11 | let type: EventType = .conversationItemTruncate 12 | let item_id: String 13 | let content_index: Int 14 | let audio_end_ms: Int 15 | } -------------------------------------------------------------------------------- /Sources/SakuraKit/Events/EventType.swift: -------------------------------------------------------------------------------- 1 | // 2 | // EventType.swift 3 | // SakuraKit 4 | // 5 | // Created by Rudrank Riyam on 10/9/24. 6 | // 7 | 8 | enum EventType: String, Encodable { 9 | case sessionUpdate = "session.update" 10 | case inputAudioBufferAppend = "input_audio_buffer.append" 11 | case inputAudioBufferCommit = "input_audio_buffer.commit" 12 | case inputAudioBufferClear = "input_audio_buffer.clear" 13 | case conversationItemCreate = "conversation.item.create" 14 | case conversationItemTruncate = "conversation.item.truncate" 15 | case conversationItemDelete = "conversation.item.delete" 16 | case responseCreate = "response.create" 17 | case responseCancel = "response.cancel" 18 | } 19 | -------------------------------------------------------------------------------- /Sources/SakuraKit/Events/InputAudioBufferAppendEvent.swift: -------------------------------------------------------------------------------- 1 | // 2 | // InputAudioBufferAppendEvent.swift 3 | // SakuraKit 4 | // 5 | // Created by Rudrank Riyam on 10/9/24. 6 | // 7 | 8 | 9 | struct InputAudioBufferAppendEvent: ClientEvent { 10 | let event_id: String? 11 | let type: EventType = .inputAudioBufferAppend 12 | let audio: String 13 | } -------------------------------------------------------------------------------- /Sources/SakuraKit/Events/InputAudioBufferClearEvent.swift: -------------------------------------------------------------------------------- 1 | // 2 | // InputAudioBufferClearEvent.swift 3 | // SakuraKit 4 | // 5 | // Created by Rudrank Riyam on 10/9/24. 6 | // 7 | 8 | 9 | struct InputAudioBufferClearEvent: ClientEvent { 10 | let event_id: String? 11 | let type: EventType = .inputAudioBufferClear 12 | } -------------------------------------------------------------------------------- /Sources/SakuraKit/Events/InputAudioBufferCommitEvent.swift: -------------------------------------------------------------------------------- 1 | // 2 | // InputAudioBufferCommitEvent.swift 3 | // SakuraKit 4 | // 5 | // Created by Rudrank Riyam on 10/9/24. 6 | // 7 | 8 | 9 | struct InputAudioBufferCommitEvent: ClientEvent { 10 | let event_id: String? 11 | let type: EventType = .inputAudioBufferCommit 12 | } -------------------------------------------------------------------------------- /Sources/SakuraKit/Events/ResponseCancelEvent.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ResponseCancelEvent.swift 3 | // SakuraKit 4 | // 5 | // Created by Rudrank Riyam on 10/9/24. 6 | // 7 | 8 | 9 | struct ResponseCancelEvent: ClientEvent { 10 | let event_id: String? 11 | let type: EventType = .responseCancel 12 | } -------------------------------------------------------------------------------- /Sources/SakuraKit/Events/ResponseCreateEvent.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ResponseCreateEvent.swift 3 | // SakuraKit 4 | // 5 | // Created by Rudrank Riyam on 10/9/24. 6 | // 7 | 8 | 9 | struct ResponseCreateEvent: ClientEvent { 10 | let event_id: String? 11 | let type: EventType = .responseCreate 12 | let response: ResponseConfiguration 13 | } 14 | 15 | public struct ResponseConfiguration: Encodable { 16 | let modalities: [String]? 17 | let instructions: String? 18 | let voice: String? 19 | let output_audio_format: String? 20 | let tools: [Tool]? 21 | let tool_choice: String? 22 | let temperature: Double 23 | let max_output_tokens: Int? 24 | 25 | public init(modalities: [String]?, instructions: String?, voice: String? = nil, output_audio_format: String? = nil, tools: [Tool]? = nil, tool_choice: String? = nil, temperature: Double = 1.0, max_output_tokens: Int? = nil) { 26 | self.modalities = modalities 27 | self.instructions = instructions 28 | self.voice = voice 29 | self.output_audio_format = output_audio_format 30 | self.tools = tools 31 | self.tool_choice = tool_choice 32 | self.temperature = temperature 33 | self.max_output_tokens = max_output_tokens 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /Sources/SakuraKit/Events/SessionUpdateEvent.swift: -------------------------------------------------------------------------------- 1 | // 2 | // SessionUpdateEvent.swift 3 | // SakuraKit 4 | // 5 | // Created by Rudrank Riyam on 10/9/24. 6 | // 7 | 8 | import Foundation 9 | 10 | struct SessionUpdateEvent: ClientEvent { 11 | let event_id: String? 12 | let type: EventType = .sessionUpdate 13 | let session: SessionConfiguration 14 | } 15 | 16 | struct SessionConfiguration: Encodable { 17 | let modalities: [String]? 18 | let instructions: String? 19 | let voice: String? 20 | let input_audio_format: String? 21 | let output_audio_format: String? 22 | let input_audio_transcription: InputAudioTranscription? 23 | let turn_detection: TurnDetection? 24 | let tools: [Tool]? 25 | let tool_choice: String? 26 | let temperature: Double? 27 | let max_output_tokens: Int? 28 | } 29 | 30 | struct InputAudioTranscription: Encodable { 31 | let enabled: Bool 32 | let model: String 33 | } 34 | 35 | struct TurnDetection: Encodable { 36 | let type: String 37 | let threshold: Double 38 | let prefix_padding_ms: Int 39 | let silence_duration_ms: Int 40 | } 41 | 42 | public struct Tool: Encodable { 43 | let type: String 44 | let name: String 45 | let description: String 46 | let parameters: ToolParameters 47 | } 48 | 49 | public struct ToolParameters: Encodable { 50 | let type: String 51 | let properties: [String: ToolParameterProperty] 52 | let required: [String] 53 | } 54 | 55 | public struct ToolParameterProperty: Encodable { 56 | let type: String 57 | } 58 | -------------------------------------------------------------------------------- /Sources/SakuraKit/Play/PlayAI.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PlayAI.swift 3 | // SakuraKit 4 | // 5 | // Created by Rudrank Riyam on 10/15/24. 6 | // 7 | 8 | import Foundation 9 | 10 | /// An actor that manages authentication and WebSocket connections for the Play.ht API. 11 | /// 12 | /// `PlayAI` provides an interface for authenticating with the Play.ht API, obtaining 13 | /// a WebSocket URL for real-time communication, and sending TTS commands. It encapsulates 14 | /// the API key and user ID, ensuring secure and thread-safe access to these credentials. 15 | /// 16 | /// - Important: This class is designed as an actor to ensure thread-safe access to its properties and methods. 17 | public actor PlayAI { 18 | 19 | /// The API key used for authentication with Play.ht services. 20 | private let apiKey: String 21 | 22 | /// The user ID associated with the Play.ht account. 23 | private let userId: String 24 | 25 | /// The URL for the WebSocket authentication endpoint. 26 | private let authEndpoint = URL(string: "https://api.play.ht/api/v3/websocket-auth")! 27 | 28 | /// Initializes a new instance of `PlayAI`. 29 | /// 30 | /// - Parameters: 31 | /// - apiKey: The API key for authenticating with Play.ht services. 32 | /// - userId: The user ID associated with the Play.ht account. 33 | public init(apiKey: String, userId: String) { 34 | self.apiKey = apiKey 35 | self.userId = userId 36 | } 37 | 38 | /// Authenticates with the Play.ht API and retrieves a WebSocket URL. 39 | /// 40 | /// This method sends an authenticated POST request to the Play.ht WebSocket 41 | /// authentication endpoint using the stored API key and user ID. It then parses 42 | /// the response to extract the WebSocket URL for establishing a real-time connection. 43 | /// 44 | /// - Returns: A string containing the authenticated WebSocket URL for establishing a connection. 45 | /// - Throws: An error if the authentication request fails or if the response cannot be parsed. 46 | private func authenticateAndFetchWebSocketURL() async throws -> String { 47 | var request = URLRequest(url: authEndpoint) 48 | request.httpMethod = "POST" 49 | request.addValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization") 50 | request.addValue(userId, forHTTPHeaderField: "X-User-Id") 51 | request.addValue("application/json", forHTTPHeaderField: "Content-Type") 52 | 53 | let (data, response) = try await URLSession.shared.data(for: request) 54 | 55 | // Add logging 56 | print("🌐 WebSocket Auth Response:") 57 | print("📝 Status Code: \((response as? HTTPURLResponse)?.statusCode ?? -1)") 58 | if let responseString = String(data: data, encoding: .utf8) { 59 | print("📦 Response Data: \(responseString)") 60 | } 61 | 62 | guard let httpResponse = response as? HTTPURLResponse, 63 | (200...299).contains(httpResponse.statusCode) else { 64 | throw PlayAIError.authenticationFailed 65 | } 66 | 67 | let decoder = JSONDecoder() 68 | let authResponse = try decoder.decode(PlayAIWebSocketAuthResponse.self, from: data) 69 | 70 | return authResponse.websocketURL 71 | } 72 | 73 | /// Sends a Text-to-Speech (TTS) command to the Play.ht WebSocket API. 74 | /// 75 | /// This method establishes a WebSocket connection using the provided URL, 76 | /// constructs a JSON message containing TTS parameters, and sends it through 77 | /// the WebSocket connection. 78 | /// 79 | /// - Parameters: 80 | /// - url: The WebSocket URL to connect to. 81 | /// - text: The text to be converted to speech. 82 | /// - voice: The voice ID or URL to use for synthesis. 83 | /// - outputFormat: The desired audio format (default is "mp3"). 84 | /// - quality: The quality of the audio ("draft", "standard", or "premium"). 85 | /// - temperature: Controls the randomness of the generated speech (0.0 to 1.0). 86 | /// - speed: The speed of the generated speech (0.5 to 2.0). 87 | /// - requestId: A unique identifier for the request (optional). 88 | /// 89 | /// - Throws: An error if the WebSocket connection fails, JSON encoding fails, or if there's an issue sending the message. 90 | private func sendTTSCommand( 91 | to url: URL, 92 | text: String, 93 | voice: String, 94 | outputFormat: String = "mp3", 95 | quality: String? = nil, 96 | temperature: Double? = nil, 97 | speed: Double? = nil, 98 | requestId: String? = nil 99 | ) async throws { 100 | let session = URLSession(configuration: .default) 101 | let webSocketTask = session.webSocketTask(with: url) 102 | 103 | try await webSocketTask.resume() 104 | 105 | let ttsCommand: [String: Any] = [ 106 | "text": text, 107 | "voice": voice, 108 | "output_format": outputFormat, 109 | "quality": quality, 110 | "temperature": temperature, 111 | "speed": speed, 112 | "request_id": requestId 113 | ].compactMapValues { $0 } 114 | 115 | let jsonData = try JSONSerialization.data(withJSONObject: ttsCommand) 116 | let jsonString = String(data: jsonData, encoding: .utf8)! 117 | 118 | try await webSocketTask.send(.string(jsonString)) 119 | } 120 | /// Receives and processes messages from the WebSocket connection using an async stream. 121 | /// 122 | /// This method creates an async stream of messages from the WebSocket, handling both audio data 123 | /// and end-of-stream messages. It collects audio chunks and provides the complete audio data 124 | /// when the stream is finished. 125 | /// 126 | /// - Parameters: 127 | /// - webSocketTask: The URLSessionWebSocketTask instance representing the active WebSocket connection. 128 | /// 129 | /// - Returns: An AsyncThrowingStream that yields audio data when it's complete. 130 | /// 131 | /// - Throws: An error if there's an issue receiving or processing messages. 132 | private func receiveMessages(from webSocketTask: URLSessionWebSocketTask) -> AsyncThrowingStream { 133 | AsyncThrowingStream { continuation in 134 | Task { 135 | var audioChunks: [Data] = [] 136 | 137 | do { 138 | while true { 139 | let message = try await webSocketTask.receive() 140 | switch message { 141 | case .string(let text): 142 | if let data = text.data(using: .utf8), 143 | let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any], 144 | json.keys.contains("request_id") { 145 | // End of audio stream 146 | let audioData = audioChunks.reduce(Data(), +) 147 | continuation.yield(audioData) 148 | audioChunks.removeAll() 149 | } else { 150 | print("Received unexpected text message: \(text)") 151 | } 152 | case .data(let data): 153 | // Received binary audio data 154 | audioChunks.append(data) 155 | @unknown default: 156 | print("Received unknown message type") 157 | } 158 | } 159 | } catch { 160 | continuation.finish(throwing: error) 161 | } 162 | 163 | continuation.finish() 164 | } 165 | } 166 | } 167 | 168 | /// Handles WebSocket errors and connection closures. 169 | /// 170 | /// This method sets up error and closure handlers for the WebSocket connection. 171 | /// 172 | /// - Parameter webSocketTask: The URLSessionWebSocketTask instance to monitor. 173 | private func handleWebSocketEvents(for webSocketTask: URLSessionWebSocketTask) { 174 | webSocketTask.observe(\.state) { task, _ in 175 | if task.state == .completed { 176 | if let error = task.error { 177 | print("WebSocket Error: \(error.localizedDescription)") 178 | } else { 179 | print("WebSocket connection closed") 180 | } 181 | // Implement reconnection logic if needed 182 | } 183 | } 184 | } 185 | 186 | /// Processes the audio stream from the WebSocket connection. 187 | /// 188 | /// This method demonstrates how to use the `receiveMessages` function with an async stream. 189 | /// 190 | /// - Parameter webSocketTask: The URLSessionWebSocketTask instance representing the active WebSocket connection. 191 | /// 192 | /// - Throws: An error if there's an issue processing the audio stream. 193 | private func processAudioStream(from webSocketTask: URLSessionWebSocketTask) async throws { 194 | let audioStream = receiveMessages(from: webSocketTask) 195 | 196 | for try await audioData in audioStream { 197 | // Here you can handle the complete audio data 198 | // For example, you might want to play it, save it, or process it further 199 | print("Received complete audio data of size: \(audioData.count) bytes") 200 | 201 | // Example: Save the audio data to a file 202 | let documentsPath = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0] 203 | let audioFileURL = documentsPath.appendingPathComponent("audio_\(Date().timeIntervalSince1970).mp3") 204 | try audioData.write(to: audioFileURL) 205 | print("Audio saved to: \(audioFileURL.path)") 206 | } 207 | } 208 | 209 | /// Creates a new PlayNote using a source file URL. 210 | /// 211 | /// This method sends a request to create a new PlayNote with the specified configuration. 212 | /// The generation process is asynchronous, and you'll need to poll the status using 213 | /// `getPlayNote(id:)` to check when it's complete. 214 | /// 215 | /// - Parameter request: The PlayNote request configuration. 216 | /// - Returns: A PlayNoteResponse containing the creation status and details. 217 | /// - Throws: A PlayAIError if the request fails or returns an invalid response. 218 | public func createPlayNote(_ request: PlayNoteRequest) async throws -> PlayNoteResponse { 219 | let endpoint = URL(string: "https://api.play.ai/api/v1/playnotes")! 220 | var urlRequest = URLRequest(url: endpoint) 221 | urlRequest.httpMethod = "POST" 222 | urlRequest.addValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization") 223 | urlRequest.addValue(userId, forHTTPHeaderField: "X-USER-ID") 224 | 225 | // Create form data 226 | let boundary = UUID().uuidString 227 | var formData = Data() 228 | 229 | urlRequest.addValue("multipart/form-data; boundary=\(boundary)", forHTTPHeaderField: "Content-Type") 230 | 231 | // Helper function to append form field 232 | func appendFormField(named name: String, value: String) { 233 | formData.append("--\(boundary)\r\n") 234 | formData.append("Content-Disposition: form-data; name=\"\(name)\"\r\n\r\n") 235 | formData.append("\(value)\r\n") 236 | } 237 | 238 | // Add fields 239 | appendFormField(named: "sourceFileUrl", value: request.sourceFileUrl.absoluteString) 240 | appendFormField(named: "synthesisStyle", value: request.synthesisStyle.rawValue) 241 | appendFormField(named: "voice1", value: request.voice1.id) 242 | appendFormField(named: "voice1Name", value: request.voice1.name) 243 | appendFormField(named: "voice1Gender", value: request.voice1.gender) 244 | 245 | // Add voice2 if present 246 | if let voice2 = request.voice2 { 247 | appendFormField(named: "voice2", value: voice2.id) 248 | appendFormField(named: "voice2Name", value: voice2.name) 249 | appendFormField(named: "voice2Gender", value: voice2.gender) 250 | } 251 | 252 | // Add final boundary 253 | formData.append("--\(boundary)--\r\n") 254 | 255 | urlRequest.httpBody = formData 256 | 257 | let (data, response) = try await URLSession.shared.data(for: urlRequest) 258 | 259 | // Add logging 260 | print("🎵 Create PlayNote Response:") 261 | print("📝 Status Code: \((response as? HTTPURLResponse)?.statusCode ?? -1)") 262 | if let responseString = String(data: data, encoding: .utf8) { 263 | print("📦 Response Data: \(responseString)") 264 | } 265 | 266 | guard let httpResponse = response as? HTTPURLResponse else { 267 | throw PlayAIError.invalidResponse 268 | } 269 | 270 | if httpResponse.statusCode == 403 { 271 | throw PlayAIError.activeGenerationExists 272 | } 273 | 274 | guard (200...299).contains(httpResponse.statusCode) else { 275 | if let errorResponse = try? JSONDecoder().decode(ErrorResponse.self, from: data) { 276 | throw PlayAIError.serverError(message: errorResponse.errorMessage) 277 | } 278 | throw PlayAIError.serverError(message: "Unknown error occurred") 279 | } 280 | 281 | return try JSONDecoder.playDateDecoder.decode(PlayNoteResponse.self, from: data) 282 | } 283 | 284 | /// Gets the status and details of a PlayNote. 285 | /// 286 | /// - Parameter id: The PlayNoteID to retrieve. 287 | /// - Returns: A PlayNoteResponse containing the current status and details. 288 | /// - Throws: A PlayAIError if the request fails or returns an invalid response. 289 | public func getPlayNote(id: PlayNoteID) async throws -> PlayNoteResponse { 290 | let endpoint = URL(string: "https://api.play.ai/api/v1/playnotes/\(id.rawValue)")! 291 | var request = URLRequest(url: endpoint) 292 | request.addValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization") 293 | request.addValue(userId, forHTTPHeaderField: "X-USER-ID") 294 | 295 | let (data, response) = try await URLSession.shared.data(for: request) 296 | 297 | // Add logging 298 | print("🔍 Get PlayNote Response:") 299 | print("📝 Status Code: \((response as? HTTPURLResponse)?.statusCode ?? -1)") 300 | if let responseString = String(data: data, encoding: .utf8) { 301 | print("📦 Response Data: \(responseString)") 302 | } 303 | 304 | guard let httpResponse = response as? HTTPURLResponse, 305 | (200...299).contains(httpResponse.statusCode) else { 306 | throw PlayAIError.invalidResponse 307 | } 308 | 309 | return try JSONDecoder.playDateDecoder.decode(PlayNoteResponse.self, from: data) 310 | } 311 | 312 | /// Creates a PlayNote and polls for its completion status. 313 | /// 314 | /// This method creates a new PlayNote and continuously monitors its status until completion 315 | /// or failure. It polls the status every 60 seconds and provides updates through the 316 | /// statusHandler closure. 317 | /// 318 | /// - Parameters: 319 | /// - request: The PlayNote request configuration. 320 | /// - statusHandler: An optional closure to receive status updates during the polling process. 321 | /// 322 | /// - Returns: The final PlayNoteResponse containing the completed PlayNote details. 323 | /// - Throws: A PlayAIError if the request fails or polling encounters an error. 324 | public func createAndAwaitPlayNote( 325 | _ request: PlayNoteRequest, 326 | statusHandler: ((String) -> Void)? = nil 327 | ) async throws -> PlayNoteResponse { 328 | // Create the initial PlayNote 329 | let initialResponse = try await createPlayNote(request) 330 | let playNoteId = initialResponse.id 331 | 332 | // Poll for completion 333 | while true { 334 | do { 335 | let response = try await getPlayNote(id: PlayNoteID(playNoteId)) 336 | 337 | switch response.status { 338 | case .completed: 339 | statusHandler?("PlayNote generation complete!") 340 | return response 341 | 342 | case .generating: 343 | statusHandler?("PlayNote is still generating...") 344 | try await Task.sleep(for: .seconds(60)) 345 | 346 | case .failed: 347 | statusHandler?("PlayNote generation failed.") 348 | throw PlayAIError.generationFailed 349 | 350 | case .none: 351 | statusHandler?("PlayNote status unknown.") 352 | } 353 | } catch { 354 | statusHandler?("Error polling for PlayNote status: \(error.localizedDescription)") 355 | throw error 356 | } 357 | } 358 | } 359 | } 360 | 361 | private struct ErrorResponse: Decodable { 362 | let errorMessage: String 363 | let errorId: String 364 | } 365 | 366 | private extension Data { 367 | mutating func append(_ string: String) { 368 | if let data = string.data(using: .utf8) { 369 | append(data) 370 | } 371 | } 372 | } 373 | 374 | private extension JSONDecoder { 375 | /// A JSON decoder configured to handle Play API date formats. 376 | /// Uses ISO8601 format with internet date time and fractional seconds. 377 | static var playDateDecoder: JSONDecoder { 378 | let decoder = JSONDecoder() 379 | 380 | // Create formatter outside closure to avoid Sendable capture 381 | let formatter = ISO8601DateFormatter() 382 | formatter.formatOptions = [.withInternetDateTime, .withFractionalSeconds] 383 | 384 | // Use @Sendable closure that doesn't capture formatter 385 | decoder.dateDecodingStrategy = .custom { decoder in 386 | let container = try decoder.singleValueContainer() 387 | let dateStr = try container.decode(String.self) 388 | 389 | // Create new formatter inside closure instead of capturing 390 | let localFormatter = ISO8601DateFormatter() 391 | localFormatter.formatOptions = [.withInternetDateTime, .withFractionalSeconds] 392 | 393 | guard let date = localFormatter.date(from: dateStr) else { 394 | throw DecodingError.dataCorruptedError( 395 | in: container, 396 | debugDescription: "Invalid date format: \(dateStr)" 397 | ) 398 | } 399 | return date 400 | } 401 | return decoder 402 | } 403 | } 404 | -------------------------------------------------------------------------------- /Sources/SakuraKit/Play/PlayAIError.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PlayAIError.swift 3 | // SakuraKit 4 | // 5 | // Created by Rudrank Riyam on 10/15/24. 6 | // 7 | 8 | import Foundation 9 | 10 | /// An enumeration of errors that can occur during Play.ht API operations. 11 | public enum PlayAIError: Error { 12 | /// Indicates that the authentication request failed. 13 | case authenticationFailed 14 | /// Indicates that the source file URL is invalid. 15 | case invalidSourceFileURL 16 | /// Indicates that the user already has an active generation. 17 | case activeGenerationExists 18 | /// Indicates that the response from the server was invalid. 19 | case invalidResponse 20 | /// Indicates that the server returned an error message. 21 | case serverError(message: String) 22 | /// Indicates that the PlayNote generation process failed. 23 | case generationFailed 24 | } 25 | -------------------------------------------------------------------------------- /Sources/SakuraKit/Play/PlayAIWebSocketAuthResponse.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PlayAIWebSocketAuthResponse.swift 3 | // SakuraKit 4 | // 5 | // Created by Rudrank Riyam on 10/15/24. 6 | // 7 | 8 | import Foundation 9 | 10 | /// A structure representing the response from the WebSocket authentication endpoint. 11 | public struct PlayAIWebSocketAuthResponse: Codable { 12 | /// The WebSocket URL to be used for establishing a connection. 13 | let websocketURL: String 14 | 15 | enum CodingKeys: String, CodingKey { 16 | case websocketURL = "websocket_url" 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /Sources/SakuraKit/Play/PlayNote.swift: -------------------------------------------------------------------------------- 1 | import Foundation 2 | 3 | /// A structure representing a PlayNote synthesis request. 4 | public struct PlayNoteRequest: Sendable { 5 | /// The URL to the source file. 6 | public let sourceFileUrl: URL 7 | /// The synthesis style of the PlayNote. 8 | public let synthesisStyle: PlayNoteSynthesisStyle 9 | /// The first voice configuration. 10 | public let voice1: PlayNoteVoice 11 | /// The optional second voice configuration. 12 | public let voice2: PlayNoteVoice? 13 | 14 | /// Creates a new PlayNote request. 15 | /// - Parameters: 16 | /// - sourceFileUrl: The URL to the source file. 17 | /// - synthesisStyle: The synthesis style to use. 18 | /// - voice1: The first voice configuration. 19 | /// - voice2: The optional second voice configuration. 20 | public init( 21 | sourceFileUrl: URL, 22 | synthesisStyle: PlayNoteSynthesisStyle, 23 | voice1: PlayNoteVoice, 24 | voice2: PlayNoteVoice? = nil 25 | ) { 26 | self.sourceFileUrl = sourceFileUrl 27 | self.synthesisStyle = synthesisStyle 28 | self.voice1 = voice1 29 | self.voice2 = voice2 30 | } 31 | } 32 | 33 | /// An enumeration of available Play.ai voices with their configurations. 34 | public enum PlayNoteVoice: Sendable { 35 | case angelo, arsenio, cillian, timo, dexter, miles, briggs 36 | case deedee, nia, inara, constanza, gideon, casper, mitch, ava 37 | 38 | /// The S3 URL for the voice manifest 39 | public var id: String { 40 | switch self { 41 | case .angelo: 42 | return "s3://voice-cloning-zero-shot/baf1ef41-36b6-428c-9bdf-50ba54682bd8/original/manifest.json" 43 | case .arsenio: 44 | return "s3://voice-cloning-zero-shot/65977f5e-a22a-4b36-861b-ecede19bdd65/original/manifest.json" 45 | case .cillian: 46 | return "s3://voice-cloning-zero-shot/1591b954-8760-41a9-bc58-9176a68c5726/original/manifest.json" 47 | case .timo: 48 | return "s3://voice-cloning-zero-shot/677a4ae3-252f-476e-85ce-eeed68e85951/original/manifest.json" 49 | case .dexter: 50 | return "s3://voice-cloning-zero-shot/b27bc13e-996f-4841-b584-4d35801aea98/original/manifest.json" 51 | case .miles: 52 | return "s3://voice-cloning-zero-shot/29dd9a52-bd32-4a6e-bff1-bbb98dcc286a/original/manifest.json" 53 | case .briggs: 54 | return "s3://voice-cloning-zero-shot/71cdb799-1e03-41c6-8a05-f7cd55134b0b/original/manifest.json" 55 | case .deedee: 56 | return "s3://voice-cloning-zero-shot/e040bd1b-f190-4bdb-83f0-75ef85b18f84/original/manifest.json" 57 | case .nia: 58 | return "s3://voice-cloning-zero-shot/831bd330-85c6-4333-b2b4-10c476ea3491/original/manifest.json" 59 | case .inara: 60 | return "s3://voice-cloning-zero-shot/adb83b67-8d75-48ff-ad4d-a0840d231ef1/original/manifest.json" 61 | case .constanza: 62 | return "s3://voice-cloning-zero-shot/b0aca4d7-1738-4848-a80b-307ac44a7298/original/manifest.json" 63 | case .gideon: 64 | return "s3://voice-cloning-zero-shot/5a3a1168-7793-4b2c-8f90-aff2b5232131/original/manifest.json" 65 | case .casper: 66 | return "s3://voice-cloning-zero-shot/1bbc6986-fadf-4bd8-98aa-b86fed0476e9/original/manifest.json" 67 | case .mitch: 68 | return "s3://voice-cloning-zero-shot/c14e50f2-c5e3-47d1-8c45-fa4b67803d19/original/manifest.json" 69 | case .ava: 70 | return "s3://voice-cloning-zero-shot/50381567-ff7b-46d2-bfdc-a9584a85e08d/original/manifest.json" 71 | // case .basil: 72 | // return "s3://voice-cloning-zero-shot/different-uuid-needed-here/original/manifest.json" // Need correct UUID 73 | } 74 | } 75 | 76 | /// The display name of the voice 77 | public var name: String { 78 | switch self { 79 | case .angelo: return "Angelo" 80 | case .arsenio: return "Arsenio" 81 | case .cillian: return "Cillian" 82 | case .timo: return "Timo" 83 | case .dexter: return "Dexter" 84 | case .miles: return "Miles" 85 | case .briggs: return "Briggs" 86 | case .deedee: return "Deedee" 87 | case .nia: return "Nia" 88 | case .inara: return "Inara" 89 | case .constanza: return "Constanza" 90 | case .gideon: return "Gideon" 91 | case .casper: return "Casper" 92 | case .mitch: return "Mitch" 93 | case .ava: return "Ava" 94 | // case .basil: return "Basil" 95 | } 96 | } 97 | 98 | /// The gender of the voice 99 | public var gender: String { 100 | switch self { 101 | case .deedee, .nia, .inara, .constanza, .ava: 102 | return "female" 103 | default: 104 | return "male" 105 | } 106 | } 107 | 108 | /// The accent of the voice 109 | public var accent: String { 110 | switch self { 111 | case .angelo, .timo, .dexter, .nia, .casper: 112 | return "US" 113 | case .arsenio, .miles, .deedee, .inara: 114 | return "US African American" 115 | case .cillian: 116 | return "Irish" 117 | case .briggs: 118 | return "US Southern (Oklahoma)" 119 | case .constanza: 120 | return "US Latin American" 121 | case .gideon: 122 | return "British" 123 | case .mitch, .ava: 124 | return "Australian" 125 | // case .basil: 126 | // return "British (Yorkshire)" 127 | } 128 | } 129 | } 130 | 131 | /// The available synthesis styles for PlayNote. 132 | public enum PlayNoteSynthesisStyle: String, Decodable, Sendable { 133 | /// A podcast-style conversation. 134 | case podcast = "podcast" 135 | /// An executive briefing style. 136 | case executiveBriefing = "executive-briefing" 137 | /// A children's story style. 138 | case childrensStory = "childrens-story" 139 | /// A debate style. 140 | case debate = "debate" 141 | } 142 | 143 | /// A structure representing a PlayNote response. 144 | public struct PlayNoteResponse: Decodable, Sendable { 145 | /// The unique ID for the PlayNote. 146 | public let id: String 147 | /// The owner's ID. 148 | public let ownerId: String 149 | /// The name of the PlayNote. 150 | public let name: String 151 | /// The source file URL. 152 | public let sourceFileUrl: String? 153 | /// The generated audio URL. 154 | public let audioUrl: String? 155 | /// The synthesis style used. 156 | public let synthesisStyle: PlayNoteSynthesisStyle 157 | /// The status of the generation. 158 | public let status: PlayNoteStatus? 159 | /// The duration in seconds. 160 | public let duration: Double? 161 | /// When the PlayNote was requested. 162 | public let requestedAt: Date 163 | /// When the PlayNote was created. 164 | public let createdAt: Date? 165 | 166 | /// The current status of the PlayNote. 167 | public enum PlayNoteStatus: String, Decodable, Sendable { 168 | case generating 169 | case completed 170 | case failed 171 | } 172 | } -------------------------------------------------------------------------------- /Sources/SakuraKit/Play/PlayNoteID.swift: -------------------------------------------------------------------------------- 1 | /// An object that represents a unique identifier for a Play.ht note. 2 | /// 3 | /// Use `PlayNoteID` to identify and reference specific Play.ht notes within your app. 4 | /// This identifier is unique across all Play.ht notes and can be used to fetch, track, 5 | /// or manage individual notes. 6 | /// 7 | /// You can create a `PlayNoteID` in several ways: 8 | /// ```swift 9 | /// // Using string literal 10 | /// let id1: PlayNoteID = "note_123456" 11 | /// 12 | /// // Using initializer 13 | /// let id2 = PlayNoteID("note_123456") 14 | /// 15 | /// // Using raw value initializer 16 | /// let id3 = PlayNoteID(rawValue: "note_123456") 17 | /// ``` 18 | public struct PlayNoteID: Equatable, Hashable, Sendable, RawRepresentable, ExpressibleByStringLiteral { 19 | 20 | /// The raw string value of the Play.ht note identifier. 21 | public let rawValue: String 22 | 23 | /// Creates a Play.ht note identifier with a string. 24 | /// 25 | /// - Parameter rawValue: The string value representing the note identifier. 26 | public init(_ rawValue: String) { 27 | self.rawValue = rawValue 28 | } 29 | 30 | /// Creates a new instance with the specified raw value. 31 | /// 32 | /// - Parameter rawValue: The raw string value to use for the new instance. 33 | public init(rawValue: String) { 34 | self.rawValue = rawValue 35 | } 36 | 37 | /// Creates an instance initialized to the given string value. 38 | /// 39 | /// - Parameter value: The string value to use for the new instance. 40 | public init(stringLiteral value: String) { 41 | self.rawValue = value 42 | } 43 | 44 | // Type aliases for protocol conformance 45 | public typealias StringLiteralType = String 46 | public typealias ExtendedGraphemeClusterLiteralType = String 47 | public typealias UnicodeScalarLiteralType = String 48 | } 49 | 50 | extension PlayNoteID: Codable { 51 | 52 | /// Creates a new instance by decoding from the given decoder. 53 | /// 54 | /// - Parameter decoder: The decoder to read data from. 55 | /// - Throws: An error if reading from the decoder fails. 56 | public init(from decoder: Decoder) throws { 57 | let container = try decoder.singleValueContainer() 58 | self.rawValue = try container.decode(String.self) 59 | } 60 | 61 | /// Encodes this value into the given encoder. 62 | /// 63 | /// - Parameter encoder: The encoder to write data to. 64 | /// - Throws: An error if encoding fails. 65 | public func encode(to encoder: Encoder) throws { 66 | var container = encoder.singleValueContainer() 67 | try container.encode(rawValue) 68 | } 69 | } 70 | 71 | extension PlayNoteID: CustomStringConvertible { 72 | 73 | /// A textual representation of the Play.ht note identifier. 74 | /// 75 | /// This property returns the raw string value of the identifier. 76 | public var description: String { 77 | return rawValue 78 | } 79 | } -------------------------------------------------------------------------------- /Tests/SakuraKitTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // SakuraKitTests.swift 3 | // SakuraKit 4 | // 5 | // Created by Rudrank Riyam on 10/9/24. 6 | // 7 | 8 | import Foundation 9 | 10 | // 🌸 11 | --------------------------------------------------------------------------------