├── .gitignore
├── .swiftpm
└── xcode
│ ├── package.xcworkspace
│ └── contents.xcworkspacedata
│ └── xcshareddata
│ └── xcschemes
│ └── SakuraKit.xcscheme
├── LICENSE
├── Package.resolved
├── Package.swift
├── README.md
├── Sources
└── SakuraKit
│ ├── Events
│ ├── ClientEvent.swift
│ ├── ConversationItemCreateEvent.swift
│ ├── ConversationItemDeleteEvent.swift
│ ├── ConversationItemTruncateEvent.swift
│ ├── EventType.swift
│ ├── InputAudioBufferAppendEvent.swift
│ ├── InputAudioBufferClearEvent.swift
│ ├── InputAudioBufferCommitEvent.swift
│ ├── ResponseCancelEvent.swift
│ ├── ResponseCreateEvent.swift
│ └── SessionUpdateEvent.swift
│ └── Play
│ ├── PlayAI.swift
│ ├── PlayAIError.swift
│ ├── PlayAIWebSocketAuthResponse.swift
│ ├── PlayNote.swift
│ └── PlayNoteID.swift
└── Tests
└── SakuraKitTests.swift
/.gitignore:
--------------------------------------------------------------------------------
1 | # Xcode
2 | #
3 | # gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore
4 |
5 | ## User settings
6 | xcuserdata/
7 |
8 | ## Obj-C/Swift specific
9 | *.hmap
10 |
11 | ## App packaging
12 | *.ipa
13 | *.dSYM.zip
14 | *.dSYM
15 |
16 | ## Playgrounds
17 | timeline.xctimeline
18 | playground.xcworkspace
19 |
20 | # Swift Package Manager
21 | #
22 | # Add this line if you want to avoid checking in source code from Swift Package Manager dependencies.
23 | # Packages/
24 | # Package.pins
25 | # Package.resolved
26 | # *.xcodeproj
27 | #
28 | # Xcode automatically generates this directory with a .xcworkspacedata file and xcuserdata
29 | # hence it is not needed unless you have added a package configuration file to your project
30 | # .swiftpm
31 |
32 | .build/
33 |
34 | # CocoaPods
35 | #
36 | # We recommend against adding the Pods directory to your .gitignore. However
37 | # you should judge for yourself, the pros and cons are mentioned at:
38 | # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
39 | #
40 | # Pods/
41 | #
42 | # Add this line if you want to avoid checking in source code from the Xcode workspace
43 | # *.xcworkspace
44 |
45 | # Carthage
46 | #
47 | # Add this line if you want to avoid checking in source code from Carthage dependencies.
48 | # Carthage/Checkouts
49 |
50 | Carthage/Build/
51 |
52 | # fastlane
53 | #
54 | # It is recommended to not store the screenshots in the git repo.
55 | # Instead, use fastlane to re-generate the screenshots whenever they are needed.
56 | # For more information about the recommended setup visit:
57 | # https://docs.fastlane.tools/best-practices/source-control/#source-control
58 |
59 | fastlane/report.xml
60 | fastlane/Preview.html
61 | fastlane/screenshots/**/*.png
62 | fastlane/test_output
63 |
--------------------------------------------------------------------------------
/.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata:
--------------------------------------------------------------------------------
1 |
2 |
4 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.swiftpm/xcode/xcshareddata/xcschemes/SakuraKit.xcscheme:
--------------------------------------------------------------------------------
1 |
2 |
5 |
9 |
10 |
16 |
22 |
23 |
24 |
25 |
26 |
32 |
33 |
35 |
41 |
42 |
43 |
44 |
45 |
55 |
57 |
63 |
64 |
65 |
66 |
72 |
74 |
80 |
81 |
82 |
83 |
85 |
86 |
89 |
90 |
91 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 Rudrank Riyam
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Package.resolved:
--------------------------------------------------------------------------------
1 | {
2 | "originHash" : "c8f24694ad7a02b92fd1bb40d2624fef259b0e4b78934c36c965a03b30d4f13a",
3 | "pins" : [
4 | {
5 | "identity" : "swift-docc-plugin",
6 | "kind" : "remoteSourceControl",
7 | "location" : "https://github.com/apple/swift-docc-plugin",
8 | "state" : {
9 | "revision" : "85e4bb4e1cd62cec64a4b8e769dcefdf0c5b9d64",
10 | "version" : "1.4.3"
11 | }
12 | },
13 | {
14 | "identity" : "swift-docc-symbolkit",
15 | "kind" : "remoteSourceControl",
16 | "location" : "https://github.com/swiftlang/swift-docc-symbolkit",
17 | "state" : {
18 | "revision" : "b45d1f2ed151d057b54504d653e0da5552844e34",
19 | "version" : "1.0.0"
20 | }
21 | }
22 | ],
23 | "version" : 3
24 | }
25 |
--------------------------------------------------------------------------------
/Package.swift:
--------------------------------------------------------------------------------
1 | // swift-tools-version: 6.0
2 | // The swift-tools-version declares the minimum version of Swift required to build this package.
3 |
4 | import PackageDescription
5 |
6 | let package = Package(
7 | name: "SakuraKit",
8 | platforms: [
9 | .iOS(.v16),
10 | .macOS(.v14),
11 | .tvOS(.v16),
12 | .watchOS(.v9),
13 | .visionOS(.v1)
14 | ],
15 | products: [
16 | .library(
17 | name: "SakuraKit",
18 | targets: ["SakuraKit"])
19 | ],
20 | dependencies: [
21 | .package(url: "https://github.com/apple/swift-docc-plugin", from: "1.0.0")
22 | ],
23 | targets: [
24 | .target(
25 | name: "SakuraKit",
26 | dependencies: []),
27 | .testTarget(
28 | name: "SakuraKitTests",
29 | dependencies: ["SakuraKit"]
30 | )
31 | ]
32 | )
33 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # SakuraKit: Swift SDK for Prototyping AI Speech Generation
2 |
3 | SakuraKit is a Swift SDK designed to quickly prototyping speech-to-speech or text-to-speech using different APIs to build low-latency, multimodal experiences with ease.
4 |
5 | This SDK is named after the cherry blossoms (Sakura) to enjoy in Shibuya next year. 🌸
6 |
7 | ## Support
8 |
9 | Love this project? Check out my books to explore more of AI and iOS development:
10 | - [Exploring AI for iOS Development](https://academy.rudrank.com/product/ai)
11 | - [Exploring AI-Assisted Coding for iOS Development](https://academy.rudrank.com/product/ai-assisted-coding)
12 |
13 | Your support helps to keep this project growing!
14 |
15 | ## Installation
16 |
17 | To get started with SakuraKit, add it to your Swift project using Swift Package Manager (SPM):
18 |
19 | ```swift
20 | dependencies: [
21 | .package(url: "https://github.com/rryam/SakuraKit", from: "0.1.0")
22 | ]
23 | ```
24 |
25 | Then, import it into your project:
26 |
27 | ```swift
28 | import SakuraKit
29 | ```
30 |
31 | ## Getting Started
32 |
33 | ### Prerequisites
34 | - Play.ht API Key and User ID: Required for text-to-speech functionality.
35 |
36 | ## Basic Usage
37 |
38 | ### Play.ht Text-to-Speech
39 |
40 | Initialize the Play.ht client:
41 |
42 | ## Basic Usage
43 |
44 | Here is a quick example to get you started:
45 |
46 | ```swift
47 | import SakuraKit
48 |
49 | // Initialize the SakuraKit client
50 | let playAI = PlayAI(apiKey: "your_playht_api_key", userId: "your_user_id")
51 |
52 | // Create a PlayNote for generating audio from PDF:
53 | let request = PlayNoteRequest(
54 | sourceFileUrl: sourceURL,
55 | synthesisStyle: .podcast,
56 | voice1: .angelo,
57 | voice2: .nia
58 | )
59 |
60 | let response = try await playAI.createPlayNote(request)
61 | ```
62 |
63 | Available voice styles include:
64 | - Podcast conversations
65 | - Executive briefings
66 | - Children's stories
67 | - Debates
68 |
69 | ## Contributing
70 |
71 | I welcome contributions! Feel free to open issues or submit pull requests to help improve SakuraKit.
72 |
73 | ## License
74 |
75 | SakuraKit is licensed under the MIT License. See LICENSE for more details.
76 |
--------------------------------------------------------------------------------
/Sources/SakuraKit/Events/ClientEvent.swift:
--------------------------------------------------------------------------------
1 | //
2 | // ClientEvent.swift
3 | // SakuraKit
4 | //
5 | // Created by Rudrank Riyam on 10/9/24.
6 | //
7 |
8 | import Foundation
9 |
10 | protocol ClientEvent: Encodable {
11 | var event_id: String? { get }
12 | var type: EventType { get }
13 | }
14 |
--------------------------------------------------------------------------------
/Sources/SakuraKit/Events/ConversationItemCreateEvent.swift:
--------------------------------------------------------------------------------
1 | //
2 | // ConversationItemCreateEvent.swift
3 | // SakuraKit
4 | //
5 | // Created by Rudrank Riyam on 10/9/24.
6 | //
7 |
8 |
9 | struct ConversationItemCreateEvent: ClientEvent {
10 | let event_id: String?
11 | let type: EventType = .conversationItemCreate
12 | let previous_item_id: String?
13 | let item: ConversationItem
14 | }
15 |
16 | struct ConversationItem: Encodable {
17 | let id: String?
18 | let type: String
19 | let status: String?
20 | let role: String
21 | let content: [ContentPart]
22 | }
23 |
24 | struct ContentPart: Encodable {
25 | let type: String
26 | let text: String?
27 | let audio: String?
28 | }
--------------------------------------------------------------------------------
/Sources/SakuraKit/Events/ConversationItemDeleteEvent.swift:
--------------------------------------------------------------------------------
1 | //
2 | // ConversationItemDeleteEvent.swift
3 | // SakuraKit
4 | //
5 | // Created by Rudrank Riyam on 10/9/24.
6 | //
7 |
8 |
9 | struct ConversationItemDeleteEvent: ClientEvent {
10 | let event_id: String?
11 | let type: EventType = .conversationItemDelete
12 | let item_id: String
13 | }
--------------------------------------------------------------------------------
/Sources/SakuraKit/Events/ConversationItemTruncateEvent.swift:
--------------------------------------------------------------------------------
1 | //
2 | // ConversationItemTruncateEvent.swift
3 | // SakuraKit
4 | //
5 | // Created by Rudrank Riyam on 10/9/24.
6 | //
7 |
8 |
9 | struct ConversationItemTruncateEvent: ClientEvent {
10 | let event_id: String?
11 | let type: EventType = .conversationItemTruncate
12 | let item_id: String
13 | let content_index: Int
14 | let audio_end_ms: Int
15 | }
--------------------------------------------------------------------------------
/Sources/SakuraKit/Events/EventType.swift:
--------------------------------------------------------------------------------
1 | //
2 | // EventType.swift
3 | // SakuraKit
4 | //
5 | // Created by Rudrank Riyam on 10/9/24.
6 | //
7 |
8 | enum EventType: String, Encodable {
9 | case sessionUpdate = "session.update"
10 | case inputAudioBufferAppend = "input_audio_buffer.append"
11 | case inputAudioBufferCommit = "input_audio_buffer.commit"
12 | case inputAudioBufferClear = "input_audio_buffer.clear"
13 | case conversationItemCreate = "conversation.item.create"
14 | case conversationItemTruncate = "conversation.item.truncate"
15 | case conversationItemDelete = "conversation.item.delete"
16 | case responseCreate = "response.create"
17 | case responseCancel = "response.cancel"
18 | }
19 |
--------------------------------------------------------------------------------
/Sources/SakuraKit/Events/InputAudioBufferAppendEvent.swift:
--------------------------------------------------------------------------------
1 | //
2 | // InputAudioBufferAppendEvent.swift
3 | // SakuraKit
4 | //
5 | // Created by Rudrank Riyam on 10/9/24.
6 | //
7 |
8 |
9 | struct InputAudioBufferAppendEvent: ClientEvent {
10 | let event_id: String?
11 | let type: EventType = .inputAudioBufferAppend
12 | let audio: String
13 | }
--------------------------------------------------------------------------------
/Sources/SakuraKit/Events/InputAudioBufferClearEvent.swift:
--------------------------------------------------------------------------------
1 | //
2 | // InputAudioBufferClearEvent.swift
3 | // SakuraKit
4 | //
5 | // Created by Rudrank Riyam on 10/9/24.
6 | //
7 |
8 |
9 | struct InputAudioBufferClearEvent: ClientEvent {
10 | let event_id: String?
11 | let type: EventType = .inputAudioBufferClear
12 | }
--------------------------------------------------------------------------------
/Sources/SakuraKit/Events/InputAudioBufferCommitEvent.swift:
--------------------------------------------------------------------------------
1 | //
2 | // InputAudioBufferCommitEvent.swift
3 | // SakuraKit
4 | //
5 | // Created by Rudrank Riyam on 10/9/24.
6 | //
7 |
8 |
9 | struct InputAudioBufferCommitEvent: ClientEvent {
10 | let event_id: String?
11 | let type: EventType = .inputAudioBufferCommit
12 | }
--------------------------------------------------------------------------------
/Sources/SakuraKit/Events/ResponseCancelEvent.swift:
--------------------------------------------------------------------------------
1 | //
2 | // ResponseCancelEvent.swift
3 | // SakuraKit
4 | //
5 | // Created by Rudrank Riyam on 10/9/24.
6 | //
7 |
8 |
9 | struct ResponseCancelEvent: ClientEvent {
10 | let event_id: String?
11 | let type: EventType = .responseCancel
12 | }
--------------------------------------------------------------------------------
/Sources/SakuraKit/Events/ResponseCreateEvent.swift:
--------------------------------------------------------------------------------
1 | //
2 | // ResponseCreateEvent.swift
3 | // SakuraKit
4 | //
5 | // Created by Rudrank Riyam on 10/9/24.
6 | //
7 |
8 |
9 | struct ResponseCreateEvent: ClientEvent {
10 | let event_id: String?
11 | let type: EventType = .responseCreate
12 | let response: ResponseConfiguration
13 | }
14 |
15 | public struct ResponseConfiguration: Encodable {
16 | let modalities: [String]?
17 | let instructions: String?
18 | let voice: String?
19 | let output_audio_format: String?
20 | let tools: [Tool]?
21 | let tool_choice: String?
22 | let temperature: Double
23 | let max_output_tokens: Int?
24 |
25 | public init(modalities: [String]?, instructions: String?, voice: String? = nil, output_audio_format: String? = nil, tools: [Tool]? = nil, tool_choice: String? = nil, temperature: Double = 1.0, max_output_tokens: Int? = nil) {
26 | self.modalities = modalities
27 | self.instructions = instructions
28 | self.voice = voice
29 | self.output_audio_format = output_audio_format
30 | self.tools = tools
31 | self.tool_choice = tool_choice
32 | self.temperature = temperature
33 | self.max_output_tokens = max_output_tokens
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/Sources/SakuraKit/Events/SessionUpdateEvent.swift:
--------------------------------------------------------------------------------
1 | //
2 | // SessionUpdateEvent.swift
3 | // SakuraKit
4 | //
5 | // Created by Rudrank Riyam on 10/9/24.
6 | //
7 |
8 | import Foundation
9 |
10 | struct SessionUpdateEvent: ClientEvent {
11 | let event_id: String?
12 | let type: EventType = .sessionUpdate
13 | let session: SessionConfiguration
14 | }
15 |
16 | struct SessionConfiguration: Encodable {
17 | let modalities: [String]?
18 | let instructions: String?
19 | let voice: String?
20 | let input_audio_format: String?
21 | let output_audio_format: String?
22 | let input_audio_transcription: InputAudioTranscription?
23 | let turn_detection: TurnDetection?
24 | let tools: [Tool]?
25 | let tool_choice: String?
26 | let temperature: Double?
27 | let max_output_tokens: Int?
28 | }
29 |
30 | struct InputAudioTranscription: Encodable {
31 | let enabled: Bool
32 | let model: String
33 | }
34 |
35 | struct TurnDetection: Encodable {
36 | let type: String
37 | let threshold: Double
38 | let prefix_padding_ms: Int
39 | let silence_duration_ms: Int
40 | }
41 |
42 | public struct Tool: Encodable {
43 | let type: String
44 | let name: String
45 | let description: String
46 | let parameters: ToolParameters
47 | }
48 |
49 | public struct ToolParameters: Encodable {
50 | let type: String
51 | let properties: [String: ToolParameterProperty]
52 | let required: [String]
53 | }
54 |
55 | public struct ToolParameterProperty: Encodable {
56 | let type: String
57 | }
58 |
--------------------------------------------------------------------------------
/Sources/SakuraKit/Play/PlayAI.swift:
--------------------------------------------------------------------------------
1 | //
2 | // PlayAI.swift
3 | // SakuraKit
4 | //
5 | // Created by Rudrank Riyam on 10/15/24.
6 | //
7 |
8 | import Foundation
9 |
10 | /// An actor that manages authentication and WebSocket connections for the Play.ht API.
11 | ///
12 | /// `PlayAI` provides an interface for authenticating with the Play.ht API, obtaining
13 | /// a WebSocket URL for real-time communication, and sending TTS commands. It encapsulates
14 | /// the API key and user ID, ensuring secure and thread-safe access to these credentials.
15 | ///
16 | /// - Important: This class is designed as an actor to ensure thread-safe access to its properties and methods.
17 | public actor PlayAI {
18 |
19 | /// The API key used for authentication with Play.ht services.
20 | private let apiKey: String
21 |
22 | /// The user ID associated with the Play.ht account.
23 | private let userId: String
24 |
25 | /// The URL for the WebSocket authentication endpoint.
26 | private let authEndpoint = URL(string: "https://api.play.ht/api/v3/websocket-auth")!
27 |
28 | /// Initializes a new instance of `PlayAI`.
29 | ///
30 | /// - Parameters:
31 | /// - apiKey: The API key for authenticating with Play.ht services.
32 | /// - userId: The user ID associated with the Play.ht account.
33 | public init(apiKey: String, userId: String) {
34 | self.apiKey = apiKey
35 | self.userId = userId
36 | }
37 |
38 | /// Authenticates with the Play.ht API and retrieves a WebSocket URL.
39 | ///
40 | /// This method sends an authenticated POST request to the Play.ht WebSocket
41 | /// authentication endpoint using the stored API key and user ID. It then parses
42 | /// the response to extract the WebSocket URL for establishing a real-time connection.
43 | ///
44 | /// - Returns: A string containing the authenticated WebSocket URL for establishing a connection.
45 | /// - Throws: An error if the authentication request fails or if the response cannot be parsed.
46 | private func authenticateAndFetchWebSocketURL() async throws -> String {
47 | var request = URLRequest(url: authEndpoint)
48 | request.httpMethod = "POST"
49 | request.addValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization")
50 | request.addValue(userId, forHTTPHeaderField: "X-User-Id")
51 | request.addValue("application/json", forHTTPHeaderField: "Content-Type")
52 |
53 | let (data, response) = try await URLSession.shared.data(for: request)
54 |
55 | // Add logging
56 | print("🌐 WebSocket Auth Response:")
57 | print("📝 Status Code: \((response as? HTTPURLResponse)?.statusCode ?? -1)")
58 | if let responseString = String(data: data, encoding: .utf8) {
59 | print("📦 Response Data: \(responseString)")
60 | }
61 |
62 | guard let httpResponse = response as? HTTPURLResponse,
63 | (200...299).contains(httpResponse.statusCode) else {
64 | throw PlayAIError.authenticationFailed
65 | }
66 |
67 | let decoder = JSONDecoder()
68 | let authResponse = try decoder.decode(PlayAIWebSocketAuthResponse.self, from: data)
69 |
70 | return authResponse.websocketURL
71 | }
72 |
73 | /// Sends a Text-to-Speech (TTS) command to the Play.ht WebSocket API.
74 | ///
75 | /// This method establishes a WebSocket connection using the provided URL,
76 | /// constructs a JSON message containing TTS parameters, and sends it through
77 | /// the WebSocket connection.
78 | ///
79 | /// - Parameters:
80 | /// - url: The WebSocket URL to connect to.
81 | /// - text: The text to be converted to speech.
82 | /// - voice: The voice ID or URL to use for synthesis.
83 | /// - outputFormat: The desired audio format (default is "mp3").
84 | /// - quality: The quality of the audio ("draft", "standard", or "premium").
85 | /// - temperature: Controls the randomness of the generated speech (0.0 to 1.0).
86 | /// - speed: The speed of the generated speech (0.5 to 2.0).
87 | /// - requestId: A unique identifier for the request (optional).
88 | ///
89 | /// - Throws: An error if the WebSocket connection fails, JSON encoding fails, or if there's an issue sending the message.
90 | private func sendTTSCommand(
91 | to url: URL,
92 | text: String,
93 | voice: String,
94 | outputFormat: String = "mp3",
95 | quality: String? = nil,
96 | temperature: Double? = nil,
97 | speed: Double? = nil,
98 | requestId: String? = nil
99 | ) async throws {
100 | let session = URLSession(configuration: .default)
101 | let webSocketTask = session.webSocketTask(with: url)
102 |
103 | try await webSocketTask.resume()
104 |
105 | let ttsCommand: [String: Any] = [
106 | "text": text,
107 | "voice": voice,
108 | "output_format": outputFormat,
109 | "quality": quality,
110 | "temperature": temperature,
111 | "speed": speed,
112 | "request_id": requestId
113 | ].compactMapValues { $0 }
114 |
115 | let jsonData = try JSONSerialization.data(withJSONObject: ttsCommand)
116 | let jsonString = String(data: jsonData, encoding: .utf8)!
117 |
118 | try await webSocketTask.send(.string(jsonString))
119 | }
120 | /// Receives and processes messages from the WebSocket connection using an async stream.
121 | ///
122 | /// This method creates an async stream of messages from the WebSocket, handling both audio data
123 | /// and end-of-stream messages. It collects audio chunks and provides the complete audio data
124 | /// when the stream is finished.
125 | ///
126 | /// - Parameters:
127 | /// - webSocketTask: The URLSessionWebSocketTask instance representing the active WebSocket connection.
128 | ///
129 | /// - Returns: An AsyncThrowingStream that yields audio data when it's complete.
130 | ///
131 | /// - Throws: An error if there's an issue receiving or processing messages.
132 | private func receiveMessages(from webSocketTask: URLSessionWebSocketTask) -> AsyncThrowingStream {
133 | AsyncThrowingStream { continuation in
134 | Task {
135 | var audioChunks: [Data] = []
136 |
137 | do {
138 | while true {
139 | let message = try await webSocketTask.receive()
140 | switch message {
141 | case .string(let text):
142 | if let data = text.data(using: .utf8),
143 | let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any],
144 | json.keys.contains("request_id") {
145 | // End of audio stream
146 | let audioData = audioChunks.reduce(Data(), +)
147 | continuation.yield(audioData)
148 | audioChunks.removeAll()
149 | } else {
150 | print("Received unexpected text message: \(text)")
151 | }
152 | case .data(let data):
153 | // Received binary audio data
154 | audioChunks.append(data)
155 | @unknown default:
156 | print("Received unknown message type")
157 | }
158 | }
159 | } catch {
160 | continuation.finish(throwing: error)
161 | }
162 |
163 | continuation.finish()
164 | }
165 | }
166 | }
167 |
168 | /// Handles WebSocket errors and connection closures.
169 | ///
170 | /// This method sets up error and closure handlers for the WebSocket connection.
171 | ///
172 | /// - Parameter webSocketTask: The URLSessionWebSocketTask instance to monitor.
173 | private func handleWebSocketEvents(for webSocketTask: URLSessionWebSocketTask) {
174 | webSocketTask.observe(\.state) { task, _ in
175 | if task.state == .completed {
176 | if let error = task.error {
177 | print("WebSocket Error: \(error.localizedDescription)")
178 | } else {
179 | print("WebSocket connection closed")
180 | }
181 | // Implement reconnection logic if needed
182 | }
183 | }
184 | }
185 |
186 | /// Processes the audio stream from the WebSocket connection.
187 | ///
188 | /// This method demonstrates how to use the `receiveMessages` function with an async stream.
189 | ///
190 | /// - Parameter webSocketTask: The URLSessionWebSocketTask instance representing the active WebSocket connection.
191 | ///
192 | /// - Throws: An error if there's an issue processing the audio stream.
193 | private func processAudioStream(from webSocketTask: URLSessionWebSocketTask) async throws {
194 | let audioStream = receiveMessages(from: webSocketTask)
195 |
196 | for try await audioData in audioStream {
197 | // Here you can handle the complete audio data
198 | // For example, you might want to play it, save it, or process it further
199 | print("Received complete audio data of size: \(audioData.count) bytes")
200 |
201 | // Example: Save the audio data to a file
202 | let documentsPath = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0]
203 | let audioFileURL = documentsPath.appendingPathComponent("audio_\(Date().timeIntervalSince1970).mp3")
204 | try audioData.write(to: audioFileURL)
205 | print("Audio saved to: \(audioFileURL.path)")
206 | }
207 | }
208 |
209 | /// Creates a new PlayNote using a source file URL.
210 | ///
211 | /// This method sends a request to create a new PlayNote with the specified configuration.
212 | /// The generation process is asynchronous, and you'll need to poll the status using
213 | /// `getPlayNote(id:)` to check when it's complete.
214 | ///
215 | /// - Parameter request: The PlayNote request configuration.
216 | /// - Returns: A PlayNoteResponse containing the creation status and details.
217 | /// - Throws: A PlayAIError if the request fails or returns an invalid response.
218 | public func createPlayNote(_ request: PlayNoteRequest) async throws -> PlayNoteResponse {
219 | let endpoint = URL(string: "https://api.play.ai/api/v1/playnotes")!
220 | var urlRequest = URLRequest(url: endpoint)
221 | urlRequest.httpMethod = "POST"
222 | urlRequest.addValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization")
223 | urlRequest.addValue(userId, forHTTPHeaderField: "X-USER-ID")
224 |
225 | // Create form data
226 | let boundary = UUID().uuidString
227 | var formData = Data()
228 |
229 | urlRequest.addValue("multipart/form-data; boundary=\(boundary)", forHTTPHeaderField: "Content-Type")
230 |
231 | // Helper function to append form field
232 | func appendFormField(named name: String, value: String) {
233 | formData.append("--\(boundary)\r\n")
234 | formData.append("Content-Disposition: form-data; name=\"\(name)\"\r\n\r\n")
235 | formData.append("\(value)\r\n")
236 | }
237 |
238 | // Add fields
239 | appendFormField(named: "sourceFileUrl", value: request.sourceFileUrl.absoluteString)
240 | appendFormField(named: "synthesisStyle", value: request.synthesisStyle.rawValue)
241 | appendFormField(named: "voice1", value: request.voice1.id)
242 | appendFormField(named: "voice1Name", value: request.voice1.name)
243 | appendFormField(named: "voice1Gender", value: request.voice1.gender)
244 |
245 | // Add voice2 if present
246 | if let voice2 = request.voice2 {
247 | appendFormField(named: "voice2", value: voice2.id)
248 | appendFormField(named: "voice2Name", value: voice2.name)
249 | appendFormField(named: "voice2Gender", value: voice2.gender)
250 | }
251 |
252 | // Add final boundary
253 | formData.append("--\(boundary)--\r\n")
254 |
255 | urlRequest.httpBody = formData
256 |
257 | let (data, response) = try await URLSession.shared.data(for: urlRequest)
258 |
259 | // Add logging
260 | print("🎵 Create PlayNote Response:")
261 | print("📝 Status Code: \((response as? HTTPURLResponse)?.statusCode ?? -1)")
262 | if let responseString = String(data: data, encoding: .utf8) {
263 | print("📦 Response Data: \(responseString)")
264 | }
265 |
266 | guard let httpResponse = response as? HTTPURLResponse else {
267 | throw PlayAIError.invalidResponse
268 | }
269 |
270 | if httpResponse.statusCode == 403 {
271 | throw PlayAIError.activeGenerationExists
272 | }
273 |
274 | guard (200...299).contains(httpResponse.statusCode) else {
275 | if let errorResponse = try? JSONDecoder().decode(ErrorResponse.self, from: data) {
276 | throw PlayAIError.serverError(message: errorResponse.errorMessage)
277 | }
278 | throw PlayAIError.serverError(message: "Unknown error occurred")
279 | }
280 |
281 | return try JSONDecoder.playDateDecoder.decode(PlayNoteResponse.self, from: data)
282 | }
283 |
284 | /// Gets the status and details of a PlayNote.
285 | ///
286 | /// - Parameter id: The PlayNoteID to retrieve.
287 | /// - Returns: A PlayNoteResponse containing the current status and details.
288 | /// - Throws: A PlayAIError if the request fails or returns an invalid response.
289 | public func getPlayNote(id: PlayNoteID) async throws -> PlayNoteResponse {
290 | let endpoint = URL(string: "https://api.play.ai/api/v1/playnotes/\(id.rawValue)")!
291 | var request = URLRequest(url: endpoint)
292 | request.addValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization")
293 | request.addValue(userId, forHTTPHeaderField: "X-USER-ID")
294 |
295 | let (data, response) = try await URLSession.shared.data(for: request)
296 |
297 | // Add logging
298 | print("🔍 Get PlayNote Response:")
299 | print("📝 Status Code: \((response as? HTTPURLResponse)?.statusCode ?? -1)")
300 | if let responseString = String(data: data, encoding: .utf8) {
301 | print("📦 Response Data: \(responseString)")
302 | }
303 |
304 | guard let httpResponse = response as? HTTPURLResponse,
305 | (200...299).contains(httpResponse.statusCode) else {
306 | throw PlayAIError.invalidResponse
307 | }
308 |
309 | return try JSONDecoder.playDateDecoder.decode(PlayNoteResponse.self, from: data)
310 | }
311 |
312 | /// Creates a PlayNote and polls for its completion status.
313 | ///
314 | /// This method creates a new PlayNote and continuously monitors its status until completion
315 | /// or failure. It polls the status every 60 seconds and provides updates through the
316 | /// statusHandler closure.
317 | ///
318 | /// - Parameters:
319 | /// - request: The PlayNote request configuration.
320 | /// - statusHandler: An optional closure to receive status updates during the polling process.
321 | ///
322 | /// - Returns: The final PlayNoteResponse containing the completed PlayNote details.
323 | /// - Throws: A PlayAIError if the request fails or polling encounters an error.
324 | public func createAndAwaitPlayNote(
325 | _ request: PlayNoteRequest,
326 | statusHandler: ((String) -> Void)? = nil
327 | ) async throws -> PlayNoteResponse {
328 | // Create the initial PlayNote
329 | let initialResponse = try await createPlayNote(request)
330 | let playNoteId = initialResponse.id
331 |
332 | // Poll for completion
333 | while true {
334 | do {
335 | let response = try await getPlayNote(id: PlayNoteID(playNoteId))
336 |
337 | switch response.status {
338 | case .completed:
339 | statusHandler?("PlayNote generation complete!")
340 | return response
341 |
342 | case .generating:
343 | statusHandler?("PlayNote is still generating...")
344 | try await Task.sleep(for: .seconds(60))
345 |
346 | case .failed:
347 | statusHandler?("PlayNote generation failed.")
348 | throw PlayAIError.generationFailed
349 |
350 | case .none:
351 | statusHandler?("PlayNote status unknown.")
352 | }
353 | } catch {
354 | statusHandler?("Error polling for PlayNote status: \(error.localizedDescription)")
355 | throw error
356 | }
357 | }
358 | }
359 | }
360 |
361 | private struct ErrorResponse: Decodable {
362 | let errorMessage: String
363 | let errorId: String
364 | }
365 |
366 | private extension Data {
367 | mutating func append(_ string: String) {
368 | if let data = string.data(using: .utf8) {
369 | append(data)
370 | }
371 | }
372 | }
373 |
374 | private extension JSONDecoder {
375 | /// A JSON decoder configured to handle Play API date formats.
376 | /// Uses ISO8601 format with internet date time and fractional seconds.
377 | static var playDateDecoder: JSONDecoder {
378 | let decoder = JSONDecoder()
379 |
380 | // Create formatter outside closure to avoid Sendable capture
381 | let formatter = ISO8601DateFormatter()
382 | formatter.formatOptions = [.withInternetDateTime, .withFractionalSeconds]
383 |
384 | // Use @Sendable closure that doesn't capture formatter
385 | decoder.dateDecodingStrategy = .custom { decoder in
386 | let container = try decoder.singleValueContainer()
387 | let dateStr = try container.decode(String.self)
388 |
389 | // Create new formatter inside closure instead of capturing
390 | let localFormatter = ISO8601DateFormatter()
391 | localFormatter.formatOptions = [.withInternetDateTime, .withFractionalSeconds]
392 |
393 | guard let date = localFormatter.date(from: dateStr) else {
394 | throw DecodingError.dataCorruptedError(
395 | in: container,
396 | debugDescription: "Invalid date format: \(dateStr)"
397 | )
398 | }
399 | return date
400 | }
401 | return decoder
402 | }
403 | }
404 |
--------------------------------------------------------------------------------
/Sources/SakuraKit/Play/PlayAIError.swift:
--------------------------------------------------------------------------------
1 | //
2 | // PlayAIError.swift
3 | // SakuraKit
4 | //
5 | // Created by Rudrank Riyam on 10/15/24.
6 | //
7 |
8 | import Foundation
9 |
10 | /// An enumeration of errors that can occur during Play.ht API operations.
11 | public enum PlayAIError: Error {
12 | /// Indicates that the authentication request failed.
13 | case authenticationFailed
14 | /// Indicates that the source file URL is invalid.
15 | case invalidSourceFileURL
16 | /// Indicates that the user already has an active generation.
17 | case activeGenerationExists
18 | /// Indicates that the response from the server was invalid.
19 | case invalidResponse
20 | /// Indicates that the server returned an error message.
21 | case serverError(message: String)
22 | /// Indicates that the PlayNote generation process failed.
23 | case generationFailed
24 | }
25 |
--------------------------------------------------------------------------------
/Sources/SakuraKit/Play/PlayAIWebSocketAuthResponse.swift:
--------------------------------------------------------------------------------
1 | //
2 | // PlayAIWebSocketAuthResponse.swift
3 | // SakuraKit
4 | //
5 | // Created by Rudrank Riyam on 10/15/24.
6 | //
7 |
8 | import Foundation
9 |
10 | /// A structure representing the response from the WebSocket authentication endpoint.
11 | public struct PlayAIWebSocketAuthResponse: Codable {
12 | /// The WebSocket URL to be used for establishing a connection.
13 | let websocketURL: String
14 |
15 | enum CodingKeys: String, CodingKey {
16 | case websocketURL = "websocket_url"
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/Sources/SakuraKit/Play/PlayNote.swift:
--------------------------------------------------------------------------------
1 | import Foundation
2 |
3 | /// A structure representing a PlayNote synthesis request.
4 | public struct PlayNoteRequest: Sendable {
5 | /// The URL to the source file.
6 | public let sourceFileUrl: URL
7 | /// The synthesis style of the PlayNote.
8 | public let synthesisStyle: PlayNoteSynthesisStyle
9 | /// The first voice configuration.
10 | public let voice1: PlayNoteVoice
11 | /// The optional second voice configuration.
12 | public let voice2: PlayNoteVoice?
13 |
14 | /// Creates a new PlayNote request.
15 | /// - Parameters:
16 | /// - sourceFileUrl: The URL to the source file.
17 | /// - synthesisStyle: The synthesis style to use.
18 | /// - voice1: The first voice configuration.
19 | /// - voice2: The optional second voice configuration.
20 | public init(
21 | sourceFileUrl: URL,
22 | synthesisStyle: PlayNoteSynthesisStyle,
23 | voice1: PlayNoteVoice,
24 | voice2: PlayNoteVoice? = nil
25 | ) {
26 | self.sourceFileUrl = sourceFileUrl
27 | self.synthesisStyle = synthesisStyle
28 | self.voice1 = voice1
29 | self.voice2 = voice2
30 | }
31 | }
32 |
33 | /// An enumeration of available Play.ai voices with their configurations.
34 | public enum PlayNoteVoice: Sendable {
35 | case angelo, arsenio, cillian, timo, dexter, miles, briggs
36 | case deedee, nia, inara, constanza, gideon, casper, mitch, ava
37 |
38 | /// The S3 URL for the voice manifest
39 | public var id: String {
40 | switch self {
41 | case .angelo:
42 | return "s3://voice-cloning-zero-shot/baf1ef41-36b6-428c-9bdf-50ba54682bd8/original/manifest.json"
43 | case .arsenio:
44 | return "s3://voice-cloning-zero-shot/65977f5e-a22a-4b36-861b-ecede19bdd65/original/manifest.json"
45 | case .cillian:
46 | return "s3://voice-cloning-zero-shot/1591b954-8760-41a9-bc58-9176a68c5726/original/manifest.json"
47 | case .timo:
48 | return "s3://voice-cloning-zero-shot/677a4ae3-252f-476e-85ce-eeed68e85951/original/manifest.json"
49 | case .dexter:
50 | return "s3://voice-cloning-zero-shot/b27bc13e-996f-4841-b584-4d35801aea98/original/manifest.json"
51 | case .miles:
52 | return "s3://voice-cloning-zero-shot/29dd9a52-bd32-4a6e-bff1-bbb98dcc286a/original/manifest.json"
53 | case .briggs:
54 | return "s3://voice-cloning-zero-shot/71cdb799-1e03-41c6-8a05-f7cd55134b0b/original/manifest.json"
55 | case .deedee:
56 | return "s3://voice-cloning-zero-shot/e040bd1b-f190-4bdb-83f0-75ef85b18f84/original/manifest.json"
57 | case .nia:
58 | return "s3://voice-cloning-zero-shot/831bd330-85c6-4333-b2b4-10c476ea3491/original/manifest.json"
59 | case .inara:
60 | return "s3://voice-cloning-zero-shot/adb83b67-8d75-48ff-ad4d-a0840d231ef1/original/manifest.json"
61 | case .constanza:
62 | return "s3://voice-cloning-zero-shot/b0aca4d7-1738-4848-a80b-307ac44a7298/original/manifest.json"
63 | case .gideon:
64 | return "s3://voice-cloning-zero-shot/5a3a1168-7793-4b2c-8f90-aff2b5232131/original/manifest.json"
65 | case .casper:
66 | return "s3://voice-cloning-zero-shot/1bbc6986-fadf-4bd8-98aa-b86fed0476e9/original/manifest.json"
67 | case .mitch:
68 | return "s3://voice-cloning-zero-shot/c14e50f2-c5e3-47d1-8c45-fa4b67803d19/original/manifest.json"
69 | case .ava:
70 | return "s3://voice-cloning-zero-shot/50381567-ff7b-46d2-bfdc-a9584a85e08d/original/manifest.json"
71 | // case .basil:
72 | // return "s3://voice-cloning-zero-shot/different-uuid-needed-here/original/manifest.json" // Need correct UUID
73 | }
74 | }
75 |
76 | /// The display name of the voice
77 | public var name: String {
78 | switch self {
79 | case .angelo: return "Angelo"
80 | case .arsenio: return "Arsenio"
81 | case .cillian: return "Cillian"
82 | case .timo: return "Timo"
83 | case .dexter: return "Dexter"
84 | case .miles: return "Miles"
85 | case .briggs: return "Briggs"
86 | case .deedee: return "Deedee"
87 | case .nia: return "Nia"
88 | case .inara: return "Inara"
89 | case .constanza: return "Constanza"
90 | case .gideon: return "Gideon"
91 | case .casper: return "Casper"
92 | case .mitch: return "Mitch"
93 | case .ava: return "Ava"
94 | // case .basil: return "Basil"
95 | }
96 | }
97 |
98 | /// The gender of the voice
99 | public var gender: String {
100 | switch self {
101 | case .deedee, .nia, .inara, .constanza, .ava:
102 | return "female"
103 | default:
104 | return "male"
105 | }
106 | }
107 |
108 | /// The accent of the voice
109 | public var accent: String {
110 | switch self {
111 | case .angelo, .timo, .dexter, .nia, .casper:
112 | return "US"
113 | case .arsenio, .miles, .deedee, .inara:
114 | return "US African American"
115 | case .cillian:
116 | return "Irish"
117 | case .briggs:
118 | return "US Southern (Oklahoma)"
119 | case .constanza:
120 | return "US Latin American"
121 | case .gideon:
122 | return "British"
123 | case .mitch, .ava:
124 | return "Australian"
125 | // case .basil:
126 | // return "British (Yorkshire)"
127 | }
128 | }
129 | }
130 |
131 | /// The available synthesis styles for PlayNote.
132 | public enum PlayNoteSynthesisStyle: String, Decodable, Sendable {
133 | /// A podcast-style conversation.
134 | case podcast = "podcast"
135 | /// An executive briefing style.
136 | case executiveBriefing = "executive-briefing"
137 | /// A children's story style.
138 | case childrensStory = "childrens-story"
139 | /// A debate style.
140 | case debate = "debate"
141 | }
142 |
143 | /// A structure representing a PlayNote response.
144 | public struct PlayNoteResponse: Decodable, Sendable {
145 | /// The unique ID for the PlayNote.
146 | public let id: String
147 | /// The owner's ID.
148 | public let ownerId: String
149 | /// The name of the PlayNote.
150 | public let name: String
151 | /// The source file URL.
152 | public let sourceFileUrl: String?
153 | /// The generated audio URL.
154 | public let audioUrl: String?
155 | /// The synthesis style used.
156 | public let synthesisStyle: PlayNoteSynthesisStyle
157 | /// The status of the generation.
158 | public let status: PlayNoteStatus?
159 | /// The duration in seconds.
160 | public let duration: Double?
161 | /// When the PlayNote was requested.
162 | public let requestedAt: Date
163 | /// When the PlayNote was created.
164 | public let createdAt: Date?
165 |
166 | /// The current status of the PlayNote.
167 | public enum PlayNoteStatus: String, Decodable, Sendable {
168 | case generating
169 | case completed
170 | case failed
171 | }
172 | }
--------------------------------------------------------------------------------
/Sources/SakuraKit/Play/PlayNoteID.swift:
--------------------------------------------------------------------------------
1 | /// An object that represents a unique identifier for a Play.ht note.
2 | ///
3 | /// Use `PlayNoteID` to identify and reference specific Play.ht notes within your app.
4 | /// This identifier is unique across all Play.ht notes and can be used to fetch, track,
5 | /// or manage individual notes.
6 | ///
7 | /// You can create a `PlayNoteID` in several ways:
8 | /// ```swift
9 | /// // Using string literal
10 | /// let id1: PlayNoteID = "note_123456"
11 | ///
12 | /// // Using initializer
13 | /// let id2 = PlayNoteID("note_123456")
14 | ///
15 | /// // Using raw value initializer
16 | /// let id3 = PlayNoteID(rawValue: "note_123456")
17 | /// ```
18 | public struct PlayNoteID: Equatable, Hashable, Sendable, RawRepresentable, ExpressibleByStringLiteral {
19 |
20 | /// The raw string value of the Play.ht note identifier.
21 | public let rawValue: String
22 |
23 | /// Creates a Play.ht note identifier with a string.
24 | ///
25 | /// - Parameter rawValue: The string value representing the note identifier.
26 | public init(_ rawValue: String) {
27 | self.rawValue = rawValue
28 | }
29 |
30 | /// Creates a new instance with the specified raw value.
31 | ///
32 | /// - Parameter rawValue: The raw string value to use for the new instance.
33 | public init(rawValue: String) {
34 | self.rawValue = rawValue
35 | }
36 |
37 | /// Creates an instance initialized to the given string value.
38 | ///
39 | /// - Parameter value: The string value to use for the new instance.
40 | public init(stringLiteral value: String) {
41 | self.rawValue = value
42 | }
43 |
44 | // Type aliases for protocol conformance
45 | public typealias StringLiteralType = String
46 | public typealias ExtendedGraphemeClusterLiteralType = String
47 | public typealias UnicodeScalarLiteralType = String
48 | }
49 |
50 | extension PlayNoteID: Codable {
51 |
52 | /// Creates a new instance by decoding from the given decoder.
53 | ///
54 | /// - Parameter decoder: The decoder to read data from.
55 | /// - Throws: An error if reading from the decoder fails.
56 | public init(from decoder: Decoder) throws {
57 | let container = try decoder.singleValueContainer()
58 | self.rawValue = try container.decode(String.self)
59 | }
60 |
61 | /// Encodes this value into the given encoder.
62 | ///
63 | /// - Parameter encoder: The encoder to write data to.
64 | /// - Throws: An error if encoding fails.
65 | public func encode(to encoder: Encoder) throws {
66 | var container = encoder.singleValueContainer()
67 | try container.encode(rawValue)
68 | }
69 | }
70 |
71 | extension PlayNoteID: CustomStringConvertible {
72 |
73 | /// A textual representation of the Play.ht note identifier.
74 | ///
75 | /// This property returns the raw string value of the identifier.
76 | public var description: String {
77 | return rawValue
78 | }
79 | }
--------------------------------------------------------------------------------
/Tests/SakuraKitTests.swift:
--------------------------------------------------------------------------------
1 | //
2 | // SakuraKitTests.swift
3 | // SakuraKit
4 | //
5 | // Created by Rudrank Riyam on 10/9/24.
6 | //
7 |
8 | import Foundation
9 |
10 | // 🌸
11 |
--------------------------------------------------------------------------------