(repeating: 0, count: Int(algorithm.digestLength))
22 | CCHmacFinal(context, &hmac)
23 |
24 | return Data(hmac).base64URLEncodedString()
25 | }
26 |
27 | enum Algorithm {
28 | case MD5, SHA1, SHA224, SHA256, SHA384, SHA512
29 |
30 | var HMACAlgorithm: CCHmacAlgorithm {
31 | var result: Int = 0
32 | switch self {
33 | case .MD5: result = kCCHmacAlgMD5
34 | case .SHA1: result = kCCHmacAlgSHA1
35 | case .SHA224: result = kCCHmacAlgSHA224
36 | case .SHA256: result = kCCHmacAlgSHA256
37 | case .SHA384: result = kCCHmacAlgSHA384
38 | case .SHA512: result = kCCHmacAlgSHA512
39 | }
40 | return CCHmacAlgorithm(result)
41 | }
42 |
43 | var digestLength: Int {
44 | var result: Int32 = 0
45 | switch self {
46 | case .MD5: result = CC_MD5_DIGEST_LENGTH
47 | case .SHA1: result = CC_SHA1_DIGEST_LENGTH
48 | case .SHA224: result = CC_SHA224_DIGEST_LENGTH
49 | case .SHA256: result = CC_SHA256_DIGEST_LENGTH
50 | case .SHA384: result = CC_SHA384_DIGEST_LENGTH
51 | case .SHA512: result = CC_SHA512_DIGEST_LENGTH
52 | }
53 | return Int(result)
54 | }
55 | }
56 | }
57 |
58 |
59 |
--------------------------------------------------------------------------------
/Sources/LangChain/utilities/jwt/Extensions/String.swift:
--------------------------------------------------------------------------------
1 | import Foundation
2 | /// Extension for making base64 representations of `Data` safe for
3 | /// transmitting via URL query parameters
4 | extension Data {
5 |
6 | /// Instantiates data by decoding a base64url string into base64
7 | ///
8 | /// - Parameter string: A base64url encoded string
9 | init?(base64URLEncoded string: String) {
10 | self.init(base64Encoded: string.toggleBase64URLSafe(on: false))
11 | }
12 |
13 | /// Encodes the string into a base64url safe representation
14 | ///
15 | /// - Returns: A string that is base64 encoded but made safe for passing
16 | /// in as a query parameter into a URL string
17 | func base64URLEncodedString() -> String {
18 | return self.base64EncodedString().toggleBase64URLSafe(on: true)
19 | }
20 |
21 | }
22 |
23 | extension String {
24 |
25 | var base64String: String? {
26 | return data(using: .utf8)?.base64EncodedString()
27 | }
28 |
29 | var base64UrlString: String? {
30 | return data(using: .utf8)?.base64URLEncodedString()
31 | }
32 |
33 | /// Encodes or decodes into a base64url safe representation
34 | ///
35 | /// - Parameter on: Whether or not the string should be made safe for URL strings
36 | /// - Returns: if `on`, then a base64url string; if `off` then a base64 string
37 | func toggleBase64URLSafe(on: Bool) -> String {
38 | if on {
39 | // Make base64 string safe for passing into URL query params
40 | let base64url = self.replacingOccurrences(of: "/", with: "_")
41 | .replacingOccurrences(of: "+", with: "-")
42 | .replacingOccurrences(of: "=", with: "")
43 | return base64url
44 | } else {
45 | // Return to base64 encoding
46 | var base64 = self.replacingOccurrences(of: "_", with: "/")
47 | .replacingOccurrences(of: "-", with: "+")
48 | // Add any necessary padding with `=`
49 | if base64.count % 4 != 0 {
50 | base64.append(String(repeating: "=", count: 4 - base64.count % 4))
51 | }
52 | return base64
53 | }
54 | }
55 |
56 | }
57 |
--------------------------------------------------------------------------------
/Sources/LangChain/utilities/jwt/JWT.swift:
--------------------------------------------------------------------------------
1 | import Foundation
2 |
3 | public class JWT {
4 |
5 | public init(alg: JWT.Algorithm = .HS256, secret: String = "") {
6 | self.alg = alg
7 | self.header["alg"] = alg.rawValue
8 | self.header["typ"] = "JWT"
9 | self.secret = secret
10 | }
11 |
12 | public enum Algorithm: String {
13 | case HS256
14 | case HS384
15 | case HS512
16 |
17 | var forCryptor: Cryptor.Algorithm {
18 | switch self {
19 | case .HS256: return .SHA256
20 | case .HS384: return .SHA384
21 | case .HS512: return .SHA512
22 | }
23 | }
24 | }
25 | var alg: Algorithm
26 |
27 | public var header: [String: String] = [:]
28 | public var payload: [String: Any] = [:]
29 | public var secret: String
30 |
31 | public var subject: String? { return payload["sub"] as? String }
32 | public var identifier: String? { return payload["jti"] as? String }
33 | public var issuer: String? { return payload["iss"] as? String }
34 |
35 | public var notValidBefore: Date? {
36 | if let interval = payload["nbf"] as? TimeInterval {
37 | return Date(timeIntervalSince1970: interval)
38 | }
39 | return nil
40 | }
41 | public var issuedAt: Date? {
42 | if let interval = payload["iat"] as? TimeInterval {
43 | return Date(timeIntervalSince1970: interval)
44 | }
45 | return nil
46 | }
47 | public var expiresAt: Date? {
48 | if let interval = payload["exp"] as? TimeInterval {
49 | return Date(timeIntervalSince1970: interval)
50 | }
51 | return nil
52 | }
53 |
54 | public var isExpired: Bool? {
55 | guard let expireDate = expiresAt else { return nil }
56 | return expireDate.compare(Date()) != .orderedDescending ? true : false}
57 |
58 | public var token: String? {
59 | do {
60 | let headerString = try JSONSerialization.data(withJSONObject: header, options: []).base64URLEncodedString()
61 | let payloadString = try JSONSerialization.data(withJSONObject: payload, options: []).base64URLEncodedString()
62 |
63 | let rawSign = "\(headerString).\(payloadString)"
64 |
65 | if let sign = Cryptor.hmac(string: rawSign, algorithm: alg.forCryptor, key: secret) {
66 | return "\(rawSign).\(sign)"
67 | } else {
68 | print("JWT: Can't compute sign.")
69 | return nil
70 | }
71 | } catch {
72 | print(error.localizedDescription)
73 | return nil
74 | }
75 | }
76 | }
77 |
78 | //MARK: - Token decoding
79 | extension JWT {
80 | public convenience init?(token: String) {
81 | let elements = token.split(separator: ".").map({String($0)})
82 | guard
83 | elements.count == 3 else {
84 | print("JWT: Wrong format!")
85 | return nil
86 | }
87 |
88 | guard let headerData = Data(base64URLEncoded: elements[0]),
89 | let payloadData = Data(base64URLEncoded: elements[1]) else {
90 | print("JWT: Wrong format!")
91 | print("Failed to parse header/payload.")
92 | return nil
93 | }
94 |
95 | do {
96 | guard let header = try JSONSerialization.jsonObject(with: headerData, options: []) as? [String: String],
97 | let payload = try JSONSerialization.jsonObject(with: payloadData, options: []) as? [String: Any]
98 | else {
99 | print("JWT: Failed to parse header/payload.")
100 | return nil
101 | }
102 |
103 | guard let algString = header["alg"] else {
104 | print("JWT: Can't define algorithm.")
105 | return nil
106 | }
107 | guard let alg = Algorithm(rawValue: algString) else {
108 | print("JWT: Alghoritm doesn't support.")
109 | return nil
110 | }
111 |
112 | self.init(alg: alg, secret: "")
113 | self.header = header
114 | self.payload = payload
115 |
116 | } catch {
117 | print(error.localizedDescription)
118 | return nil
119 | }
120 | }
121 | }
122 |
--------------------------------------------------------------------------------
/Sources/LangChain/utilities/pubmed/PubmedAPIWrapper.swift:
--------------------------------------------------------------------------------
1 | //
2 | // File.swift
3 | //
4 | //
5 | // Created by 顾艳华 on 2023/11/2.
6 | //
7 | import AsyncHTTPClient
8 | import Foundation
9 | import SwiftyJSON
10 | import NIOPosix
11 |
12 | struct PubmedAPIWrapper {
13 | func search(query: String) async throws -> [PubmedPage] {
14 | let eventLoopGroup = ThreadManager.thread
15 | let httpClient = HTTPClient(eventLoopGroupProvider: .shared(eventLoopGroup))
16 | defer {
17 | // it's important to shutdown the httpClient after all requests are done, even if one failed. See: https://github.com/swift-server/async-http-client
18 | try? httpClient.syncShutdown()
19 | }
20 |
21 | let baseURL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
22 | var components = URLComponents(string: baseURL)!
23 | components.queryItems = [
24 | URLQueryItem(name: "db", value: "pubmed"),
25 | URLQueryItem(name: "retmode", value: "json"),
26 | URLQueryItem(name: "term", value: query),
27 | URLQueryItem(name: "retmax", value: "5"),
28 | URLQueryItem(name: "usehistory", value: "y"),
29 | ]
30 | print(components.url!.absoluteString)
31 | var request = HTTPClientRequest(url: components.url!.absoluteString)
32 | request.method = .GET
33 |
34 | let response = try await httpClient.execute(request, timeout: .seconds(30))
35 | if response.status == .ok {
36 | let str = String(buffer: try await response.body.collect(upTo: 1024 * 1024))
37 | // print(str)
38 | let json = try JSON(data: str.data(using: .utf8)!)
39 | var pubmeds: [PubmedPage] = []
40 | let webenv = json["esearchresult"]["webenv"].stringValue
41 | let searchResults = json["esearchresult"]["idlist"].arrayValue
42 |
43 | for uid in searchResults {
44 | pubmeds.append(PubmedPage(uid: uid.stringValue, webenv: webenv))
45 | }
46 | return pubmeds
47 | } else {
48 | // handle remote error
49 | print("http code is not 200.")
50 | return []
51 | }
52 | }
53 |
54 | func load(query: String) async throws -> [Document] {
55 | let pages = try await self.search(query: query)
56 | var docs: [Document] = []
57 | for page in pages {
58 | let content = try await page.content()
59 | docs.append(Document(page_content: content, metadata: [:]))
60 | }
61 | return docs
62 | }
63 | }
64 |
--------------------------------------------------------------------------------
/Sources/LangChain/utilities/pubmed/PubmedPage.swift:
--------------------------------------------------------------------------------
1 | //
2 | // File.swift
3 | //
4 | //
5 | // Created by 顾艳华 on 2023/11/3.
6 | //
7 |
8 | import AsyncHTTPClient
9 | import Foundation
10 | import SWXMLHash
11 | import NIOPosix
12 |
13 | struct PubmedPage {
14 | let uid: String
15 | let webenv: String
16 |
17 | func content() async throws -> String {
18 | let eventLoopGroup = ThreadManager.thread
19 | let httpClient = HTTPClient(eventLoopGroupProvider: .shared(eventLoopGroup))
20 | defer {
21 | // it's important to shutdown the httpClient after all requests are done, even if one failed. See: https://github.com/swift-server/async-http-client
22 | try? httpClient.syncShutdown()
23 | }
24 |
25 | let baseURL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
26 | var components = URLComponents(string: baseURL)!
27 | components.queryItems = [
28 | URLQueryItem(name: "db", value: "pubmed"),
29 | URLQueryItem(name: "retmode", value: "xml"),
30 | URLQueryItem(name: "id", value: self.uid),
31 | URLQueryItem(name: "webenv", value: self.webenv),
32 | ]
33 | print(components.url!.absoluteString)
34 | var request = HTTPClientRequest(url: components.url!.absoluteString)
35 | request.method = .GET
36 |
37 | let response = try await httpClient.execute(request, timeout: .seconds(30))
38 | if response.status == .ok {
39 | let str = String(buffer: try await response.body.collect(upTo: 1024 * 1024))
40 | let xml = XMLHash.parse(str.data(using: .utf8)!)
41 | var ar = xml["PubmedArticleSet"]["PubmedArticle"]["MedlineCitation"][
42 | "Article"
43 | ]
44 | if ar.element == nil {
45 | ar = xml["PubmedArticleSet"]["PubmedBookArticle"]["BookDocument"]
46 | }
47 | let summarys = ar["Abstract"]["AbstractText"].all
48 | let summarysStr = summarys.map{$0.element?.text}
49 | if !summarysStr.isEmpty && summarysStr.first != nil {
50 | return summarysStr.map{$0!}.joined(separator: "\n")
51 | } else {
52 | return ""
53 | }
54 | } else {
55 | // handle remote error
56 | print("http code is not 200.")
57 | return ""
58 | }
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/Sources/LangChain/utilities/report/ReportKey.swift:
--------------------------------------------------------------------------------
1 | //
2 | // File.swift
3 | //
4 | //
5 | // Created by 顾艳华 on 2023/9/13.
6 | //
7 |
8 | import Foundation
9 |
10 | struct ReportKey {
11 | static let STEP_START_KEY = "start"
12 | static let STEP_END_KEY = "end"
13 | static let STEP_ERROR_KEY = "error"
14 |
15 | static let TRUE = "true"
16 | static let FALSE = "false"
17 | }
18 |
--------------------------------------------------------------------------------
/Sources/LangChain/utilities/report/TraceManager.swift:
--------------------------------------------------------------------------------
1 | //
2 | // File.swift
3 | //
4 | //
5 | // Created by 顾艳华 on 2023/9/11.
6 | //
7 |
8 | import AsyncHTTPClient
9 | import Foundation
10 | import NIOPosix
11 |
12 | struct Report: Codable {
13 | let appDisplayName: String?
14 | let reportId: String
15 | let type: String
16 | let message: String
17 | let metadata: [String: String]
18 | let createAt: Date
19 | }
20 |
21 | struct TraceManager {
22 | // var reports: [Report] = []
23 | static let REPORT_URL = "http://192.168.31.60:8083/rest/agent"
24 | static var shared: TraceManager = TraceManager()
25 |
26 | mutating func insertReport(report: Report) async {
27 | // reports.append(report)
28 | // TODO: end or error - start time, remove start entry at memery
29 | await sendServer(report: report)
30 | }
31 |
32 | func sendServer(report: Report) async {
33 | // TODO: Http keep alive
34 | let eventLoopGroup = ThreadManager.thread
35 |
36 | let httpClient = HTTPClient(eventLoopGroupProvider: .shared(eventLoopGroup))
37 | defer {
38 | // it's important to shutdown the httpClient after all requests are done, even if one failed. See: https://github.com/swift-server/async-http-client
39 | try? httpClient.syncShutdown()
40 | }
41 | do {
42 | var request = HTTPClientRequest(url: TraceManager.REPORT_URL)
43 | request.method = .POST
44 | request.headers.add(name: "Content-Type", value: "application/json")
45 | let requestBody = try! JSONEncoder().encode(report)
46 | request.body = .bytes(requestBody)
47 |
48 | let response = try await httpClient.execute(request, timeout: .seconds(30))
49 | if response.status == .ok {
50 | let _ = String(buffer: try await response.body.collect(upTo: 1024 * 1024))
51 | } else {
52 | // handle remote error
53 | print("http code is not 200.")
54 | }
55 | } catch {
56 | // handle error
57 | print(error)
58 | }
59 | }
60 | }
61 |
62 | extension Bundle {
63 | var appDisplayName: String? {
64 | return infoDictionary?["CFBundleExecutable"] as? String
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/Sources/LangChain/utilities/wikipedia/WikipediaAPIWrapper.swift:
--------------------------------------------------------------------------------
1 | //
2 | // File.swift
3 | //
4 | //
5 | // Created by 顾艳华 on 2023/11/2.
6 | //
7 | import AsyncHTTPClient
8 | import Foundation
9 | import SwiftyJSON
10 | import NIOPosix
11 |
12 | struct WikipediaAPIWrapper {
13 | func search(query: String) async throws -> [WikipediaPage] {
14 | let eventLoopGroup = ThreadManager.thread
15 | let httpClient = HTTPClient(eventLoopGroupProvider: .shared(eventLoopGroup))
16 | defer {
17 | // it's important to shutdown the httpClient after all requests are done, even if one failed. See: https://github.com/swift-server/async-http-client
18 | try? httpClient.syncShutdown()
19 | }
20 |
21 | let baseURL = "http://en.wikipedia.org/w/api.php"
22 | var components = URLComponents(string: baseURL)!
23 | components.queryItems = [
24 | URLQueryItem(name: "srlimit", value: "3"),
25 | URLQueryItem(name: "list", value: "search"),
26 | URLQueryItem(name: "srsearch", value: query),
27 | URLQueryItem(name: "action", value: "query"),
28 | URLQueryItem(name: "format", value: "json"),
29 | ]
30 | // print(components.url!.absoluteString)
31 | var request = HTTPClientRequest(url: components.url!.absoluteString)
32 | request.method = .GET
33 |
34 | let response = try await httpClient.execute(request, timeout: .seconds(30))
35 | if response.status == .ok {
36 | let str = String(buffer: try await response.body.collect(upTo: 1024 * 1024))
37 | // print(str)
38 | let json = try JSON(data: str.data(using: .utf8)!)
39 | var wikis: [WikipediaPage] = []
40 | let searchResults = json["query"]["search"].arrayValue
41 |
42 | for wiki in searchResults {
43 | wikis.append(WikipediaPage(title: wiki["title"].stringValue, pageid: wiki["pageid"].intValue))
44 | }
45 | return wikis
46 | } else {
47 | // handle remote error
48 | print("http code is not 200.")
49 | return []
50 | }
51 | }
52 |
53 | func load(query: String) async throws -> [Document] {
54 | let pages = try await self.search(query: query)
55 | var docs: [Document] = []
56 | for page in pages {
57 | let content = try await page.content()
58 | docs.append(Document(page_content: content, metadata: [:]))
59 | }
60 | return docs
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/Sources/LangChain/utilities/wikipedia/WikipediaPage.swift:
--------------------------------------------------------------------------------
1 | //
2 | // File.swift
3 | //
4 | //
5 | // Created by 顾艳华 on 2023/11/3.
6 | //
7 |
8 | import AsyncHTTPClient
9 | import Foundation
10 | import SwiftyJSON
11 | import NIOPosix
12 |
13 | struct WikipediaPage {
14 | let title: String
15 | let pageid: Int
16 |
17 | func content() async throws -> String {
18 | let eventLoopGroup = ThreadManager.thread
19 | let httpClient = HTTPClient(eventLoopGroupProvider: .shared(eventLoopGroup))
20 | defer {
21 | // it's important to shutdown the httpClient after all requests are done, even if one failed. See: https://github.com/swift-server/async-http-client
22 | try? httpClient.syncShutdown()
23 | }
24 |
25 | let baseURL = "http://en.wikipedia.org/w/api.php"
26 | var components = URLComponents(string: baseURL)!
27 | components.queryItems = [
28 | URLQueryItem(name: "prop", value: "extracts|revisions"),
29 | URLQueryItem(name: "rvprop", value: "ids"),
30 | URLQueryItem(name: "titles", value: self.title),
31 | URLQueryItem(name: "action", value: "query"),
32 | URLQueryItem(name: "format", value: "json"),
33 | ]
34 | // print(components.url!.absoluteString)
35 | var request = HTTPClientRequest(url: components.url!.absoluteString)
36 | request.method = .GET
37 |
38 | let response = try await httpClient.execute(request, timeout: .seconds(30))
39 | if response.status == .ok {
40 | let str = String(buffer: try await response.body.collect(upTo: 1024 * 1024))
41 | // print(str)
42 | let json = try JSON(data: str.data(using: .utf8)!)
43 | return json["query"]["pages"]["\(self.pageid)"]["extract"].stringValue
44 |
45 | } else {
46 | // handle remote error
47 | print("http code is not 200.")
48 | return ""
49 | }
50 | }
51 | }
52 |
--------------------------------------------------------------------------------
/Sources/LangChain/utilities/youtube/Transcript.swift:
--------------------------------------------------------------------------------
1 | //
2 | // File.swift
3 | //
4 | //
5 | // Created by 顾艳华 on 2023/6/30.
6 | //
7 |
8 | import AsyncHTTPClient
9 | import Foundation
10 | import NIOPosix
11 | import SWXMLHash
12 |
13 | struct Transcript {
14 | let http_client: HTTPClient
15 | let video_id: String
16 | let url: String
17 | let language: String
18 | let language_code: String
19 | let is_generated: Bool
20 | let translation_languages: [[String: String]]
21 | var translation_languages_dict: [String: String]
22 | init(http_client: HTTPClient, video_id: String, url: String, language: String, language_code: String, is_generated: Bool, translation_languages: [[String : String]]) {
23 | self.http_client = http_client
24 | self.video_id = video_id
25 | self.url = url
26 | self.language = language
27 | self.language_code = language_code
28 | self.is_generated = is_generated
29 | self.translation_languages = translation_languages
30 | // self._translation_languages_dict = {
31 | // translation_language['language_code']: translation_language['language']
32 | // for translation_language in translation_languages
33 | // }
34 | self.translation_languages_dict = [:]
35 | for t in self.translation_languages {
36 | self.translation_languages_dict[t["language_code"]!] = t["language"]
37 | }
38 | }
39 | func translate(language_code: String) -> Transcript {
40 | // for i in translation_languages_dict {
41 | // print(i.key)
42 | // print(i.value)
43 | // }
44 | return Transcript(
45 | http_client: self.http_client,
46 | video_id: self.video_id,
47 | url: String(format: "%@&tlang=%@", self.url, language_code),
48 | language: language_code,
49 | // language: self.translation_languages_dict[language_code]!,//self._translation_languages_dict[language_code],
50 | language_code: language_code,
51 | is_generated: true,
52 | translation_languages: []
53 | )
54 | }
55 |
56 | func fetch() async -> [[String: String]]? {
57 | do {
58 | var request = HTTPClientRequest(url: self.url)
59 | request.method = .GET
60 | request.headers.add(name: "Accept-Language", value: "en-US")
61 |
62 | let response = try await http_client.execute(request, timeout: .seconds(30))
63 | if response.status == .ok {
64 | let plain = String(buffer: try await response.body.collect(upTo: 1024 * 1024))
65 | return _TranscriptParser().parse(plain_data: plain)
66 | } else {
67 | // handle remote error
68 | print("get transcript http code is not 200.")
69 | return nil
70 | }
71 | } catch {
72 | // handle error
73 | print(error)
74 | return nil
75 | }
76 | }
77 | }
78 |
79 |
80 | struct _TranscriptParser {
81 | func parse(plain_data: String) -> [[String: String]] {
82 | let xml = XMLHash.parse(plain_data)
83 | let textArray = xml["transcript"]["text"]
84 | var texts: [[String: String]] = []
85 | for text in textArray.all {
86 | let start = text.element!.attribute(by: "start")!.text
87 | let dur = text.element!.attribute(by: "dur")!.text
88 | let t = text.element!.text
89 | texts.append([
90 | "start": start,
91 | "dur": dur,
92 | "text": t,
93 | ])
94 |
95 | }
96 | return texts
97 | }
98 | }
99 |
--------------------------------------------------------------------------------
/Sources/LangChain/utilities/youtube/TranscriptListFetcher.swift:
--------------------------------------------------------------------------------
1 | //
2 | // File.swift
3 | //
4 | //
5 | // Created by 顾艳华 on 2023/6/30.
6 | //
7 |
8 | import AsyncHTTPClient
9 | import Foundation
10 | import NIOPosix
11 | import SwiftyJSON
12 |
13 | let WATCH_URL = "https://www.youtube.com/watch?v=%@"
14 |
15 | struct TranscriptListFetcher {
16 | let http_client: HTTPClient
17 |
18 | init(http_client: HTTPClient) {
19 | self.http_client = http_client
20 | }
21 |
22 | func fetch(video_id: String) async -> TranscriptList? {
23 | return await TranscriptList.build(http_client:
24 | self.http_client, video_id: video_id, captions_json: self._extract_captions_json(html: self._fetch_video_html(video_id: video_id), video_id: video_id)
25 | )
26 | }
27 |
28 | func _extract_captions_json(html: String, video_id: String) async -> JSON? {
29 | let splitted_html = html.components(separatedBy: "\"captions\":")
30 | if splitted_html.count != 2 {
31 | return nil
32 | }
33 | let details = splitted_html[1].components(separatedBy: ",\"videoDetails")
34 | let _2 = details[0].replacingOccurrences(of: "\n", with: "")
35 | // print(_2)
36 | let json = try! JSON(data:
37 | _2.data(using: .utf8)!
38 | )
39 | let captions_json = json["playerCaptionsTracklistRenderer"]
40 | return captions_json
41 | }
42 | // def _extract_captions_json(self, html, video_id):
43 | // splitted_html = html.split('"captions":')
44 | //
45 | // if len(splitted_html) <= 1:
46 | // if video_id.startswith('http://') or video_id.startswith('https://'):
47 | // raise InvalidVideoId(video_id)
48 | // if 'class="g-recaptcha"' in html:
49 | // raise TooManyRequests(video_id)
50 | // if '"playabilityStatus":' not in html:
51 | // raise VideoUnavailable(video_id)
52 | //
53 | // raise TranscriptsDisabled(video_id)
54 | //
55 | // captions_json = json.loads(
56 | // splitted_html[1].split(',"videoDetails')[0].replace('\n', '')
57 | // ).get('playerCaptionsTracklistRenderer')
58 | // if captions_json is None:
59 | // raise TranscriptsDisabled(video_id)
60 | //
61 | // if 'captionTracks' not in captions_json:
62 | // raise NoTranscriptAvailable(video_id)
63 | //
64 | // return captions_json
65 |
66 | // def _fetch_video_html(self, video_id):
67 | // html = self._fetch_html(video_id)
68 | // if 'action="https://consent.youtube.com/s"' in html:
69 | // self._create_consent_cookie(html, video_id)
70 | // html = self._fetch_html(video_id)
71 | // if 'action="https://consent.youtube.com/s"' in html:
72 | // raise FailedToCreateConsentCookie(video_id)
73 | // return html
74 | //
75 | // def _fetch_html(self, video_id):
76 | // response = self._http_client.get(WATCH_URL.format(video_id=video_id), headers={'Accept-Language': 'en-US'})
77 | // return unescape(_raise_http_errors(response, video_id).text)
78 | func _fetch_video_html(video_id: String) async -> String {
79 | let html = await self._fetch_html(video_id: video_id)
80 | return html
81 | }
82 |
83 | func _fetch_html(video_id: String) async -> String {
84 | do {
85 | var request = HTTPClientRequest(url: String(format: WATCH_URL, video_id))
86 | request.method = .GET
87 | request.headers.add(name: "Accept-Language", value: "en-US")
88 |
89 | let response = try await http_client.execute(request, timeout: .seconds(30))
90 | if response.status == .ok {
91 | return String(buffer: try await response.body.collect(upTo: 1024 * 1024))
92 | } else {
93 | // handle remote error
94 | print("get list http code is not 200.\(response.body)")
95 | return "Bad requset."
96 | }
97 | } catch {
98 | // handle error
99 | print(error)
100 | return "Bad request."
101 | }
102 | }
103 | }
104 |
--------------------------------------------------------------------------------
/Sources/LangChain/utilities/youtube/YoutubeHackClient.swift:
--------------------------------------------------------------------------------
1 | //
2 | // File.swift
3 | //
4 | //
5 | // Created by 顾艳华 on 2023/6/29.
6 | //
7 |
8 | import AsyncHTTPClient
9 | import Foundation
10 | import NIOPosix
11 |
12 | public struct YoutubeHackClient {
13 |
14 | public static func list_transcripts(video_id: String, httpClient: HTTPClient) async -> TranscriptList? {
15 | return await TranscriptListFetcher(http_client: httpClient).fetch(video_id: video_id)
16 | }
17 |
18 | public static func info(video_id: String, httpClient: HTTPClient) async -> YoutubeInfo? {
19 | return await YoutubeInfoFetcher().fetch(http_client: httpClient, video_id: video_id)
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/Sources/LangChain/utilities/youtube/YoutubeInfo.swift:
--------------------------------------------------------------------------------
1 | //
2 | // File.swift
3 | //
4 | //
5 | // Created by 顾艳华 on 2023/7/4.
6 | //
7 |
8 | import AsyncHTTPClient
9 | import Foundation
10 | import NIOPosix
11 | import SwiftyJSON
12 |
13 | struct YoutubeInfoFetcher {
14 | func fetch(http_client: HTTPClient, video_id: String) async -> YoutubeInfo? {
15 | let url = "https://www.youtube.com/youtubei/v1/player?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8"
16 |
17 | let requestBody = YoutubeInfoRequest(videoId: video_id, context: YoutubeInfoRequestContext(client: YoutubeInfoRequestContextClient(clientName: "WEB", clientVersion: "2.20210721.00.00")))
18 | do {
19 | var request = HTTPClientRequest(url: url)
20 | request.method = .POST
21 | request.headers.add(name: "Content-Type", value: "application/json")
22 | request.body = .bytes(try! JSONEncoder().encode(requestBody))
23 |
24 | let response = try await http_client.execute(request, timeout: .seconds(30))
25 | if response.status == .ok {
26 | let plain = String(buffer: try await response.body.collect(upTo: 1024 * 1024))
27 | return YoutubeInfoParse().parse(plain_data: plain)
28 | } else {
29 | // handle remote error
30 | print("get video info http code is not 200.")
31 | return nil
32 | }
33 | } catch {
34 | // handle error
35 | print(error)
36 | return nil
37 | }
38 | }
39 |
40 | }
41 | struct YoutubeInfoRequestContextClient: Encodable {
42 | let clientName: String
43 | let clientVersion: String
44 | }
45 | struct YoutubeInfoRequestContext: Encodable {
46 | let client: YoutubeInfoRequestContextClient
47 | }
48 | struct YoutubeInfoRequest: Encodable {
49 | let videoId: String
50 | let context: YoutubeInfoRequestContext
51 | }
52 |
53 | public struct YoutubeInfo {
54 | public let title: String
55 | public let description: String
56 | public let thumbnail: String
57 | }
58 |
59 | struct YoutubeInfoParse {
60 | func parse(plain_data: String) -> YoutubeInfo {
61 | let tag = "videoDetails"
62 | let json = try! JSON(data:
63 | plain_data.data(using: .utf8)!
64 | )
65 | let detail = json[tag]
66 | var url = ""
67 | if detail["thumbnail"]["thumbnails"].count >= 4 {
68 | url = detail["thumbnail"]["thumbnails"][3]["url"].stringValue
69 | } else {
70 | url = detail["thumbnail"]["thumbnails"][0]["url"].stringValue
71 | }
72 | return YoutubeInfo(title: detail["title"].stringValue, description: detail["shortDescription"].stringValue, thumbnail: url)
73 | }
74 | }
75 |
--------------------------------------------------------------------------------
/Sources/LangChain/vectorstores/SimilaritySearchKit.swift:
--------------------------------------------------------------------------------
1 | //
2 | // File.swift
3 | //
4 | //
5 | // Created by 顾艳华 on 2023/11/18.
6 | //
7 |
8 | import Foundation
9 |
10 | #if os(macOS) || os(iOS) || os(visionOS)
11 | import SimilaritySearchKit
12 | import CryptoKit
13 |
14 | private struct LangChainEmbeddingBridge: EmbeddingsProtocol {
15 |
16 | var tokenizer: _T?
17 |
18 | var model: _M?
19 |
20 | class _M {
21 |
22 | }
23 | class _T: TokenizerProtocol {
24 | func tokenize(text: String) -> [String] {
25 | []
26 | }
27 |
28 | func detokenize(tokens: [String]) -> String {
29 | ""
30 | }
31 |
32 |
33 | }
34 | let embeddings: Embeddings
35 | func encode(sentence: String) async -> [Float]? {
36 | let e = await embeddings.embedQuery(text: sentence)
37 | if e.isEmpty {
38 | print("⚠️\(sentence.prefix(100))")
39 | }
40 | return e
41 | }
42 |
43 |
44 | }
45 | public class SimilaritySearchKit: VectorStore {
46 | let vs: SimilarityIndex
47 |
48 | public init(embeddings: Embeddings, autoLoad: Bool = false) {
49 | self.vs = SimilarityIndex(
50 | model: LangChainEmbeddingBridge(embeddings: embeddings),
51 | metric: DotProduct()
52 | )
53 | if #available(macOS 13.0, *) {
54 | if #available(iOS 16.0, *) {
55 | if autoLoad {
56 | let _ = try? vs.loadIndex()
57 | } else {
58 | // Fallback on earlier versions
59 | }
60 | }
61 | } else {
62 | // Fallback on earlier versions
63 | }
64 | }
65 |
66 | override func similaritySearch(query: String, k: Int) async -> [MatchedModel] {
67 | await vs.search(query, top: k).map{MatchedModel(content: $0.text, similarity: $0.score, metadata: $0.metadata)}
68 | }
69 |
70 | override func addText(text: String, metadata: [String: String]) async {
71 | await vs.addItem(id: sha256(str: text), text: text, metadata: metadata)
72 | }
73 |
74 | @available(iOS 16.0, *)
75 | @available(macOS 13.0, *)
76 | public func writeToFile() {
77 | let _ = try? vs.saveIndex()
78 | }
79 |
80 | override func removeText(sha256: String) async {
81 | vs.removeItem(id: sha256)
82 | }
83 |
84 | func sha256(str: String) -> String {
85 | let data = Data(str.utf8)
86 | let hash = SHA256.hash(data: data)
87 | return hash.compactMap { String(format: "%02x", $0) }.joined()
88 | }
89 | }
90 | #endif
91 |
--------------------------------------------------------------------------------
/Sources/LangChain/vectorstores/Supabase.swift:
--------------------------------------------------------------------------------
1 | //
2 | // File.swift
3 | //
4 | //
5 | // Created by 顾艳华 on 2023/6/12.
6 | //
7 |
8 | import Foundation
9 | import Supabase
10 |
11 | struct SearchVectorParams: Codable {
12 | let query_embedding: [Float]
13 | let match_count: Int
14 | }
15 | struct DocModel: Encodable, Decodable {
16 | let content: String?
17 | let embedding: [Float]
18 | let metadata: [String: String]
19 | }
20 |
21 | public class Supabase: VectorStore {
22 | let client: SupabaseClient
23 | let embeddings: Embeddings
24 | public init(embeddings: Embeddings) {
25 | self.embeddings = embeddings
26 | let env = LC.loadEnv()
27 | client = SupabaseClient(supabaseURL: URL(string: env["SUPABASE_URL"]!)!, supabaseKey: env["SUPABASE_KEY"]!)
28 | }
29 |
30 | public override func similaritySearch(query: String, k: Int) async -> [MatchedModel] {
31 | let params = SearchVectorParams(query_embedding: await embeddings.embedQuery(text: query), match_count: k)
32 | let rpcQuery = client.database.rpc(fn: "match_documents", params: params)
33 |
34 | do {
35 | let response: [MatchedModel] = try await rpcQuery.execute().value // Where DataModel is the model of the data returned by the function
36 | // print("### RPC Returned: \(response.first!.content!)")
37 | return response
38 | } catch {
39 | print("### RPC Error: \(error)")
40 | return []
41 | }
42 |
43 | }
44 |
45 | public override func addText(text: String, metadata: [String: String]) async {
46 | let embedding = await embeddings.embedQuery(text: text)
47 | let insertData = DocModel(content: text, embedding: embedding, metadata: metadata)
48 | let query = client.database
49 | .from("documents")
50 | .insert(values: insertData,
51 | returning: .representation) // you will need to add this to return the added data
52 | // .select(columns: "id") // specifiy which column names to be returned. Leave it empty for all columns
53 | .single() // specify you want to return a single value.
54 |
55 | do {
56 | let _: String = try await query.execute().value
57 | // print("### Save Returned: \(response)")
58 | } catch {
59 | print("### Insert Error: \(error)")
60 | }
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/Sources/LangChain/vectorstores/VectorStore.swift:
--------------------------------------------------------------------------------
1 | //
2 | // File.swift
3 | //
4 | //
5 | // Created by 顾艳华 on 2023/6/14.
6 | //
7 |
8 | import Foundation
9 |
10 | public struct MatchedModel: Encodable, Decodable {
11 | let content: String?
12 | let similarity: Float
13 | let metadata: [String: String]
14 | }
15 | public class VectorStore {
16 | func addText(text: String, metadata: [String: String]) async {
17 |
18 | }
19 | func removeText(sha256: String) async {
20 |
21 | }
22 | func similaritySearch(query: String, k: Int) async -> [MatchedModel] {
23 | []
24 | }
25 |
26 | func add_documents(documents: [Document]) async {
27 | for document in documents {
28 | await self.addText(text: document.page_content, metadata: document.metadata)
29 | }
30 | }
31 |
32 |
33 | func remove_documents(sha256s: [String]) async {
34 | for sha256 in sha256s {
35 | await self.removeText(sha256: sha256)
36 | }
37 | }
38 | // def add_documents(self, documents: List[Document], **kwargs: Any) -> List[str]:
39 | // """Run more documents through the embeddings and add to the vectorstore.
40 | //
41 | // Args:
42 | // documents (List[Document]: Documents to add to the vectorstore.
43 | //
44 | // Returns:
45 | // List[str]: List of IDs of the added texts.
46 | // """
47 | // # TODO: Handle the case where the user doesn't provide ids on the Collection
48 | // texts = [doc.page_content for doc in documents]
49 | // metadatas = [doc.metadata for doc in documents]
50 | // return self.add_texts(texts, metadatas, **kwargs)
51 | }
52 |
--------------------------------------------------------------------------------
/Sources/LangChain/vectorstores/supabase/supabase.sql:
--------------------------------------------------------------------------------
1 |
2 |
3 | -- Create a table to store your documents
4 | create table documents (
5 | id bigserial primary key,
6 | content text, -- corresponds to Document.pageContent
7 | embedding vector(1536), -- 1536 works for OpenAI embeddings, change if needed
8 | metadata jsonb
9 | );
10 |
11 | -- Create a function to search for documents
12 | create function match_documents(query_embedding vector(1536), match_count int)
13 | returns table(id bigint, content text, metadata jsonb, similarity float)
14 | language plpgsql
15 | as $$
16 | #variable_conflict use_column
17 | begin
18 | return query
19 | select
20 | id,
21 | content,
22 | metadata,
23 | 1 - (documents.embedding <=> query_embedding) as similarity
24 | from documents
25 | order by documents.embedding <=> query_embedding
26 | limit match_count;
27 | end;
28 | $$
29 | ;
30 |
--------------------------------------------------------------------------------
/techstack.md:
--------------------------------------------------------------------------------
1 |
26 |
27 |
28 | # Tech Stack File
29 |  [buhe/langchain-swift](https://github.com/buhe/langchain-swift)
30 |
31 | |4
Tools used|12/14/23
Report generated|
32 | |------|------|
33 |
34 |
35 | ##
Languages (2)
36 |
37 |
38 |
39 |
40 | SQL
41 |
42 |
43 | |
44 |
45 |
46 |
47 |
48 | Swift
49 |
50 |
51 | |
52 |
53 |
54 |
55 |
56 | ##
DevOps (2)
57 |
58 |
59 |
60 |
61 | Git
62 |
63 |
64 | |
65 |
66 |
67 |
68 |
69 | GitHub Actions
70 |
71 |
72 | |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 | Generated via [Stack File](https://github.com/marketplace/stack-file)
81 |
--------------------------------------------------------------------------------
/techstack.yml:
--------------------------------------------------------------------------------
1 | repo_name: buhe/langchain-swift
2 | report_id: 64713165d26cff75e4a785647213e3da
3 | repo_type: Public
4 | timestamp: '2023-12-14T09:27:20+00:00'
5 | requested_by: buhe
6 | provider: github
7 | branch: main
8 | detected_tools_count: 4
9 | tools:
10 | - name: SQL
11 | description: It is a domain-specific language used in programming
12 | website_url: https://en.wikipedia.org/wiki/SQL
13 | open_source: true
14 | hosted_saas: false
15 | category: Languages & Frameworks
16 | sub_category: Languages
17 | image_url: https://img.stackshare.io/service/2271/default_068d33483bba6b81ee13fbd4dc7aab9780896a54.png
18 | detection_source: Sources/LangChain/vectorstores/supabase/supabase.sql
19 | last_updated_by: buhe
20 | last_updated_on: 2023-06-14 09:04:42.000000000 Z
21 | - name: Swift
22 | description: 'An innovative new programming language for Cocoa and Cocoa Touch. '
23 | website_url: https://developer.apple.com/swift/
24 | license: Apache-2.0
25 | open_source: true
26 | hosted_saas: false
27 | category: Languages & Frameworks
28 | sub_category: Languages
29 | image_url: https://img.stackshare.io/service/1009/tuHsaI2U.png
30 | detection_source: Repo Metadata
31 | - name: Git
32 | description: Fast, scalable, distributed revision control system
33 | website_url: http://git-scm.com/
34 | open_source: true
35 | hosted_saas: false
36 | category: Build, Test, Deploy
37 | sub_category: Version Control System
38 | image_url: https://img.stackshare.io/service/1046/git.png
39 | detection_source: Repo Metadata
40 | - name: GitHub Actions
41 | description: Automate your workflow from idea to production
42 | website_url: https://github.com/features/actions
43 | open_source: false
44 | hosted_saas: true
45 | category: Build, Test, Deploy
46 | sub_category: Continuous Integration
47 | image_url: https://img.stackshare.io/service/11563/actions.png
48 | detection_source: ".github/workflows/swift.yml"
49 | last_updated_by: buhe
50 | last_updated_on: 2023-11-18 07:17:21.000000000 Z
51 |
--------------------------------------------------------------------------------