├── .gitignore
├── .spi.yml
├── LICENSE
├── Package.resolved
├── Package.swift
├── README.md
├── Sources
    ├── HCBacktrace
    │   ├── Interface
    │   │   ├── Backtrace.swift
    │   │   ├── Block.swift
    │   │   └── Macros.swift
    │   └── Macros
    │   │   ├── AlwaysAssert.swift
    │   │   ├── Plugin.swift
    │   │   └── RecordCaller.swift
    ├── HCMatrixBench
    │   └── Entrypoint.swift
    ├── HCTestUtils
    │   ├── Assertions.swift
    │   ├── BackendTests.swift
    │   ├── Resources
    │   │   ├── conv2d.json.gz
    │   │   └── conv2d_test_gen.py
    │   └── Unsafe.swift
    ├── Honeycrisp
    │   ├── Backend.swift
    │   ├── BackendCPU.swift
    │   ├── BackendCoreML.swift
    │   ├── BackendMPS.swift
    │   ├── BackendWrapper.swift
    │   ├── BinaryOps.swift
    │   ├── BitwiseOps.swift
    │   ├── Broadcast.swift
    │   ├── Checkpoint.swift
    │   ├── Clamp.swift
    │   ├── Comparison.swift
    │   ├── ConcatSplit.swift
    │   ├── Conv.swift
    │   ├── Cumulative.swift
    │   ├── DataUtil.swift
    │   ├── Debug.swift
    │   ├── Elemwise.swift
    │   ├── FusedOps.swift
    │   ├── Honeycrisp.docc
    │   │   ├── abs_1.md
    │   │   ├── acos_1.md
    │   │   ├── add_1.md
    │   │   ├── all_1.md
    │   │   ├── argmax_1.md
    │   │   ├── argmin_1.md
    │   │   ├── argsort_1.md
    │   │   ├── article_0.md
    │   │   ├── article_1.md
    │   │   ├── article_2.md
    │   │   ├── asin_1.md
    │   │   ├── atan_1.md
    │   │   ├── backwardHandle_backward.md
    │   │   ├── backward_1.md
    │   │   ├── batchedMatmul_1.md
    │   │   ├── bools_1.md
    │   │   ├── broadcast_1.md
    │   │   ├── broadcast_2.md
    │   │   ├── cast_1.md
    │   │   ├── cast_2.md
    │   │   ├── ceil_1.md
    │   │   ├── checkNaN_1.md
    │   │   ├── checkpoint.md
    │   │   ├── chunk_1.md
    │   │   ├── clamp_1.md
    │   │   ├── conv1DKernelGrad_1.md
    │   │   ├── conv1DTranspose_1.md
    │   │   ├── conv1D_1.md
    │   │   ├── conv2DKernelGrad_1.md
    │   │   ├── conv2DTranspose_1.md
    │   │   ├── conv2D_1.md
    │   │   ├── copyToArray_1.md
    │   │   ├── cos_1.md
    │   │   ├── cumulativeProd_1.md
    │   │   ├── cumulativeSum_1.md
    │   │   ├── elemwise_1.md
    │   │   ├── erf_1.md
    │   │   ├── exp_1.md
    │   │   ├── expand_1.md
    │   │   ├── expand_2.md
    │   │   ├── flatten_1.md
    │   │   ├── floats_1.md
    │   │   ├── floor_1.md
    │   │   ├── gather_1.md
    │   │   ├── gelu_1.md
    │   │   ├── int64s_1.md
    │   │   ├── ints_1.md
    │   │   ├── item_1.md
    │   │   ├── logSoftmaxGrad_1.md
    │   │   ├── logSoftmax_1.md
    │   │   ├── log_1.md
    │   │   ├── matmul_1.md
    │   │   ├── maxPool2D_1.md
    │   │   ├── max_1.md
    │   │   ├── meanAndVariance_1.md
    │   │   ├── mean_1.md
    │   │   ├── min_1.md
    │   │   ├── move_1.md
    │   │   ├── mul_1.md
    │   │   ├── noGrad_1.md
    │   │   ├── normalize_1.md
    │   │   ├── onGrad_1.md
    │   │   ├── outer_1.md
    │   │   ├── pow_1.md
    │   │   ├── printing_1.md
    │   │   ├── printing_2.md
    │   │   ├── prod_1.md
    │   │   ├── qr_1.md
    │   │   ├── relu_1.md
    │   │   ├── repeating_1.md
    │   │   ├── reshape_1.md
    │   │   ├── reshape_2.md
    │   │   ├── round_1.md
    │   │   ├── rsqrt_1.md
    │   │   ├── saveForBackward_1.md
    │   │   ├── scatter_1.md
    │   │   ├── sigmoid_1.md
    │   │   ├── silu_1.md
    │   │   ├── sin_1.md
    │   │   ├── softmax_1.md
    │   │   ├── some_1.md
    │   │   ├── split_1.md
    │   │   ├── sqrt_1.md
    │   │   ├── squeeze_1.md
    │   │   ├── state_1.md
    │   │   ├── sum_1.md
    │   │   ├── svd_1.md
    │   │   ├── swap_1.md
    │   │   ├── t_1.md
    │   │   ├── tan_1.md
    │   │   ├── tanh_1.md
    │   │   ├── tril_1.md
    │   │   ├── triu_1.md
    │   │   ├── unsqueeze_1.md
    │   │   ├── variance_1.md
    │   │   ├── wait_1.md
    │   │   ├── when_1.md
    │   │   ├── when_2.md
    │   │   ├── when_3.md
    │   │   └── when_4.md
    │   ├── Index.swift
    │   ├── Linalg.swift
    │   ├── OneHot.swift
    │   ├── Optimizer.swift
    │   ├── PrimitiveExtensions.swift
    │   ├── Random.swift
    │   ├── ReduceRepeat.swift
    │   ├── Resources
    │   │   └── kernels.txt
    │   ├── ScatterGather.swift
    │   ├── Softmax.swift
    │   ├── Sort.swift
    │   ├── State.swift
    │   ├── Tensor.swift
    │   ├── TensorElement.swift
    │   ├── Trainable.swift
    │   └── When.swift
    └── HoneycrispMacros
    │   └── TracebackMacro.swift
└── Tests
    └── HoneycrispTests
        └── HoneycrispTests.swift


/.gitignore:
--------------------------------------------------------------------------------
1 | .build
2 | .vscode


--------------------------------------------------------------------------------
/.spi.yml:
--------------------------------------------------------------------------------
1 | version: 1
2 | builder:
3 |   configs:
4 |     - documentation_targets: [Honeycrisp, HCBacktrace]
5 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Alex Nichol
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Package.resolved:
--------------------------------------------------------------------------------
 1 | {
 2 |   "originHash" : "518e63f8f0f77c5835469c73ad89289852ef5e4de39e3fe29a884a61e740c6cf",
 3 |   "pins" : [
 4 |     {
 5 |       "identity" : "coreml-builder",
 6 |       "kind" : "remoteSourceControl",
 7 |       "location" : "https://github.com/unixpickle/coreml-builder.git",
 8 |       "state" : {
 9 |         "revision" : "87c1e9b9baedcd7cfedfa0bef6fc43843c96b9c3",
10 |         "version" : "0.2.0"
11 |       }
12 |     },
13 |     {
14 |       "identity" : "gzipswift",
15 |       "kind" : "remoteSourceControl",
16 |       "location" : "https://github.com/1024jp/GzipSwift",
17 |       "state" : {
18 |         "revision" : "731037f6cc2be2ec01562f6597c1d0aa3fe6fd05",
19 |         "version" : "6.0.1"
20 |       }
21 |     },
22 |     {
23 |       "identity" : "swift-argument-parser",
24 |       "kind" : "remoteSourceControl",
25 |       "location" : "https://github.com/apple/swift-argument-parser",
26 |       "state" : {
27 |         "revision" : "41982a3656a71c768319979febd796c6fd111d5c",
28 |         "version" : "1.5.0"
29 |       }
30 |     },
31 |     {
32 |       "identity" : "swift-docc-plugin",
33 |       "kind" : "remoteSourceControl",
34 |       "location" : "https://github.com/swiftlang/swift-docc-plugin",
35 |       "state" : {
36 |         "revision" : "85e4bb4e1cd62cec64a4b8e769dcefdf0c5b9d64",
37 |         "version" : "1.4.3"
38 |       }
39 |     },
40 |     {
41 |       "identity" : "swift-docc-symbolkit",
42 |       "kind" : "remoteSourceControl",
43 |       "location" : "https://github.com/swiftlang/swift-docc-symbolkit",
44 |       "state" : {
45 |         "revision" : "b45d1f2ed151d057b54504d653e0da5552844e34",
46 |         "version" : "1.0.0"
47 |       }
48 |     },
49 |     {
50 |       "identity" : "swift-protobuf",
51 |       "kind" : "remoteSourceControl",
52 |       "location" : "https://github.com/apple/swift-protobuf",
53 |       "state" : {
54 |         "revision" : "ebc7251dd5b37f627c93698e4374084d98409633",
55 |         "version" : "1.28.2"
56 |       }
57 |     },
58 |     {
59 |       "identity" : "swift-syntax",
60 |       "kind" : "remoteSourceControl",
61 |       "location" : "https://github.com/swiftlang/swift-syntax.git",
62 |       "state" : {
63 |         "revision" : "0687f71944021d616d34d922343dcef086855920",
64 |         "version" : "600.0.1"
65 |       }
66 |     }
67 |   ],
68 |   "version" : 3
69 | }
70 | 


--------------------------------------------------------------------------------
/Package.swift:
--------------------------------------------------------------------------------
 1 | // swift-tools-version: 5.10
 2 | // The swift-tools-version declares the minimum version of Swift required to build this package.
 3 | 
 4 | import CompilerPluginSupport
 5 | import PackageDescription
 6 | 
 7 | let package = Package(
 8 |   name: "Honeycrisp",
 9 |   platforms: [
10 |     .macOS(.v13),
11 |     .iOS(.v16),
12 |   ],
13 |   products: [
14 |     .library(
15 |       name: "HCBacktrace",
16 |       targets: ["HCBacktrace"]),
17 |     .library(
18 |       name: "Honeycrisp",
19 |       targets: ["Honeycrisp"]),
20 |     .library(
21 |       name: "HCTestUtils",
22 |       targets: ["HCTestUtils"]),
23 |   ],
24 |   dependencies: [
25 |     .package(
26 |       url: "https://github.com/swiftlang/swift-syntax.git", from: "600.0.0"),
27 |     .package(url: "https://github.com/1024jp/GzipSwift", "6.0.0"..<"6.1.0"),
28 |     .package(url: "https://github.com/unixpickle/coreml-builder.git", from: "0.2.0"),
29 |     .package(url: "https://github.com/swiftlang/swift-docc-plugin", from: "1.1.0"),
30 |   ],
31 |   targets: [
32 |     .macro(
33 |       name: "HCBacktraceMacros",
34 |       dependencies: [
35 |         .product(name: "SwiftSyntaxMacros", package: "swift-syntax"),
36 |         .product(name: "SwiftCompilerPlugin", package: "swift-syntax"),
37 |       ],
38 |       path: "Sources/HCBacktrace/Macros"),
39 |     .target(
40 |       name: "HCBacktrace",
41 |       dependencies: ["HCBacktraceMacros"],
42 |       path: "Sources/HCBacktrace/Interface"),
43 |     .target(
44 |       name: "Honeycrisp",
45 |       dependencies: [
46 |         .product(name: "CoreMLBuilder", package: "coreml-builder"),
47 |         "HCBacktrace",
48 |       ],
49 |       resources: [
50 |         .process("Resources")
51 |       ],
52 |       cSettings: [
53 |         .define("ACCELERATE_NEW_LAPACK")
54 |       ]),
55 |     .target(
56 |       name: "HCTestUtils",
57 |       dependencies: [
58 |         "Honeycrisp", .product(name: "Gzip", package: "GzipSwift"),
59 |       ],
60 |       resources: [
61 |         .process("Resources")
62 |       ]),
63 |     .testTarget(
64 |       name: "HoneycrispTests",
65 |       dependencies: ["Honeycrisp", "HCTestUtils"]),
66 |     .executableTarget(
67 |       name: "HCMatrixBench",
68 |       dependencies: ["Honeycrisp"]),
69 |   ],
70 |   swiftLanguageVersions: [.v5, .version("6")]
71 | )
72 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 🍎 Honeycrisp
  2 | 
  3 | [Documentation](https://swiftpackageindex.com/unixpickle/honeycrisp/main/documentation/honeycrisp) | [Examples](https://github.com/unixpickle/honeycrisp-examples) | [Package Index Page](https://swiftpackageindex.com/unixpickle/honeycrisp)
  4 | 
  5 | Automatic differentiation and neural networks, all in Swift for Apple Silicon.
  6 | 
  7 | # Examples
  8 | 
  9 | See [honeycrisp-examples](https://github.com/unixpickle/honeycrisp-examples) for in-depth usage examples.
 10 | 
 11 | ## Tensors and operations
 12 | 
 13 | We can create a tensor with a shape and data:
 14 | 
 15 | ```swift
 16 | // Create a 2x3 matrix:
 17 | //   1  2  3
 18 | //   4  5  6
 19 | let matrix = Tensor(data: [1, 2, 3, 4, 5, 6], shape: [2, 3])
 20 | ```
 21 | 
 22 | You can perform operations on tensors to get new tensors:
 23 | 
 24 | ```swift
 25 | let matrixPlus1 = matrix + 1
 26 | let sumOfColumns = matrix.sum(axis: 1)
 27 | ```
 28 | 
 29 | We can get data out of a tensor using `try await`:
 30 | 
 31 | ```swift
 32 | // Print a [Float] from the raw data of the matrix
 33 | print("data as floats:", try await matrix.floats())
 34 | ```
 35 | 
 36 | ## Swappable backends
 37 | 
 38 | We can run different parts of our computation in different backends:
 39 | 
 40 | ```swift
 41 | Backend.defaultBackend = try MPSBackend()  // Use the GPU by default
 42 | let cpuBackend = CPUBackend()
 43 | let x = Tensor(rand: [128, 128])  // Performed on GPU
 44 | let y = cpuBackend.use { x + 3 }  // Performed on CPU
 45 | let z = y - 3  // Performed on GPU
 46 | ```
 47 | 
 48 | ## Full training example
 49 | 
 50 | Here is a full example of training a dummy model on a simple objective.
 51 | 
 52 | First, we define a model with trainable parameters and sub-modules:
 53 | 
 54 | ```swift
 55 | class MyModel: Trainable {
 56 |   // A parameter which will be tracked automatically
 57 |   @Param var someParameter: Tensor
 58 | 
 59 |   // We can also give parameters custom names
 60 |   @Param(name: "customName") var otherParameter: Tensor
 61 | 
 62 |   // A sub-module whose parameters will also be tracked
 63 |   @Child var someLayer: Linear
 64 | 
 65 |   override init() {
 66 |     super.init()
 67 |     self.someParameter = Tensor(data: [1.0])
 68 |     self.otherParameter = Tensor(zeros: [7])
 69 |     self.someLayer = Linear(inCount: 3, outCount: 7)
 70 |   }
 71 | 
 72 |   func callAsFunction(_ input: Tensor) -> Tensor {
 73 |     // We can access properties like normal
 74 |     return someParameter * (someLayer(input) + otherParameter)
 75 |   }
 76 | }
 77 | ```
 78 | 
 79 | The training loop looks like this:
 80 | 
 81 | ```swift
 82 | @main
 83 | struct Main {
 84 |   static func main() async {
 85 |     do {
 86 |       let model = MyModel()
 87 |       let optimizer = Adam(model.parameters, lr: 0.1)
 88 | 
 89 |       // We will use the same input batch for all iterations.
 90 |       let batchSize = 8
 91 |       let input = Tensor(rand: [batchSize, 3])
 92 | 
 93 |       for i in 0..<10 {
 94 |         let output = model(input)
 95 |         let loss = output.pow(2).mean()
 96 |         loss.backward()
 97 |         optimizer.step()
 98 |         optimizer.clearGrads()
 99 |         print("step \(i): loss=\(try await loss.item())")
100 |       }
101 |     } catch {
102 |       print("FATAL ERROR: \(error)")
103 |     }
104 |   }
105 | }
106 | ```
107 | 


--------------------------------------------------------------------------------
/Sources/HCBacktrace/Interface/Backtrace.swift:
--------------------------------------------------------------------------------
  1 | import Foundation
  2 | 
  3 | /// A location in compiled code, which represents part of a backtrace.
  4 | public struct CodeLocation: CustomStringConvertible, Sendable {
  5 |   public let function: StaticString
  6 |   public let file: StaticString
  7 |   public let line: UInt
  8 | 
  9 |   public init(function: StaticString, file: StaticString, line: UInt) {
 10 |     self.function = function
 11 |     self.file = file
 12 |     self.line = line
 13 |   }
 14 | 
 15 |   public var description: String {
 16 |     "\(function) at \(file):\(line)"
 17 |   }
 18 | }
 19 | 
 20 | /// An error with a recorded backtrace.
 21 | public struct TracedError: Error, CustomStringConvertible {
 22 |   public let wrapped: Error
 23 |   public let trace: [CodeLocation]
 24 | 
 25 |   public init(wrapped: Error, trace: [CodeLocation]) {
 26 |     self.wrapped = wrapped
 27 |     self.trace = trace
 28 |   }
 29 | 
 30 |   public var description: String {
 31 |     "Error at:\n\n\(formatCalls(trace))\n\nError: \(wrapped)"
 32 |   }
 33 | }
 34 | 
 35 | /// The interface for manually recording and accessing the currently recorded call stack.
 36 | ///
 37 | /// A `Task`-local backtrace is recorded by using ``Backtrace/record(_:function:file:line:)-1mlbd``
 38 | /// or similar methods.
 39 | ///
 40 | /// Typically, methods on `Backtrace` do not need to be called explicitly.
 41 | /// Rather, ``recordCaller()`` can be used on class methods to automatically record callers
 42 | /// and wrap errors with traces.
 43 | ///
 44 | /// Functions like ``tracedFatalError(_:function:file:line:)`` can handle printing stack traces
 45 | /// for fatal errors.
 46 | final public class Backtrace {
 47 |   /// The current, `Task`-local backtrace.
 48 |   ///
 49 |   /// Recorded calls are appended to the end of this list.
 50 |   @TaskLocal public static var current: [CodeLocation] = []
 51 | 
 52 |   internal static func wrapErrors<T>(_ fn: () throws -> T) rethrows -> T {
 53 |     do {
 54 |       return try fn()
 55 |     } catch {
 56 |       if (error as? TracedError) != nil {
 57 |         throw error
 58 |       } else {
 59 |         throw TracedError(wrapped: error, trace: current)
 60 |       }
 61 |     }
 62 |   }
 63 | 
 64 |   internal static func wrapErrors<T>(_ fn: () async throws -> T) async rethrows -> T {
 65 |     do {
 66 |       return try await fn()
 67 |     } catch {
 68 |       if (error as? TracedError) != nil {
 69 |         throw error
 70 |       } else {
 71 |         throw TracedError(wrapped: error, trace: current)
 72 |       }
 73 |     }
 74 |   }
 75 | 
 76 |   /// Record the provided function, file, and line, followed by the caller, in the backtrace
 77 |   /// during the provided block.
 78 |   public static func record<T>(
 79 |     function: StaticString,
 80 |     file: StaticString,
 81 |     line: UInt,
 82 |     _ fn: () throws -> T,
 83 |     function1: StaticString = #function,
 84 |     file1: StaticString = #filePath,
 85 |     line1: UInt = #line
 86 |   ) rethrows -> T {
 87 |     try $current.withValue(
 88 |       current + [
 89 |         CodeLocation(function: function, file: file, line: line),
 90 |         CodeLocation(function: function1, file: file1, line: line1),
 91 |       ]
 92 |     ) {
 93 |       try wrapErrors {
 94 |         try fn()
 95 |       }
 96 |     }
 97 |   }
 98 | 
 99 |   /// Record the provided function, file, and line, followed by the caller, in the backtrace
100 |   /// during the provided block.
101 |   public static func record<T>(
102 |     function: StaticString,
103 |     file: StaticString,
104 |     line: UInt,
105 |     _ fn: () async throws -> T,
106 |     function1: StaticString = #function,
107 |     file1: StaticString = #filePath,
108 |     line1: UInt = #line
109 |   ) async rethrows -> T {
110 |     try await $current.withValue(
111 |       current + [
112 |         CodeLocation(function: function, file: file, line: line),
113 |         CodeLocation(function: function1, file: file1, line: line1),
114 |       ]
115 |     ) {
116 |       try await wrapErrors {
117 |         try await fn()
118 |       }
119 |     }
120 |   }
121 | 
122 |   /// Record the caller in the backtrace during the provided block.
123 |   public static func record<T>(
124 |     _ fn: () throws -> T,
125 |     function: StaticString = #function, file: StaticString = #filePath, line: UInt = #line
126 |   ) rethrows -> T {
127 |     try $current.withValue(current + [CodeLocation(function: function, file: file, line: line)]) {
128 |       try wrapErrors {
129 |         try fn()
130 |       }
131 |     }
132 |   }
133 | 
134 |   /// Record the caller in the backtrace during the provided block.
135 |   public static func record<T>(
136 |     _ fn: () async throws -> T, function: StaticString = #function, file: StaticString = #filePath,
137 |     line: UInt = #line
138 |   ) async rethrows -> T {
139 |     try await $current.withValue(
140 |       current + [CodeLocation(function: function, file: file, line: line)]
141 |     ) {
142 |       try await wrapErrors {
143 |         try await fn()
144 |       }
145 |     }
146 |   }
147 | 
148 |   /// Override the `Task`-local backtrace within the block.
149 |   public static func override<T>(_ callers: [CodeLocation], _ fn: () async throws -> T)
150 |     async rethrows -> T
151 |   {
152 |     try await $current.withValue(callers) {
153 |       try await wrapErrors {
154 |         try await fn()
155 |       }
156 |     }
157 |   }
158 | 
159 |   /// Equivalent to ``Backtrace/current``.
160 |   public static func trace() -> [CodeLocation] {
161 |     current
162 |   }
163 | 
164 |   /// Format the current (or provided) backtrace as a string.
165 |   public static func format(_ trace: [CodeLocation]? = nil) -> String {
166 |     formatCalls(trace ?? current)
167 |   }
168 | }
169 | 
170 | private func formatCalls(_ locs: [CodeLocation]) -> String {
171 |   locs.enumerated().map { x in
172 |     let (i, loc) = (x.0, x.1)
173 |     return String(repeating: "  ", count: i) + "\(loc)"
174 |   }.joined(separator: "\n")
175 | }
176 | 
177 | /// If a condition is false, abort program execution and print the current backtrace with an
178 | /// optional error message.
179 | @inlinable
180 | @inline(__always)
181 | public func alwaysAssert(
182 |   _ condition: Bool, _ message: String? = nil, function: StaticString = #function,
183 |   file: StaticString = #filePath, line: UInt = #line
184 | ) {
185 |   if !condition {
186 |     Backtrace.record(
187 |       {
188 |         let msg =
189 |           if let message = message {
190 |             "\n\nTraceback:\n\n\(Backtrace.format())\n\nAssertion failure: \(message)"
191 |           } else {
192 |             "\n\nTraceback:\n\n\(Backtrace.format())\n\nAssertion failure"
193 |           }
194 |         fatalError(msg)
195 |       },
196 |       function: function,
197 |       file: file,
198 |       line: line
199 |     )
200 |   }
201 | }
202 | 
203 | /// Abort program execution and print the current backtrace with an optional error message.
204 | public func tracedFatalError(
205 |   _ message: String? = nil, function: StaticString = #function,
206 |   file: StaticString = #filePath, line: UInt = #line
207 | ) -> Never {
208 |   Backtrace.record(
209 |     {
210 |       let msg =
211 |         if let message = message {
212 |           "\n\nTraceback:\n\n\(Backtrace.format())\n\nFatal error: \(message)"
213 |         } else {
214 |           "\n\nTraceback:\n\n\(Backtrace.format())\n\n"
215 |         }
216 |       fatalError(msg)
217 |     }, function: function, file: file, line: line)
218 | 
219 |   // This will not be reached.
220 |   fatalError()
221 | }
222 | 


--------------------------------------------------------------------------------
/Sources/HCBacktrace/Interface/Block.swift:
--------------------------------------------------------------------------------
 1 | public struct TracedBlock<T>: Sendable {
 2 | 
 3 |   let fn: @Sendable () async throws -> T
 4 | 
 5 |   public init(_ fn: @escaping @Sendable () async throws -> T) {
 6 |     self.fn = fn
 7 |   }
 8 | 
 9 |   @recordCaller
10 |   private func _callAsFunction() async throws -> T {
11 |     try await fn()
12 |   }
13 | 
14 | }
15 | 


--------------------------------------------------------------------------------
/Sources/HCBacktrace/Interface/Macros.swift:
--------------------------------------------------------------------------------
 1 | import Foundation
 2 | 
 3 | /// Make a new copy of the method which records the caller in the current ``Backtrace``.
 4 | ///
 5 | /// In general, a Swift function can only record its caller by adding extra arguments with
 6 | /// defaults like `#filePath`, `#line`, and `#function`. It would be quite tedious to manually add
 7 | /// these arguments to every method of a class.
 8 | ///
 9 | /// Instead, we can use a macro to make this easier. For example, we might start with this class:
10 | ///
11 | /// ```swift
12 | /// class Foo {
13 | ///   public func printNumbers(x: Int) {
14 | ///     for i in 0..<x {
15 | ///       print(i)
16 | ///     }
17 | ///   }
18 | /// }
19 | /// ```
20 | ///
21 | /// If we'd like to automatically record callers to our `printNumbers` method, we can wrap it
22 | /// with a `@recordCaller`. We will do this by renaming `printNumbers` to `_printNumbers`,
23 | /// because the `@recordCaller` macro cannot _replace_ our method, it can only create a new one
24 | /// (so it will do so by removing the underscore). We will also make this method `private`, since
25 | /// we do not want to expose the *original* (untracked) version of the method.
26 | ///
27 | /// ```swift
28 | /// class Foo {
29 | ///   @recordCaller
30 | ///   private func _printNumbers(x: Int) {
31 | ///     for i in 0..<x {
32 | ///       print(i)
33 | ///     }
34 | ///   }
35 | /// }
36 | /// ```
37 | ///
38 | /// This will expand to the equivalent code:
39 | ///
40 | /// ```swift
41 | /// class Foo {
42 | ///   private func _printNumbers(x: Int) {
43 | ///     for i in 0..<x {
44 | ///       print(i)
45 | ///     }
46 | ///   }
47 | ///
48 | ///   public func printNumbers(x: Int, function: StaticString = #function, file: StaticString = #filePath, line: UInt = #line) {
49 | ///     Backtrace.record(function: function, file: file, line: line) {
50 | ///       for i in 0..<x {
51 | ///         print(i)
52 | ///       }
53 | ///     }
54 | ///   }
55 | /// }
56 | /// ```
57 | @attached(peer, names: arbitrary)
58 | public macro recordCaller() = #externalMacro(module: "HCBacktraceMacros", type: "RecordCaller")
59 | 
60 | /// Assert a condition with a backtrace on failure.
61 | @freestanding(expression)
62 | public macro alwaysAssert(_ value: Bool, _ message: String? = nil) =
63 |   #externalMacro(module: "HCBacktraceMacros", type: "AlwaysAssert")
64 | 


--------------------------------------------------------------------------------
/Sources/HCBacktrace/Macros/AlwaysAssert.swift:
--------------------------------------------------------------------------------
 1 | import SwiftSyntax
 2 | import SwiftSyntaxBuilder
 3 | import SwiftSyntaxMacros
 4 | 
 5 | public enum AlwaysAssert: ExpressionMacro {
 6 |   public static func expansion(
 7 |     of node: some FreestandingMacroExpansionSyntax,
 8 |     in context: some MacroExpansionContext
 9 |   ) throws -> ExprSyntax {
10 |     if ![1, 2].contains(node.arguments.count) {
11 |       throw MacroError.message(
12 |         "alwaysAssert expects 1 or 2 arguments, but got \(node.arguments.count)")
13 |     }
14 |     let condition = node.arguments.first!.expression
15 |     let message = node.arguments.count == 2 ? node.arguments.last!.expression : ExprSyntax("nil")
16 |     if condition.description.trimmingCharacters(in: .whitespacesAndNewlines) == "false" {
17 |       return "alwaysAssert(false, \(message))"
18 |     }
19 |     return "(!(\(condition)) ? alwaysAssert(false, \(message)) : ())"
20 |   }
21 | }
22 | 


--------------------------------------------------------------------------------
/Sources/HCBacktrace/Macros/Plugin.swift:
--------------------------------------------------------------------------------
1 | import SwiftCompilerPlugin
2 | import SwiftSyntaxMacros
3 | 
4 | @main
5 | struct MyProjectMacros: CompilerPlugin {
6 |   var providingMacros: [Macro.Type] = [AlwaysAssert.self, RecordCaller.self]
7 | }
8 | 


--------------------------------------------------------------------------------
/Sources/HCBacktrace/Macros/RecordCaller.swift:
--------------------------------------------------------------------------------
  1 | import SwiftSyntax
  2 | import SwiftSyntaxMacros
  3 | 
  4 | public enum MacroError: Error {
  5 |   case message(String)
  6 | }
  7 | 
  8 | public struct RecordCaller: PeerMacro {
  9 |   public static func expansion<
 10 |     Context: MacroExpansionContext,
 11 |     Declaration: DeclSyntaxProtocol
 12 |   >(
 13 |     of node: AttributeSyntax,
 14 |     providingPeersOf declaration: Declaration,
 15 |     in context: Context
 16 |   ) throws -> [DeclSyntax] {
 17 | 
 18 |     guard var newNode = declaration.as(FunctionDeclSyntax.self) else {
 19 |       throw MacroError.message("@addAsync only works on functions")
 20 |     }
 21 | 
 22 |     let oldParams = Array(newNode.signature.parameterClause.parameters)
 23 | 
 24 |     let isAsync = newNode.signature.effectSpecifiers?.asyncSpecifier != nil
 25 |     let isThrows = newNode.signature.effectSpecifiers?.throwsClause?.throwsSpecifier != nil
 26 |     func maybeTryAndAwait(_ call: FunctionCallExprSyntax) -> ExprSyntax {
 27 |       var callExpr = ExprSyntax(call)
 28 |       if isAsync {
 29 |         callExpr = ExprSyntax(AwaitExprSyntax(expression: callExpr))
 30 |       }
 31 |       if isThrows {
 32 |         callExpr = ExprSyntax(TryExprSyntax(expression: callExpr))
 33 |       }
 34 |       return callExpr
 35 |     }
 36 | 
 37 |     let callInnerFunction = maybeTryAndAwait(
 38 |       FunctionCallExprSyntax(
 39 |         calledExpression: ExprSyntax("\(newNode.name)"),
 40 |         leftParen: TokenSyntax.leftParenToken(),
 41 |         arguments: LabeledExprListSyntax(
 42 |           oldParams.map { param in
 43 |             let noName = param.firstName.tokenKind == .wildcard
 44 |             let isInOut =
 45 |               if case .attributedType(let t) = param.type.as(TypeSyntaxEnum.self) {
 46 |                 t.specifiers.contains(where: { spec in
 47 |                   if let x = spec.as(SimpleTypeSpecifierSyntax.self)?.specifier {
 48 |                     x.tokenKind == .keyword(SwiftSyntax.Keyword.inout)
 49 |                   } else {
 50 |                     false
 51 |                   }
 52 |                 })
 53 |               } else {
 54 |                 false
 55 |               }
 56 |             let arg = DeclReferenceExprSyntax(baseName: param.secondName ?? param.firstName)
 57 |             return LabeledExprSyntax(
 58 |               label: noName ? nil : param.firstName, colon: noName ? nil : param.colon,
 59 |               expression: isInOut ? ExprSyntax(InOutExprSyntax(expression: arg)) : ExprSyntax(arg),
 60 |               trailingComma: param.trailingComma)
 61 |           }),
 62 |         rightParen: TokenSyntax.rightParenToken()
 63 |       ))
 64 | 
 65 |     var newParams = oldParams
 66 | 
 67 |     if var oldLastParam = newParams.popLast() {
 68 |       oldLastParam.trailingComma = .commaToken()
 69 |       newParams.append(oldLastParam)
 70 |     }
 71 | 
 72 |     // Add parameters that capture caller position.
 73 |     let argFunc = TokenSyntax.identifier("function")
 74 |     let argFile = TokenSyntax.identifier("file")
 75 |     let argLine = TokenSyntax.identifier("line")
 76 |     newParams.append(
 77 |       FunctionParameterSyntax(
 78 |         firstName: argFunc,
 79 |         type: TypeSyntax(IdentifierTypeSyntax(name: TokenSyntax.identifier("StaticString"))),
 80 |         defaultValue: InitializerClauseSyntax(value: ExprSyntax("#function")),
 81 |         trailingComma: TokenSyntax.commaToken())
 82 |     )
 83 |     newParams.append(
 84 |       FunctionParameterSyntax(
 85 |         firstName: argFile,
 86 |         type: TypeSyntax(IdentifierTypeSyntax(name: TokenSyntax.identifier("StaticString"))),
 87 |         defaultValue: InitializerClauseSyntax(value: ExprSyntax("#filePath")),
 88 |         trailingComma: TokenSyntax.commaToken())
 89 |     )
 90 |     newParams.append(
 91 |       FunctionParameterSyntax(
 92 |         firstName: argLine,
 93 |         type: TypeSyntax(IdentifierTypeSyntax(name: TokenSyntax.identifier("UInt"))),
 94 |         defaultValue: InitializerClauseSyntax(value: ExprSyntax("#line")))
 95 |     )
 96 | 
 97 |     var newSignature = newNode.signature
 98 |     var newParamClause = newNode.signature.parameterClause
 99 |     newParamClause.parameters = FunctionParameterListSyntax(newParams)
100 |     newSignature.parameterClause = newParamClause
101 |     newNode.signature = newSignature
102 | 
103 |     // Optionally remove leading _ from name.
104 |     let rawName = "\(newNode.name)"
105 |     let newName =
106 |       if let underscoreIndex = rawName.firstIndex(of: "_") {
107 |         String(rawName[rawName.index(after: underscoreIndex)...])
108 |       } else {
109 |         rawName
110 |       }
111 |     newNode.name = TokenSyntax(stringLiteral: newName)
112 | 
113 |     // Make method public if it is marked as private.
114 |     var newMods = [DeclModifierSyntax]()
115 |     for mod in newNode.modifiers {
116 |       if mod.name.tokenKind == .keyword(SwiftSyntax.Keyword.private) {
117 |         newMods.append(
118 |           DeclModifierSyntax(
119 |             name: TokenSyntax(
120 |               .keyword(SwiftSyntax.Keyword.public), leadingTrivia: mod.name.leadingTrivia,
121 |               trailingTrivia: mod.name.trailingTrivia, presence: .present),
122 |             trailingTrivia: mod.trailingTrivia))
123 |       } else {
124 |         newMods.append(mod)
125 |       }
126 |     }
127 |     newNode.modifiers = DeclModifierListSyntax(newMods)
128 | 
129 |     // Call the original function inside `Backtrace.record`.
130 |     let callExpr: ExprSyntax = maybeTryAndAwait(
131 |       FunctionCallExprSyntax(
132 |         calledExpression: ExprSyntax("Backtrace.record"),
133 |         leftParen: TokenSyntax.leftParenToken(),
134 |         arguments: LabeledExprListSyntax([
135 |           LabeledExprSyntax(
136 |             expression: ClosureExprSyntax(
137 |               statements: CodeBlockItemListSyntax([
138 |                 CodeBlockItemSyntax(item: .expr(callInnerFunction))
139 |               ])),
140 |             trailingComma: TokenSyntax.commaToken()),
141 |           LabeledExprSyntax(
142 |             label: TokenSyntax.identifier("function"), colon: TokenSyntax.colonToken(),
143 |             expression: DeclReferenceExprSyntax(baseName: argFunc),
144 |             trailingComma: TokenSyntax.commaToken()),
145 |           LabeledExprSyntax(
146 |             label: TokenSyntax.identifier("file"), colon: TokenSyntax.colonToken(),
147 |             expression: DeclReferenceExprSyntax(baseName: argFile),
148 |             trailingComma: TokenSyntax.commaToken()),
149 |           LabeledExprSyntax(
150 |             label: TokenSyntax.identifier("line"), colon: TokenSyntax.colonToken(),
151 |             expression: DeclReferenceExprSyntax(baseName: argLine)),
152 |         ]),
153 |         rightParen: TokenSyntax.rightParenToken()
154 |       ))
155 | 
156 |     let codeBlock = CodeBlockItemSyntax(item: .expr(callExpr))
157 | 
158 |     var newBody = newNode.body!
159 |     newBody.statements = CodeBlockItemListSyntax([codeBlock])
160 |     newNode.body = newBody
161 | 
162 |     // Don't accidentally use macro recursively.
163 |     newNode.attributes = AttributeListSyntax([])
164 | 
165 |     // We must add newlines before the new function, but we want to
166 |     // keep any docstrings.
167 |     newNode.leadingTrivia = [.newlines(2)] + newNode.leadingTrivia
168 | 
169 |     return [DeclSyntax(newNode)]
170 |   }
171 | }
172 | 


--------------------------------------------------------------------------------
/Sources/HCMatrixBench/Entrypoint.swift:
--------------------------------------------------------------------------------
 1 | import Foundation
 2 | import HCBacktrace
 3 | import Honeycrisp
 4 | 
 5 | @main
 6 | struct Main {
 7 |   static func main() async {
 8 |     do {
 9 |       // Backend.defaultBackend = CoreMLBackend(wrapping: try MPSBackend())
10 |       Backend.defaultBackend = try MPSBackend()
11 | 
12 |       for innerSize in [512, 1024, 2048] {
13 |         for outerSize in [512, 1024, 2048, 4096, 16384, 32768] {
14 |           let m1 = Tensor(rand: [outerSize, innerSize], dtype: .float16)
15 |           let m2 = Tensor(rand: [innerSize, innerSize], dtype: .float16)
16 |           func runMatmul() async throws {
17 |             let result = Backtrace.record { m1.t() &* m2 }
18 |             try await result.wait()
19 |           }
20 | 
21 |           try await runMatmul()
22 |           let t1 = DispatchTime.now()
23 |           try await runMatmul()
24 |           let t2 = DispatchTime.now()
25 |           let flops = innerSize * innerSize * outerSize * 2
26 |           let gflops = Float(flops) / Float(t2.uptimeNanoseconds - t1.uptimeNanoseconds)
27 |           print("size \(outerSize) x \(innerSize) x \(innerSize): \(gflops) GFLOP/s")
28 |         }
29 |       }
30 |     } catch {
31 |       print("fatal error: \(error)")
32 |     }
33 |   }
34 | }
35 | 


--------------------------------------------------------------------------------
/Sources/HCTestUtils/Assertions.swift:
--------------------------------------------------------------------------------
 1 | import Gzip
 2 | import XCTest
 3 | 
 4 | @testable import Honeycrisp
 5 | 
 6 | public func assertClose(
 7 |   _ x: Tensor, _ y: Tensor, _ msg: String? = nil, atol: Float = 1e-4, rtol: Float = 1e-4,
 8 |   file: StaticString = #filePath,
 9 |   line: UInt = #line
10 | ) async throws {
11 |   XCTAssertEqual(x.shape, y.shape)
12 |   var allGood = true
13 |   let xData = try await x.floats()
14 |   let yData = try await y.floats()
15 |   for (a, b) in zip(xData, yData) {
16 |     if a.isNaN != b.isNaN || (abs(a - b) > atol && (b == 0 || abs(a / b - 1) > rtol)) {
17 |       allGood = false
18 |     }
19 |   }
20 |   if let msg = msg {
21 |     XCTAssert(
22 |       allGood, "tensors \(xData) and \(yData) are not equal: \(msg)", file: file, line: line)
23 |   } else {
24 |     XCTAssert(allGood, "tensors \(xData) and \(yData) are not equal", file: file, line: line)
25 |   }
26 | }
27 | 
28 | public func assertClose(
29 |   _ x: Tensor, _ yData: [Float], _ msg: String? = nil, atol: Float = 1e-4, rtol: Float = 1e-4,
30 |   file: StaticString = #filePath,
31 |   line: UInt = #line
32 | ) async throws {
33 |   XCTAssertEqual(x.shape.product(), yData.count)
34 |   var allGood = true
35 |   let xData = try await x.floats()
36 |   for (a, b) in zip(xData, yData) {
37 |     if a.isNaN != b.isNaN || (abs(a - b) > atol && (b == 0 || abs(a / b - 1) > rtol)) {
38 |       allGood = false
39 |     }
40 |     if let msg = msg {
41 |       XCTAssert(
42 |         allGood, "tensors \(xData) and \(yData) are not equal: \(msg)", file: file, line: line)
43 |     } else {
44 |       XCTAssert(allGood, "tensors \(xData) and \(yData) are not equal", file: file, line: line)
45 |     }
46 |   }
47 | }
48 | 
49 | public func assertCloseToIdentity(
50 |   _ x: Tensor, _ msg: String? = nil, atol: Float = 1e-4, rtol: Float = 1e-4,
51 |   file: StaticString = #filePath,
52 |   line: UInt = #line
53 | ) async throws {
54 |   XCTAssert(x.shape.count >= 2)
55 |   XCTAssertEqual(x.shape[x.shape.count - 2], x.shape[x.shape.count - 1])
56 |   let eye = Tensor(identity: x.shape.last!, dtype: x.dtype).expand(as: x)
57 |   try await assertClose(x, eye, msg, atol: atol, rtol: rtol)
58 | }
59 | 
60 | public func assertDataEqual(
61 |   _ x: Tensor, _ y: [Float], _ msg: String? = nil, file: StaticString = #filePath,
62 |   line: UInt = #line
63 | ) async throws {
64 |   let data = try await x.floats()
65 |   if let msg = msg {
66 |     XCTAssertEqual(data, y, msg, file: file, line: line)
67 |   } else {
68 |     XCTAssertEqual(data, y, file: file, line: line)
69 |   }
70 | }
71 | 
72 | public func assertDataEqual64(
73 |   _ x: Tensor, _ y: [Int64], _ msg: String? = nil, file: StaticString = #filePath,
74 |   line: UInt = #line
75 | ) async throws {
76 |   let data = try await x.int64s()
77 |   if let msg = msg {
78 |     XCTAssertEqual(data, y, msg, file: file, line: line)
79 |   } else {
80 |     XCTAssertEqual(data, y, file: file, line: line)
81 |   }
82 | }
83 | 
84 | public func assertDataEqual(
85 |   _ x: Tensor, _ y: Tensor, file: StaticString = #filePath, line: UInt = #line
86 | ) async throws {
87 |   let data = try await x.floats()
88 |   let data1 = try await y.floats()
89 |   XCTAssertEqual(data, data1, file: file, line: line)
90 | }
91 | 


--------------------------------------------------------------------------------
/Sources/HCTestUtils/Resources/conv2d.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/unixpickle/honeycrisp/0016cad22f163daf8b2cd0b46a1a25ca6a6b6e08/Sources/HCTestUtils/Resources/conv2d.json.gz


--------------------------------------------------------------------------------
/Sources/HCTestUtils/Resources/conv2d_test_gen.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Generates the JSON data for conv2d.json.gz
 3 | 
 4 | Will print the data to stdout; you must gzip it yourself.
 5 | """
 6 | 
 7 | import json
 8 | 
 9 | import torch
10 | import torch.nn as nn
11 | import torch.nn.functional as F
12 | 
13 | items = []
14 | for image_size in [(12, 11)]:
15 |     for kernel_size in [(1, 1), (3, 2), (3, 3)]:
16 |         for in_channels in [3, 4]:
17 |             for out_channels in [3, 4]:
18 |                 for groups in [1, 2, 3]:
19 |                     if in_channels % groups or out_channels % groups:
20 |                         continue
21 |                     for strides in [(1, 1), (2, 3)]:
22 |                         for dilation in [(1, 1), (3, 4)]:
23 |                             for padding in [(0, 0), (2, 3)]:
24 |                                 kernel = torch.arange(
25 |                                     out_channels
26 |                                     * in_channels
27 |                                     // groups
28 |                                     * kernel_size[0]
29 |                                     * kernel_size[1]
30 |                                 ).reshape(out_channels, in_channels // groups, *kernel_size)
31 |                                 image = -torch.arange(
32 |                                     in_channels * image_size[0] * image_size[1]
33 |                                 ).view(1, in_channels, *image_size)
34 |                                 image_param = nn.Parameter(image.float())
35 |                                 kernel_param = nn.Parameter(kernel.float())
36 |                                 try:
37 |                                     out_tensor = F.conv2d(
38 |                                         image_param,
39 |                                         kernel_param,
40 |                                         stride=strides,
41 |                                         padding=padding,
42 |                                         dilation=dilation,
43 |                                         groups=groups,
44 |                                     )
45 |                                     out_tensor.backward(
46 |                                         (torch.arange(out_tensor.numel()) * 4)
47 |                                         .float()
48 |                                         .reshape(out_tensor.shape)
49 |                                     )
50 |                                     output = out_tensor.int().flatten().tolist()
51 |                                     image_grad = image_param.grad.int().flatten().tolist()
52 |                                     kernel_grad = kernel_param.grad.int().flatten().tolist()
53 |                                 except:
54 |                                     continue
55 |                                 items.append(
56 |                                     dict(
57 |                                         conv=dict(
58 |                                             kernelSize=(*kernel_size, out_channels),
59 |                                             imageSize=(*image_size, in_channels),
60 |                                             stride=strides,
61 |                                             dilation=dilation,
62 |                                             padding=padding,
63 |                                             groups=groups,
64 |                                         ),
65 |                                         outShape=out_tensor.shape,
66 |                                         output=output,
67 |                                         imageGrad=image_grad,
68 |                                         kernelGrad=kernel_grad,
69 |                                     )
70 |                                 )
71 | print(json.dumps(items))
72 | 


--------------------------------------------------------------------------------
/Sources/HCTestUtils/Unsafe.swift:
--------------------------------------------------------------------------------
 1 | import Honeycrisp
 2 | 
 3 | extension Tensor {
 4 |   public func onGradUnsafe(_ x: @escaping (Tensor) -> Void) -> Tensor {
 5 |     struct X: @unchecked Sendable {
 6 |       let x: (Tensor) -> Void
 7 |     }
 8 |     let cb = X(x: x)
 9 |     return self.onGrad { g in cb.x(g) }
10 |   }
11 | }
12 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/BackendCoreML.swift:
--------------------------------------------------------------------------------
  1 | @preconcurrency import CoreML
  2 | import CoreMLBuilder
  3 | 
  4 | /// A ``Backend`` which replaces certain operations with CoreML programs,
  5 | /// possibly allowing the use of the Apple Neural Engine (ANE).
  6 | open class CoreMLBackend: BackendWrapper, DataAllocator, @unchecked Sendable {
  7 |   internal struct MatmulKey: Hashable {
  8 |     public let sizeA0: Int
  9 |     public let sizeA1: Int
 10 |     public let sizeB0: Int
 11 |     public let sizeB1: Int
 12 |     public let transA: Bool
 13 |     public let transB: Bool
 14 |     public let isFloat32: Bool
 15 |   }
 16 | 
 17 |   private final class ModelCache: @unchecked Sendable {
 18 |     private let computeUnits: MLComputeUnits
 19 |     private var matmuls: [MatmulKey: MLModel] = [:]
 20 |     private let lock = NSLock()
 21 | 
 22 |     public init(computeUnits: MLComputeUnits) {
 23 |       self.computeUnits = computeUnits
 24 |     }
 25 | 
 26 |     public func matmul(_ key: MatmulKey) async throws -> MLModel {
 27 |       if let model = lock.withLock({ matmuls[key] }) {
 28 |         return model
 29 |       }
 30 | 
 31 |       let matmul = Matmul(
 32 |         xShape: (Int64(key.sizeA0), Int64(key.sizeA1)),
 33 |         yShape: (Int64(key.sizeB0), Int64(key.sizeB1)),
 34 |         transposeX: key.transA,
 35 |         transposeY: key.transB,
 36 |         dtype: key.isFloat32 ? .float32 : .float16
 37 |       )
 38 |       let model = try await matmul.model(computeUnits: computeUnits)
 39 |       lock.withLock {
 40 |         matmuls[key] = model
 41 |       }
 42 |       return model
 43 |     }
 44 |   }
 45 | 
 46 |   private typealias Job = (
 47 |     @Sendable (ModelCache) async throws -> MLModel,
 48 |     @Sendable (Result<MLModel, Error>) -> Void
 49 |   )
 50 |   private let queue: Queue<Job> = Queue()
 51 |   public let computeUnits: MLComputeUnits
 52 | 
 53 |   deinit {
 54 |     queue.close()
 55 |   }
 56 | 
 57 |   public init(
 58 |     wrapping: Backend, threads: Int = 8, computeUnits: MLComputeUnits = .cpuAndNeuralEngine
 59 |   ) {
 60 |     self.computeUnits = computeUnits
 61 |     super.init(wrapping: wrapping)
 62 |     for i in 0..<threads {
 63 |       let thread = Thread { [queue = self.queue] in
 64 |         let cache = ModelCache(computeUnits: computeUnits)
 65 | 
 66 |         // I'm not sure why we need this wrapper type to make this compile
 67 |         // in Swift 6.0.2.
 68 |         struct ResultType: Sendable {
 69 |           let value: Result<MLModel, Error>
 70 |         }
 71 | 
 72 |         while let (buildModel, useModel) = queue.get() {
 73 |           let q1: Queue<ResultType> = .init()
 74 |           Task.detached {
 75 |             do {
 76 |               let model = try await buildModel(cache)
 77 |               q1.put(ResultType(value: .success(model)))
 78 |             } catch {
 79 |               q1.put(ResultType(value: .failure(error)))
 80 |             }
 81 |           }
 82 |           autoreleasepool {
 83 |             useModel(q1.get()!.value)
 84 |           }
 85 |         }
 86 |       }
 87 |       thread.name = "BackendCoreML\(i)"
 88 |       thread.start()
 89 |     }
 90 |   }
 91 | 
 92 |   private func runModel<T: Sendable>(
 93 |     buildModel: @escaping @Sendable (ModelCache) async throws -> MLModel,
 94 |     useModel: @escaping @Sendable (MLModel) throws -> T
 95 |   ) async throws -> T {
 96 |     try await withCheckedThrowingContinuation { continuation in
 97 |       queue.put(
 98 |         (
 99 |           buildModel,
100 |           { result in
101 |             switch result {
102 |             case .success(let model):
103 |               var result: Result<T, Error>?
104 |               do {
105 |                 result = Result.success(try useModel(model))
106 |               } catch {
107 |                 result = Result.failure(error)
108 |               }
109 |               let constResult = result!
110 |               continuation.resume(with: constResult)
111 |             case .failure(let error):
112 |               continuation.resume(throwing: error)
113 |             }
114 |           }
115 |         ))
116 |     }
117 |   }
118 | 
119 |   open func allocate(_ byteCount: Int) async throws -> Tensor.Data {
120 |     if let allocator = wrapped as? DataAllocator {
121 |       try await allocator.allocate(byteCount)
122 |     } else {
123 |       try CPUBackend.CPUData(byteCount: byteCount)
124 |     }
125 |   }
126 | 
127 |   override public func matmul(
128 |     a: Tensor.Data, transA: Bool, b: Tensor.Data, transB: Bool, transOut: Bool, rows: Int,
129 |     inner: Int, cols: Int, dtype: Tensor.DType
130 |   )
131 |     async throws
132 |     -> Tensor.Data
133 |   {
134 |     return try await batchedMatmul(
135 |       matrixCount: 1, a: a, transA: transA, b: b, transB: transB, transOut: transOut, rows: rows,
136 |       inner: inner, cols: cols, dtype: dtype)
137 |   }
138 | 
139 |   override public func batchedMatmul(
140 |     matrixCount: Int, a: Tensor.Data, transA: Bool, b: Tensor.Data, transB: Bool, transOut: Bool,
141 |     rows: Int, inner: Int, cols: Int, dtype: Tensor.DType
142 |   )
143 |     async throws
144 |     -> Tensor.Data
145 |   {
146 |     if dtype != .float32 && dtype != .float16 {
147 |       return try await wrapped.batchedMatmul(
148 |         matrixCount: matrixCount, a: a, transA: transA, b: b, transB: transB, transOut: transOut,
149 |         rows: rows, inner: inner, cols: cols, dtype: dtype)
150 |     } else if transOut {
151 |       return try await batchedMatmul(
152 |         matrixCount: matrixCount, a: b, transA: !transB, b: a, transB: !transA, transOut: false,
153 |         rows: cols, inner: inner,
154 |         cols: rows, dtype: dtype)
155 |     }
156 | 
157 |     let aCount = rows * inner
158 |     let bCount = inner * cols
159 |     let outCount = rows * cols
160 |     let buffer = try await allocate(matrixCount * outCount * dtype.byteSize)
161 | 
162 |     let matmulKey = MatmulKey(
163 |       sizeA0: (transA ? inner : rows), sizeA1: (transA ? rows : inner),
164 |       sizeB0: transB ? cols : inner, sizeB1: transB ? inner : cols, transA: transA,
165 |       transB: transB, isFloat32: dtype == .float32)
166 | 
167 |     try await a.onCPU { aBuf in
168 |       try await b.onCPU { bBuf in
169 |         try await buffer.mutateOnCPU { outBuf in
170 |           struct Ptrs: @unchecked Sendable {
171 |             let aBuf: UnsafeRawPointer
172 |             let bBuf: UnsafeRawPointer
173 |             let outBuf: UnsafeMutableRawPointer
174 |           }
175 |           let ptrs = Ptrs(aBuf: aBuf, bBuf: bBuf, outBuf: outBuf)
176 | 
177 |           // Build up a bunch of tasks that will all use the same buffers.
178 |           var tasks = [Task<(), Error>]()
179 |           for i in 0..<matrixCount {
180 |             let i = i
181 |             // Chunking over the batch dimension to multi-thread the ANE calls
182 |             // tends to improve performance.
183 |             let aChunk = transA ? matmulKey.sizeA0 : min(1024, matmulKey.sizeA0)
184 |             for aIdx in stride(from: 0, to: matmulKey.sizeA0, by: aChunk) {
185 |               let aSize = min(matmulKey.sizeA0 - aIdx, aChunk)
186 |               tasks.append(
187 |                 Task.detached { [self] in
188 |                   let mmKey = MatmulKey(
189 |                     sizeA0: aSize, sizeA1: (transA ? rows : inner),
190 |                     sizeB0: transB ? cols : inner, sizeB1: transB ? inner : cols, transA: transA,
191 |                     transB: transB, isFloat32: dtype == .float32)
192 |                   try await runModel(buildModel: { mc in try await mc.matmul(mmKey) }) { model in
193 |                     let x = try MLMultiArray(
194 |                       dataPointer: UnsafeMutableRawPointer(mutating: ptrs.aBuf).advanced(
195 |                         by: (i * aCount + aIdx * matmulKey.sizeA1) * dtype.byteSize),
196 |                       shape: mpsShape([aSize, matmulKey.sizeA1]),
197 |                       dataType: matmulKey.isFloat32 ? .float32 : .float16,
198 |                       strides: mpsShape([matmulKey.sizeA1, 1])
199 |                     )
200 |                     let y = try MLMultiArray(
201 |                       dataPointer: UnsafeMutableRawPointer(mutating: ptrs.bBuf).advanced(
202 |                         by: i * bCount * dtype.byteSize),
203 |                       shape: mpsShape([matmulKey.sizeB0, matmulKey.sizeB1]),
204 |                       dataType: x.dataType,
205 |                       strides: mpsShape([matmulKey.sizeB1, 1])
206 |                     )
207 |                     let featureProvider: MLFeatureProvider = try MLDictionaryFeatureProvider(
208 |                       dictionary: [
209 |                         "x": MLFeatureValue(multiArray: x),
210 |                         "y": MLFeatureValue(multiArray: y),
211 |                       ])
212 |                     let options = MLPredictionOptions()
213 |                     options.outputBackings = [
214 |                       "output": try MLMultiArray(
215 |                         dataPointer: ptrs.outBuf.advanced(
216 |                           by: (i * outCount + aIdx * cols) * dtype.byteSize),
217 |                         shape: mpsShape([transA ? rows : min(aChunk, rows - aIdx), cols]),
218 |                         dataType: x.dataType,
219 |                         strides: mpsShape([cols, 1]),
220 |                         deallocator: { [b = buffer] _ in _ = b }
221 |                       )
222 |                     ]
223 |                     try model.prediction(from: featureProvider, options: options)
224 |                   }
225 |                 }
226 |               )
227 |             }
228 |           }
229 | 
230 |           // Even if one task fails, another task might still be using the pointers
231 |           // and we need to wait for it before returning.
232 |           var firstError: Error? = nil
233 |           for task in tasks {
234 |             do {
235 |               let _ = try await task.value
236 |             } catch {
237 |               if firstError == nil {
238 |                 firstError = error
239 |               }
240 |             }
241 |           }
242 |           if let e = firstError {
243 |             throw e
244 |           }
245 |         }
246 |       }
247 |     }
248 | 
249 |     return buffer
250 |   }
251 | 
252 | }
253 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/BinaryOps.swift:
--------------------------------------------------------------------------------
 1 | /// A binary operator which can be performed on numeric ``Tensor``s.
 2 | public enum NumericBinaryOp: Sendable {
 3 |   case add
 4 |   case mul
 5 |   case sub
 6 |   case div
 7 |   case mod
 8 | 
 9 |   public func apply<T: NumericTensorElement>(_ a: T, _ b: T) -> T {
10 |     switch self {
11 |     case .add:
12 |       a + b
13 |     case .mul:
14 |       a * b
15 |     case .sub:
16 |       a - b
17 |     case .div:
18 |       a / b
19 |     case .mod:
20 |       T.modulus(a, b)
21 |     }
22 |   }
23 | }
24 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/BitwiseOps.swift:
--------------------------------------------------------------------------------
  1 | import HCBacktrace
  2 | 
  3 | /// A value which can be used as a bit pattern for bitwise operators on a ``Tensor``.
  4 | public protocol TensorElementBitPattern: Sendable {
  5 |   init(_: Int)
  6 | }
  7 | 
  8 | extension UInt8: TensorElementBitPattern {
  9 | }
 10 | 
 11 | extension UInt16: TensorElementBitPattern {
 12 | }
 13 | 
 14 | extension UInt32: TensorElementBitPattern {
 15 | }
 16 | 
 17 | extension UInt64: TensorElementBitPattern {
 18 | }
 19 | 
 20 | extension TensorElementBitPattern {
 21 |   public var bitsForBitwiseOp: [UInt8] {
 22 |     var x = self
 23 |     return withUnsafeBytes(of: &x) { data in (0..<MemoryLayout<Self>.size).map { data[$0] } }
 24 |   }
 25 | 
 26 |   public init(fromBitwiseOpBits bits: [UInt8]) {
 27 |     var x: Self = Self(0)
 28 |     withUnsafeMutableBytes(of: &x) { out in
 29 |       for (i, x) in bits.enumerated() {
 30 |         out[i] = x
 31 |       }
 32 |     }
 33 |     self = x
 34 |   }
 35 | }
 36 | 
 37 | /// A bitwise operator which can be performed on a pair of ``Tensor``s.
 38 | public enum BitwiseOp: Sendable {
 39 |   case or
 40 |   case and
 41 |   case xor
 42 | 
 43 |   public func apply<T: TensorElementBitPattern>(_ a: T, _ b: T) -> T {
 44 |     T(
 45 |       fromBitwiseOpBits: zip(a.bitsForBitwiseOp, b.bitsForBitwiseOp).map { x, y in
 46 |         switch self {
 47 |         case .or:
 48 |           x | y
 49 |         case .and:
 50 |           x & y
 51 |         case .xor:
 52 |           x ^ y
 53 |         }
 54 |       })
 55 |   }
 56 | }
 57 | 
 58 | extension Tensor {
 59 | 
 60 |   @recordCaller
 61 |   internal static func _bitwise(lhs: Tensor, rhs: Tensor, op: BitwiseOp) -> Tensor {
 62 |     #alwaysAssert(
 63 |       lhs.dtype == rhs.dtype,
 64 |       "dtypes for bitwise operator do not match: \(lhs.dtype) and \(rhs.dtype)")
 65 | 
 66 |     let (newShape, (lhsStrides, rhsStrides)) = Tensor.lazyBroadcast(lhs, rhs)
 67 | 
 68 |     let backend = Backend.current
 69 |     let newData = createDataTask(lhs, rhs) { lhs, rhs in
 70 |       try await backend.bitwiseOp(
 71 |         BroadcastData(strides: lhsStrides, data: try await lhs.data),
 72 |         BroadcastData(strides: rhsStrides, data: try await rhs.data),
 73 |         op: op,
 74 |         dtype: lhs.dtype
 75 |       )
 76 |     }
 77 |     return Tensor(dataTask: newData, shape: newShape, dtype: lhs.dtype)
 78 |   }
 79 | 
 80 |   @recordCaller
 81 |   internal static func _bitwise<T: TensorElementBitPattern>(lhs: Tensor, rhs: T, op: BitwiseOp)
 82 |     -> Tensor
 83 |   {
 84 |     #alwaysAssert(
 85 |       rhs.bitsForBitwiseOp.count == lhs.dtype.byteSize,
 86 |       "dtype \(lhs.dtype) cannot be used with scalar type \(T.self) in bitwise operations because they are different sizes"
 87 |     )
 88 |     let backend = Backend.current
 89 |     let newData = createDataTask(lhs) { lhs in
 90 |       try await backend.bitwiseOp(
 91 |         try await lhs.data, rhs, op: op, count: lhs.shape.product(), dtype: lhs.dtype
 92 |       )
 93 |     }
 94 |     return Tensor(dataTask: newData, shape: lhs.shape, dtype: lhs.dtype)
 95 |   }
 96 | 
 97 |   @recordCaller
 98 |   internal static func _bitwise<T: TensorElementBitPattern>(lhs: T, rhs: Tensor, op: BitwiseOp)
 99 |     -> Tensor
100 |   {
101 |     #alwaysAssert(
102 |       lhs.bitsForBitwiseOp.count == rhs.dtype.byteSize,
103 |       "dtype \(rhs.dtype) cannot be used with scalar type \(T.self) in bitwise operations because they are different sizes"
104 |     )
105 |     let backend = Backend.current
106 |     let newData = createDataTask(rhs) { rhs in
107 |       try await backend.bitwiseOp(
108 |         try await rhs.data, lhs, op: op, count: rhs.shape.product(), dtype: rhs.dtype
109 |       )
110 |     }
111 |     return Tensor(dataTask: newData, shape: rhs.shape, dtype: rhs.dtype)
112 |   }
113 | 
114 |   /*
115 |   for op, name in [
116 |     ("^", "xor"),
117 |     ("|", "or"),
118 |     ("&", "and"),
119 |   ]:
120 |     print(
121 |         f"""
122 |   public static func {op} <T: TensorElementBitPattern>(lhs: Tensor, rhs: T) -> Tensor {{
123 |     bitwise(lhs: lhs, rhs: rhs, op: .{name})
124 |   }}
125 | 
126 |   public static func {op} <T: TensorElementBitPattern>(lhs: T, rhs: Tensor) -> Tensor {{
127 |     bitwise(lhs: lhs, rhs: rhs, op: .{name})
128 |   }}
129 | 
130 |   public static func {op} (lhs: Tensor, rhs: Tensor) -> Tensor {{
131 |     bitwise(lhs: lhs, rhs: rhs, op: .{name})
132 |   }}
133 |         """
134 |     )
135 |   */
136 | 
137 |   public static func ^ <T: TensorElementBitPattern>(lhs: Tensor, rhs: T) -> Tensor {
138 |     bitwise(lhs: lhs, rhs: rhs, op: .xor)
139 |   }
140 | 
141 |   public static func ^ <T: TensorElementBitPattern>(lhs: T, rhs: Tensor) -> Tensor {
142 |     bitwise(lhs: lhs, rhs: rhs, op: .xor)
143 |   }
144 | 
145 |   public static func ^ (lhs: Tensor, rhs: Tensor) -> Tensor {
146 |     bitwise(lhs: lhs, rhs: rhs, op: .xor)
147 |   }
148 | 
149 |   public static func | <T: TensorElementBitPattern>(lhs: Tensor, rhs: T) -> Tensor {
150 |     bitwise(lhs: lhs, rhs: rhs, op: .or)
151 |   }
152 | 
153 |   public static func | <T: TensorElementBitPattern>(lhs: T, rhs: Tensor) -> Tensor {
154 |     bitwise(lhs: lhs, rhs: rhs, op: .or)
155 |   }
156 | 
157 |   public static func | (lhs: Tensor, rhs: Tensor) -> Tensor {
158 |     bitwise(lhs: lhs, rhs: rhs, op: .or)
159 |   }
160 | 
161 |   public static func & <T: TensorElementBitPattern>(lhs: Tensor, rhs: T) -> Tensor {
162 |     bitwise(lhs: lhs, rhs: rhs, op: .and)
163 |   }
164 | 
165 |   public static func & <T: TensorElementBitPattern>(lhs: T, rhs: Tensor) -> Tensor {
166 |     bitwise(lhs: lhs, rhs: rhs, op: .and)
167 |   }
168 | 
169 |   public static func & (lhs: Tensor, rhs: Tensor) -> Tensor {
170 |     bitwise(lhs: lhs, rhs: rhs, op: .and)
171 |   }
172 | 
173 | }
174 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Broadcast.swift:
--------------------------------------------------------------------------------
  1 | import HCBacktrace
  2 | import Metal
  3 | 
  4 | /// A shape and corresponding strides, allowing a mapping from a tensor
  5 | /// to its underlying data.
  6 | ///
  7 | /// The strides must correspond to contiguous data, i.e. each stride that
  8 | /// is non-zero must be larger than the following strides.
  9 | public struct BroadcastStrides: Hashable, Equatable, Sendable {
 10 |   public let shape: [Int]
 11 |   public let strides: [Int]
 12 |   public let dataCount: Int
 13 |   public let isNoOp: Bool
 14 | 
 15 |   public init(shape: [Int], strides: [Int]) {
 16 |     assert(shape.count == strides.count)
 17 |     self.shape = shape
 18 |     self.strides = strides
 19 | 
 20 |     dataCount = zip(strides, shape).filter({ $0.0 != 0 }).map({ $0.1 }).product()
 21 |     isNoOp = zip(strides, shape).allSatisfy { stride, size in stride != 0 || size == 1 }
 22 |   }
 23 | 
 24 |   public init(contiguousForShape shape: [Int]) {
 25 |     self.init(shape: shape, strides: stridesForShape(shape))
 26 |   }
 27 | 
 28 |   public var dataShape: [Int] {
 29 |     return zip(shape, strides).map { size, stride in stride == 0 ? 1 : size }
 30 |   }
 31 | 
 32 |   /// Get the `RepeatDims` which could be applied to the raw array to get the
 33 |   /// broadcasted array.
 34 |   public func repeats() -> [RepeatDims] {
 35 |     var result = [RepeatDims]()
 36 |     for (i, stride) in strides.enumerated().reversed() {
 37 |       if stride == 0 && shape[i] != 1 {
 38 |         var newDims = RepeatDims(
 39 |           outerCount: zip(strides[..<i], shape[..<i]).filter({ $0.0 != 0 }).map({ $0.1 })
 40 |             .product(),
 41 |           repeatCount: shape[i],
 42 |           innerCount: shape[(i + 1)...].product()
 43 |         )
 44 |         if let last = result.last {
 45 |           if last.outerCount == newDims.outerCount
 46 |             && last.innerCount * last.repeatCount == newDims.innerCount
 47 |           {
 48 |             // Coalesce consecutive repetitions.
 49 |             // This is useful when broadcasting multiple consecutive dimensions,
 50 |             // or dimensions with size 1 between them.
 51 |             result.removeLast()
 52 |             newDims = RepeatDims(
 53 |               outerCount: newDims.outerCount,
 54 |               repeatCount: last.repeatCount * newDims.repeatCount,
 55 |               innerCount: last.innerCount
 56 |             )
 57 |           }
 58 |         }
 59 |         result.append(newDims)
 60 |       }
 61 |     }
 62 |     return result
 63 |   }
 64 | 
 65 |   /// Get the `ReduceDims` that we could apply to the virtual gradient array to
 66 |   /// get a gradient for the raw data array.
 67 |   public func repeatsInverse() -> [ReduceDims] {
 68 |     return repeats().reversed().map { $0.inverse() }
 69 |   }
 70 | 
 71 |   /// Translate an index in the virtual array to the data array.
 72 |   public func callAsFunction(_ i: Int) -> Int {
 73 |     assert(i >= 0 && i < shape.product())
 74 | 
 75 |     if isNoOp {
 76 |       return i
 77 |     }
 78 | 
 79 |     var curIdx = i
 80 |     var result = 0
 81 |     for (stride, size) in zip(strides, shape).reversed() {
 82 |       result += stride * (curIdx % size)
 83 |       curIdx /= size
 84 |     }
 85 | 
 86 |     assert(
 87 |       result >= 0 && result < dataCount,
 88 |       "shape=\(shape) strides=\(strides) index=\(i) result=\(result)")
 89 |     return result
 90 |   }
 91 | }
 92 | 
 93 | /// A structure annotating an actual instance of raw data with a
 94 | /// `BroadcastStrides` indicating how to interpret it as a virtual array.
 95 | ///
 96 | /// Note that this structure does not include the dtype of the underlying
 97 | /// data, which is crucial for actually performing operations on it.
 98 | public struct BroadcastData: Sendable {
 99 |   public let strides: BroadcastStrides
100 |   public let data: Tensor.Data
101 | 
102 |   /// Get the size of the data array, in elements (not bytes).
103 |   public var dataCount: Int { strides.dataCount }
104 | 
105 |   /// If true, the data is actually not broadcasted and the raw array is
106 |   /// equal to the virtual array.
107 |   public var isSimple: Bool { strides.isNoOp }
108 | 
109 |   /// Wrap an unbroadcasted tensor in a ``BroadcastData``.
110 |   public static func simple(data: Tensor.Data, shape: [Int]) -> BroadcastData {
111 |     Self(strides: BroadcastStrides(contiguousForShape: shape), data: data)
112 |   }
113 | }
114 | 
115 | extension Tensor {
116 | 
117 |   @recordCaller
118 |   private func _expand(as asTensor: Tensor) -> Tensor {
119 |     expand(shape: asTensor.shape)
120 |   }
121 | 
122 |   @recordCaller
123 |   private func _expand(shape newShape: [Int]) -> Tensor {
124 |     if self.shape == newShape {
125 |       return self
126 |     }
127 | 
128 |     let bcastStrides = expandStrides(shape: newShape)
129 |     let backend = Backend.current
130 |     let newData = createDataTask { t in
131 |       try await backend.broadcast(
132 |         BroadcastData(strides: bcastStrides, data: try await t.data),
133 |         dtype: t.dtype
134 |       )
135 |     }
136 | 
137 |     if !Tensor.isGradEnabled || !needsGrad {
138 |       return Tensor(dataTask: newData, shape: newShape, dtype: dtype)
139 |     } else {
140 |       let handle = saveForBackward()
141 |       return Tensor(dataTask: newData, shape: newShape, dtype: dtype) { grad in
142 |         handle.backward(backend) { grad.reduceBroadcast(bcastStrides, as: self) }
143 |       }
144 |     }
145 |   }
146 | 
147 |   @recordCaller
148 |   internal func _expandStrides(shape newShape: [Int]) -> BroadcastStrides {
149 |     #alwaysAssert(
150 |       newShape.count >= shape.count,
151 |       "cannot broadcast shape \(shape) to shorter shape \(newShape)"
152 |     )
153 | 
154 |     let extraAxes = newShape.count - shape.count
155 |     var strides = Array(repeating: 0, count: extraAxes)
156 |     for (i, (oldSize, oldStride)) in zip(shape, stridesForShape(shape)).enumerated() {
157 |       let newSize = newShape[i + extraAxes]
158 |       if newSize != oldSize {
159 |         #alwaysAssert(
160 |           oldSize == 1,
161 |           "axis \(i) cannot expand from size \(oldSize) to \(newSize): old shape \(shape), new shape \(newShape)"
162 |         )
163 |         strides.append(0)
164 |       } else {
165 |         strides.append(oldStride)
166 |       }
167 |     }
168 | 
169 |     return BroadcastStrides(shape: newShape, strides: strides)
170 |   }
171 | 
172 |   @recordCaller
173 |   private static func _broadcast(_ xs: [Tensor]) -> [Tensor] {
174 |     let shape = Tensor.broadcastShape(xs.map { $0.shape })
175 |     return xs.map { $0.expand(shape: shape) }
176 |   }
177 | 
178 |   @recordCaller
179 |   private static func _broadcast(_ x: Tensor, _ y: Tensor) -> (Tensor, Tensor) {
180 |     let results = broadcast([x, y])
181 |     return (results[0], results[1])
182 |   }
183 | 
184 |   @recordCaller
185 |   internal static func _lazyBroadcast(_ t: [Tensor]) -> ([Int], [BroadcastStrides]) {
186 |     let newShape = broadcastShape(t.map { $0.shape })
187 |     let results = t.map { $0.expandStrides(shape: newShape) }
188 |     return (newShape, results)
189 |   }
190 | 
191 |   @recordCaller
192 |   internal static func _lazyBroadcast(_ t1: Tensor, _ t2: Tensor) -> (
193 |     [Int], (BroadcastStrides, BroadcastStrides)
194 |   ) {
195 |     let newShape = broadcastShape([t1.shape, t2.shape])
196 |     let r1 = t1.expandStrides(shape: newShape)
197 |     let r2 = t2.expandStrides(shape: newShape)
198 |     return (newShape, (r1, r2))
199 |   }
200 | 
201 |   @recordCaller
202 |   internal func _reduceBroadcast(_ bcast: BroadcastStrides, as tensor: Tensor) -> Tensor {
203 |     return flatApplySums(bcast.repeatsInverse()).reshape(tensor.shape)
204 |   }
205 | 
206 |   @recordCaller
207 |   internal static func _broadcastShape(_ s: [[Int]]) -> [Int] {
208 |     if s.count == 1 {
209 |       return s[0]
210 |     } else if s.count == 2 {
211 |       var shape0 = s[0]
212 |       var shape1 = s[1]
213 |       while shape0.count < shape1.count {
214 |         shape0.insert(1, at: 0)
215 |       }
216 |       while shape1.count < shape0.count {
217 |         shape1.insert(1, at: 0)
218 |       }
219 |       return zip(shape0, shape1).map { x, y in
220 |         if x == y {
221 |           return x
222 |         } else if x == 0 || y == 0 {
223 |           tracedFatalError("shapes \(s[0]) and \(s[1]) do not support broadcasting")
224 |         } else if x == 1 {
225 |           return y
226 |         } else if y == 1 {
227 |           return x
228 |         } else {
229 |           tracedFatalError("shapes \(s[0]) and \(s[1]) do not support broadcasting")
230 |         }
231 |       }
232 |     } else {
233 |       return broadcastShape([broadcastShape([s[0], s[1]])] + Array(s[2...]))
234 |     }
235 |   }
236 | 
237 | }
238 | 
239 | func stridesForShape(_ shape: [Int]) -> [Int] {
240 |   var strides = [Int](repeating: 0, count: shape.count)
241 |   for i in 0..<shape.count {
242 |     strides[i] = shape[(i + 1)...].product()
243 |   }
244 |   return strides
245 | }
246 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Checkpoint.swift:
--------------------------------------------------------------------------------
  1 | import Foundation
  2 | import HCBacktrace
  3 | 
  4 | extension Tensor {
  5 |   @recordCaller
  6 |   private static func _checkpoint(
  7 |     enabled: Bool = true,
  8 |     saveRandomState: Bool = true,
  9 |     waitForData: Bool = true,
 10 |     _ args: [Tensor],
 11 |     _ fn: @escaping @Sendable ([Tensor]) -> [Tensor]
 12 |   ) -> [Tensor] {
 13 |     let backend = Backend.current
 14 |     let rng: RandomGenerator? =
 15 |       if saveRandomState {
 16 |         backend.defaultRandom()
 17 |       } else {
 18 |         nil
 19 |       }
 20 |     if !Tensor.isGradEnabled || enabled == false {
 21 |       return fn(args)
 22 |     }
 23 |     let saver = RandomStateGuard(rng)
 24 |     let capturedResults = Tensor.withGrad(enabled: false) {
 25 |       saver.saveOrRestoreRandom {
 26 |         fn(args.map { $0.noGrad() })
 27 |       }
 28 |     }
 29 | 
 30 |     // Avoid a case of prematurely backward'ing.
 31 |     if capturedResults.allSatisfy({ !$0.dtype.supportsGrad }) {
 32 |       return capturedResults
 33 |     }
 34 | 
 35 |     let inputHandles = args.map { $0.saveForBackward() }
 36 |     let state = CheckpointResultState(count: capturedResults.count, trace: Backtrace.current) {
 37 |       grads in
 38 | 
 39 |       var handlesAndParams = [(BackwardHandle, Trainable.Param<Tensor>)]()
 40 |       let newOutputs = Tensor.withGrad(enabled: true) {
 41 |         var newInputs = [Tensor]()
 42 |         for (x, handle) in zip(args, inputHandles) {
 43 |           if x.needsGrad {
 44 |             let p: Trainable.Param<Tensor> = .init()
 45 |             p.data = x.noGrad()
 46 |             handlesAndParams.append((handle, p))
 47 |             newInputs.append(p.data!.onGrad { g in backend.use { p.addGrad(g) } })
 48 |           } else {
 49 |             newInputs.append(x)
 50 |           }
 51 |         }
 52 |         return Tensor.asDependencies(
 53 |           grads.filter({ $0 != nil }).map({ $0! }), waitForData: waitForData
 54 |         ) {
 55 |           backend.use { saver.saveOrRestoreRandom { fn(newInputs) } }
 56 |         }
 57 |       }
 58 | 
 59 |       // We are already in a backward context here, so all of our backward() blocks
 60 |       // will be called after this closure returns, but they will still be called in
 61 |       // the order we might expect.
 62 | 
 63 |       for (output, upstreamGrad) in zip(newOutputs, grads) {
 64 |         if let grad = upstreamGrad {
 65 |           output.saveForBackward().backward(backend) { grad }
 66 |         }
 67 |       }
 68 | 
 69 |       // We can reference param.grad safely because these backward calls won't be
 70 |       // triggered until the above backward()'s and all of their downstream backwards()
 71 |       // have completed.
 72 |       for (inputHandle, param) in handlesAndParams {
 73 |         inputHandle.backward(backend) {
 74 |           param.grad ?? Tensor(zerosLike: param.data!)
 75 |         }
 76 |       }
 77 |     }
 78 | 
 79 |     let results = capturedResults.enumerated().map { (i, result) in
 80 |       if !result.dtype.supportsGrad {
 81 |         state.record(index: i, grad: nil)
 82 |         return result
 83 |       } else {
 84 |         let handle = CheckpointResultTensorTracker(state: state, index: i)
 85 |         return result.onGrad { g in handle.backward(g) }
 86 |       }
 87 |     }
 88 |     return results
 89 |   }
 90 | }
 91 | 
 92 | final class RandomStateGuard: Sendable {
 93 |   let rng: RandomGenerator?
 94 |   let lock = NSLock()
 95 |   nonisolated(unsafe) var state: Tensor? = nil
 96 | 
 97 |   init(_ rng: RandomGenerator?) {
 98 |     self.rng = rng
 99 |   }
100 | 
101 |   func saveOrRestoreRandom<T>(_ fn: () throws -> T) rethrows -> T {
102 |     guard let rng = rng else {
103 |       return try fn()
104 |     }
105 |     lock.lock()
106 |     if let state = state {
107 |       lock.unlock()
108 |       let oldState = rng.state
109 |       rng.state = state
110 |       defer { rng.state = oldState }
111 |       return try fn()
112 |     } else {
113 |       state = rng.state
114 |       lock.unlock()
115 |       return try fn()
116 |     }
117 |   }
118 | }
119 | 
120 | final class CheckpointResultState: Sendable {
121 |   // It will only be called once, and it is passed with sending.
122 |   nonisolated(unsafe) let callback: ([Tensor?]) -> Void
123 | 
124 |   let trace: [CodeLocation]
125 |   let lock = NSLock()
126 |   nonisolated(unsafe) var completed: [Bool]
127 |   nonisolated(unsafe) var grads: [Tensor?]
128 | 
129 |   #if compiler(>=6.0)
130 |     init(count: Int, trace: [CodeLocation], _ callback: sending @escaping ([Tensor?]) -> Void) {
131 |       completed = Array(repeating: false, count: count)
132 |       grads = Array(repeating: nil, count: count)
133 |       self.trace = trace
134 |       self.callback = callback
135 |     }
136 |   #else
137 |     init(count: Int, trace: [CodeLocation], _ callback: @escaping ([Tensor?]) -> Void) {
138 |       completed = Array(repeating: false, count: count)
139 |       grads = Array(repeating: nil, count: count)
140 |       self.trace = trace
141 |       self.callback = callback
142 |     }
143 |   #endif
144 | 
145 |   func record(index: Int, grad: Tensor?) {
146 |     let result: [Tensor?]? = lock.withLock {
147 |       if completed[index] {
148 |         // Allow extra call from deinit
149 |         #alwaysAssert(grad == nil, "second call must be from deinit")
150 |         return nil
151 |       }
152 | 
153 |       completed[index] = true
154 |       grads[index] = grad
155 |       if completed.allSatisfy({ $0 }) {
156 |         #alwaysAssert(
157 |           grad != nil,
158 |           "Checkpoint backward pass was delayed until one of the results was deinitialized.\n\n"
159 |             + "The offending tensor was returned at index \(index) from code location \(Backtrace.format(trace))"
160 |         )
161 |         return grads
162 |       } else {
163 |         return nil
164 |       }
165 |     }
166 |     if let result = result {
167 |       callback(result)
168 |     }
169 |   }
170 | }
171 | 
172 | final class CheckpointResultTensorTracker: Sendable {
173 |   let state: CheckpointResultState
174 |   let index: Int
175 | 
176 |   init(state: CheckpointResultState, index: Int) {
177 |     self.state = state
178 |     self.index = index
179 |   }
180 | 
181 |   func backward(_ grad: Tensor?) {
182 |     state.record(index: index, grad: grad)
183 |   }
184 | 
185 |   deinit {
186 |     state.record(index: index, grad: nil)
187 |   }
188 | }
189 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Clamp.swift:
--------------------------------------------------------------------------------
  1 | import HCBacktrace
  2 | 
  3 | extension Tensor {
  4 | 
  5 |   @recordCaller
  6 |   private func _clamp<T: NumericTensorElement>(min: T) -> Tensor {
  7 |     return clampInternal(T.self, minScalar: min)
  8 |   }
  9 | 
 10 |   @recordCaller
 11 |   private func _clamp<T: NumericTensorElement>(max: T) -> Tensor {
 12 |     return clampInternal(T.self, maxScalar: max)
 13 |   }
 14 | 
 15 |   @recordCaller
 16 |   private func _clamp<T: NumericTensorElement>(min: T, max: T) -> Tensor {
 17 |     return clampInternal(T.self, minScalar: min, maxScalar: max)
 18 |   }
 19 | 
 20 |   @recordCaller
 21 |   private func _clamp(min: Tensor) -> Tensor {
 22 |     return clampInternal(Float.self, minTensor: min)
 23 |   }
 24 | 
 25 |   @recordCaller
 26 |   private func _clamp(max: Tensor) -> Tensor {
 27 |     return clampInternal(Float.self, maxTensor: max)
 28 |   }
 29 | 
 30 |   @recordCaller
 31 |   private func _clamp(min: Tensor, max: Tensor) -> Tensor {
 32 |     return clampInternal(Float.self, minTensor: min, maxTensor: max)
 33 |   }
 34 | 
 35 |   @recordCaller
 36 |   private func _clamp<T: NumericTensorElement>(min: Tensor, max: T) -> Tensor {
 37 |     return clampInternal(T.self, minTensor: min, maxScalar: max)
 38 |   }
 39 | 
 40 |   @recordCaller
 41 |   private func _clamp<T: NumericTensorElement>(min: T, max: Tensor) -> Tensor {
 42 |     return clampInternal(T.self, minScalar: min, maxTensor: max)
 43 |   }
 44 | 
 45 |   @recordCaller
 46 |   internal func _clampInternal<T: NumericTensorElement>(
 47 |     _ t: T.Type, minScalar: T? = nil, minTensor: Tensor? = nil, maxScalar: T? = nil,
 48 |     maxTensor: Tensor? = nil
 49 |   ) -> Tensor {
 50 |     #alwaysAssert(dtype.isNumeric, "cannot use clamp() with dtype \(dtype)")
 51 |     let backend = Backend.current
 52 | 
 53 |     var bcastTensors = [self]
 54 |     if let mt = minTensor {
 55 |       bcastTensors.append(mt)
 56 |       #alwaysAssert(
 57 |         dtype == mt.dtype, "mismatched dtype of self (\(dtype)) and min tensor (\(mt.dtype))")
 58 |     }
 59 |     if let mt = maxTensor {
 60 |       bcastTensors.append(mt)
 61 |       #alwaysAssert(
 62 |         dtype == mt.dtype, "mismatched dtype of self (\(dtype)) and max tensor (\(mt.dtype))")
 63 |     }
 64 |     let (bcastShape, stridesImmutable) = Tensor.lazyBroadcast(bcastTensors)
 65 |     var strides = stridesImmutable
 66 | 
 67 |     let tStrides = strides.remove(at: 0)
 68 |     let minStrides: BroadcastStrides? = (minTensor == nil ? nil : strides.remove(at: 0))
 69 |     let maxStrides: BroadcastStrides? = (maxTensor == nil ? nil : strides.remove(at: 0))
 70 | 
 71 |     let t = self.noGrad()
 72 |     let minData = minTensor?.noGrad()
 73 |     let maxData = maxTensor?.noGrad()
 74 | 
 75 |     let newData: Task<Tensor.Data, Error> = Tensor.createDataTask {
 76 |       let minArg: Backend.TensorOrScalar<T>? =
 77 |         if let t = minData, let s = minStrides {
 78 |           .tensor(BroadcastData(strides: s, data: try await t.data))
 79 |         } else if let val = minScalar {
 80 |           .scalar(val, bcastShape)
 81 |         } else {
 82 |           nil
 83 |         }
 84 |       let maxArg: Backend.TensorOrScalar<T>? =
 85 |         if let t = maxData, let s = maxStrides {
 86 |           .tensor(BroadcastData(strides: s, data: try await t.data))
 87 |         } else if let val = maxScalar {
 88 |           .scalar(val, bcastShape)
 89 |         } else {
 90 |           nil
 91 |         }
 92 |       return try await backend.clamp(
 93 |         BroadcastData(strides: tStrides, data: try await t.data),
 94 |         T.self,
 95 |         min: minArg,
 96 |         max: maxArg,
 97 |         dtype: t.dtype
 98 |       )
 99 |     }
100 | 
101 |     if !needsGrad || !Tensor.isGradEnabled {
102 |       return Tensor(dataTask: newData, shape: bcastShape, dtype: dtype)
103 |     } else {
104 |       let handle = saveForBackward()
105 |       let minHandle = minTensor?.saveForBackward()
106 |       let maxHandle = maxTensor?.saveForBackward()
107 |       let rawResult = Tensor(dataTask: newData, shape: bcastShape, dtype: dtype)
108 |       return rawResult.onGrad { grad in
109 |         handle.backward(backend) {
110 |           let mask =
111 |             if let max = maxScalar {
112 |               // Fast paths when the gradient can be assigned to self
113 |               if let min = minScalar, min < max {
114 |                 rawResult == self
115 |               } else if minScalar == nil && minTensor == nil {
116 |                 rawResult == self
117 |               } else {
118 |                 (rawResult == self) & (rawResult != max)
119 |               }
120 |             } else if let max = maxTensor {
121 |               (rawResult == self) & (rawResult != max)
122 |             } else {
123 |               rawResult == self
124 |             }
125 |           return mask.when(isTrue: grad, isFalse: 0).reduceBroadcast(tStrides, as: self)
126 |         }
127 |         if let t = minTensor, let h = minHandle, let s = minStrides {
128 |           h.backward(backend) {
129 |             ((rawResult == t) & (rawResult != self)).when(isTrue: grad, isFalse: 0).reduceBroadcast(
130 |               s, as: t)
131 |           }
132 |         }
133 |         if let t = maxTensor, let h = maxHandle, let s = maxStrides {
134 |           h.backward(backend) {
135 |             (rawResult == t).when(isTrue: grad, isFalse: 0).reduceBroadcast(s, as: t)
136 |           }
137 |         }
138 |       }
139 |     }
140 |   }
141 | 
142 | }
143 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Comparison.swift:
--------------------------------------------------------------------------------
  1 | import HCBacktrace
  2 | 
  3 | /// A comparison operator which can be performed between ``Tensor``s.
  4 | public enum ComparisonOp: Sendable {
  5 |   case equal
  6 |   case notEqual
  7 |   case less
  8 |   case lessEqual
  9 |   case greater
 10 |   case greaterEqual
 11 | 
 12 |   public func apply<T: TensorElement>(_ a: T, _ b: T) -> Bool {
 13 |     switch self {
 14 |     case .equal:
 15 |       a == b
 16 |     case .notEqual:
 17 |       a != b
 18 |     case .less:
 19 |       a < b
 20 |     case .lessEqual:
 21 |       a <= b
 22 |     case .greater:
 23 |       a > b
 24 |     case .greaterEqual:
 25 |       a >= b
 26 |     }
 27 |   }
 28 | }
 29 | 
 30 | extension Tensor {
 31 |   @recordCaller
 32 |   internal static func _compare(lhs: Tensor, rhs: Tensor, op: ComparisonOp) -> Tensor {
 33 |     #alwaysAssert(
 34 |       lhs.dtype == rhs.dtype,
 35 |       "dtypes for comparison operator do not match: \(lhs.dtype) and \(rhs.dtype)")
 36 | 
 37 |     let (newShape, (lhsStrides, rhsStrides)) = Tensor.lazyBroadcast(lhs, rhs)
 38 | 
 39 |     let backend = Backend.current
 40 |     let newData = createDataTask(lhs, rhs) { lhs, rhs in
 41 |       try await backend.compare(
 42 |         BroadcastData(strides: lhsStrides, data: try await lhs.data),
 43 |         BroadcastData(strides: rhsStrides, data: try await rhs.data),
 44 |         op: op,
 45 |         dtype: lhs.dtype
 46 |       )
 47 |     }
 48 |     return Tensor(dataTask: newData, shape: newShape, dtype: .bool)
 49 |   }
 50 | 
 51 |   @recordCaller
 52 |   internal static func _compare<T: TensorElement>(lhs: Tensor, rhs: T, op: ComparisonOp) -> Tensor {
 53 |     let backend = Backend.current
 54 |     let newData = createDataTask(lhs) { lhs in
 55 |       try await backend.compare(
 56 |         try await lhs.data, rhs, op: op, count: lhs.shape.product(), dtype: lhs.dtype
 57 |       )
 58 |     }
 59 |     return Tensor(dataTask: newData, shape: lhs.shape, dtype: .bool)
 60 |   }
 61 | 
 62 |   @recordCaller
 63 |   internal static func _compare<T: TensorElement>(lhs: T, rhs: Tensor, op: ComparisonOp) -> Tensor {
 64 |     let backend = Backend.current
 65 |     let newData = createDataTask(rhs) { rhs in
 66 |       try await backend.compare(
 67 |         lhs, try await rhs.data, op: op, count: rhs.shape.product(), dtype: rhs.dtype
 68 |       )
 69 |     }
 70 |     return Tensor(dataTask: newData, shape: rhs.shape, dtype: .bool)
 71 |   }
 72 | 
 73 |   /*
 74 |   for op, name in [
 75 |     ("==", "equal"),
 76 |     ("!=", "notEqual"),
 77 |     ("<", "less"),
 78 |     (">", "greater"),
 79 |     ("<=", "lessEqual"),
 80 |     (">=", "greaterEqual"),
 81 |   ]:
 82 |     print(
 83 |         f"""
 84 |   public static func {op} <T: TensorElement>(lhs: Tensor, rhs: T) -> Tensor {{
 85 |     compare(lhs: lhs, rhs: rhs, op: .{name})
 86 |   }}
 87 | 
 88 |   public static func {op} <T: TensorElement>(lhs: T, rhs: Tensor) -> Tensor {{
 89 |     compare(lhs: lhs, rhs: rhs, op: .{name})
 90 |   }}
 91 | 
 92 |   public static func {op} (lhs: Tensor, rhs: Tensor) -> Tensor {{
 93 |     compare(lhs: lhs, rhs: rhs, op: .{name})
 94 |   }}
 95 |         """
 96 |     )
 97 |   */
 98 | 
 99 |   public static func == <T: TensorElement>(lhs: Tensor, rhs: T) -> Tensor {
100 |     compare(lhs: lhs, rhs: rhs, op: .equal)
101 |   }
102 | 
103 |   public static func == <T: TensorElement>(lhs: T, rhs: Tensor) -> Tensor {
104 |     compare(lhs: lhs, rhs: rhs, op: .equal)
105 |   }
106 | 
107 |   public static func == (lhs: Tensor, rhs: Tensor) -> Tensor {
108 |     compare(lhs: lhs, rhs: rhs, op: .equal)
109 |   }
110 | 
111 |   public static func != <T: TensorElement>(lhs: Tensor, rhs: T) -> Tensor {
112 |     compare(lhs: lhs, rhs: rhs, op: .notEqual)
113 |   }
114 | 
115 |   public static func != <T: TensorElement>(lhs: T, rhs: Tensor) -> Tensor {
116 |     compare(lhs: lhs, rhs: rhs, op: .notEqual)
117 |   }
118 | 
119 |   public static func != (lhs: Tensor, rhs: Tensor) -> Tensor {
120 |     compare(lhs: lhs, rhs: rhs, op: .notEqual)
121 |   }
122 | 
123 |   public static func < <T: TensorElement>(lhs: Tensor, rhs: T) -> Tensor {
124 |     compare(lhs: lhs, rhs: rhs, op: .less)
125 |   }
126 | 
127 |   public static func < <T: TensorElement>(lhs: T, rhs: Tensor) -> Tensor {
128 |     compare(lhs: lhs, rhs: rhs, op: .less)
129 |   }
130 | 
131 |   public static func < (lhs: Tensor, rhs: Tensor) -> Tensor {
132 |     compare(lhs: lhs, rhs: rhs, op: .less)
133 |   }
134 | 
135 |   public static func > <T: TensorElement>(lhs: Tensor, rhs: T) -> Tensor {
136 |     compare(lhs: lhs, rhs: rhs, op: .greater)
137 |   }
138 | 
139 |   public static func > <T: TensorElement>(lhs: T, rhs: Tensor) -> Tensor {
140 |     compare(lhs: lhs, rhs: rhs, op: .greater)
141 |   }
142 | 
143 |   public static func > (lhs: Tensor, rhs: Tensor) -> Tensor {
144 |     compare(lhs: lhs, rhs: rhs, op: .greater)
145 |   }
146 | 
147 |   public static func <= <T: TensorElement>(lhs: Tensor, rhs: T) -> Tensor {
148 |     compare(lhs: lhs, rhs: rhs, op: .lessEqual)
149 |   }
150 | 
151 |   public static func <= <T: TensorElement>(lhs: T, rhs: Tensor) -> Tensor {
152 |     compare(lhs: lhs, rhs: rhs, op: .lessEqual)
153 |   }
154 | 
155 |   public static func <= (lhs: Tensor, rhs: Tensor) -> Tensor {
156 |     compare(lhs: lhs, rhs: rhs, op: .lessEqual)
157 |   }
158 | 
159 |   public static func >= <T: TensorElement>(lhs: Tensor, rhs: T) -> Tensor {
160 |     compare(lhs: lhs, rhs: rhs, op: .greaterEqual)
161 |   }
162 | 
163 |   public static func >= <T: TensorElement>(lhs: T, rhs: Tensor) -> Tensor {
164 |     compare(lhs: lhs, rhs: rhs, op: .greaterEqual)
165 |   }
166 | 
167 |   public static func >= (lhs: Tensor, rhs: Tensor) -> Tensor {
168 |     compare(lhs: lhs, rhs: rhs, op: .greaterEqual)
169 |   }
170 | }
171 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/ConcatSplit.swift:
--------------------------------------------------------------------------------
  1 | import HCBacktrace
  2 | 
  3 | extension Tensor {
  4 |   /// Concatenate tensors along the given axis.
  5 |   ///
  6 |   /// All `Tensor`s must have the same ``Tensor/dtype`` and number of dimensions.
  7 |   /// The shape of the `Tensor`s must match except along the given axis.
  8 |   public convenience init(
  9 |     concat tensors: [Tensor],
 10 |     axis: Int = 0,
 11 |     function: StaticString = #function,
 12 |     file: StaticString = #filePath,
 13 |     line: UInt = #line
 14 |   ) {
 15 |     let backend = Backend.current
 16 | 
 17 |     func record<T>(
 18 |       fn: () -> T
 19 |     ) -> T {
 20 |       Backtrace.record(fn, function: function, file: file, line: line)
 21 |     }
 22 | 
 23 |     record { #alwaysAssert(tensors.count > 0, "cannot concatenate zero tensors") }
 24 | 
 25 |     let axis = record { tensors[0].positiveAxis(axis) }
 26 |     record {
 27 |       #alwaysAssert(
 28 |         axis >= 0 && axis < tensors[0].shape.count,
 29 |         "axis \(axis) out of bounds for shape \(tensors[0].shape)")
 30 | 
 31 |       for (i, t) in tensors.enumerated() {
 32 |         #alwaysAssert(
 33 |           t.dtype == tensors[0].dtype,
 34 |           "tensor at index \(i) has different dtype \(t.dtype) than tensor 0 \(tensors[0].dtype)")
 35 |         #alwaysAssert(
 36 |           t.shape.count == tensors[0].shape.count && t.shape[..<axis] == tensors[0].shape[..<axis]
 37 |             && t.shape[(axis + 1)...] == tensors[0].shape[(axis + 1)...],
 38 |           "tensor at index \(i) has shape \(t.shape) which is incompatible with shape at index 0 \(tensors[0].shape)"
 39 |         )
 40 |       }
 41 |     }
 42 | 
 43 |     let middleCounts: [Int] = tensors.map { $0.shape[axis] }
 44 |     let innerCounts: [Int] = tensors.map { $0.shape[axis...].product() }
 45 |     let outerCount = tensors[0].shape[..<axis].product()
 46 |     let dtype = tensors[0].dtype
 47 | 
 48 |     // Explicitly detach arguments instead of relying on createDataTask.
 49 |     let capturedTensors = tensors.map { $0.noGrad() }
 50 |     let newData = record {
 51 |       Tensor.createDataTask {
 52 |         var datas: [Tensor.Data] = []
 53 |         for tensor in capturedTensors {
 54 |           datas.append(try await tensor.data)
 55 |         }
 56 |         return try await backend.concat(
 57 |           datas, outerCount: outerCount, innerCounts: innerCounts, dtype: dtype)
 58 |       }
 59 |     }
 60 |     var newShape = tensors[0].shape
 61 |     newShape[axis] = middleCounts.sum()
 62 |     if tensors.map({ $0.needsGrad }).allSatisfy({ $0 == false }) {
 63 |       self.init(dataTask: newData, shape: newShape, dtype: dtype)
 64 |     } else {
 65 |       let handles = tensors.map { $0.saveForBackward() }
 66 |       self.init(dataTask: newData, shape: newShape, dtype: dtype) { grad in
 67 |         let pieces = backend.use { grad.split(axis: axis, counts: middleCounts) }
 68 |         for (handle, piece) in zip(handles, pieces) {
 69 |           handle.backward(backend) { piece }
 70 |         }
 71 |       }
 72 |     }
 73 |   }
 74 | 
 75 |   public convenience init(
 76 |     stack tensors: [Tensor],
 77 |     axis: Int = 0,
 78 |     function: StaticString = #function,
 79 |     file: StaticString = #filePath,
 80 |     line: UInt = #line
 81 |   ) {
 82 |     let args = Backtrace.record(function: function, file: file, line: line) {
 83 |       tensors.map { x in x.unsqueeze(axis: axis) }
 84 |     }
 85 |     self.init(concat: args, axis: axis)
 86 |   }
 87 | 
 88 |   @recordCaller
 89 |   private func _split(axis: Int, counts: [Int]) -> [Tensor] {
 90 |     let axis = positiveAxis(axis)
 91 |     #alwaysAssert(axis >= 0 && axis < shape.count, "axis \(axis) out of bounds for shape \(shape)")
 92 |     #alwaysAssert(
 93 |       shape[axis] == counts.sum(),
 94 |       "split counts \(counts) do not sum to axis \(axis) of shape \(shape)")
 95 |     var results: [Tensor] = []
 96 |     var start = 0
 97 |     for count in counts {
 98 |       results.append(self[FullRange(count: axis), start..<(start + count)])
 99 |       start += count
100 |     }
101 |     return results
102 |   }
103 | 
104 |   @recordCaller
105 |   private func _chunk(axis: Int, count: Int) -> [Tensor] {
106 |     let axis = positiveAxis(axis)
107 |     #alwaysAssert(
108 |       shape[axis] >= count,
109 |       "shape \(shape) incompatible with chunk of count \(count) on axis \(axis)")
110 |     #alwaysAssert(
111 |       shape[axis] % count == 0,
112 |       "shape \(shape) incompatible with chunk of count \(count) on axis \(axis)")
113 |     let sizes = [Int](repeating: shape[axis] / count, count: count)
114 |     return split(axis: axis, counts: sizes)
115 |   }
116 | }
117 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Cumulative.swift:
--------------------------------------------------------------------------------
 1 | import Foundation
 2 | import HCBacktrace
 3 | 
 4 | extension Tensor {
 5 | 
 6 |   @recordCaller
 7 |   private func _cumulativeSum(axis: Int, exclusive: Bool = false, reverse: Bool = false) -> Tensor {
 8 |     let axis = positiveAxis(axis)
 9 |     #alwaysAssert(axis >= 0 && axis < shape.count, "axis \(axis) out of bounds for shape \(shape)")
10 |     let backend = Backend.current
11 |     let newData = createDataTask { t in
12 |       try await backend.cumulativeSum(
13 |         try await t.data, dims: t.reduceDims(axis), exclusive: exclusive, reverse: reverse,
14 |         dtype: t.dtype)
15 |     }
16 |     if !Tensor.isGradEnabled || !needsGrad {
17 |       return Tensor(dataTask: newData, shape: shape, dtype: dtype)
18 |     } else {
19 |       let handle = self.saveForBackward()
20 |       return Tensor(dataTask: newData, shape: shape, dtype: dtype) { grad in
21 |         handle.backward(backend) {
22 |           grad.cumulativeSum(axis: axis, exclusive: exclusive, reverse: !reverse)
23 |         }
24 |       }
25 |     }
26 |   }
27 | 
28 |   @recordCaller
29 |   private func _cumulativeProd(axis: Int, exclusive: Bool = false, reverse: Bool = false) -> Tensor
30 |   {
31 |     #alwaysAssert(
32 |       !Tensor.isGradEnabled || !needsGrad,
33 |       "gradients are currently not supported for cumulative product")
34 |     let axis = positiveAxis(axis)
35 |     #alwaysAssert(axis >= 0 && axis < shape.count, "axis \(axis) out of bounds for shape \(shape)")
36 |     let backend = Backend.current
37 |     let newData = createDataTask { t in
38 |       try await backend.cumulativeProd(
39 |         try await t.data, dims: t.reduceDims(axis), exclusive: exclusive, reverse: reverse,
40 |         dtype: t.dtype)
41 |     }
42 |     return Tensor(dataTask: newData, shape: shape, dtype: dtype)
43 |   }
44 | 
45 | }
46 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/DataUtil.swift:
--------------------------------------------------------------------------------
  1 | import Foundation
  2 | 
  3 | /// Iterate through a synchronous iterator in a background thread and push
  4 | /// the results to an asynchronous stream.
  5 | ///
  6 | /// Buffers `bufferSize` elements in memory.
  7 | public func loadDataInBackground<T, S: Sequence<Result<T, Error>>>(_ it: S, bufferSize: Int = 2)
  8 |   -> AsyncThrowingStream<T, Error>
  9 | where T: Sendable, S: Sendable {
 10 |   AsyncThrowingStream(bufferingPolicy: .bufferingOldest(bufferSize)) { continuation in
 11 |     let thread = Thread {
 12 |       var it = it.makeIterator()
 13 |       while true {
 14 |         var maybeX: Result<T, Error>?
 15 |         autoreleasepool {
 16 |           maybeX = it.next()
 17 |         }
 18 |         guard let x = maybeX else { break }
 19 |         if Thread.current.isCancelled {
 20 |           return
 21 |         }
 22 |         switch x {
 23 |         case .failure(let e):
 24 |           continuation.finish(throwing: e)
 25 |           return
 26 |         case .success(let x):
 27 |           var sent = false
 28 |           while !sent {
 29 |             switch continuation.yield(x) {
 30 |             case .dropped(_):
 31 |               Thread.sleep(forTimeInterval: 0.05)
 32 |             default:
 33 |               sent = true
 34 |             }
 35 |           }
 36 |         }
 37 |       }
 38 |       continuation.finish()
 39 |     }
 40 |     thread.name = "loadDataInBackground-Worker"
 41 |     thread.start()
 42 | 
 43 |     let t = SendableThread(thread: thread)
 44 |     continuation.onTermination = { _ in t.thread.cancel() }
 45 |   }
 46 | }
 47 | 
 48 | #if compiler(>=6.0)
 49 |   /// Like loadDataInBackground, but allows using non-sendable sequences.
 50 |   public func loadDataInBackgroundSending<T, S: Sequence<Result<T, Error>>>(
 51 |     _ it: sending S, bufferSize: Int = 2
 52 |   )
 53 |     -> AsyncThrowingStream<T, Error>
 54 |   where T: Sendable {
 55 |     let sendIt = SendS(it)
 56 |     return AsyncThrowingStream(bufferingPolicy: .bufferingOldest(bufferSize)) { continuation in
 57 |       let thread = Thread {
 58 |         var it = sendIt.s.makeIterator()
 59 |         while true {
 60 |           var maybeX: Result<T, Error>?
 61 |           autoreleasepool {
 62 |             maybeX = it.next()
 63 |           }
 64 |           guard let x = maybeX else { break }
 65 |           if Thread.current.isCancelled {
 66 |             return
 67 |           }
 68 |           switch x {
 69 |           case .failure(let e):
 70 |             continuation.finish(throwing: e)
 71 |             return
 72 |           case .success(let x):
 73 |             var sent = false
 74 |             while !sent {
 75 |               switch continuation.yield(x) {
 76 |               case .dropped(_):
 77 |                 Thread.sleep(forTimeInterval: 0.05)
 78 |               default:
 79 |                 sent = true
 80 |               }
 81 |             }
 82 |           }
 83 |         }
 84 |         continuation.finish()
 85 |       }
 86 |       thread.name = "loadDataInBackground-Worker"
 87 |       thread.start()
 88 | 
 89 |       let t = SendableThread(thread: thread)
 90 |       continuation.onTermination = { _ in t.thread.cancel() }
 91 |     }
 92 |   }
 93 | #else
 94 |   /// Like loadDataInBackground, but allows using non-sendable sequences.
 95 |   public func loadDataInBackgroundSending<T, S: Sequence<Result<T, Error>>>(
 96 |     _ it: S, bufferSize: Int = 2
 97 |   )
 98 |     -> AsyncThrowingStream<T, Error>
 99 |   where T: Sendable {
100 |     let sendIt = SendS(it)
101 |     return AsyncThrowingStream(bufferingPolicy: .bufferingOldest(bufferSize)) { continuation in
102 |       let thread = Thread {
103 |         var it = sendIt.s.makeIterator()
104 |         while true {
105 |           var maybeX: Result<T, Error>?
106 |           autoreleasepool {
107 |             maybeX = it.next()
108 |           }
109 |           guard let x = maybeX else { break }
110 |           if Thread.current.isCancelled {
111 |             return
112 |           }
113 |           switch x {
114 |           case .failure(let e):
115 |             continuation.finish(throwing: e)
116 |             return
117 |           case .success(let x):
118 |             var sent = false
119 |             while !sent {
120 |               switch continuation.yield(x) {
121 |               case .dropped(_):
122 |                 Thread.sleep(forTimeInterval: 0.05)
123 |               default:
124 |                 sent = true
125 |               }
126 |             }
127 |           }
128 |         }
129 |         continuation.finish()
130 |       }
131 |       thread.name = "loadDataInBackground-Worker"
132 |       thread.start()
133 | 
134 |       let t = SendableThread(thread: thread)
135 |       continuation.onTermination = { _ in t.thread.cancel() }
136 |     }
137 |   }
138 | #endif
139 | 
140 | private struct SendableThread: @unchecked Sendable {
141 |   let thread: Thread
142 | }
143 | 
144 | struct SendS<S>: @unchecked Sendable {
145 |   let s: S
146 |   init(_ s: S) {
147 |     self.s = s
148 |   }
149 | }
150 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Debug.swift:
--------------------------------------------------------------------------------
 1 | import Foundation
 2 | import HCBacktrace
 3 | 
 4 | extension Tensor {
 5 |   @recordCaller
 6 |   private func _printing(onForward: String? = nil, onGrad: String? = nil) -> Tensor {
 7 |     let forwardFn: (@Sendable (Tensor) async throws -> Void)? =
 8 |       if let onForward = onForward {
 9 |         { t in
10 |           let _ = try await t.data
11 |           print(onForward)
12 |         }
13 |       } else {
14 |         nil
15 |       }
16 |     let bwdFn: (@Sendable (Tensor) async throws -> Void)? =
17 |       if let onGrad = onGrad {
18 |         { t in
19 |           let _ = try await t.data
20 |           print(onGrad)
21 |         }
22 |       } else {
23 |         nil
24 |       }
25 |     return printing(onForward: forwardFn, onGrad: bwdFn)
26 |   }
27 | 
28 |   @recordCaller
29 |   private func _printing(
30 |     onForward: (@Sendable (Tensor) async throws -> Void)? = nil,
31 |     onGrad: (@Sendable (Tensor) async throws -> Void)? = nil
32 |   ) -> Tensor {
33 |     if onForward == nil && onGrad == nil {
34 |       return self
35 |     }
36 |     let task =
37 |       if let onForward = onForward {
38 |         createDataTask { t in
39 |           try await onForward(t)
40 |           return try await t.data
41 |         }
42 |       } else {
43 |         dataTask
44 |       }
45 |     if !needsGrad || !Tensor.isGradEnabled {
46 |       return Tensor(dataTask: task, shape: shape, dtype: dtype)
47 |     } else {
48 |       let handle = saveForBackward()
49 |       return Tensor(dataTask: task, shape: shape, dtype: dtype) { grad in
50 |         handle.backward(Backend.current) { grad.printing(onForward: onGrad) }
51 |       }
52 |     }
53 |   }
54 | 
55 |   @recordCaller
56 |   private func _checkNaN(onForward: String? = nil, onGrad: String? = nil) -> Tensor {
57 |     #alwaysAssert(dtype == .float32)
58 |     if onForward == nil && onGrad == nil {
59 |       return self
60 |     }
61 |     let task = createDataTask { t in
62 |       let floats = try await t.floats()
63 |       if let onForward = onForward, !floats.allSatisfy({ !$0.isNaN }) {
64 |         print("nan detected: \(onForward)")
65 |       }
66 |       return try await t.data
67 |     }
68 |     if !needsGrad || !Tensor.isGradEnabled {
69 |       return Tensor(dataTask: task, shape: shape, dtype: dtype)
70 |     } else {
71 |       let handle = saveForBackward()
72 |       return Tensor(dataTask: task, shape: shape, dtype: dtype) { grad in
73 |         handle.backward(Backend.current) { grad.checkNaN(onForward: onGrad) }
74 |       }
75 |     }
76 |   }
77 | }
78 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Elemwise.swift:
--------------------------------------------------------------------------------
  1 | import Foundation
  2 | import HCBacktrace
  3 | 
  4 | /// A flag which determines the implementation behind ``Tensor/gelu(mode:function:file:line:)``.
  5 | ///
  6 | /// When the mode is approx, a tanh-based approximation is used.
  7 | public enum GeLUMode: Sendable {
  8 |   case approx
  9 |   case exact
 10 | }
 11 | 
 12 | /// An element-wise operation which can be applied to a numeric ``Tensor``.
 13 | public enum ElemwiseOp: Sendable {
 14 |   case sin
 15 |   case cos
 16 |   case minusSin
 17 |   case tan
 18 |   case tanGrad
 19 |   case atan
 20 |   case atanGrad
 21 |   case acos
 22 |   case acosGrad
 23 |   case asin
 24 |   case asinGrad
 25 |   case exp
 26 |   case log
 27 |   case recip
 28 |   case sigmoid
 29 |   case sigmoidGrad
 30 |   case relu
 31 |   case reluGrad
 32 |   case abs
 33 |   case absGrad
 34 |   case geluApprox
 35 |   case geluApproxGrad
 36 |   case geluExact
 37 |   case geluExactGrad
 38 |   case erf
 39 |   case erfGrad
 40 |   case floor
 41 |   case ceil
 42 |   case round
 43 | 
 44 |   public func apply<T: NumericTensorElement>(_ x: T) -> T {
 45 |     let f = x.toFloat()
 46 |     return switch self {
 47 |     case .sin:
 48 |       T(Foundation.sin(f))
 49 |     case .cos:
 50 |       T(Foundation.cos(f))
 51 |     case .minusSin:
 52 |       T(-Foundation.sin(f))
 53 |     case .tan:
 54 |       T(Foundation.tan(f))
 55 |     case .tanGrad:
 56 |       T(1 / pow(Foundation.cos(f), 2))
 57 |     case .atan:
 58 |       T(Foundation.atan(f))
 59 |     case .atanGrad:
 60 |       T(1 / (1 + pow(f, 2)))
 61 |     case .acos:
 62 |       T(Foundation.acos(f))
 63 |     case .acosGrad:
 64 |       T(-1 / Foundation.sqrt(1 - pow(f, 2)))
 65 |     case .asin:
 66 |       T(Foundation.asin(f))
 67 |     case .asinGrad:
 68 |       T(1 / Foundation.sqrt(1 - pow(f, 2)))
 69 |     case .exp:
 70 |       T(Foundation.exp(f))
 71 |     case .log:
 72 |       T(Foundation.log(f))
 73 |     case .recip:
 74 |       T(1 / f)
 75 |     case .sigmoid:
 76 |       T(safeSigmoid(f))
 77 |     case .sigmoidGrad:
 78 |       T(safeSigmoid(f) * safeSigmoid(-f))
 79 |     case .relu:
 80 |       x < T(0.0) ? T(0.0) : x
 81 |     case .reluGrad:
 82 |       x < T(0.0) ? T(0.0) : T(1.0)
 83 |     case .abs:
 84 |       T(f < 0 ? -f : f)
 85 |     case .absGrad:
 86 |       T(f < 0 ? -1.0 : 1.0)
 87 |     case .geluApprox:
 88 |       T(geluApproxImpl(f))
 89 |     case .geluApproxGrad:
 90 |       T(geluApproxGradImpl(f))
 91 |     case .geluExact:
 92 |       T(geluExactImpl(f))
 93 |     case .geluExactGrad:
 94 |       T(geluExactGradImpl(f))
 95 |     case .erf:
 96 |       T(fastErf(f))
 97 |     case .erfGrad:
 98 |       T(simpleErfGrad(f))
 99 |     case .floor:
100 |       T(f.rounded(.down))
101 |     case .ceil:
102 |       T(f.rounded(.up))
103 |     case .round:
104 |       T(f.rounded())
105 |     }
106 |   }
107 | }
108 | 
109 | private func geluApproxImpl(_ f: Float) -> Float {
110 |   0.5 * f * (1 + safeTanh(0.797884561 * (f + 0.044715 * pow(f, 3))))
111 | }
112 | 
113 | private func geluApproxGradImpl(_ f: Float) -> Float {
114 |   let tanhTerm = tanh(0.035677408145115 * pow(f, 3) + 0.797884561 * f)
115 |   return 0.5 * f * (1 - pow(tanhTerm, 2)) * (0.107032224435345 * pow(f, 2) + 0.797884561)
116 |     + 0.5 * tanhTerm + 0.5
117 | }
118 | 
119 | private func geluExactImpl(_ f: Float) -> Float {
120 |   return f * 0.5 * (1 + fastErf(f * 0.7071067811865475))
121 | }
122 | 
123 | private func geluExactGradImpl(_ f: Float) -> Float {
124 |   let c: Float = 0.7071067811865475
125 |   let term1 = 0.5 * (1 + fastErf(f * c))
126 |   let term2 = 0.5 * f * c * simpleErfGrad(f * c)
127 |   return term1 + term2
128 | }
129 | 
130 | private func simpleErfGrad(_ f: Float) -> Float {
131 |   abs(f) > 20 ? 0 : 1.1283791670955126 * exp(-f * f)
132 | }
133 | 
134 | private func fastErf(_ a: Float) -> Float {
135 |   // https://github.com/ml-explore/mlx/blob/0d5e7716ad0adadae215ece6eb70861a6a8b55a3/mlx/backend/common/ops.h#L47
136 |   var r: Float
137 |   var s: Float
138 |   var t: Float
139 |   var u: Float
140 |   t = abs(a)
141 |   s = a * a
142 |   func fma(_ x: Float, _ y: Float, _ z: Float) -> Float {
143 |     return x * y + z
144 |   }
145 |   if t > 0.927734375 {
146 |     // maximum error 0.99527 ulp
147 |     r = fma(
148 |       -1.72853470e-5, t, 3.83197126e-4)  // -0x1.220000p-16,0x1.91cfb2p-12
149 |     u = fma(
150 |       -3.88396438e-3, t, 2.42546219e-2)  // -0x1.fd1438p-9, 0x1.8d6342p-6
151 |     r = fma(r, s, u)
152 |     r = fma(r, t, -1.06777877e-1)  // -0x1.b55cb8p-4
153 |     r = fma(r, t, -6.34846687e-1)  // -0x1.450aa0p-1
154 |     r = fma(r, t, -1.28717512e-1)  // -0x1.079d0cp-3
155 |     r = fma(r, t, -t)
156 |     r = 1.0 - exp(r)
157 |     r = copysign(r, a)
158 |   } else {
159 |     // maximum error 0.98929 ulp
160 |     r = -5.96761703e-4  // -0x1.38e000p-11
161 |     r = fma(r, s, 4.99119423e-3)  //  0x1.471a58p-8
162 |     r = fma(r, s, -2.67681349e-2)  // -0x1.b691b2p-6
163 |     r = fma(r, s, 1.12819925e-1)  //  0x1.ce1c44p-4
164 |     r = fma(r, s, -3.76125336e-1)  // -0x1.812700p-2
165 |     r = fma(r, s, 1.28379166e-1)  //  0x1.06eba8p-3
166 |     r = fma(r, a, a)
167 |   }
168 |   return r
169 | }
170 | 
171 | private func safeSigmoid(_ x: Float) -> Float {
172 |   if x < -20 {
173 |     0
174 |   } else if x > 20 {
175 |     1
176 |   } else {
177 |     1 / (1 + exp(-x))
178 |   }
179 | }
180 | 
181 | private func safeTanh(_ x: Float) -> Float {
182 |   2 * safeSigmoid(2 * x) - 1
183 | }
184 | 
185 | extension Tensor {
186 |   @recordCaller
187 |   private func _elemwise(op: ElemwiseOp, grad gradOp: ElemwiseOp? = nil) -> Tensor {
188 |     let backend = Backend.current
189 |     let newData = createDataTask { t in
190 |       try await backend.elemwise(
191 |         try await t.data, op: op, scales: nil, count: t.shape.product(), dtype: t.dtype)
192 |     }
193 |     if needsGrad && Tensor.isGradEnabled {
194 |       guard let gradOp = gradOp else {
195 |         tracedFatalError("no gradient operation was specified")
196 |       }
197 |       let handle = self.saveForBackward()
198 |       return Tensor(dataTask: newData, shape: shape, dtype: dtype) { grad in
199 |         handle.backward(backend) { self.noGrad().elemwiseGrad(op: gradOp, grad: grad) }
200 |       }
201 |     } else {
202 |       return Tensor(dataTask: newData, shape: shape, dtype: dtype)
203 |     }
204 |   }
205 | 
206 |   @recordCaller
207 |   internal func _elemwiseGrad(op: ElemwiseOp, grad: Tensor) -> Tensor {
208 |     #alwaysAssert(!self.needsGrad && !grad.needsGrad, "second derivatives are not supported")
209 |     let backend = Backend.current
210 |     let newData = Tensor.createDataTask(self, grad) { t, grad in
211 |       try await backend.elemwise(
212 |         try await t.data, op: op, scales: try await grad.data, count: t.shape.product(),
213 |         dtype: t.dtype)
214 |     }
215 |     return Tensor(dataTask: newData, shape: shape, dtype: dtype)
216 |   }
217 | 
218 |   @recordCaller
219 |   private func _sin() -> Tensor {
220 |     self.elemwise(op: .sin, grad: .cos)
221 |   }
222 | 
223 |   @recordCaller
224 |   private func _cos() -> Tensor {
225 |     self.elemwise(op: .cos, grad: .minusSin)
226 |   }
227 | 
228 |   @recordCaller
229 |   private func _tan() -> Tensor {
230 |     self.elemwise(op: .tan, grad: .tanGrad)
231 |   }
232 | 
233 |   @recordCaller
234 |   private func _atan() -> Tensor {
235 |     self.elemwise(op: .atan, grad: .atanGrad)
236 |   }
237 | 
238 |   @recordCaller
239 |   private func _acos() -> Tensor {
240 |     self.elemwise(op: .acos, grad: .acosGrad)
241 |   }
242 | 
243 |   @recordCaller
244 |   private func _asin() -> Tensor {
245 |     self.elemwise(op: .asin, grad: .asinGrad)
246 |   }
247 | 
248 |   @recordCaller
249 |   private func _exp() -> Tensor {
250 |     self.elemwise(op: .exp, grad: .exp)
251 |   }
252 | 
253 |   @recordCaller
254 |   private func _log() -> Tensor {
255 |     self.elemwise(op: .log, grad: .recip)
256 |   }
257 | 
258 |   @recordCaller
259 |   private func _sigmoid() -> Tensor {
260 |     self.elemwise(op: .sigmoid, grad: .sigmoidGrad)
261 |   }
262 | 
263 |   @recordCaller
264 |   private func _relu() -> Tensor {
265 |     self.elemwise(op: .relu, grad: .reluGrad)
266 |   }
267 | 
268 |   @recordCaller
269 |   private func _abs() -> Tensor {
270 |     self.elemwise(op: .abs, grad: .absGrad)
271 |   }
272 | 
273 |   @recordCaller
274 |   private func _tanh() -> Tensor {
275 |     2 * (2 * self).sigmoid() - 1
276 |   }
277 | 
278 |   @recordCaller
279 |   private func _gelu(mode: GeLUMode = .approx) -> Tensor {
280 |     switch mode {
281 |     case .approx:
282 |       self.elemwise(op: .geluApprox, grad: .geluApproxGrad)
283 |     case .exact:
284 |       self.elemwise(op: .geluExact, grad: .geluExactGrad)
285 |     }
286 |   }
287 | 
288 |   @recordCaller
289 |   private func _erf() -> Tensor {
290 |     self.elemwise(op: .erf, grad: .erfGrad)
291 |   }
292 | 
293 |   @recordCaller
294 |   private func _silu() -> Tensor {
295 |     return self * self.sigmoid()
296 |   }
297 | 
298 |   @recordCaller
299 |   private func _sqrt() -> Tensor {
300 |     pow(0.5)
301 |   }
302 | 
303 |   @recordCaller
304 |   private func _rsqrt() -> Tensor {
305 |     pow(-0.5)
306 |   }
307 | 
308 |   @recordCaller
309 |   private func _floor() -> Tensor {
310 |     noGrad().elemwise(op: .floor)
311 |   }
312 | 
313 |   @recordCaller
314 |   private func _ceil() -> Tensor {
315 |     noGrad().elemwise(op: .ceil)
316 |   }
317 | 
318 |   @recordCaller
319 |   private func _round() -> Tensor {
320 |     noGrad().elemwise(op: .round)
321 |   }
322 | }
323 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/FusedOps.swift:
--------------------------------------------------------------------------------
  1 | import HCBacktrace
  2 | 
  3 | extension Tensor {
  4 |   @recordCaller
  5 |   private func _mul(_ coeff: Tensor, thenAdd bias: Tensor) -> Tensor {
  6 |     #alwaysAssert(dtype == coeff.dtype, "dtype \(dtype) does not match coefficients \(coeff.dtype)")
  7 |     #alwaysAssert(dtype == bias.dtype, "dtype \(dtype) does not match bias \(bias.dtype)")
  8 | 
  9 |     let (outputShape, allStrides) = Tensor.lazyBroadcast([self, coeff, bias])
 10 |     let tStrides = allStrides[0]
 11 |     let coeffStrides = allStrides[1]
 12 |     let biasStrides = allStrides[2]
 13 | 
 14 |     let backend = Backend.current
 15 |     let newData = Tensor.createDataTask(self, coeff, bias) { t, coeff, bias in
 16 |       try await backend.mulAdd(
 17 |         input: BroadcastData(strides: tStrides, data: try await t.data),
 18 |         coeff: BroadcastData(strides: coeffStrides, data: try await coeff.data),
 19 |         bias: BroadcastData(strides: biasStrides, data: try await bias.data),
 20 |         dtype: t.dtype
 21 |       )
 22 |     }
 23 |     if (needsGrad || coeff.needsGrad || bias.needsGrad) && Tensor.isGradEnabled {
 24 |       let handle = self.saveForBackward()
 25 |       let coeffHandle = coeff.saveForBackward()
 26 |       let biasHandle = bias.saveForBackward()
 27 |       return Tensor(dataTask: newData, shape: outputShape, dtype: dtype) { grad in
 28 |         handle.backward(backend) { (grad * coeff.noGrad()).reduceBroadcast(tStrides, as: self) }
 29 |         coeffHandle.backward(backend) {
 30 |           (grad * self.noGrad()).reduceBroadcast(coeffStrides, as: coeff)
 31 |         }
 32 |         biasHandle.backward(backend) { grad.reduceBroadcast(biasStrides, as: bias) }
 33 |       }
 34 |     } else {
 35 |       return Tensor(dataTask: newData, shape: outputShape, dtype: dtype)
 36 |     }
 37 |   }
 38 | 
 39 |   @recordCaller
 40 |   private func _add(_ bias: Tensor, thenMul coeff: Tensor) -> Tensor {
 41 |     #alwaysAssert(dtype == coeff.dtype, "dtype \(dtype) does not match coefficients \(coeff.dtype)")
 42 |     #alwaysAssert(dtype == bias.dtype, "dtype \(dtype) does not match bias \(bias.dtype)")
 43 | 
 44 |     let (outputShape, allStrides) = Tensor.lazyBroadcast([self, coeff, bias])
 45 |     let tStrides = allStrides[0]
 46 |     let coeffStrides = allStrides[1]
 47 |     let biasStrides = allStrides[2]
 48 | 
 49 |     let backend = Backend.current
 50 |     let newData = Tensor.createDataTask(self, coeff, bias) { t, coeff, bias in
 51 |       try await backend.addMul(
 52 |         input: BroadcastData(strides: tStrides, data: try await t.data),
 53 |         bias: BroadcastData(strides: biasStrides, data: try await bias.data),
 54 |         coeff: BroadcastData(strides: coeffStrides, data: try await coeff.data),
 55 |         dtype: t.dtype
 56 |       )
 57 |     }
 58 |     if (needsGrad || coeff.needsGrad || bias.needsGrad) && Tensor.isGradEnabled {
 59 |       let handle = self.saveForBackward()
 60 |       let coeffHandle = coeff.saveForBackward()
 61 |       let biasHandle = bias.saveForBackward()
 62 |       return Tensor(dataTask: newData, shape: outputShape, dtype: dtype) { grad in
 63 |         handle.backward(backend) { (grad * coeff.noGrad()).reduceBroadcast(tStrides, as: self) }
 64 |         coeffHandle.backward(backend) {
 65 |           (grad * (self.noGrad() + bias.noGrad())).reduceBroadcast(coeffStrides, as: coeff)
 66 |         }
 67 |         biasHandle.backward(backend) {
 68 |           (grad * coeff.noGrad()).reduceBroadcast(biasStrides, as: bias)
 69 |         }
 70 |       }
 71 |     } else {
 72 |       return Tensor(dataTask: newData, shape: outputShape, dtype: dtype)
 73 |     }
 74 |   }
 75 | 
 76 |   @recordCaller
 77 |   private func _normalize<T: NumericTensorElement>(axis: Int, eps: T) -> Tensor {
 78 |     #alwaysAssert(dtype.isFloat, "cannot apply normalize() to dtype \(dtype)")
 79 | 
 80 |     let backend = Backend.current
 81 |     let newData = createDataTask { t in
 82 |       try await backend.normalize(
 83 |         input: try await t.data,
 84 |         dims: t.reduceDims(axis),
 85 |         eps: eps,
 86 |         dtype: t.dtype
 87 |       )
 88 |     }
 89 |     if needsGrad && Tensor.isGradEnabled {
 90 |       let handle = self.saveForBackward()
 91 |       return Tensor(dataTask: newData, shape: shape, dtype: dtype) { grad in
 92 |         handle.backward(backend) {
 93 |           self.noGrad().normalizeGrad(axis: axis, outGrad: grad, eps: eps)
 94 |         }
 95 |       }
 96 |     } else {
 97 |       return Tensor(dataTask: newData, shape: shape, dtype: dtype)
 98 |     }
 99 |   }
100 | 
101 |   @recordCaller
102 |   private func _normalizeGrad<T: NumericTensorElement>(axis: Int, outGrad: Tensor, eps: T)
103 |     -> Tensor
104 |   {
105 |     #alwaysAssert(dtype.isFloat, "cannot apply normalizeGrad() to dtype \(dtype)")
106 |     #alwaysAssert(dtype == outGrad.dtype, "gradient dtype \(outGrad.dtype) does not match \(dtype)")
107 |     #alwaysAssert(shape == outGrad.shape, "gradient shape \(outGrad.shape) does not match \(shape)")
108 |     #alwaysAssert(
109 |       !Tensor.isGradEnabled || (!needsGrad && !outGrad.needsGrad),
110 |       "gradients of normalizeGrad() are not supported")
111 | 
112 |     let backend = Backend.current
113 |     let newData = Tensor.createDataTask(self, outGrad) { t, outGrad in
114 |       try await backend.normalizeGrad(
115 |         input: try await t.data,
116 |         outGrad: try await outGrad.data,
117 |         dims: t.reduceDims(axis),
118 |         eps: eps,
119 |         dtype: t.dtype
120 |       )
121 |     }
122 |     return Tensor(dataTask: newData, shape: shape, dtype: dtype)
123 |   }
124 | }
125 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/abs_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/abs(function:file:line:)``
2 | 
3 | Calculate the absolute value of each element in the ``Tensor``, returning a new tensor with the results.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/acos_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/acos(function:file:line:)``
2 | 
3 | Compute the inverse cosine of the input.
4 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/add_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/add(_:thenMul:function:file:line:)``
2 | 
3 | Add the given tensor to the original tensor, then multiply the result by another tensor.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/all_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/all(axis:keepdims:function:file:line:)``
2 | 
3 | Perform a "logical and" reduction along an axis. Only applies to tensor of dtype `.bool`.
4 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/argmax_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/argmax(axis:keepdims:function:file:line:)``
2 | 
3 | Return the indices of the maximum values along the specified axis of the `Tensor`. If `keepdims` is `true`, then keep the given axis with a value of `1`.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/argmin_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/argmin(axis:keepdims:function:file:line:)``
2 | 
3 | Return the indices of the minimum values along the specified axis of the ``Tensor`. If `keepdims` is `true`, then keep the given axis with a value of `1`.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/argsort_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/argsort(axis:descending:stable:function:file:line:)``
2 | 
3 | Return the indices to sort a tensor along the given axis.
4 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/article_0.md:
--------------------------------------------------------------------------------
  1 | # Introduction to Tensors
  2 | 
  3 | An introduction to the ``Tensor`` object.
  4 | 
  5 | ## Overview
  6 | 
  7 | A `Tensor` is an array of primitive values such as numbers or booleans.
  8 | 
  9 | We can create an array of floating point values like so:
 10 | 
 11 | ```swift
 12 | let values = Tensor(data: [1.0, 2.0, 3.0, 4.0])
 13 | ```
 14 | 
 15 | Unlike an `Array<Float>` in Swift, we cannot modify the values inside of our `Tensor` in-place.
 16 | However, we can create new `Tensor`s using other operations:
 17 | 
 18 | ```swift
 19 | let valuesPlus1 = values + 1 // contains: [2.0, 3.0, 4.0, 5.0]
 20 | ```
 21 | 
 22 | What if we want to print the values inside of our new `Tensor`?
 23 | For this, we must use `try await` with a helper method like ``Tensor/floats(function:file:line:)``.
 24 | This is because the computations performed on `Tensor`s asynchronously, possibly on devices like GPUs.
 25 | This computation might even fail, in which case the result will not be available.
 26 | 
 27 | ```swift
 28 | do {
 29 |   print("contents of array:", try await valuesPlus1.floats())
 30 |   // Output: [2.0, 3.0, 4.0, 5.0]
 31 | } catch {
 32 |   // ...
 33 | }
 34 | ```
 35 | 
 36 | ### Data types
 37 | 
 38 | In Swift, we can use generics to define arrays of arbitrary types, such as `[Int64]`, `[Bool]`, `[Float]`, etc.
 39 | The data that can be stored in a `Tensor` is restricted to a few primitive types, which are enumerated
 40 | in the ``Tensor/DType`` enum.
 41 | 
 42 | We can access the type of data stored in a `Tensor` using the ``Tensor/dtype`` attribute.
 43 | We can also pass `dtype` arguments to various constructors of `Tensors`.
 44 | 
 45 | ```swift
 46 | let x = Tensor(data: [1, 2], dtype: .float32)
 47 | print(x.dtype) // Output: [1.0, 2.0]
 48 | ```
 49 | 
 50 | You can also cast the elements of a `Tensor` using the ``Tensor/cast(_:function:file:line:)`` method.
 51 | 
 52 | ### Shapes
 53 | 
 54 | It is often useful to define multi-dimensional arrays.
 55 | One way to think of these is as arrays of arrays (or arrays of arrays of arrays, etc.).
 56 | For example, you could create a two-dimensional `Array` in Swift like
 57 | 
 58 | ```swift
 59 | let x: [[Float]] = [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]
 60 | ```
 61 | 
 62 | When we create multi-dimensional `Tensor`s, we don't explicitly represent them as arrays of arrays.
 63 | Instead, we talk about "data" and its "shape".
 64 | In the above example, the data might be `[1.0, 2.0, 3.0, 4.0]`, and the shape could be `[2, 3]`.
 65 | We can think of the shape as the sizes of the recursively nested arrays; so the outer array is of size `2`, and the inner array is of size `3`.
 66 | 
 67 | To create a `Tensor` corresponding to our Swift array above, we can do
 68 | 
 69 | ```swift
 70 | let x = Tensor(data: [1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape: [2, 3])
 71 | ```
 72 | 
 73 | ### Indexing
 74 | 
 75 | Suppose we have created a 2-dimensional `Tensor` like so
 76 | 
 77 | ```swift
 78 | let x = Tensor(data: [1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape: [2, 3])
 79 | ```
 80 | 
 81 | We can think of this as a matrix with two rows and three columns.
 82 | Let's say we want to access the second row as its own `Tensor`. We can do
 83 | 
 84 | ```swift
 85 | let secondRow = x[1] // Tensor of shape [3] with data [4.0, 5.0, 6.0]
 86 | ```
 87 | 
 88 | We can also use multiple indices, separated by commas. For example, we can do the following
 89 | 
 90 | ```swift
 91 | let y = x[0, 1] // Tensor of shape [] with data [2.0]
 92 | let z = x[1, 1] // Tensor of shape [] with data [5.0]
 93 | let w = x[1, 2] // Tensor of shape [] with data [6.0]
 94 | ```
 95 | 
 96 | If the index is an integer, then it will select a specific element along a dimension.
 97 | What if we want to select more than one item?
 98 | For this, we can use a range:
 99 | 
100 | ```swift
101 | let topRightPair = x[0, 1..<3] // Tensor of shape [2] and data [2.0, 3.0]
102 | let rightColumn = x[0...1, 2] // Tensor of shape [2] and data [3.0, 6.0]
103 | ```
104 | 
105 | To select the entire dimension, we can use the ellipsis `...`:
106 | 
107 | ```swift
108 | let rightColumn = x[..., 2] // Tensor of shape [2] and data [3.0, 6.0]
109 | ```
110 | 
111 | We can also use negative indexing, which produces an index beginning from the end rather than the beginning:
112 | 
113 | ```swift
114 | let rightColumn = x[..., -1] // Tensor of shape [2] and data [3.0, 6.0]
115 | ```
116 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/article_1.md:
--------------------------------------------------------------------------------
 1 | # Automatic Differentiation
 2 | 
 3 | An overview of how to compute gradients of functions with respect to inputs using ``Tensor``s.
 4 | 
 5 | ## Overview
 6 | 
 7 | It is essential to be able to compute derivatives of functions when training neural networks.
 8 | With a `Tensor`, we can leverage reverse-mode automatic differentiation to compute derivatives automatically.
 9 | 
10 | For example, let's compute the gradient of `sqrt(sum_i x_i^2)` with respect to an array of inputs `x_i`:
11 | 
12 | ```swift
13 | let x = Tensor(data: [1.0, 2.0, 3.0])
14 | 
15 | // Create a version of `x` that will store its gradients in `grad`
16 | var grad: Tensor?
17 | let differentiableX = x.onGrad { g in grad = g }
18 | 
19 | // Compute the final value we want to differentiate
20 | let norm = differentiableX.pow(2).sum().sqrt()
21 | 
22 | // Perform reverse-mode automatic differentiation to compute gradients.
23 | norm.backward()
24 | 
25 | print(try await grad!.floats())
26 | // Output: [0.26726124, 0.5345225, 0.8017837]
27 | ```
28 | 
29 | When a `Tensor` is created from data, it is typically not differentiable.
30 | Instead, it will be treated as a constant in the computation graph.
31 | To tell if a `Tensor` is a constant, check the ``Tensor/needsGrad`` property.
32 | 
33 | When we call ``Tensor/onGrad(_:function:file:line:)``, we create a new `Tensor` that requires gradients, and is therefore not a constant.
34 | Furthermore, whenever we perform an operation on a `Tensor` that requires gradients, the resulting `Tensor` will also require gradients. When the backward pass is triggered by a `backward()` call on some downstream result `Tensor`, all non-constant `Tensor`s that were used in the computation will receive callbacks with gradients.
35 | 
36 | ### Pitfall: don't create multiple computation graphs!
37 | 
38 | The computation graph is tracked by leveraging Swift reference counting.
39 | This can cause errors when a `Tensor` is used in two different computation graphs, and only one
40 | graph is differentiated with a `backward()` call.
41 | For example, this code is incorrect:
42 | 
43 | ```swift
44 | let x = Tensor(data: [1.0, 2.0, 3.0]).onGrad { _ in
45 |   print("this won't end up being called")
46 | }
47 | let y = x + 2
48 | let z = x * 3
49 | 
50 | // Incorrect: backward through graph that used x, when a different graph is
51 | // still around that also used x.
52 | // As a result, `x` will not receive a gradient since it is still waiting for
53 | // a backward pass through `z`.
54 | y.sum().backward()
55 | 
56 | print(try await z.floats())
57 | 
58 | // After the scope exits and `z` is released, we will see an assertion failure:
59 | //
60 | //     Assertion failure: backward pass was incompleted due to an unused reference.
61 | //
62 | //     Traceback of reference creation:
63 | // 
64 | //     *(_:_:) at /Users/alex/code/github.com/unixpickle/honeycrisp/Sources/Honeycrisp/Tensor.swift:685
65 | ```
66 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/article_2.md:
--------------------------------------------------------------------------------
 1 | # Random Number Generation
 2 | 
 3 | How to seed, save, and use random number generators.
 4 | 
 5 | ## Overview
 6 | 
 7 | Randomness is often useful in machine learning workloads.
 8 | 
 9 | Random ``Tensor`` objects can be constructed with a few convenience initializers. For example:
10 | 
11 | ```swift
12 | // Generate 12 uniformly random floating-point values in the range [0, 1).
13 | let x = Tensor(rand: [12])
14 | 
15 | // Generate a 3x3 matrix of samples from the Normal distribution.
16 | let y = Tensor(randn: [3, 3])
17 | 
18 | // Generate a list of 32 integers randomly sampled from [0, 16).
19 | let z = Tensor(randInt: [32], in: 0..<16)
20 | ```
21 | 
22 | All of the above initializers take an optional `generator` argument which is an instance of ``RandomGenerator``. If unspecified, then the generator returned by ``Backend/defaultRandom()`` is used. You can create your own random generator using ``Backend/createRandom()``, and it can be manipulated using ``RandomGenerator/state`` and ``RandomGenerator/seed(_:)``.
23 | 
24 | ```swift
25 | let rng = Backend.current.createRandom()
26 | 
27 | rng.seed(1337) // Optionally seed the generator
28 | let state = rng.state // Get the current RNG state (as a Tensor)
29 | 
30 | let sampled = Tensor(rand: [3, 3], generator: rng)
31 | 
32 | rng.state = state // Restore the state to a previously saved state
33 | let sampled1 = Tensor(rand: [3, 3], generator: rng)
34 | 
35 | // `sampled1` should equal `sampled`
36 | 
37 | rng.seed(1337) // Re-seed with the same value as before
38 | let sampled2 = Tensor(rand: [3, 3], generator: rng)
39 | // `sampled2` should equal `sampled` and `sampled1`
40 | ```
41 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/asin_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/asin(function:file:line:)``
2 | 
3 | Compute the inverse sine of the input.
4 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/atan_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/atan(function:file:line:)``
2 | 
3 | Compute the inverse tangent of the input.
4 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/backwardHandle_backward.md:
--------------------------------------------------------------------------------
 1 | # ``Tensor/BackwardHandle/backward(_:_:function:file:line:)``
 2 | 
 3 | Enqueue an operation to contribute to the gradient of the underlying ``Tensor`` that this handle was created for.
 4 | 
 5 | ## Overview
 6 | 
 7 | When the gradient implementation is called, it is done so with the provided backend accessible as ``Backend/current``. This is done to avoid scenarios where a backward implementation mistakenly fails to use the correct ``Backend`` in the backward pass, by forcing the implementor to _think_ about what ``Backend`` should be used.
 8 | 
 9 | When the gradient is computed, it may trigger further gradients to be computed for inputs of the handle's underlying Tensor. If multiple downstream operations use the underlying `Tensor`, then the results may be accumulated until the last gradient is computed for the underlying `Tensor`.
10 | 
11 | When this is called, the provided backward implementation may not be immediately run, but may be added to the back of a queue. However, it is guaranteed (under unexceptional circumstances) that this function will be called during this same backward pass.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/backward_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/backward(_:function:file:line:)``
2 | 
3 | Begin backpropagation to compute gradients of inputs with respect to this ``Tensor``. Optionally, pass the corresponding output gradient for this tensor.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/batchedMatmul_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/batchedMatmul(a:transA:b:transB:transOut:function:file:line:)``
2 | 
3 | Perform a batched matrix multiplication of two tensors, with options to transpose the input and output tensors.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/bools_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/bools(function:file:line:)``
2 | 
3 | Convert this tensor to an array of boolean values.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/broadcast_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/broadcast(_:_:function:file:line:)``
2 | 
3 | Broadcast the pair of ``Tensor`` values, allowing for operations between tensors of different shapes.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/broadcast_2.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/broadcast(_:function:file:line:)``
2 | 
3 | Broadcast an arbitrary number of ``Tensor`` values to all have the same, compatible shape.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/cast_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/cast(_:function:file:line:)``
2 | 
3 | Cast the ``Tensor`` to a specified data type.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/cast_2.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/cast(as:function:file:line:)``
2 | 
3 | Cast the `Tensor` to the same data type as the provided argument.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/ceil_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/ceil(function:file:line:)``
2 | 
3 | Round the floating-point input upwards.
4 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/checkNaN_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/checkNaN(onForward:onGrad:function:file:line:)``
2 | 
3 | Check the tensor for NaN (Not a Number) values during the forward and/or backward passes, and print the provided strings when NaNs are detected.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/checkpoint.md:
--------------------------------------------------------------------------------
 1 | # ``Tensor/checkpoint(enabled:saveRandomState:waitForData:_:_:function:file:line:)``
 2 | 
 3 | Apply a differentiable function without storing intermediate results for the backward pass; during the backward pass, call the function again with gradients enabled to backpropagate through it.
 4 | 
 5 | ## Overview
 6 | 
 7 | This is useful for saving memory when a computation graph is deep.
 8 | 
 9 | Set `saveRandomState` to `false` to avoid saving and restoring the state of the current backend's default random number generator for the second call to the function. The `saveRandomState` behavior is not thread-safe, since other threads might mutate or depend on the random state at the same time as the checkpointed function.
10 | 
11 | If `waitForData` is `false`, then gradients may be in the process of being computed asynchronously on-device while the forward pass is recomputed. This might defeat the purpose of using checkpointing as a means to save memory.
12 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/chunk_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/chunk(axis:count:function:file:line:)``
2 | 
3 | Divide the ``Tensor`` into chunks along the specified axis, distributing the elements into the specified number of segments.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/clamp_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/clamp(min:max:function:file:line:)``
2 | 
3 | Clamp the values of the ``Tensor`` to the specified minimum and maximum bounds.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/conv1DKernelGrad_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/conv1DKernelGrad(_:image:outGrad:function:file:line:)``
2 | 
3 | Compute the gradient of the 1-dimensional convolution with respect to the kernel.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/conv1DTranspose_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/conv1DTranspose(_:image:kernel:function:file:line:)``
2 | 
3 | Perform a 1-dimensional transposed convolution on the given `image` using the specified `kernel`.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/conv1D_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/conv1D(_:image:kernel:function:file:line:)``
2 | 
3 | Perform a one-dimensional convolution on the given `image` using the specified `kernel`.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/conv2DKernelGrad_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/conv2DKernelGrad(_:image:outGrad:function:file:line:)``
2 | 
3 | Compute the gradient of a 2D convolution operation with respect to the kernel, using the provided input image and output gradient.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/conv2DTranspose_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/conv2DTranspose(_:image:kernel:function:file:line:)``
2 | 
3 | Apply a 2D transpose convolution operation to the given `image` using the specified `kernel`.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/conv2D_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/conv2D(_:image:kernel:function:file:line:)``
2 | 
3 | Perform a 2D convolution on the given image using the specified kernel.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/copyToArray_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/copyToArray(_:function:file:line:)``
2 | 
3 | Copy the contents of the ``Tensor`` to the specified array, allowing for easy manipulation and access of tensor data in a standard array format.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/cos_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/cos(function:file:line:)``
2 | 
3 | Calculate the cosine of each element in the ``Tensor``.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/cumulativeProd_1.md:
--------------------------------------------------------------------------------
 1 | # ``Tensor/cumulativeProd(axis:exclusive:reverse:function:file:line:)``
 2 | 
 3 | Compute a cumulative product along an axis.
 4 | 
 5 | ## Overview
 6 | 
 7 | If `reverse` is true, accumulate from the right instead of from the left.
 8 | 
 9 | If `exclusive` is true, then the product is shifted by 1, and the value `1.0` is inserted inserted in the first result's place.
10 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/cumulativeSum_1.md:
--------------------------------------------------------------------------------
 1 | # ``Tensor/cumulativeSum(axis:exclusive:reverse:function:file:line:)``
 2 | 
 3 | Compute a cumulative sum along an axis.
 4 | 
 5 | ## Overview
 6 | 
 7 | If `reverse` is true, sum from the right instead of from the left.
 8 | 
 9 | If `exclusive` is true, then the sum is shifted by 1.
10 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/elemwise_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/elemwise(op:grad:function:file:line:)``
2 | 
3 | Apply the given element-wise operation to the ``Tensor``, with the corresponding operation that defines the gradient of the operation.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/erf_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/erf(function:file:line:)``
2 | 
3 | Compute the Gauss error function.
4 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/exp_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/exp(function:file:line:)``
2 | 
3 | Compute the element-wise exponential of the ``Tensor``.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/expand_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/expand(as:function:file:line:)``
2 | 
3 | Expand the dimensions of the ``Tensor`` to match the shape of the provided argument. See ``Tensor/expand(shape:function:file:line:)``.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/expand_2.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/expand(shape:function:file:line:)``
2 | 
3 | Expand the dimensions of the `Tensor` to the specified shape.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/flatten_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/flatten(startAxis:endAxis:function:file:line:)``
2 | 
3 | Flatten the ``Tensor`` by collapsing the specified range of axes into a single axis, allowing for flexible reshaping of multi-dimensional data. The `endAxis` is an inclusive index.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/floats_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/floats(function:file:line:)``
2 | 
3 | Extract the values of the ``Tensor`` (as if it were flattened) into an array of floats.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/floor_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/floor(function:file:line:)``
2 | 
3 | Round the floating-point input downwards.
4 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/gather_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/gather(axis:indices:indicesAreUnique:function:file:line:)``
2 | 
3 | Gather values along the specified axis using the provided indices. The `indicesAreUnique` parameter determines whether the indices used for gathering are unique or can have duplicates.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/gelu_1.md:
--------------------------------------------------------------------------------
 1 | # ``Tensor/gelu(mode:function:file:line:)``
 2 | 
 3 | Applies the Gaussian Error Linear Unit (GELU) activation function to each element of the ``Tensor``.
 4 | 
 5 | ## Overview
 6 | 
 7 | Typically, GELU is implemented via the expensive expression
 8 | 
 9 |     x * 0.5 * (1 + erf(x/sqrt(2)))
10 | 
11 | However, when `mode` is `.approx` (the default), then the approximation
12 |   
13 |     0.5 * x * (1 + tanh(sqrt(2/pi) * (x + 0.044715 * x^3)))
14 | 
15 | is used instead.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/int64s_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/int64s(function:file:line:)``
2 | 
3 | Extract the values of the ``Tensor`` (as if it were flattened) into an array of 64-bit integers.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/ints_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/ints(function:file:line:)``
2 | 
3 | Extract the values of the ``Tensor`` (as if it were flattened) into an array of `Int`s.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/item_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/item(function:file:line:)``
2 | 
3 | Return the single element of the `Tensor` as a scalar value.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/logSoftmaxGrad_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/logSoftmaxGrad(inputs:grads:axis:function:file:line:)``
2 | 
3 | Computes the gradients of the log softmax function with respect to the inputs along the specified axis using the provided gradients.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/logSoftmax_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/logSoftmax(axis:function:file:line:)``
2 | 
3 | Compute the logarithm of the softmax of the ``Tensor`` along the specified axis, providing a numerically stable way to perform this operation.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/log_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/log(function:file:line:)``
2 | 
3 | Compute the natural logarithm of each element in the ``Tensor``, returning a new ``Tensor`` containing the results.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/matmul_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/matmul(a:transA:b:transB:transOut:aGradBackend:bGradBackend:function:file:line:)``
2 | 
3 | Perform matrix multiplication of two tensors, with options to specify transposition for each tensor and the output tensor, while also allowing for gradient computation for backpropagation through specified backends.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/maxPool2D_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/maxPool2D(width:height:channelsLast:function:file:line:)``
2 | 
3 | Perform 2D max pooling on the ``Tensor`` using the specified width and height, with the data formatted in the channels-last or channels-first order.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/max_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/max(axis:keepdims:function:file:line:)``
2 | 
3 | Return the maximum value of the ``Tensor`` along the specified axis, optionally retaining the reduced dimension based on the `keepdims` parameter.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/meanAndVariance_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/meanAndVariance(axis:keepdims:function:file:line:)``
2 | 
3 | Calculate the mean and variance of the ``Tensor`` along the specified axis, with an option to retain the reduced dimensions.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/mean_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/mean(axis:keepdims:function:file:line:)``
2 | 
3 | Compute the mean of the elements in the ``Tensor`` along the specified axis, with an option to keep the dimensions of the result.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/min_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/min(axis:keepdims:function:file:line:)``
2 | 
3 | Compute the minimum value of the elements in the ``Tensor`` along the specified axis, with an option to retain the reduced dimensions.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/move_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/move(axis:to:function:file:line:)``
2 | 
3 | Move an axis at a given index to a different index in the shape of the resulting ``Tensor``.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/mul_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/mul(_:thenAdd:function:file:line:)``
2 | 
3 | Multiply the current ``Tensor`` by another ``Tensor`` and then add a specified value to the result.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/noGrad_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/noGrad()``
2 | 
3 | Prevent the resulting ``Tensor`` from propagating gradients to this Tensor.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/normalize_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/normalize(axis:eps:function:file:line:)``
2 | 
3 | Normalize the ``Tensor`` along the given axis.
4 | 
5 | ## Overview
6 | 
7 | The variance is computed without bias correction. Adds a scalar `epsilon` to the variance before taking a square root for numerical stability.
8 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/onGrad_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/onGrad(_:function:file:line:)``
2 | 
3 | Create a new tensor with the same data as this one, but with a provided callback to implement the backward pass.
4 | 
5 | If the current tensor already includes a backward callback, it is not preserved for the returned tensor.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/outer_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/outer(_:_:function:file:line:)``
2 | 
3 | Compute the outer product of two one-dimensional ``Tensor``s.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/pow_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/pow(_:function:file:line:)``
2 | 
3 | Raise the elements of the ``Tensor`` to the power of the specified scalar exponent.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/printing_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/printing(onForward:onGrad:function:file:line:)-46ckn``
2 | 
3 | Print the values of the `Tensor` during forward and backward passes.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/printing_2.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/printing(onForward:onGrad:function:file:line:)-5lqwk``
2 | 
3 | Print the values of the `Tensor` during forward and backward passes.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/prod_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/prod(axis:keepdims:function:file:line:)``
2 | 
3 | Compute the product of the elements in the ``Tensor`` along the specified axis, with an option to retain the axis of reduction with a value of `1`.
4 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/qr_1.md:
--------------------------------------------------------------------------------
 1 | # ``Tensor/qrDecomposition(full:function:file:line:)``
 2 | 
 3 | Compute the QR decomposition of the matrix, such that `q &* r` approximates the original matrix.
 4 | 
 5 | If the input matrix is m-by-n, then the returned matrices will have the shapes:
 6 | 
 7 |  * `q`: if `full` is true, then m-by-m; otherwise, if m > n, then m-by-n
 8 |  * `r`: if `full` is true, then m-by-n; otherwise, if m > n, then n-by-n.
 9 | 
10 | If `full` is true, then redundant, arbitrary directions may be present in `q`, accompanied by zeros in `r`.
11 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/relu_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/relu(function:file:line:)``
2 | 
3 | Apply the Rectified Linear Unit (ReLU) activation function to the ``Tensor``, replacing all negative values with zero while leaving positive values unchanged.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/repeating_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/repeating(axis:count:function:file:line:)``
2 | 
3 | Repeat the ``Tensor`` along the specified axis for the given number of times.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/reshape_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/reshape(_:function:file:line:)``
2 | 
3 | Reshape the ``Tensor`` to the specified dimensions, allowing for a change in shape while maintaining the same total number of elements.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/reshape_2.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/reshape(as:function:file:line:)``
2 | 
3 | Change the shape of the ``Tensor`` to the shape of the provided tensor.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/round_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/round(function:file:line:)``
2 | 
3 | Round the floating-point input elementwise.
4 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/rsqrt_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/rsqrt(function:file:line:)``
2 | 
3 | Compute the element-wise reciprocal square root of the ``Tensor``.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/saveForBackward_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/saveForBackward(function:file:line:)``
2 | 
3 | Create a ``Tensor/BackwardHandle`` to this tensor to be used during the backward pass.
4 | 
5 | This is typically only necessary when implementing a custom operation's backward pass. Otherwise, it may be sufficient to directly call ``Tensor/backward(_:function:file:line:)``.
6 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/scatter_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/scatter(axis:count:indices:indicesAreUnique:function:file:line:)``
2 | 
3 | Scatter values from the source ``Tensor`` along the specified axis at the given indices. The caller must specify the new size of the given axis, and no indices should exceed this size. If the indices are unique (per slice of indices), then you may pass `indicesAreUnique: true` and possibly achieve a faster computation.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/sigmoid_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/sigmoid(function:file:line:)``
2 | 
3 | Compute the sigmoid activation function for each element in the ``Tensor``, mapping values to the range (0, 1).


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/silu_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/silu(function:file:line:)``
2 | 
3 | Computes the sigmoid linear unit (SiLU) activation function for the ``Tensor``.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/sin_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/sin(function:file:line:)``
2 | 
3 | Calculate the element-wise sine of the elements in the ``Tensor``.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/softmax_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/softmax(axis:function:file:line:)``
2 | 
3 | Computes the softmax function along the specified axis of the ``Tensor``, normalizing the input values to produce a probability distribution.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/some_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/some(axis:keepdims:function:file:line:)``
2 | 
3 | Perform a "logical or" reduction along an axis. Only applies to tensor of dtype `.bool`.
4 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/split_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/split(axis:counts:function:file:line:)``
2 | 
3 | Split the ``Tensor`` along the specified axis into multiple sub-tensors based on the provided counts for each segment.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/sqrt_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/sqrt(function:file:line:)``
2 | 
3 | Return the element-wise square root of the ``Tensor``, producing a new tensor with the same shape.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/squeeze_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/squeeze(axis:function:file:line:)``
2 | 
3 | Remove the specified axis from the ``Tensor`` and assert that the current size of this axis was `1`.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/state_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/state(function:file:line:)``
2 | 
3 | Extract the contents of the ``Tensor`` into an object which can be serialized to bytes.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/sum_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/sum(axis:keepdims:function:file:line:)``
2 | 
3 | Compute the sum of the elements in the ``Tensor`` along the specified axis, with an option to retain the axis of reduction with a value of `1`.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/svd_1.md:
--------------------------------------------------------------------------------
 1 | # ``Tensor/svd(full:function:file:line:)``
 2 | 
 3 | Compute the singular value decomposition of the matrix.
 4 | 
 5 | If the input matrix is m-by-n, then the returned matrices will have the shapes:
 6 | 
 7 |  * `u`: if `full` is true, then m-by-m; otherwise, m-by-min(m,n).
 8 |  * `s`: a 1-D array of length min(m,n).
 9 |  * `vt`: if `full` is true, then n-by-n; otherwise, n-by-min(m,n).
10 | 
11 | If `full` is true, then redundant, arbitrary dimensions might be present in `u` and `vt` when the input matrix is rectangular.
12 | 
13 | If `full` is false, then `u &* Tensor.diagonal(s) &* vt` should approximate the input matrix.
14 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/swap_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/swap(axis:with:function:file:line:)``
2 | 
3 | Swap the specified axes of the ``Tensor``, allowing for the reordering of dimensions.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/t_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/t(function:file:line:)``
2 | 
3 | Transpose the ``Tensor``, swapping its final two dimensions.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/tan_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/tan(function:file:line:)``
2 | 
3 | Returns the tangent of each element in the ``Tensor``, applying the tan function element-wise.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/tanh_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/tanh(function:file:line:)``
2 | 
3 | Returns the hyperbolic tangent of each element in the ``Tensor``, applying the tanh function element-wise.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/tril_1.md:
--------------------------------------------------------------------------------
 1 | # ``Tensor/tril(offset:function:file:line:)``
 2 | 
 3 | Return the lower-triangular part of the ``Tensor``, with all elements above (but not on) the diagonal set to zero.
 4 | 
 5 | ## Overview
 6 | 
 7 | This can be useful for masking out elements of a tensor.
 8 | 
 9 | The `offset` parameter can be specified to move the diagonal to the right (positive) or left (negative). Positive offsets result in fewer entries being set to zero.
10 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/triu_1.md:
--------------------------------------------------------------------------------
 1 | # ``Tensor/triu(offset:function:file:line:)``
 2 | 
 3 | Return the upper-triangular part of the ``Tensor``, with all elements below (but not on) the diagonal set to zero.
 4 | 
 5 | ## Overview
 6 | 
 7 | This can be useful for masking out elements of a tensor.
 8 | 
 9 | The `offset` parameter can be specified to move the diagonal to the right (positive) or left (negative). Positive offsets result in more entries being set to zero.
10 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/unsqueeze_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/unsqueeze(axis:function:file:line:)``
2 | 
3 | Insert a new dimension of size one into the ``Tensor`` at the specified axis.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/variance_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/variance(axis:keepdims:function:file:line:)``
2 | 
3 | Compute the variance of the elements in the ``Tensor`` along the specified axis, optionally retaining the reduced dimension.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/wait_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/wait(function:file:line:)``
2 | 
3 | Wait for this tensor's data to be fully computed.
4 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/when_1.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/when(isTrue:isFalse:dtype:function:file:line:)``
2 | 
3 | Returns a new ``Tensor`` containing `isTrue` where `self` is `true`, or `isFalse` otherwise. The `dtype` of `self` must be ``Tensor/DType/bool``.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/when_2.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/when(isTrue:isFalse:function:file:line:)-262cy``
2 | 
3 | Returns a new ``Tensor`` containing `isTrue` where `self` is `true`, or `isFalse` otherwise.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/when_3.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/when(isTrue:isFalse:function:file:line:)-2hnqn``
2 | 
3 | Returns a new ``Tensor`` containing `isTrue` where `self` is `true`, or `isFalse` otherwise.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Honeycrisp.docc/when_4.md:
--------------------------------------------------------------------------------
1 | # ``Tensor/when(isTrue:isFalse:function:file:line:)-8s8k``
2 | 
3 | Returns a new ``Tensor`` containing `isTrue` where `self` is `true`, or `isFalse` otherwise.


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Linalg.swift:
--------------------------------------------------------------------------------
  1 | import HCBacktrace
  2 | 
  3 | extension Tensor {
  4 | 
  5 |   /// Create an identity matrix of the given size.
  6 |   public convenience init(
  7 |     identity count: Int, dtype: DType = .float32, function: StaticString = #function,
  8 |     file: StaticString = #filePath, line: UInt = #line
  9 |   ) {
 10 |     let result = Backtrace.record(function: function, file: file, line: line) {
 11 |       #alwaysAssert(count >= 0, "identity size must not be negative, but got \(count)")
 12 |       let idxs = Tensor(data: 0..<count)
 13 |       return (idxs == idxs[..., NewAxis()]).cast(dtype)
 14 |     }
 15 |     self.init(dataTask: result.dataTask, shape: [count, count], dtype: dtype)
 16 |   }
 17 | 
 18 |   @recordCaller
 19 |   private static func _diagonal(_ diagonal: Tensor, offset: Int = 0) -> Tensor {
 20 |     #alwaysAssert(
 21 |       diagonal.shape.count == 1, "diagonal must be 1-D, but got shape \(diagonal.shape)")
 22 |     let matrixSize = diagonal.shape[0] + (offset < 0 ? -offset : offset)
 23 |     var indices = Tensor(data: 0..<diagonal.shape[0], dtype: .int64) * (matrixSize + 1)
 24 |     if offset > 0 {
 25 |       indices = indices + offset
 26 |     } else if offset < 0 {
 27 |       indices = indices - matrixSize * offset
 28 |     }
 29 |     return diagonal.scatter(
 30 |       axis: 0, count: matrixSize * matrixSize, indices: indices, indicesAreUnique: true
 31 |     ).reshape([matrixSize, matrixSize])
 32 |   }
 33 | 
 34 |   @recordCaller
 35 |   private static func _outer(_ a: Tensor, _ b: Tensor) -> Tensor {
 36 |     #alwaysAssert(
 37 |       a.shape.count == 1 && b.shape.count == 1,
 38 |       "invalid shapes for outer product: \(a.shape), \(b.shape)")
 39 |     #alwaysAssert(a.dtype == b.dtype, "dtype mismatch for outer product: \(a.dtype), \(b.dtype)")
 40 |     return matmul(
 41 |       a: a.reshape([a.shape[0], 1]), transA: false, b: b.reshape([1, b.shape[0]]), transB: false,
 42 |       transOut: false)
 43 |   }
 44 | 
 45 |   @recordCaller
 46 |   private static func _matmul(
 47 |     a: Tensor, transA: Bool, b: Tensor, transB: Bool, transOut: Bool, aGradBackend: Backend? = nil,
 48 |     bGradBackend: Backend? = nil
 49 |   )
 50 |     -> Tensor
 51 |   {
 52 |     #alwaysAssert(
 53 |       a.shape.count == 2 && b.shape.count == 2,
 54 |       "invalid shapes for matmul: \(a.shape), \(b.shape)")
 55 |     #alwaysAssert(a.dtype == b.dtype, "mismatched dtypes for matmul: \(a.dtype) and \(b.dtype)")
 56 |     let aShape = transA ? [a.shape[1], a.shape[0]] : a.shape
 57 |     let bShape = transB ? [b.shape[1], b.shape[0]] : b.shape
 58 |     #alwaysAssert(
 59 |       aShape[1] == bShape[0], "shape mismatch for matmul (with transposes): \(aShape), \(bShape)")
 60 |     let outShape = transOut ? [bShape[1], aShape[0]] : [aShape[0], bShape[1]]
 61 |     let backend = Backend.current
 62 |     let newData = createDataTask(a, b) { a, b in
 63 |       try await backend.matmul(
 64 |         a: try await a.data, transA: transA, b: try await b.data, transB: transB,
 65 |         transOut: transOut, rows: aShape[0],
 66 |         inner: aShape[1], cols: bShape[1], dtype: a.dtype)
 67 |     }
 68 |     if !Tensor.isGradEnabled || (!a.needsGrad && !b.needsGrad) {
 69 |       return Tensor(dataTask: newData, shape: outShape, dtype: a.dtype)
 70 |     } else {
 71 |       let handleA = a.saveForBackward()
 72 |       let handleB = b.saveForBackward()
 73 |       return Tensor(dataTask: newData, shape: outShape, dtype: a.dtype) { grad in
 74 |         handleA.backward(aGradBackend ?? backend) {
 75 |           matmul(a: grad, transA: transOut, b: b.noGrad(), transB: !transB, transOut: transA)
 76 |         }
 77 |         handleB.backward(bGradBackend ?? backend) {
 78 |           matmul(a: a.noGrad(), transA: !transA, b: grad, transB: transOut, transOut: transB)
 79 |         }
 80 |       }
 81 |     }
 82 |   }
 83 | 
 84 |   @recordCaller
 85 |   private static func _batchedMatmul(
 86 |     a: Tensor, transA: Bool, b: Tensor, transB: Bool, transOut: Bool
 87 |   )
 88 |     -> Tensor
 89 |   {
 90 |     #alwaysAssert(
 91 |       a.shape.count > 2 && b.shape.count > 2
 92 |         && a.shape[..<(a.shape.count - 2)] == b.shape[..<(b.shape.count - 2)],
 93 |       "invalid shapes for batched matmul: \(a.shape), \(b.shape)")
 94 |     #alwaysAssert(
 95 |       a.dtype == b.dtype, "mismatched dtypes for batched matmul: \(a.dtype) and \(b.dtype)")
 96 |     let batchShape: [Int] = Array(a.shape[..<(a.shape.count - 2)])
 97 |     let d0 = a.shape.count - 2
 98 |     let d1 = a.shape.count - 1
 99 |     let aShape = transA ? [a.shape[d1], a.shape[d0]] : [a.shape[d0], a.shape[d1]]
100 |     let bShape = transB ? [b.shape[d1], b.shape[d0]] : [b.shape[d0], b.shape[d1]]
101 |     #alwaysAssert(
102 |       aShape[1] == bShape[0],
103 |       "shape mismatch for batched matmul: \(a.shape) (trans=\(transA)), \(b.shape) (trans=\(transB))"
104 |     )
105 |     let outShape = batchShape + (transOut ? [bShape[1], aShape[0]] : [aShape[0], bShape[1]])
106 |     let backend = Backend.current
107 |     let newData = createDataTask(a, b) { a, b in
108 |       return try await backend.batchedMatmul(
109 |         matrixCount: batchShape.product(), a: try await a.data, transA: transA, b: try await b.data,
110 |         transB: transB, transOut: transOut, rows: aShape[0], inner: aShape[1], cols: bShape[1],
111 |         dtype: a.dtype)
112 |     }
113 |     if !Tensor.isGradEnabled || (!a.needsGrad && !b.needsGrad) {
114 |       return Tensor(dataTask: newData, shape: outShape, dtype: a.dtype)
115 |     } else {
116 |       let handleA = a.saveForBackward()
117 |       let handleB = b.saveForBackward()
118 |       return Tensor(dataTask: newData, shape: outShape, dtype: a.dtype) { grad in
119 |         handleA.backward(backend) {
120 |           batchedMatmul(
121 |             a: grad, transA: transOut, b: b.noGrad(), transB: !transB, transOut: transA)
122 |         }
123 |         handleB.backward(backend) {
124 |           batchedMatmul(
125 |             a: a.noGrad(), transA: !transA, b: grad, transB: transOut, transOut: transB)
126 |         }
127 |       }
128 |     }
129 |   }
130 | 
131 |   public static func &* (_ lhs: Tensor, _ rhs: Tensor) -> Tensor {
132 |     if lhs.shape.count <= 2 {
133 |       matmul(a: lhs, transA: false, b: rhs, transB: false, transOut: false)
134 |     } else {
135 |       batchedMatmul(a: lhs, transA: false, b: rhs, transB: false, transOut: false)
136 |     }
137 |   }
138 | 
139 |   @recordCaller
140 |   private func _tril(offset: Int = 0) -> Tensor {
141 |     triangular(upper: false, offset: offset)
142 |   }
143 | 
144 |   @recordCaller
145 |   private func _triu(offset: Int = 0) -> Tensor {
146 |     triangular(upper: true, offset: offset)
147 |   }
148 | 
149 |   private func triangular(upper: Bool, offset: Int = 0) -> Tensor {
150 |     #alwaysAssert(shape.count >= 2, "tensor of shape \(shape) is not a matrix")
151 |     let backend = Backend.current
152 |     let newData = createDataTask { t in
153 |       return try await backend.triangular(
154 |         try await t.data,
155 |         batch: t.shape[..<(t.shape.count - 2)].product(),
156 |         rows: t.shape[t.shape.count - 2],
157 |         cols: t.shape[t.shape.count - 1],
158 |         upper: upper,
159 |         offset: offset,
160 |         dtype: t.dtype
161 |       )
162 |     }
163 |     if !needsGrad || !Tensor.isGradEnabled {
164 |       return Tensor(dataTask: newData, shape: shape, dtype: dtype)
165 |     } else {
166 |       let handle = saveForBackward()
167 |       return Tensor(dataTask: newData, shape: shape, dtype: dtype) { grad in
168 |         handle.backward(backend) { grad.triangular(upper: upper, offset: offset) }
169 |       }
170 |     }
171 |   }
172 | 
173 |   @recordCaller
174 |   private func _qrDecomposition(full: Bool = false) -> (q: Tensor, r: Tensor) {
175 |     #alwaysAssert(shape.count >= 2, "tensor of shape \(shape) is not a matrix")
176 |     let batchShape = shape[..<(shape.count - 2)]
177 |     let batch = batchShape.product()
178 |     let rows = shape[shape.count - 2]
179 |     let cols = shape[shape.count - 1]
180 |     let full = full || rows <= cols
181 |     let qShape = Array(batchShape + (full ? [rows, rows] : [rows, cols]))
182 |     let rShape = Array(batchShape + (full ? [rows, cols] : [cols, cols]))
183 |     let backend = Backend.current
184 |     let newData = createDataTask { t in
185 |       return try await backend.qrDecomposition(
186 |         try await t.data,
187 |         batch: batch,
188 |         rows: rows,
189 |         cols: cols,
190 |         full: full,
191 |         dtype: t.dtype
192 |       )
193 |     }
194 |     #alwaysAssert(
195 |       !needsGrad || !Tensor.isGradEnabled, "QR decomposition does not currently support gradients")
196 |     return (
197 |       q: Tensor(dataTask: Task { try await newData.value.q }, shape: qShape, dtype: dtype),
198 |       r: Tensor(dataTask: Task { try await newData.value.r }, shape: rShape, dtype: dtype)
199 |     )
200 |   }
201 | 
202 |   @recordCaller
203 |   private func _svd(full: Bool = false) -> (u: Tensor, s: Tensor, vt: Tensor) {
204 |     #alwaysAssert(shape.count >= 2, "tensor of shape \(shape) is not a matrix")
205 |     let batchShape = shape[..<(shape.count - 2)]
206 |     let batch = batchShape.product()
207 |     let rows = shape[shape.count - 2]
208 |     let cols = shape[shape.count - 1]
209 |     let k = Swift.min(rows, cols)
210 |     let sShape = Array(batchShape + [k])
211 |     let uShape = Array(batchShape + (full ? [rows, rows] : [rows, k]))
212 |     let vtShape = Array(batchShape + (full ? [cols, cols] : [k, cols]))
213 |     let backend = Backend.current
214 |     let newData = createDataTask { t in
215 |       return try await backend.svd(
216 |         try await t.data,
217 |         batch: batch,
218 |         rows: rows,
219 |         cols: cols,
220 |         full: full,
221 |         dtype: t.dtype
222 |       )
223 |     }
224 |     #alwaysAssert(
225 |       !needsGrad || !Tensor.isGradEnabled, "SVD does not currently support gradients")
226 |     return (
227 |       u: Tensor(dataTask: Task { try await newData.value.u }, shape: uShape, dtype: dtype),
228 |       s: Tensor(dataTask: Task { try await newData.value.s }, shape: sShape, dtype: dtype),
229 |       vt: Tensor(dataTask: Task { try await newData.value.vt }, shape: vtShape, dtype: dtype)
230 |     )
231 |   }
232 | 
233 | }
234 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/OneHot.swift:
--------------------------------------------------------------------------------
 1 | import HCBacktrace
 2 | 
 3 | extension Tensor {
 4 | 
 5 |   public convenience init(
 6 |     oneHot: Int, count: Int, dtype: DType = .float32, function: StaticString = #function,
 7 |     file: StaticString = #filePath, line: UInt = #line
 8 |   ) {
 9 |     Backtrace.record(function: function, file: file, line: line) {
10 |       #alwaysAssert(oneHot >= 0 && oneHot < count, "oneHot \(oneHot) out of range [0, \(count))")
11 |     }
12 |     var data = Array(repeating: Float(0), count: count)
13 |     data[oneHot] = 1
14 |     self.init(data: data, shape: [count], dtype: dtype)
15 |   }
16 | 
17 |   public convenience init(
18 |     oneHot: [Int], count: Int, dtype: DType = .float32, function: StaticString = #function,
19 |     file: StaticString = #filePath, line: UInt = #line
20 |   ) {
21 |     var data = Array(repeating: Float(0), count: count * oneHot.count)
22 |     Backtrace.record(function: function, file: file, line: line) {
23 |       for (i, idx) in oneHot.enumerated() {
24 |         #alwaysAssert(idx >= 0 && idx < count, "oneHot index \(idx) out of range [0, \(count))")
25 |         data[i * count + idx] = 1
26 |       }
27 |     }
28 |     self.init(data: data, shape: [oneHot.count, count], dtype: dtype)
29 |   }
30 | 
31 | }
32 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Optimizer.swift:
--------------------------------------------------------------------------------
  1 | import Foundation
  2 | import HCBacktrace
  3 | 
  4 | extension Tensor {
  5 |   @recordCaller
  6 |   private static func adamW(
  7 |     param: Tensor,
  8 |     grad: Tensor,
  9 |     moment1: Tensor,
 10 |     moment2: Tensor,
 11 |     beta1: Float,
 12 |     beta2: Float,
 13 |     eps: Float,
 14 |     weightDecay: Float,
 15 |     lr: Float,
 16 |     step: Float
 17 |   ) -> (param: Tensor, moment1: Tensor, moment2: Tensor) {
 18 |     if Tensor.isGradEnabled {
 19 |       #alwaysAssert(!param.needsGrad, "adamW does not support gradients")
 20 |       #alwaysAssert(!grad.needsGrad, "adamW does not support gradients")
 21 |       #alwaysAssert(!moment1.needsGrad, "adamW does not support gradients")
 22 |       #alwaysAssert(!moment2.needsGrad, "adamW does not support gradients")
 23 |     }
 24 |     #alwaysAssert(
 25 |       grad.dtype == param.dtype,
 26 |       "mismatching dtypes for param \(param.dtype) and grad \(grad.dtype)")
 27 |     #alwaysAssert(
 28 |       grad.dtype == moment1.dtype,
 29 |       "mismatching dtypes for param \(param.dtype) and moment1 \(moment1.dtype)")
 30 |     #alwaysAssert(
 31 |       grad.dtype == moment2.dtype,
 32 |       "mismatching dtypes for param \(param.dtype) and moment2 \(moment2.dtype)")
 33 |     #alwaysAssert(
 34 |       grad.shape == param.shape,
 35 |       "mismatching shapes for param \(param.shape) and grad \(grad.shape)")
 36 |     #alwaysAssert(
 37 |       grad.shape == moment1.shape,
 38 |       "mismatching shapes for param \(param.shape) and moment1 \(moment1.shape)")
 39 |     #alwaysAssert(
 40 |       grad.shape == moment2.shape,
 41 |       "mismatching shapes for param \(param.shape) and moment2 \(moment2.shape)")
 42 | 
 43 |     let backend = Backend.current
 44 | 
 45 |     let shape = param.shape
 46 |     let dtype = param.dtype
 47 | 
 48 |     let param = param.noGrad()
 49 |     let grad = grad.noGrad()
 50 |     let moment1 = moment1.noGrad()
 51 |     let moment2 = moment2.noGrad()
 52 |     let newData = createDataTask {
 53 |       try await backend.adamW(
 54 |         param: try await param.data,
 55 |         grad: try await grad.data,
 56 |         moment1: try await moment1.data,
 57 |         moment2: try await moment2.data,
 58 |         beta1: beta1,
 59 |         beta2: beta2,
 60 |         eps: eps,
 61 |         weightDecay: weightDecay,
 62 |         lr: lr,
 63 |         step: step,
 64 |         count: shape.product(),
 65 |         dtype: dtype
 66 |       )
 67 |     }
 68 |     return (
 69 |       param: Tensor(
 70 |         dataTask: Task { try await newData.value.param }, shape: shape, dtype: dtype),
 71 |       moment1: Tensor(
 72 |         dataTask: Task { try await newData.value.moment1 }, shape: shape, dtype: dtype),
 73 |       moment2: Tensor(
 74 |         dataTask: Task { try await newData.value.moment2 }, shape: shape, dtype: dtype)
 75 |     )
 76 |   }
 77 | }
 78 | 
 79 | /// A base class for gradient-based optimizers.
 80 | open class Optimizer {
 81 |   public typealias Parameter = Trainable.Parameter
 82 | 
 83 |   public let parameters: [String: Parameter]
 84 | 
 85 |   public init(_ parameters: [(String, Parameter)]) {
 86 |     self.parameters = [String: Parameter](uniqueKeysWithValues: parameters)
 87 |   }
 88 | 
 89 |   /// Reset the gradients of the parameters.
 90 |   ///
 91 |   /// This should be called between steps to avoid accumulating gradients incorrectly.
 92 |   public func clearGrads() {
 93 |     for var p in self.parameters.values {
 94 |       p.grad = nil
 95 |     }
 96 |   }
 97 | }
 98 | 
 99 | /// An ``Optimizer`` implementation for [Adam: A Method for Stochastic Optimization](https://arxiv.org/abs/1412.6980).
100 | ///
101 | /// This implements [AdamW](https://arxiv.org/abs/1711.05101) when using `weightDecay`.
102 | public class Adam: Optimizer {
103 |   public var lr: Float
104 |   public var beta1: Float
105 |   public var beta2: Float
106 |   public var eps: Float
107 |   public var weightDecay: Float
108 | 
109 |   public var stepIndex: [String: Int] = [:]
110 |   public var moment1: [String: Tensor] = [:]
111 |   public var moment2: [String: Tensor] = [:]
112 | 
113 |   /// Create an optimizer wrapping the given parameters.
114 |   ///
115 |   /// The default arguments match those from the original paper.
116 |   public init(
117 |     _ parameters: [(String, Parameter)], lr: Float, beta1: Float = 0.9, beta2: Float = 0.999,
118 |     eps: Float = 1e-8, weightDecay: Float = 0.0
119 |   ) {
120 |     self.lr = lr
121 |     self.beta1 = beta1
122 |     self.beta2 = beta2
123 |     self.eps = eps
124 |     self.weightDecay = weightDecay
125 |     super.init(parameters)
126 |   }
127 | 
128 |   @recordCaller
129 |   private func _step() {
130 |     for (name, var param) in parameters {
131 |       guard let grad = param.grad else {
132 |         continue
133 |       }
134 |       let t = stepIndex[name] ?? 1
135 |       stepIndex[name] = t + 1
136 | 
137 |       let mt = moment1[name] ?? Tensor(zerosLike: grad)
138 |       let vt = moment2[name] ?? Tensor(zerosLike: grad)
139 | 
140 |       (param.data!, moment1[name], moment2[name]) = Tensor.adamW(
141 |         param: param.data!,
142 |         grad: grad,
143 |         moment1: mt,
144 |         moment2: vt,
145 |         beta1: beta1,
146 |         beta2: beta2,
147 |         eps: eps,
148 |         weightDecay: weightDecay,
149 |         lr: lr,
150 |         step: Float(t)
151 |       )
152 |     }
153 |   }
154 | 
155 |   /// An encodable object that contains all of the values that this optimizer
156 |   /// tracks during optimization trajectories.
157 |   public struct State: Codable, Sendable {
158 |     public let stepIndex: [String: Int]
159 |     public let moment1: [String: TensorState]
160 |     public let moment2: [String: TensorState]
161 | 
162 |     public init(
163 |       stepIndex: [String: Int] = [:],
164 |       moment1: [String: TensorState] = [:],
165 |       moment2: [String: TensorState] = [:]
166 |     ) {
167 |       self.stepIndex = stepIndex
168 |       self.moment1 = moment1
169 |       self.moment2 = moment2
170 |     }
171 |   }
172 | 
173 |   public var state: TracedBlock<State> {
174 |     let moment1 = moment1
175 |     let moment2 = moment2
176 |     let stepIndex = stepIndex
177 |     return TracedBlock {
178 |       State(
179 |         stepIndex: stepIndex,
180 |         moment1: try await tensorsToStates(moment1),
181 |         moment2: try await tensorsToStates(moment2)
182 |       )
183 |     }
184 |   }
185 | 
186 |   @recordCaller
187 |   private func _loadState(_ state: State) throws {
188 |     stepIndex = state.stepIndex
189 |     moment1 = statesToTensors(state.moment1)
190 |     moment2 = statesToTensors(state.moment2)
191 |   }
192 | }
193 | 
194 | /// A stateful object for scaling down gradients when they are unusually large.
195 | ///
196 | /// A history of previous gradient norms is recorded, and gradients are clipped
197 | /// when they exceed some number of standard deviations from the mean of previous
198 | /// gradients.
199 | public class GradClipper {
200 |   public struct State: Codable, Sendable {
201 |     let history: [Float]
202 |   }
203 | 
204 |   public let historySize: Int
205 |   public let recentCount: Int
206 |   public let maxStds: Float
207 |   private var history: [Float] = []
208 | 
209 |   public init(historySize: Int = 30, recentCount: Int = 5, maxStds: Float = 2.0) {
210 |     self.historySize = historySize
211 |     self.recentCount = recentCount
212 |     self.maxStds = maxStds
213 |   }
214 | 
215 |   public var state: State {
216 |     get { State(history: history) }
217 |     set { history = newValue.history }
218 |   }
219 | 
220 |   @recordCaller
221 |   private func _clipGrads(model: TrainableProto) async throws -> (Float, Float) {
222 |     var gradNorm = Tensor(data: [0.0])
223 |     for (_, p) in model.parameters {
224 |       if let g = p.grad {
225 |         gradNorm = gradNorm + g.pow(2).sum()
226 |       }
227 |     }
228 |     let actualNorm = try await gradNorm.sqrt().item()
229 | 
230 |     let (flag, scale) = shouldClip(norm: actualNorm)
231 |     history.append(actualNorm)
232 |     if history.count > historySize + recentCount {
233 |       history.remove(at: 0)
234 |     }
235 |     if flag {
236 |       for (_, var p) in model.parameters {
237 |         if let g = p.grad {
238 |           p.grad = g * scale
239 |         }
240 |       }
241 |     }
242 |     return (actualNorm, scale)
243 |   }
244 | 
245 |   private func shouldClip(norm: Float) -> (Bool, Float) {
246 |     if history.count < historySize + recentCount {
247 |       return (false, 1.0)
248 |     }
249 |     let past = history[..<historySize]
250 |     let mean = past.reduce(0.0, +) / Float(historySize)
251 |     let std = sqrt(
252 |       past.map { pow($0 - mean, 2) }.reduce(0.0, +) / Float(historySize)
253 |     )
254 |     let threshold = mean + std * maxStds
255 |     return (norm > threshold, min(1, threshold / norm))
256 |   }
257 | }
258 | 
259 | private func tensorsToStates(_ d: [String: Tensor]) async throws -> [String: TensorState] {
260 |   var result = [String: TensorState]()
261 |   for (k, v) in d {
262 |     result[k] = try await v.state()
263 |   }
264 |   return result
265 | }
266 | 
267 | private func statesToTensors(_ d: [String: TensorState]) -> [String: Tensor] {
268 |   var result = [String: Tensor]()
269 |   for (k, v) in d {
270 |     result[k] = Tensor(state: v)
271 |   }
272 |   return result
273 | }
274 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/PrimitiveExtensions.swift:
--------------------------------------------------------------------------------
 1 | extension Sequence where Element: Numeric {
 2 |   func product() -> Element {
 3 |     reduce(Element(exactly: 1)!, *)
 4 |   }
 5 | 
 6 |   func sum() -> Element {
 7 |     reduce(Element(exactly: 0)!, +)
 8 |   }
 9 | }
10 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Random.swift:
--------------------------------------------------------------------------------
  1 | import Foundation
  2 | import HCBacktrace
  3 | 
  4 | /// A probability distribution over continuous values.
  5 | public enum RandomDist: Sendable {
  6 |   case uniform
  7 |   case normal
  8 | }
  9 | 
 10 | /// A random number generator associated with a ``Backend``.
 11 | ///
 12 | /// Methods can be called from any thread, but the caller may want to synchronize
 13 | /// operations to make sure results are produced in a deterministic order.
 14 | /// For example, it may be desirable to serialize calls to ``RandomGenerator/sample(count:dist:dtype:)``
 15 | /// and ``RandomGenerator/seed(_:)`` to ensure sequences of tensors are generated in a
 16 | /// deterministic order.
 17 | ///
 18 | /// Often times, generation methods will be called indirectly and asynchronously via ``Tensor``
 19 | /// initializers like ``Tensor/init(rand:dtype:generator:function:file:line:)``.
 20 | /// These calls with synchronously update ``RandomGenerator/state``, ensuring that the generator
 21 | /// is used in a deterministic order.
 22 | open class RandomGenerator: @unchecked Sendable {
 23 |   public let backend: Backend
 24 |   private var _state: Tensor
 25 |   private let _opLock: NSLock = NSLock()
 26 | 
 27 |   open var stateCount: Int {
 28 |     tracedFatalError("must override stateCount")
 29 |   }
 30 | 
 31 |   open var stateDType: Tensor.DType {
 32 |     tracedFatalError("must override stateDType")
 33 |   }
 34 | 
 35 |   /// All the information necessary to determine how the generator will behave.
 36 |   ///
 37 |   /// This can be accessed and restored to ensure deterministic reproducibility of a random
 38 |   /// operation or sequence of random operations.
 39 |   public var state: Tensor {
 40 |     get {
 41 |       _opLock.withLock { _state }
 42 |     }
 43 |     set {
 44 |       _opLock.withLock {
 45 |         #alwaysAssert(newValue.shape == [stateCount])
 46 |         #alwaysAssert(newValue.dtype == stateDType)
 47 |         _state = newValue
 48 |       }
 49 |     }
 50 |   }
 51 | 
 52 |   public init(backend: Backend, state: Tensor) {
 53 |     self.backend = backend
 54 |     self._state = state
 55 |   }
 56 | 
 57 |   /// Update the state of the generator given the seed.
 58 |   public func seed(_ x: Int) {
 59 |     _opLock.withLock {
 60 |       _state = Tensor(
 61 |         dataTask: Tensor.createDataTask {
 62 |           try await self._seed(x)
 63 |         }, shape: [stateCount], dtype: stateDType)
 64 |     }
 65 |   }
 66 | 
 67 |   open func _seed(_ x: Int) async throws -> Tensor.Data {
 68 |     tracedFatalError("_seed() is not implemented")
 69 |   }
 70 | 
 71 |   /// Sample a numeric tensor from a given continuous distribution.
 72 |   ///
 73 |   /// This will synchronously update ``state`` to the state after the operation.
 74 |   public func sample(count: Int, dist: RandomDist, dtype: Tensor.DType) -> Task<Tensor.Data, Error>
 75 |   {
 76 |     _opLock.withLock {
 77 |       let s = _state.noGrad()
 78 |       let task = Tensor.createDataTask {
 79 |         try await self._sample(state: try await s.data, count: count, dist: dist, dtype: dtype)
 80 |       }
 81 |       _state = Tensor(
 82 |         dataTask: Task {
 83 |           try await task.value.state
 84 |         }, shape: [stateCount], dtype: stateDType)
 85 |       return Task {
 86 |         try await task.value.sample
 87 |       }
 88 |     }
 89 |   }
 90 | 
 91 |   open func _sample(state: Tensor.Data, count: Int, dist: RandomDist, dtype: Tensor.DType)
 92 |     async throws -> (
 93 |       sample: Tensor.Data, state: Tensor.Data
 94 |     )
 95 |   {
 96 |     tracedFatalError("_sample(state:count:dist:dtype:) is not implemented")
 97 |   }
 98 | 
 99 |   /// Sample a tensor of int64 values uniformly in the given range.
100 |   ///
101 |   /// This will synchronously update ``state`` to the state after the operation.
102 |   public func sample(count: Int, in range: Range<Int64>) -> Task<Tensor.Data, Error> {
103 |     _opLock.withLock {
104 |       let s = _state.noGrad()
105 |       let task = Tensor.createDataTask {
106 |         try await self._sample(state: try await s.data, count: count, in: range)
107 |       }
108 |       _state = Tensor(
109 |         dataTask: Task {
110 |           try await task.value.state
111 |         }, shape: [stateCount], dtype: stateDType)
112 |       return Task {
113 |         try await task.value.sample
114 |       }
115 |     }
116 |   }
117 | 
118 |   open func _sample(state: Tensor.Data, count: Int, in range: Range<Int64>) async throws -> (
119 |     sample: Tensor.Data, state: Tensor.Data
120 |   ) {
121 |     tracedFatalError("_sample(state:count:in:) is not implemented")
122 |   }
123 | }
124 | 
125 | extension Tensor {
126 |   /// Sample values in the range [0, 1).
127 |   public convenience init(
128 |     rand shape: [Int],
129 |     dtype: DType = .float32,
130 |     generator: RandomGenerator? = nil,
131 |     function: StaticString = #function,
132 |     file: StaticString = #filePath,
133 |     line: UInt = #line
134 |   ) {
135 |     self.init(
136 |       rand: shape, dist: .uniform, dtype: dtype, generator: generator, function: function,
137 |       file: file, line: line)
138 |   }
139 | 
140 |   /// Sample values from the Normal distribution.
141 |   public convenience init(
142 |     randn shape: [Int],
143 |     dtype: DType = .float32,
144 |     generator: RandomGenerator? = nil,
145 |     function: StaticString = #function,
146 |     file: StaticString = #filePath,
147 |     line: UInt = #line
148 |   ) {
149 |     self.init(
150 |       rand: shape, dist: .normal, dtype: dtype, generator: generator, function: function,
151 |       file: file, line: line)
152 |   }
153 | 
154 |   private convenience init(
155 |     rand shape: [Int],
156 |     dist: RandomDist,
157 |     dtype: DType = .float32,
158 |     generator: RandomGenerator? = nil,
159 |     function: StaticString = #function,
160 |     file: StaticString = #filePath,
161 |     line: UInt = #line
162 |   ) {
163 |     let backend = Backend.current
164 |     let dataTask = Backtrace.record(function: function, file: file, line: line) {
165 |       #alwaysAssert(
166 |         generator == nil || generator!.backend === backend,
167 |         "backend for provided generator is not the current backend")
168 |       let generator = generator ?? backend.defaultRandom()
169 |       return generator.sample(count: shape.product(), dist: dist, dtype: dtype)
170 |     }
171 |     self.init(dataTask: dataTask, shape: shape, dtype: dtype)
172 |   }
173 | 
174 |   public convenience init(
175 |     randPerm shape: [Int],
176 |     axis: Int = -1,
177 |     generator: RandomGenerator? = nil,
178 |     function: StaticString = #function,
179 |     file: StaticString = #filePath,
180 |     line: UInt = #line
181 |   ) {
182 |     let dataTask = Backtrace.record(function: function, file: file, line: line) {
183 |       let values = Tensor(
184 |         randInt: shape, in: -0x8000_0000_0000_0000..<0x7fff_ffff_ffff_ffff, generator: generator)
185 |       return values.argsort(axis: axis, stable: true).dataTask
186 |     }
187 |     self.init(dataTask: dataTask, shape: shape, dtype: .int64)
188 |   }
189 | 
190 |   /// Sample values from the Normal distribution with the shape and dtype of a given `Tensor`.
191 |   public convenience init(
192 |     randnLike other: Tensor,
193 |     generator: RandomGenerator? = nil,
194 |     function: StaticString = #function,
195 |     file: StaticString = #filePath,
196 |     line: UInt = #line
197 |   ) {
198 |     self.init(
199 |       rand: other.shape, dist: .normal, dtype: other.dtype, generator: generator,
200 |       function: function, file: file, line: line)
201 |   }
202 | 
203 |   /// Sample values uniformly in [0, 1) with the shape and dtype of a given `Tensor`.
204 |   public convenience init(
205 |     randLike other: Tensor,
206 |     generator: RandomGenerator? = nil,
207 |     function: StaticString = #function,
208 |     file: StaticString = #filePath,
209 |     line: UInt = #line
210 |   ) {
211 |     self.init(
212 |       rand: other.shape, dist: .uniform, dtype: other.dtype, generator: generator,
213 |       function: function, file: file, line: line)
214 |   }
215 | 
216 |   /// Sample random integers in the given range.
217 |   public convenience init(
218 |     randInt shape: [Int],
219 |     in range: Range<Int64>,
220 |     generator: RandomGenerator? = nil,
221 |     function: StaticString = #function,
222 |     file: StaticString = #filePath,
223 |     line: UInt = #line
224 |   ) {
225 |     let dataTask = Backtrace.record(function: function, file: file, line: line) {
226 |       let backend = Backend.current
227 |       #alwaysAssert(
228 |         generator == nil || generator!.backend === backend,
229 |         "backend for provided generator is not the current backend")
230 |       let generator = generator ?? backend.defaultRandom()
231 |       return generator.sample(count: shape.product(), in: range)
232 |     }
233 |     self.init(
234 |       dataTask: dataTask, shape: shape, dtype: .int64, function: function, file: file, line: line)
235 |   }
236 | 
237 |   @recordCaller
238 |   private func _multinomial(
239 |     sampleCount: Int, replacement: Bool = false, generator: RandomGenerator? = nil
240 |   ) -> Tensor {
241 |     #alwaysAssert(
242 |       (shape.count == 1 || shape.count == 2) && shape.last! > 0,
243 |       "cannot use tensor of shape \(shape) as multinomial weights")
244 |     #alwaysAssert(
245 |       replacement || sampleCount <= shape.last!,
246 |       "cannot sample \(sampleCount) indices from only \(shape.last!) possible values without replacement"
247 |     )
248 |     #alwaysAssert(dtype.isFloat, "cannot use dtype \(dtype) as multinomial weights")
249 |     if shape.count == 1 {
250 |       return unsqueeze(axis: 0).multinomial(
251 |         sampleCount: sampleCount, replacement: replacement, generator: generator
252 |       ).squeeze(axis: 0)
253 |     }
254 |     if !replacement {
255 |       #alwaysAssert(
256 |         shape[1] >= sampleCount,
257 |         "cannot sample \(sampleCount) elements from only \(shape[1]) options without replacement"
258 |       )
259 |     }
260 |     let ng = noGrad()
261 |     let probs = (ng / ng.sum(axis: 1, keepdims: true)).cast(.float32)
262 |     if replacement || sampleCount == 1 {
263 |       let cumProbs = probs.cast(.float32).cumulativeSum(axis: -1).unsqueeze(axis: 1)
264 |       let noise = Tensor(rand: [shape[0], sampleCount, 1])
265 |       return (cumProbs >= noise).argmax(axis: -1)
266 |     } else {
267 |       let logits = probs.clamp(min: 1e-8).log()
268 |       let gumbels = -(-Tensor(rand: [shape[0], shape[1]]).clamp(min: 1e-8).log()).log()
269 |       let indices = (logits + gumbels).argsort(axis: -1, descending: true)
270 |       return indices[..., ..<sampleCount]
271 |     }
272 |   }
273 | }
274 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/ScatterGather.swift:
--------------------------------------------------------------------------------
  1 | import HCBacktrace
  2 | 
  3 | /// A low-level description of indexing for a gather or scatter operation.
  4 | public struct ScatterGatherIndices: Sendable {
  5 |   /// The shape of the tensor of input values before the gather.
  6 |   public let valueShape: [Int]
  7 | 
  8 |   /// The axis to perform the gather along.
  9 |   public let axis: Int
 10 | 
 11 |   /// The int64 tensor data containing the indices.
 12 |   /// Must have the same shape as valueShape.
 13 |   public let indices: BroadcastData
 14 | 
 15 |   /// If `true`, then no index is repeated inside of ``ScatterGatherIndices/indices``
 16 |   /// within the gathered axis.
 17 |   ///
 18 |   /// This is used to determine if reduction is necessary during a scatter.
 19 |   public let indicesAreUnique: Bool
 20 | 
 21 |   var outerCount: Int {
 22 |     valueShape[..<axis].product()
 23 |   }
 24 | 
 25 |   var middleCount: Int {
 26 |     valueShape[axis]
 27 |   }
 28 | 
 29 |   var innerCount: Int {
 30 |     valueShape[(axis + 1)...].product()
 31 |   }
 32 | 
 33 |   var outCount: Int {
 34 |     indices.strides.shape[axis]
 35 |   }
 36 | 
 37 |   /// The number of input elements for a gather, or output elements for a scatter.
 38 |   public var gatherInCount: Int {
 39 |     valueShape.product()
 40 |   }
 41 | 
 42 |   /// The number of output elements for a gather, or input elements for a scatter.
 43 |   public var gatherOutCount: Int {
 44 |     return indices.strides.shape.product()
 45 |   }
 46 | }
 47 | 
 48 | extension Tensor {
 49 | 
 50 |   @recordCaller
 51 |   internal func _expandIndices(axis: Int, indices: Tensor) -> (Tensor, BroadcastStrides) {
 52 |     let indices =
 53 |       if indices.shape.count == 1 {
 54 |         {
 55 |           var bcastShape = Array(repeating: 1, count: shape.count)
 56 |           bcastShape[axis] = indices.shape[0]
 57 |           return indices.reshape(bcastShape)
 58 |         }()
 59 |       } else {
 60 |         indices
 61 |       }
 62 | 
 63 |     var expandShape = shape
 64 |     expandShape[axis] = indices.shape[axis]
 65 |     let indicesStrides = indices.expandStrides(shape: expandShape)
 66 | 
 67 |     return (indices, indicesStrides)
 68 |   }
 69 | 
 70 |   @recordCaller
 71 |   private func _gather(axis: Int, indices: Tensor, indicesAreUnique: Bool = false) -> Tensor {
 72 |     #alwaysAssert(shape.count > 0, "cannot gather() on a zero-dimensional tensor")
 73 |     #alwaysAssert(
 74 |       indices.dtype == .int64,
 75 |       "can only gather with indices of dtype int64, but got \(indices.dtype)")
 76 | 
 77 |     let axis = positiveAxis(axis)
 78 |     let (indices, indicesStrides) = expandIndices(axis: axis, indices: indices)
 79 | 
 80 |     let newShape = indicesStrides.shape
 81 |     let backend = Backend.current
 82 |     let newData = Tensor.createDataTask(self, indices) { t, indices in
 83 |       try await backend.gather(
 84 |         try await t.data,
 85 |         ScatterGatherIndices(
 86 |           valueShape: t.shape,
 87 |           axis: axis,
 88 |           indices: BroadcastData(strides: indicesStrides, data: try await indices.data),
 89 |           indicesAreUnique: indicesAreUnique
 90 |         ),
 91 |         dtype: t.dtype
 92 |       )
 93 |     }
 94 |     if !needsGrad || !Tensor.isGradEnabled {
 95 |       return Tensor(dataTask: newData, shape: newShape, dtype: dtype)
 96 |     } else {
 97 |       let handle = self.saveForBackward()
 98 |       return Tensor(dataTask: newData, shape: newShape, dtype: dtype) { [self] grad in
 99 |         let shape = shape
100 |         handle.backward(backend) {
101 |           grad.scatter(
102 |             axis: axis,
103 |             count: shape[axis],
104 |             indices: indices,
105 |             indicesAreUnique: indicesAreUnique
106 |           )
107 |         }
108 |       }
109 |     }
110 |   }
111 | 
112 |   @recordCaller
113 |   private func _scatter(axis: Int, count: Int, indices: Tensor, indicesAreUnique: Bool = false)
114 |     -> Tensor
115 |   {
116 |     #alwaysAssert(shape.count > 0, "cannot scatter() on a zero-dimensional tensor")
117 |     #alwaysAssert(
118 |       indices.dtype == .int64,
119 |       "can only scatter with indices of dtype int64, but got \(indices.dtype)")
120 | 
121 |     let axis = positiveAxis(axis)
122 |     let (indices, indicesStrides) = expandIndices(axis: axis, indices: indices)
123 | 
124 |     #alwaysAssert(
125 |       indices.shape[axis] == shape[axis],
126 |       "shape \(shape) is not compatible with indices shape \(indices.shape) for scatter along axis \(axis)"
127 |     )
128 | 
129 |     var newShape = shape
130 |     newShape[axis] = count
131 |     let valueShape = newShape
132 | 
133 |     let backend = Backend.current
134 |     let newData = Tensor.createDataTask(self, indices) { t, indices in
135 |       try await backend.scatter(
136 |         try await t.data,
137 |         ScatterGatherIndices(
138 |           valueShape: valueShape,
139 |           axis: axis,
140 |           indices: BroadcastData(strides: indicesStrides, data: try await indices.data),
141 |           indicesAreUnique: indicesAreUnique
142 |         ), dtype: t.dtype)
143 |     }
144 |     if !needsGrad || !Tensor.isGradEnabled {
145 |       return Tensor(dataTask: newData, shape: newShape, dtype: dtype)
146 |     } else {
147 |       let handle = self.saveForBackward()
148 |       return Tensor(dataTask: newData, shape: newShape, dtype: dtype) { grad in
149 |         handle.backward(backend) {
150 |           grad.gather(axis: axis, indices: indices, indicesAreUnique: indicesAreUnique)
151 |         }
152 |       }
153 |     }
154 |   }
155 | 
156 | }
157 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Softmax.swift:
--------------------------------------------------------------------------------
 1 | import HCBacktrace
 2 | 
 3 | extension Tensor {
 4 | 
 5 |   @recordCaller
 6 |   private func _logSoftmax(axis: Int = -1) -> Tensor {
 7 |     #alwaysAssert(dtype.isNumeric, "dtype \(dtype) not supported for logSoftmax")
 8 |     let posAxis = positiveAxis(axis)
 9 |     #alwaysAssert(posAxis >= 0 && posAxis < shape.count, "invalid axis \(axis) for shape \(shape)")
10 | 
11 |     let backend = Backend.current
12 |     let newData = createDataTask { t in
13 |       try await backend.logSoftmax(
14 |         try await t.data,
15 |         dims: ReduceDims(
16 |           outerCount: t.shape[..<posAxis].product(),
17 |           reduceCount: t.shape[posAxis],
18 |           innerCount: t.shape[(posAxis + 1)...].product()
19 |         ),
20 |         dtype: t.dtype)
21 |     }
22 |     if !needsGrad || !Tensor.isGradEnabled {
23 |       return Tensor(dataTask: newData, shape: shape, dtype: dtype)
24 |     } else {
25 |       let handle = self.saveForBackward()
26 |       return Tensor(dataTask: newData, shape: shape, dtype: dtype) { [self] grad in
27 |         handle.backward(backend) {
28 |           Tensor.logSoftmaxGrad(inputs: self.noGrad(), grads: grad, axis: axis)
29 |         }
30 |       }
31 |     }
32 |   }
33 | 
34 |   @recordCaller
35 |   private static func _logSoftmaxGrad(inputs: Tensor, grads: Tensor, axis: Int = -1) -> Tensor {
36 |     #alwaysAssert(inputs.dtype.isNumeric)
37 |     #alwaysAssert(inputs.dtype == grads.dtype)
38 |     #alwaysAssert(inputs.shape == grads.shape)
39 |     let posAxis = inputs.positiveAxis(axis)
40 |     #alwaysAssert(
41 |       posAxis >= 0 && posAxis < inputs.shape.count, "invalid axis \(axis) for shape \(inputs.shape)"
42 |     )
43 | 
44 |     let backend = Backend.current
45 |     let newData = createDataTask(inputs, grads) { inputs, grads in
46 |       try await backend.logSoftmaxGrad(
47 |         try await inputs.data,
48 |         try await grads.data,
49 |         dims: ReduceDims(
50 |           outerCount: inputs.shape[..<posAxis].product(),
51 |           reduceCount: inputs.shape[posAxis],
52 |           innerCount: inputs.shape[(posAxis + 1)...].product()
53 |         ),
54 |         dtype: inputs.dtype)
55 |     }
56 |     if (!inputs.needsGrad && !grads.needsGrad) || !Tensor.isGradEnabled {
57 |       return Tensor(dataTask: newData, shape: inputs.shape, dtype: inputs.dtype)
58 |     } else {
59 |       fatalError("gradients not supported for logSoftmaxGrad operation")
60 |     }
61 |   }
62 | 
63 |   @recordCaller
64 |   private func _softmax(axis: Int = -1) -> Tensor {
65 |     logSoftmax().exp()
66 |   }
67 | 
68 | }
69 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/Sort.swift:
--------------------------------------------------------------------------------
 1 | import HCBacktrace
 2 | 
 3 | extension Tensor {
 4 |   @recordCaller
 5 |   private func _argsort(axis origAxis: Int, descending: Bool = false, stable: Bool = false)
 6 |     -> Tensor
 7 |   {
 8 |     let axis = positiveAxis(origAxis)
 9 |     let backend = Backend.current
10 |     let newData = createDataTask { t in
11 |       try await backend.argsort(
12 |         try await t.data,
13 |         dims: t.reduceDims(axis),
14 |         descending: descending,
15 |         stable: stable,
16 |         dtype: t.dtype
17 |       )
18 |     }
19 |     return Tensor(dataTask: newData, shape: shape, dtype: .int64)
20 |   }
21 | }
22 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/State.swift:
--------------------------------------------------------------------------------
 1 | import Foundation
 2 | import HCBacktrace
 3 | 
 4 | /// A concrete, CPU-accessible description of the state of a ``Tensor``.
 5 | ///
 6 | /// This is intended to be used with ``Tensor/init(state:)`` and obtained via
 7 | /// ``Tensor/state(function:file:line:)``.
 8 | public struct TensorState: Codable, Sendable {
 9 |   public enum TensorData: Sendable {
10 |     case floats([Float])
11 |     case ints([Int64])
12 |     case bools([Bool])
13 |   }
14 | 
15 |   public enum DecodeError: Error {
16 |     case invalidDataSize
17 |   }
18 | 
19 |   enum CodingKeys: String, CodingKey, Sendable {
20 |     case data
21 |     case shape
22 |     case dtype
23 |   }
24 | 
25 |   public let data: TensorData
26 |   public let shape: [Int]
27 |   public let dtype: Tensor.DType
28 | 
29 |   public init(data: TensorData, shape: [Int], dtype: Tensor.DType) {
30 |     self.data = data
31 |     self.shape = shape
32 |     self.dtype = dtype
33 |   }
34 | 
35 |   public init(from decoder: Decoder) throws {
36 |     let values = try decoder.container(keyedBy: CodingKeys.self)
37 |     let data = try values.decode(Data.self, forKey: .data)
38 |     shape = try values.decode([Int].self, forKey: .shape)
39 |     dtype = try values.decode(Tensor.DType.self, forKey: .dtype)
40 |     if data.count % dtype.byteSize != 0 {
41 |       throw DecodeError.invalidDataSize
42 |     }
43 |     self.data =
44 |       switch dtype {
45 |       case .bool:
46 |         .bools(data.map { $0 != 0 })
47 |       case .int64:
48 |         .ints(
49 |           data.withUnsafeBytes { $0.bindMemory(to: Int64.self).map { Int64(littleEndian: $0) } })
50 |       case .float32, .float16:
51 |         .floats(
52 |           data.withUnsafeBytes {
53 |             $0.bindMemory(to: UInt32.self).map { Float(bitPattern: UInt32(littleEndian: $0)) }
54 |           })
55 |       }
56 |   }
57 | 
58 |   public func encode(to encoder: Encoder) throws {
59 |     var values = encoder.container(keyedBy: CodingKeys.self)
60 |     let data =
61 |       switch data {
62 |       case .bools(let x):
63 |         Data(x.map { $0 ? 1 : 0 })
64 |       case .ints(let x):
65 |         x.map { $0.littleEndian }.withUnsafeBufferPointer { Data(buffer: $0) }
66 |       case .floats(let x):
67 |         x.map { $0.bitPattern.littleEndian }.withUnsafeBufferPointer { Data(buffer: $0) }
68 |       }
69 |     try values.encode(data, forKey: .data)
70 |     try values.encode(shape, forKey: .shape)
71 |     try values.encode(dtype, forKey: .dtype)
72 |   }
73 | }
74 | 
75 | extension Tensor {
76 |   public convenience init(state: TensorState) {
77 |     switch state.data {
78 |     case .floats(let x):
79 |       self.init(data: x, shape: state.shape, dtype: state.dtype)
80 |     case .ints(let x):
81 |       self.init(data: x, shape: state.shape, dtype: state.dtype)
82 |     case .bools(let x):
83 |       self.init(data: x, shape: state.shape, dtype: state.dtype)
84 |     }
85 |   }
86 | 
87 |   @recordCaller
88 |   private func _state() async throws -> TensorState {
89 |     switch self.dtype {
90 |     case .float16, .float32:
91 |       TensorState(data: .floats(try await floats()), shape: shape, dtype: dtype)
92 |     case .int64:
93 |       TensorState(data: .ints(try await int64s()), shape: shape, dtype: dtype)
94 |     case .bool:
95 |       TensorState(data: .bools(try await bools()), shape: shape, dtype: dtype)
96 |     }
97 |   }
98 | }
99 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/TensorElement.swift:
--------------------------------------------------------------------------------
  1 | // Borrowed from https://forums.developer.apple.com/forums/thread/93282
  2 | 
  3 | import Accelerate
  4 | import Foundation
  5 | 
  6 | enum ConversionError: Error {
  7 |   case vImageError(String)
  8 | }
  9 | 
 10 | /// A native numeric or boolean value which can represent a single element of a ``Tensor``.
 11 | public protocol TensorElement: Comparable, Sendable {
 12 |   /// If true, then ``TensorElement/toFloat()`` may lose information.
 13 |   static var isFloatLossy: Bool { get }
 14 | 
 15 |   /// If true, then ``TensorElement/toInt64()`` may lose information.
 16 |   static var isInt64Lossy: Bool { get }
 17 | 
 18 |   /// If false, then ``TensorElement/toInt64()`` will return `0` or `1`.
 19 |   static var isBoolLossy: Bool { get }
 20 | 
 21 |   /// Get the default tensor datatype for this scalar type.
 22 |   static var dtype: Tensor.DType { get }
 23 | 
 24 |   init(_ value: Float)
 25 |   init(_ value: Int64)
 26 | 
 27 |   /// Obtain a floating-point representation of this value.
 28 |   func toFloat() -> Float
 29 | 
 30 |   /// Obtain an integer representation of this value, possibly rounding.
 31 |   func toInt64() -> Int64
 32 | 
 33 |   static func == (lhs: Self, rhs: Self) -> Bool
 34 | }
 35 | 
 36 | /// A ``TensorElement`` which supports numerical operations.
 37 | public protocol NumericTensorElement: TensorElement, Strideable {
 38 |   func pow<T: TensorElement>(_ exponent: T) -> Self
 39 | 
 40 |   static func + (lhs: Self, rhs: Self) -> Self
 41 |   static func * (lhs: Self, rhs: Self) -> Self
 42 |   prefix static func - (t: Self) -> Self
 43 |   static func - (lhs: Self, rhs: Self) -> Self
 44 |   static func / (lhs: Self, rhs: Self) -> Self
 45 |   static func modulus(_ lhs: Self, _ rhs: Self) -> Self
 46 | }
 47 | 
 48 | extension Double: NumericTensorElement {
 49 |   public static var isInt64Lossy: Bool { true }
 50 |   public static var isFloatLossy: Bool { false }
 51 |   public static var isBoolLossy: Bool { true }
 52 |   public static var dtype: Tensor.DType { .float32 }
 53 | 
 54 |   public func toFloat() -> Float {
 55 |     return Float(self)
 56 |   }
 57 | 
 58 |   public func toInt64() -> Int64 {
 59 |     return Int64(self)
 60 |   }
 61 | 
 62 |   public func pow<T: TensorElement>(_ exponent: T) -> Double {
 63 |     return Foundation.pow(self, Double(exponent.toFloat()))
 64 |   }
 65 | 
 66 |   public static func modulus(_ lhs: Self, _ rhs: Self) -> Self {
 67 |     if rhs < 0 {
 68 |       -modulus(-lhs, -rhs)
 69 |     } else if lhs < 0 {
 70 |       fmod(rhs - fmod(rhs - lhs, rhs), rhs)
 71 |     } else {
 72 |       fmod(lhs, rhs)
 73 |     }
 74 |   }
 75 | }
 76 | 
 77 | extension Int: NumericTensorElement {
 78 |   public static var isInt64Lossy: Bool { false }
 79 |   public static var isFloatLossy: Bool { false }
 80 |   public static var isBoolLossy: Bool { true }
 81 |   public static var dtype: Tensor.DType { .int64 }
 82 | 
 83 |   public func toFloat() -> Float {
 84 |     return Float(self)
 85 |   }
 86 | 
 87 |   public func toInt64() -> Int64 {
 88 |     return Int64(self)
 89 |   }
 90 | 
 91 |   public func pow<T: TensorElement>(_ exponent: T) -> Int {
 92 |     return Int(Foundation.pow(Double(self), Double(exponent.toFloat())))
 93 |   }
 94 | 
 95 |   public static func modulus(_ lhs: Self, _ rhs: Self) -> Self {
 96 |     if rhs < 0 {
 97 |       -modulus(-lhs, -rhs)
 98 |     } else if lhs < 0 {
 99 |       (rhs - ((rhs - lhs) % rhs)) % rhs
100 |     } else {
101 |       lhs % rhs
102 |     }
103 |   }
104 | }
105 | 
106 | extension Float: NumericTensorElement {
107 |   public static var isInt64Lossy: Bool { true }
108 |   public static var isFloatLossy: Bool { false }
109 |   public static var isBoolLossy: Bool { true }
110 |   public static var dtype: Tensor.DType { .float32 }
111 | 
112 |   public func toFloat() -> Float {
113 |     return self
114 |   }
115 | 
116 |   public func toInt64() -> Int64 {
117 |     return Int64(self)
118 |   }
119 | 
120 |   public func pow<T: TensorElement>(_ exponent: T) -> Float {
121 |     return Foundation.pow(self, exponent.toFloat())
122 |   }
123 | 
124 |   public static func modulus(_ lhs: Self, _ rhs: Self) -> Self {
125 |     if rhs < 0 {
126 |       -modulus(-lhs, -rhs)
127 |     } else if lhs < 0 {
128 |       fmod(rhs - fmod(rhs - lhs, rhs), rhs)
129 |     } else {
130 |       fmod(lhs, rhs)
131 |     }
132 |   }
133 | }
134 | 
135 | #if compiler(>=6)
136 |   extension Bool: @retroactive Comparable {
137 |     public static func < (lhs: Self, rhs: Self) -> Bool {
138 |       lhs == false && rhs == true
139 |     }
140 | 
141 |     public static func <= (lhs: Self, rhs: Self) -> Bool {
142 |       lhs == false
143 |     }
144 | 
145 |     public static func >= (lhs: Self, rhs: Self) -> Bool {
146 |       lhs == true
147 |     }
148 | 
149 |     public static func > (lhs: Self, rhs: Self) -> Bool {
150 |       lhs == true && rhs == false
151 |     }
152 |   }
153 | #else
154 |   extension Bool: Comparable {
155 |     public static func < (lhs: Self, rhs: Self) -> Bool {
156 |       lhs == false && rhs == true
157 |     }
158 | 
159 |     public static func <= (lhs: Self, rhs: Self) -> Bool {
160 |       lhs == false
161 |     }
162 | 
163 |     public static func >= (lhs: Self, rhs: Self) -> Bool {
164 |       lhs == true
165 |     }
166 | 
167 |     public static func > (lhs: Self, rhs: Self) -> Bool {
168 |       lhs == true && rhs == false
169 |     }
170 |   }
171 | #endif
172 | 
173 | extension Bool: TensorElement {
174 |   public static var isInt64Lossy: Bool { false }
175 |   public static var isFloatLossy: Bool { false }
176 |   public static var isBoolLossy: Bool { false }
177 |   public static var dtype: Tensor.DType { .bool }
178 | 
179 |   public init(_ value: Float) {
180 |     self.init(value == 0 ? false : true)
181 |   }
182 | 
183 |   public init(_ value: Int64) {
184 |     self.init(value == 0 ? false : true)
185 |   }
186 | 
187 |   public func toFloat() -> Float {
188 |     return self ? 1 : 0
189 |   }
190 | 
191 |   public func toInt64() -> Int64 {
192 |     return self ? 1 : 0
193 |   }
194 | }
195 | 
196 | extension Int64: NumericTensorElement {
197 |   public static var isInt64Lossy: Bool { false }
198 |   public static var isFloatLossy: Bool { true }
199 |   public static var isBoolLossy: Bool { true }
200 |   public static var dtype: Tensor.DType { .int64 }
201 | 
202 |   public func toFloat() -> Float {
203 |     return Float(self)
204 |   }
205 | 
206 |   public func toInt64() -> Int64 {
207 |     return self
208 |   }
209 | 
210 |   public func pow<T: TensorElement>(_ exponent: T) -> Int64 {
211 |     return Int64(Foundation.pow(Double(self), Double(exponent.toFloat())))
212 |   }
213 | 
214 |   public static func modulus(_ lhs: Self, _ rhs: Self) -> Self {
215 |     if rhs < 0 {
216 |       -modulus(-lhs, -rhs)
217 |     } else if lhs < 0 {
218 |       (rhs - ((rhs - lhs) % rhs)) % rhs
219 |     } else {
220 |       lhs % rhs
221 |     }
222 |   }
223 | }
224 | 
225 | func arrayToPointer<T: TensorElement>(
226 |   _ input: [T], output: UnsafeMutableRawPointer, dtype: Tensor.DType
227 | ) throws {
228 |   switch dtype {
229 |   case .int64:
230 |     arrayToPointer(input, UnsafeMutablePointer<Int64>(OpaquePointer(output)))
231 |   case .bool:
232 |     arrayToPointer(input, UnsafeMutablePointer<Bool>(OpaquePointer(output)))
233 |   case .float32:
234 |     arrayToPointer(input, UnsafeMutablePointer<Float>(OpaquePointer(output)))
235 |   case .float16:
236 |     convertFloatToHalf(input.map { $0.toFloat() }, output)
237 |   }
238 | }
239 | 
240 | private func arrayToPointer<A: TensorElement, B: TensorElement>(
241 |   _ input: [A], _ output: UnsafeMutablePointer<B>
242 | ) {
243 |   if A.self == B.self {
244 |     input.withUnsafeBufferPointer({ srcBuf in
245 |       UnsafeMutableRawPointer(output).copyMemory(
246 |         from: UnsafeRawPointer(srcBuf.baseAddress!), byteCount: input.count * MemoryLayout<A>.stride
247 |       )
248 |     })
249 |   } else {
250 |     if A.isFloatLossy {
251 |       for (i, x) in input.enumerated() {
252 |         output[i] = B(x.toInt64())
253 |       }
254 |     } else {
255 |       for (i, x) in input.enumerated() {
256 |         output[i] = B(x.toFloat())
257 |       }
258 |     }
259 |   }
260 | }
261 | 
262 | func convertFloatToHalf(
263 |   _ input: [Float], _ output: UnsafeMutableRawPointer
264 | ) {
265 |   input.withUnsafeBufferPointer { inputPtr in
266 |     var bufferFloat32 = vImage_Buffer(
267 |       data: UnsafeMutableRawPointer(mutating: inputPtr.baseAddress), height: 1,
268 |       width: UInt(input.count),
269 |       rowBytes: input.count * 4)
270 |     var bufferFloat16 = vImage_Buffer(
271 |       data: output, height: 1, width: UInt(input.count), rowBytes: input.count * 2)
272 | 
273 |     if vImageConvert_PlanarFtoPlanar16F(&bufferFloat32, &bufferFloat16, 0) != kvImageNoError {
274 |       print("Error converting float32 to float16")
275 |     }
276 |   }
277 | }
278 | 
279 | func pointerToArray<T: TensorElement>(
280 |   _ input: UnsafeRawPointer, output: inout [T], dtype: Tensor.DType
281 | ) throws {
282 |   switch dtype {
283 |   case .bool:
284 |     return pointerToArray(UnsafeMutablePointer<Bool>(OpaquePointer(input)), &output)
285 |   case .float32:
286 |     return pointerToArray(UnsafeMutablePointer<Float>(OpaquePointer(input)), &output)
287 |   case .int64:
288 |     return pointerToArray(UnsafeMutablePointer<Int64>(OpaquePointer(input)), &output)
289 |   case .float16:
290 |     var resultFloats = [Float](repeating: 0, count: output.count)
291 |     try resultFloats.withUnsafeMutableBufferPointer { buffer in
292 |       try convertHalfToFloat(
293 |         UnsafeMutableRawPointer(mutating: input),
294 |         UnsafeMutableRawPointer(OpaquePointer(buffer.baseAddress!)), count: output.count)
295 |     }
296 |     for (i, x) in resultFloats.enumerated() {
297 |       output[i] = T(x)
298 |     }
299 |   }
300 | }
301 | 
302 | private func pointerToArray<A: TensorElement, B: TensorElement>(
303 |   _ input: UnsafeMutablePointer<A>, _ output: inout [B]
304 | ) {
305 |   if A.self == B.self {
306 |     let count = output.count
307 |     output.withUnsafeMutableBufferPointer({ dstBuf in
308 |       UnsafeMutableRawPointer(dstBuf.baseAddress!).copyMemory(
309 |         from: UnsafeRawPointer(input), byteCount: count * MemoryLayout<A>.stride
310 |       )
311 |     })
312 |   } else {
313 |     if A.isFloatLossy {
314 |       for i in 0..<output.count {
315 |         output[i] = B(input[i].toInt64())
316 |       }
317 |     } else {
318 |       for i in 0..<output.count {
319 |         output[i] = B(input[i].toFloat())
320 |       }
321 |     }
322 |   }
323 | }
324 | 
325 | private func convertHalfToFloat(
326 |   _ input: UnsafeMutableRawPointer, _ output: UnsafeMutableRawPointer, count: Int
327 | ) throws {
328 |   var bufferFloat16 = vImage_Buffer(
329 |     data: input, height: 1, width: UInt(count), rowBytes: count * 2)
330 |   var bufferFloat32 = vImage_Buffer(
331 |     data: output, height: 1, width: UInt(count),
332 |     rowBytes: count * 4)
333 | 
334 |   let err = vImageConvert_Planar16FtoPlanarF(&bufferFloat16, &bufferFloat32, 0)
335 |   if err != kvImageNoError {
336 |     throw ConversionError.vImageError("\(err)")
337 |   }
338 | }
339 | 


--------------------------------------------------------------------------------
/Sources/Honeycrisp/When.swift:
--------------------------------------------------------------------------------
  1 | import HCBacktrace
  2 | 
  3 | extension Tensor {
  4 |   @recordCaller
  5 |   private func _when(isTrue: Tensor, isFalse: Tensor) -> Tensor {
  6 |     #alwaysAssert(dtype == .bool, "can only call when() on boolean Tensor")
  7 |     #alwaysAssert(
  8 |       isTrue.dtype == isFalse.dtype,
  9 |       "when() argument dtypes differ: \(isTrue.dtype) vs \(isFalse.dtype)")
 10 | 
 11 |     let (newShape, allStrides) = Tensor.lazyBroadcast([self, isTrue, isFalse])
 12 |     let tStrides = allStrides[0]
 13 |     let isTrueStrides = allStrides[1]
 14 |     let isFalseStrides = allStrides[2]
 15 | 
 16 |     let backend = Backend.current
 17 |     let newData = Tensor.createDataTask(self, isTrue, isFalse) { t, isTrue, isFalse in
 18 |       try await backend.when(
 19 |         BroadcastData(strides: tStrides, data: try await t.data),
 20 |         .tensor(BroadcastData(strides: isTrueStrides, data: try await isTrue.data)),
 21 |         .tensor(BroadcastData(strides: isFalseStrides, data: try await isFalse.data)),
 22 |         Float.self,
 23 |         dtype: isTrue.dtype)
 24 |     }
 25 |     if !Tensor.isGradEnabled || (!isTrue.needsGrad && !isFalse.needsGrad) {
 26 |       return Tensor(dataTask: newData, shape: newShape, dtype: isTrue.dtype)
 27 |     } else {
 28 |       let lhsHandle = isTrue.saveForBackward()
 29 |       let rhsHandle = isFalse.saveForBackward()
 30 |       return Tensor(dataTask: newData, shape: newShape, dtype: isTrue.dtype) { grad in
 31 |         lhsHandle.backward(backend) {
 32 |           self.when(isTrue: grad, isFalse: 0).reduceBroadcast(isTrueStrides, as: isTrue)
 33 |         }
 34 |         rhsHandle.backward(backend) {
 35 |           self.when(isTrue: 0, isFalse: grad).reduceBroadcast(isFalseStrides, as: isFalse)
 36 |         }
 37 |       }
 38 |     }
 39 |   }
 40 | 
 41 |   @recordCaller
 42 |   private func _when<T: TensorElement>(isTrue: Tensor, isFalse: T) -> Tensor {
 43 |     #alwaysAssert(dtype == .bool, "can only call when() on boolean Tensor")
 44 | 
 45 |     let (newShape, (tStrides, isTrueStrides)) = Tensor.lazyBroadcast(self, isTrue)
 46 | 
 47 |     let backend = Backend.current
 48 |     let newData = Tensor.createDataTask(self, isTrue) { t, isTrue in
 49 |       try await backend.when(
 50 |         BroadcastData(strides: tStrides, data: try await t.data),
 51 |         .tensor(BroadcastData(strides: isTrueStrides, data: try await isTrue.data)),
 52 |         .scalar(isFalse, newShape),
 53 |         T.self,
 54 |         dtype: isTrue.dtype)
 55 |     }
 56 |     if !Tensor.isGradEnabled || (!isTrue.needsGrad) {
 57 |       return Tensor(dataTask: newData, shape: newShape, dtype: isTrue.dtype)
 58 |     } else {
 59 |       let handle = isTrue.saveForBackward()
 60 |       return Tensor(dataTask: newData, shape: newShape, dtype: isTrue.dtype) { grad in
 61 |         handle.backward(backend) {
 62 |           self.when(isTrue: grad, isFalse: 0).reduceBroadcast(isTrueStrides, as: isTrue)
 63 |         }
 64 |       }
 65 |     }
 66 |   }
 67 | 
 68 |   @recordCaller
 69 |   private func _when<T: TensorElement>(isTrue: T, isFalse: Tensor) -> Tensor {
 70 |     #alwaysAssert(dtype == .bool, "can only call when() on boolean Tensor")
 71 | 
 72 |     let (newShape, (tStrides, isFalseStrides)) = Tensor.lazyBroadcast(self, isFalse)
 73 | 
 74 |     let backend = Backend.current
 75 |     let newData = Tensor.createDataTask(self, isFalse) { t, isFalse in
 76 |       try await backend.when(
 77 |         BroadcastData(strides: tStrides, data: try await t.data),
 78 |         .scalar(isTrue, newShape),
 79 |         .tensor(BroadcastData(strides: isFalseStrides, data: try await isFalse.data)),
 80 |         T.self,
 81 |         dtype: isFalse.dtype)
 82 |     }
 83 |     if !Tensor.isGradEnabled || (!isFalse.needsGrad) {
 84 |       return Tensor(dataTask: newData, shape: newShape, dtype: isFalse.dtype)
 85 |     } else {
 86 |       let handle = isFalse.saveForBackward()
 87 |       return Tensor(dataTask: newData, shape: newShape, dtype: isFalse.dtype) { grad in
 88 |         handle.backward(backend) {
 89 |           self.when(isTrue: 0, isFalse: grad).reduceBroadcast(isFalseStrides, as: isFalse)
 90 |         }
 91 |       }
 92 |     }
 93 |   }
 94 | 
 95 |   @recordCaller
 96 |   private func _when<T: TensorElement>(isTrue: T, isFalse: T, dtype outDType: DType) -> Tensor {
 97 |     #alwaysAssert(dtype == .bool, "can only call when() on boolean Tensor")
 98 |     let backend = Backend.current
 99 |     let newData = createDataTask { t in
100 |       try await backend.when(
101 |         BroadcastData.simple(data: try await t.data, shape: t.shape),
102 |         .scalar(isTrue, t.shape),
103 |         .scalar(isFalse, t.shape),
104 |         T.self,
105 |         dtype: outDType)
106 |     }
107 |     return Tensor(dataTask: newData, shape: shape, dtype: outDType)
108 |   }
109 | }
110 | 


--------------------------------------------------------------------------------
/Sources/HoneycrispMacros/TracebackMacro.swift:
--------------------------------------------------------------------------------
  1 | import SwiftSyntax
  2 | import SwiftSyntaxMacros
  3 | 
  4 | public enum MacroError: Error {
  5 |   case message(String)
  6 | }
  7 | 
  8 | public struct TracebackMacro: PeerMacro {
  9 |   public static func expansion<
 10 |     Context: MacroExpansionContext,
 11 |     Declaration: DeclSyntaxProtocol
 12 |   >(
 13 |     of node: AttributeSyntax,
 14 |     providingPeersOf declaration: Declaration,
 15 |     in context: Context
 16 |   ) throws -> [DeclSyntax] {
 17 | 
 18 |     guard var newNode = declaration.as(FunctionDeclSyntax.self) else {
 19 |       throw MacroError.message("@addAsync only works on functions")
 20 |     }
 21 | 
 22 |     var newParams = Array(newNode.signature.parameterClause.parameters)
 23 | 
 24 |     if var oldLastParam = newParams.popLast() {
 25 |       oldLastParam.trailingComma = .commaToken()
 26 |       newParams.append(oldLastParam)
 27 |     }
 28 | 
 29 |     // Add `file` and `line` parameters
 30 |     newParams.append(
 31 |       FunctionParameterSyntax(
 32 |         firstName: TokenSyntax.identifier("tracebackFile"),
 33 |         type: TypeSyntax(IdentifierTypeSyntax(name: TokenSyntax.identifier("StaticString"))),
 34 |         defaultValue: InitializerClauseSyntax(value: ExprSyntax("#filePath")),
 35 |         trailingComma: TokenSyntax.commaToken())
 36 |     )
 37 |     newParams.append(
 38 |       FunctionParameterSyntax(
 39 |         firstName: TokenSyntax.identifier("tracebackLine"),
 40 |         type: TypeSyntax(IdentifierTypeSyntax(name: TokenSyntax.identifier("UInt"))),
 41 |         defaultValue: InitializerClauseSyntax(value: ExprSyntax("#line")))
 42 |     )
 43 | 
 44 |     var newSignature = newNode.signature
 45 |     var newParamClause = newNode.signature.parameterClause
 46 |     newParamClause.parameters = FunctionParameterListSyntax(newParams)
 47 |     newSignature.parameterClause = newParamClause
 48 |     newNode.signature = newSignature
 49 | 
 50 |     // Optionally remove leading _ from name.
 51 |     let rawName = "\(newNode.name)"
 52 |     let newName =
 53 |       if let underscoreIndex = rawName.firstIndex(of: "_") {
 54 |         String(rawName[rawName.index(after: underscoreIndex)...])
 55 |       } else {
 56 |         rawName
 57 |       }
 58 |     newNode.name = TokenSyntax(stringLiteral: newName)
 59 | 
 60 |     // Make method public if it is marked as private.
 61 |     var newMods = [DeclModifierSyntax]()
 62 |     var doesThrow = false
 63 |     for mod in newNode.modifiers {
 64 |       let name = "\(mod.name)"
 65 |       if name.contains("private") {
 66 |         newMods.append(
 67 |           DeclModifierSyntax(
 68 |             name: TokenSyntax(stringLiteral: "public"), trailingTrivia: mod.trailingTrivia))
 69 |       } else if name.contains("throws") {
 70 |         doesThrow = true
 71 |       } else {
 72 |         newMods.append(mod)
 73 |       }
 74 |     }
 75 |     newNode.modifiers = DeclModifierListSyntax(newMods)
 76 | 
 77 |     let stackCall = CodeBlockItemSyntax(
 78 |       """
 79 |       ThreadTraceback.current.push(tracebackFile, tracebackLine)
 80 |       defer { ThreadTraceback.current.pop() }
 81 |       """
 82 |     )
 83 | 
 84 |     var newBodyStatements = [CodeBlockItemSyntax]()
 85 |     newBodyStatements.append(stackCall)
 86 |     if doesThrow {
 87 |       newBodyStatements.append(
 88 |         CodeBlockItemSyntax("return try {\n\(newNode.body!.statements)\n}()"))
 89 |     } else {
 90 |       newBodyStatements.append(CodeBlockItemSyntax("return {\n\(newNode.body!.statements)\n}()"))
 91 |     }
 92 |     var newBody = newNode.body!
 93 |     newBody.statements = CodeBlockItemListSyntax(newBodyStatements)
 94 |     newNode.body = newBody
 95 | 
 96 |     // Don't accidentally use macro recursively.
 97 |     newNode.attributes = AttributeListSyntax([])
 98 | 
 99 |     // We must add newlines before the new function, but we want to
100 |     // keep any docstrings.
101 |     newNode.leadingTrivia = [.newlines(2)] + newNode.leadingTrivia
102 | 
103 |     return [DeclSyntax(newNode)]
104 |   }
105 | }
106 | 


--------------------------------------------------------------------------------