├── .github └── workflows │ ├── docc.yml │ ├── lint.yml │ └── test.yml ├── .gitignore ├── .swift-format.json ├── Benchmarks ├── BuildingSimulation │ ├── PyTorch │ │ └── PyTorchSimulator.py │ ├── README.md │ ├── Swift │ │ └── main.swift │ └── TensorFlow │ │ └── TensorFlowSimulator.py ├── LanguageSuite │ ├── Benchmarks │ │ └── LanguageCoverageBenchmarks │ │ │ ├── FuzzedFunctions.swift │ │ │ ├── LanguageCoverageBenchmarks.swift │ │ │ ├── LoopedFunctions.swift │ │ │ └── SimpleFunctions.swift │ ├── Package.resolved │ ├── Package.swift │ └── README.md └── README.md ├── LICENSE.txt ├── Package.resolved ├── Package.swift ├── README.md └── Sources ├── BasicDifferentiation └── main.swift ├── BasicGradientDescent └── main.swift ├── CustomDerivatives └── main.swift └── DifferentiableSwiftExamplesDocumentation ├── DifferentiableSwiftExamples.docc ├── DifferentiableSwiftExamples.md ├── Resources │ └── Code │ │ ├── DifferentiableFunctions │ │ ├── DifferentiableFunctions-01-01.swift │ │ ├── DifferentiableFunctions-01-02.swift │ │ ├── DifferentiableFunctions-01-03.swift │ │ ├── DifferentiableFunctions-01-04.swift │ │ ├── DifferentiableFunctions-01-05.swift │ │ ├── DifferentiableFunctions-02-01.swift │ │ ├── DifferentiableFunctions-02-02.swift │ │ ├── DifferentiableFunctions-02-03.swift │ │ ├── DifferentiableFunctions-02-04.swift │ │ ├── DifferentiableFunctions-02-05.swift │ │ ├── DifferentiableFunctions-03-01.swift │ │ ├── DifferentiableFunctions-03-02.swift │ │ └── DifferentiableFunctions-03-03.swift │ │ ├── DifferentiableTypes │ │ ├── DifferentiableTypes-01-01.swift │ │ ├── DifferentiableTypes-01-02.swift │ │ ├── DifferentiableTypes-01-03.swift │ │ ├── DifferentiableTypes-01-04.swift │ │ ├── DifferentiableTypes-01-05.swift │ │ ├── DifferentiableTypes-02-01.swift │ │ ├── DifferentiableTypes-02-02.swift │ │ └── DifferentiableTypes-02-03.swift │ │ └── GradientDescent │ │ ├── GradientDescent-01-01.swift │ │ ├── GradientDescent-01-02.swift │ │ ├── GradientDescent-01-03.swift │ │ ├── GradientDescent-01-04.swift │ │ ├── GradientDescent-01-05.swift │ │ ├── GradientDescent-01-06.swift │ │ ├── GradientDescent-01-07.swift │ │ ├── GradientDescent-01-08.swift │ │ ├── GradientDescent-01-09.swift │ │ ├── GradientDescent-01-10.swift │ │ ├── GradientDescent-01-11.swift │ │ ├── GradientDescent-01-12.swift │ │ └── GradientDescent-01-13.swift ├── Setup.md ├── SharpEdgesInDifferentiableSwift.md ├── Tutorials │ ├── DifferentiableFunctions.tutorial │ ├── DifferentiableTypes.tutorial │ ├── GradientDescent.tutorial │ └── UsingDifferentiableSwift.tutorial └── UsingDifferentiableSwift.md ├── EmptyFile.swift └── README.md /.github/workflows/docc.yml: -------------------------------------------------------------------------------- 1 | name: docc 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | 7 | permissions: 8 | pages: write 9 | id-token: write 10 | contents: read 11 | 12 | jobs: 13 | docc: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v3 17 | with: 18 | fetch-depth: 0 19 | - name: install swift 20 | uses: slashmo/install-swift@v0.4.0 21 | with: 22 | version: swift-DEVELOPMENT-SNAPSHOT-2023-11-20-a 23 | - name: build docs 24 | run: swift package --allow-writing-to-directory docs generate-documentation --target DifferentiableSwiftExamples --transform-for-static-hosting --hosting-base-path differentiable-swift-examples --output-path docs 25 | - name: update index 26 | run: echo '' > docs/index.html 27 | - name: setup pages 28 | id: pages 29 | uses: actions/configure-pages@v3 30 | - name: upload artifact 31 | uses: actions/upload-pages-artifact@v1 32 | with: 33 | path: docs 34 | - name: deploy to GitHub Pages 35 | id: deployment 36 | uses: actions/deploy-pages@v2 37 | environment: 38 | name: github-pages 39 | url: ${{ steps.deployment.outputs.page_url }} 40 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: lint 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - '*' 10 | 11 | jobs: 12 | mac: 13 | name: lint-macOS 14 | runs-on: macOS-13 15 | steps: 16 | - uses: actions/checkout@v3 17 | - name: install 18 | run: brew install swift-format 19 | - name: run 20 | run: swift-format lint --recursive --parallel --strict --configuration .swift-format.json Package.swift Sources 21 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - '*' 10 | 11 | jobs: 12 | mac: 13 | name: test-ubuntu 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v3 17 | - name: install swift 18 | uses: slashmo/install-swift@v0.4.0 19 | with: 20 | version: swift-DEVELOPMENT-SNAPSHOT-2023-11-20-a 21 | - name: run 22 | run: swift build 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.xcuserstate 3 | project.xcworkspace/ 4 | xcuserdata/ 5 | DerivedData 6 | .idea 7 | *~ 8 | .swiftpm/ 9 | .build/ 10 | .swift-version 11 | *.swp 12 | main 13 | SwiftBenchmark 14 | -------------------------------------------------------------------------------- /.swift-format.json: -------------------------------------------------------------------------------- 1 | { 2 | "fileScopedDeclarationPrivacy" : { 3 | "accessLevel" : "private" 4 | }, 5 | "indentation" : { 6 | "spaces" : 4 7 | }, 8 | "indentConditionalCompilationBlocks" : true, 9 | "indentSwitchCaseLabels" : false, 10 | "lineBreakAroundMultilineExpressionChainComponents" : false, 11 | "lineBreakBeforeControlFlowKeywords" : false, 12 | "lineBreakBeforeEachArgument" : false, 13 | "lineBreakBeforeEachGenericRequirement" : false, 14 | "lineLength" : 100, 15 | "maximumBlankLines" : 1, 16 | "multiElementCollectionTrailingCommas" : true, 17 | "noAssignmentInExpressions" : { 18 | "allowedFunctions" : [ 19 | "XCTAssertNoThrow" 20 | ] 21 | }, 22 | "prioritizeKeepingFunctionOutputTogether" : false, 23 | "respectsExistingLineBreaks" : true, 24 | "rules" : { 25 | "AllPublicDeclarationsHaveDocumentation" : false, 26 | "AlwaysUseLiteralForEmptyCollectionInit" : false, 27 | "AlwaysUseLowerCamelCase" : true, 28 | "AmbiguousTrailingClosureOverload" : true, 29 | "BeginDocumentationCommentWithOneLineSummary" : false, 30 | "DoNotUseSemicolons" : true, 31 | "DontRepeatTypeInStaticProperties" : true, 32 | "FileScopedDeclarationPrivacy" : true, 33 | "FullyIndirectEnum" : true, 34 | "GroupNumericLiterals" : true, 35 | "IdentifiersMustBeASCII" : true, 36 | "NeverForceUnwrap" : false, 37 | "NeverUseForceTry" : false, 38 | "NeverUseImplicitlyUnwrappedOptionals" : false, 39 | "NoAccessLevelOnExtensionDeclaration" : true, 40 | "NoAssignmentInExpressions" : true, 41 | "NoBlockComments" : true, 42 | "NoCasesWithOnlyFallthrough" : true, 43 | "NoEmptyTrailingClosureParentheses" : true, 44 | "NoLabelsInCasePatterns" : true, 45 | "NoLeadingUnderscores" : false, 46 | "NoParensAroundConditions" : true, 47 | "NoPlaygroundLiterals" : true, 48 | "NoVoidReturnOnFunctionSignature" : true, 49 | "OmitExplicitReturns" : false, 50 | "OneCasePerLine" : true, 51 | "OneVariableDeclarationPerLine" : true, 52 | "OnlyOneTrailingClosureArgument" : true, 53 | "OrderedImports" : true, 54 | "ReplaceForEachWithForLoop" : true, 55 | "ReturnVoidInsteadOfEmptyTuple" : true, 56 | "TypeNamesShouldBeCapitalized" : true, 57 | "UseEarlyExits" : false, 58 | "UseLetInEveryBoundCaseVariable" : true, 59 | "UseShorthandTypeNames" : true, 60 | "UseSingleLinePropertyGetter" : true, 61 | "UseSynthesizedInitializer" : true, 62 | "UseTripleSlashForDocumentationComments" : true, 63 | "UseWhereClausesInForLoops" : false, 64 | "ValidateDocumentationComments" : false 65 | }, 66 | "spacesAroundRangeFormationOperators" : false, 67 | "tabWidth" : 4, 68 | "version" : 1 69 | } 70 | -------------------------------------------------------------------------------- /Benchmarks/BuildingSimulation/PyTorch/PyTorchSimulator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | # Simulation parameters 4 | trials = 100 5 | timesteps = 20 6 | warmup = 3 7 | dTime = 0.1 8 | printGradToCompare = False 9 | 10 | # Definitions 11 | π = 3.14159265359 12 | 13 | # TubeType and other custom object holding primitives will be represented with a 1D Tensor, 14 | # and SimParams will compose them into a 2D tensor 15 | 16 | # make each 1D Tensor the same length, to avoid having to use Ragged Tensors 17 | # with padding added to match length of other 1D Tensors 18 | TubeType = torch.tensor([0.50292, 0.019, 0.001588, 2.43, 0.0], requires_grad=True) 19 | 20 | # define indexes for sanity's sake 21 | class TubeTypeIndices: 22 | itubeSpacing = 0 23 | idiameter = 1 24 | ithickness = 2 25 | iresistivity = 3 26 | 27 | SlabType = torch.tensor([21.1111111, 100.0, 0.2, 2242.58, 0.101], requires_grad=True) 28 | 29 | class SlabTypeIndices: 30 | itemp = 0 31 | iarea = 1 32 | iCp = 2 33 | idensity = 3 34 | ithickness = 4 35 | 36 | QuantaType = torch.tensor([0.0, 60.0, 0.0006309, 1000.0, 4180.0], requires_grad=True) 37 | 38 | class QuantaIndices: 39 | ipower = 0 40 | itemp = 1 41 | iflow = 2 42 | idensity = 3 43 | iCp = 4 44 | 45 | TankType = torch.tensor([70.0, 0.0757082, 4180.0, 1000.0, 75.708], requires_grad=True) 46 | 47 | class TankTypeIndices: 48 | itemp = 0 49 | ivolume = 1 50 | iCp = 2 51 | idensity = 3 52 | imass = 4 53 | 54 | #------------------------------------------------------------------------ 55 | # represent starting temp as a 5 length padded Tensor to match other Tensor sizes 56 | # (to avoid having to use Ragged Tensors) 57 | startingTemperature = torch.tensor([33.3, 0, 0, 0, 0], requires_grad=True) 58 | 59 | 60 | # SimParams will be represented with a 2D Tensor, where each 61 | # member (a custom type itself) is represented by a 1D Tensor 62 | SimParamsConstant = torch.stack([TubeType, SlabType, QuantaType, TankType, startingTemperature]) 63 | assert SimParamsConstant.size() == (5,5) 64 | 65 | class SimParamsIndices: 66 | itube = 0 67 | islab = 1 68 | iquanta = 2 69 | itank = 3 70 | istartingTemp = 4 71 | 72 | 73 | # Computations 74 | 75 | def computeResistance(floor, tube, quanta): 76 | geometry_coeff = 10.0 77 | 78 | tubingSurfaceArea = (floor[SlabTypeIndices.iarea] / tube[TubeTypeIndices.itubeSpacing]) * π * tube[TubeTypeIndices.idiameter] 79 | resistance_abs = tube[TubeTypeIndices.iresistivity] * tube[TubeTypeIndices.ithickness] / tubingSurfaceArea 80 | 81 | resistance_corrected = resistance_abs * geometry_coeff 82 | 83 | return resistance_corrected 84 | 85 | 86 | def computeLoadPower(floor, tube, quanta): 87 | resistance_abs = computeResistance(floor, tube, quanta) 88 | 89 | conductance = 1/resistance_abs 90 | dTemp = floor[SlabTypeIndices.itemp] - quanta[QuantaIndices.itemp] 91 | power = dTemp * conductance 92 | 93 | loadPower = -power 94 | 95 | resultQuanta = quanta * torch.tensor([0.0, 1, 1, 1, 1], requires_grad=True) + power * torch.tensor([1.0, 0, 0, 0, 0], requires_grad=True) 96 | 97 | return (resultQuanta, loadPower) 98 | 99 | def updateQuanta(quanta): 100 | workingVolume = (quanta[QuantaIndices.iflow] * dTime) 101 | workingMass = (workingVolume * quanta[QuantaIndices.idensity]) 102 | workingEnergy = quanta[QuantaIndices.ipower] * dTime 103 | TempRise = workingEnergy / quanta[QuantaIndices.iCp] / workingMass 104 | 105 | resultQuanta = quanta + TempRise * torch.tensor([0.0, 1, 0, 0, 0]) 106 | resultQuanta = resultQuanta * torch.tensor([0.0, 1, 1, 1, 1]) 107 | 108 | return resultQuanta 109 | 110 | def updateBuildingModel(power, floor): 111 | floorVolume = floor[SlabTypeIndices.iarea] * floor[SlabTypeIndices.ithickness] 112 | floorMass = floorVolume * floor[SlabTypeIndices.idensity] 113 | floorTempChange = (power * dTime) / floor[SlabTypeIndices.iCp] / floorMass 114 | 115 | resultFloor = floor + floorTempChange * torch.Tensor([1.0, 0, 0, 0, 0]) 116 | 117 | return resultFloor 118 | 119 | def updateSourceTank(store, quanta): 120 | massPerTime = quanta[QuantaIndices.iflow] * quanta[QuantaIndices.idensity] 121 | dTemp = store[TankTypeIndices.itemp] - quanta[QuantaIndices.itemp] 122 | power = dTemp * massPerTime * quanta[QuantaIndices.iCp] 123 | 124 | updatedQuanta = quanta * torch.Tensor([0.0, 1, 1, 1, 1]) + power * torch.Tensor([1.0, 0, 0, 0, 0]) 125 | 126 | tankMass = store[TankTypeIndices.ivolume] * store[TankTypeIndices.idensity] 127 | TempRise = (power * dTime) / store[TankTypeIndices.iCp] / tankMass 128 | 129 | updatedStore = store + TempRise * torch.Tensor([1.0, 0, 0, 0, 0]) 130 | 131 | return (updatedStore, updatedQuanta) 132 | 133 | def lossCalc(pred, gt): 134 | return torch.abs(pred - gt) 135 | 136 | # Simulations 137 | 138 | def simulate(simParams): 139 | pexTube = simParams[SimParamsIndices.itube] 140 | slab = simParams[SimParamsIndices.islab] 141 | tank = simParams[SimParamsIndices.itank] 142 | quanta = simParams[SimParamsIndices.iquanta] 143 | 144 | startingTemp = simParams[SimParamsIndices.istartingTemp][0] 145 | slab = slab * torch.Tensor([0.0, 1, 1, 1, 1]) + startingTemp * torch.Tensor([1.0, 0, 0, 0, 0]) 146 | 147 | for i in range(0, timesteps): 148 | tankAndQuanta = updateSourceTank(tank, quanta) 149 | tank = tankAndQuanta[0] 150 | quanta = tankAndQuanta[1] 151 | 152 | quanta = updateQuanta(quanta) 153 | 154 | quantaAndPower = computeLoadPower(slab, pexTube, quanta) 155 | quanta = quantaAndPower[0] 156 | powerToBuilding = quantaAndPower[1] 157 | quanta = updateQuanta(quanta) 158 | 159 | slab = updateBuildingModel(powerToBuilding, slab) 160 | 161 | return slab[SlabTypeIndices.itemp] 162 | 163 | import time 164 | 165 | def measure(function, arguments): 166 | start = time.time() 167 | result = function(arguments) 168 | end = time.time() 169 | return (end - start, result) 170 | 171 | 172 | def fullPipe(simParams): 173 | pred = simulate(simParams) 174 | loss = lossCalc(pred, 27.344767) 175 | return loss 176 | 177 | 178 | totalForwardTime = 0 179 | totalGradientTime = 0 180 | 181 | 182 | for i in range(trials + warmup): 183 | 184 | inputs = SimParamsConstant 185 | forwardTime, forwardOutput = measure(fullPipe, inputs) 186 | 187 | simParams = SimParamsConstant 188 | def getGradient(simParams): 189 | gradient = torch.autograd.grad(forwardOutput, inputs) 190 | return gradient 191 | 192 | 193 | gradientTime, gradient = measure(getGradient, simParams) 194 | 195 | if printGradToCompare: 196 | print(gradient) 197 | 198 | if i >= warmup: 199 | totalForwardTime += forwardTime 200 | totalGradientTime += gradientTime 201 | 202 | 203 | averageForwardTime = totalForwardTime / trials 204 | averageGradientTime = totalGradientTime / trials 205 | 206 | print("trials:", trials) 207 | print("timesteps:", timesteps) 208 | print(f"average forward only time: {averageForwardTime} seconds") 209 | print(f"average forward and backwards (gradient) time: {averageGradientTime} seconds") 210 | -------------------------------------------------------------------------------- /Benchmarks/BuildingSimulation/README.md: -------------------------------------------------------------------------------- 1 | # Differentiable Simulator Benchmarks 2 | 3 | [PassiveLogic](https://passivelogic.com) is constructing autonomous systems for building control and 4 | more, utilizing physics-based digital twins. As a motivating use case for differentiable Swift, a 5 | simple thermal model of a building was constructed and optimized via gradient descent in several 6 | languages and frameworks. 7 | 8 | Differentiable Swift proves to be the best of the available solutions, and that has driven 9 | PassiveLogic's investment in the language feature. This directory contains a representative benchmark 10 | for a thermal model of a building implemented in differentiable Swift, 11 | [PyTorch](https://pytorch.org), and [TensorFlow](https://www.tensorflow.org). 12 | 13 | In this benchmark, the average time for a full forward + backward pass through the simulation is 14 | measured across multiple trials. The lower the time, the better. 15 | 16 | ## Running Benchmarks 17 | 18 | To evaluate the benchmarks yourself, the following sections provide setup instructions for the 19 | environments needed for each language / framework. These instructions should be valid for macOS and 20 | Ubuntu 20.04, but may require slight modification for other platforms. 21 | 22 | ### Swift 23 | 24 | A Swift toolchain with support for differentiation must be installed and in your current path. We 25 | recommend using one [downloaded from Swift.org](https://www.swift.org/download/) for your platform. 26 | Nightly toolchain snapshots tend to have better performance, due to new optimizations and 27 | architectural improvements constantly being upstreamed. More information on toolchain installation 28 | and management can be found [here](https://passivelogic.github.io/differentiable-swift-examples/documentation/differentiableswiftexamples/setup). 29 | 30 | When using a recent Swift.org nightly toolchain snapshot on macOS, you may need to set the following environment variables to point to the correct macOS SDK and Swift runtime: 31 | ```bash 32 | export SDKROOT=/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX14.sdk 33 | ``` 34 | ```bash 35 | export DYLD_LIBRARY_PATH=/Library/Developer/Toolchains/swift-DEVELOPMENT-SNAPSHOT-2023-11-20-a.xctoolchain/usr/lib/swift/macosx 36 | ``` 37 | 38 | To build the benchmark, change into the `Swift` subdirectory and run the following: 39 | ```bash 40 | swiftc -O main.swift -o SwiftBenchmark 41 | ``` 42 | 43 | and then run it via: 44 | ```bash 45 | ./SwiftBenchmark 46 | ``` 47 | 48 | ### PyTorch 49 | 50 | For these benchmarks, we've used PyTorch on the CPU, running in a dedicated Python environment. If 51 | you have such an environment, you can activate it and jump ahead to running the benchmark. To 52 | set up such an environment, start in your home directory and type: 53 | 54 | ```bash 55 | python3 -m venv pytorch-cpu 56 | source pytorch-cpu/bin/activate 57 | pip install torch torchvision 58 | ``` 59 | 60 | and then run the benchmark by going to the `PyTorch` subdirectory here and using: 61 | 62 | ```bash 63 | python3 PyTorchSimulator.py 64 | ``` 65 | 66 | ### TensorFlow 67 | 68 | For these benchmarks, we've used TensorFlow on the CPU, running in a dedicated Python environment. If 69 | you have such an environment, you can activate it and jump ahead to running the benchmark. To 70 | set up such an environment, start in your home directory and type: 71 | 72 | ```bash 73 | python3 -m venv tensorflow-cpu 74 | source tensorflow-cpu/bin/activate 75 | pip install tensorflow 76 | ``` 77 | 78 | and then run the benchmark by going to the `TensorFlow` subdirectory here and using: 79 | 80 | ```bash 81 | python3 TensorFlowSimulator.py 82 | ``` 83 | 84 | ## Current Results 85 | 86 | ### 2024-07-30 87 | 88 | Various optimizations in Differentiable Swift landed in the nightly toolchain dated 2024-06-03. The following benchmarks were run primarily to measure the effect of those optimizations. This optimized version of Differentiable Swift was compared to PyTorch and TensorFlow, as well as the most recent toolchain _without_ these optimizations, which resolved to the nightly toolchain dated 2024-05-15. 89 | 90 | In addition to Forward Only and Gradient measurements, Memory Utilization and Power Consumption were also recorded for comparison. The dimensions of each simulation were scaled from 100 to 100,000 in both number of `trials` and `timesteps`. 91 | 92 | Note that 'Swift Improvement' in the tables below is calculated by dividing each measurement by the corresponding measurement from the optimized Swift column. In other words, a 'Swift Improvement' of 5.2 translates to a measurement being 5.2x longer/larger/more than optimized Swift's measurement. 93 | 94 | #### Environment Setup 95 | Forward Only/Gradient and Memory Utilization results were gathered from the same hardware, with the following specs: 96 | - Model: MacBook Pro, 2021 97 | - CPU: Apple M1 Max 98 | - Memory: 32 GB 99 | - OS: Sonoma 14.5 100 | 101 | Power consumption results were gathered from Jetson Orin NX hardware: 102 | - Model: Jetson Orin NX 16GB 103 | - CPU: 8-core Arm® Cortex®-A78AE v8.2 64-bit CPU 2MB L2 + 4MB L3 104 | - Memory: 16 GB 105 | - OS: Ubuntu 20.04.6 LTS 106 | 107 | #### Forward Only and Gradient times 108 | Results were recorded from each script's execution output. Example Swift output: 109 | ``` 110 | $ ./SwiftBenchmark 111 | trials: 1000 112 | timesteps: 1000 113 | average forward only time: 2.1570954999999886e-05 seconds 114 | average forward and back (gradient) time: 0.0002565037070000004 seconds 115 | ``` 116 | 117 | --- 118 | ##### Forward only time 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 |
N trials
timesteps=20
warmup=3
Swift nightly toolchain 2024-06-03Swift nightly toolchain 2024-05-15Swift ImprovementPyTorch 2.3.1Swift ImprovementTensorFlow 2.16.2Swift Improvement
1001.0133E-061.30496E-061.30.002375102043151862,3440.000805909633636475795
10009.86573000000009E-076.88607999999986E-070.70.002323251962661742,3550.00071248984336853722
100005.82377500000031E-074.38613799999972E-070.80.002174942517280583,7350.0007117460250854491222
1000004.26006110000326E-074.15276820000269E-071.00.002161883883476265,0750.0007062503099441531658
N timesteps
trials=1
warmup=3
1002.708E-061.5333E-055.70.011765003204345743440.00326609611511231206
10002.6625E-053.9333E-051.50.13491487503051850670.03053498268127441146
100000.00029450.0002868331.01.3680710792541546450.283676862716675963
1000000.0029442090.0026680.914.591596126556449562.962682247161871006
223 | 224 | --- 225 | 226 | ##### Gradient time 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 |
N trials
timesteps=20
warmup=3
Swift nightly toolchain 2024-06-03Swift nightly toolchain 2024-05-15Swift ImprovementPyTorch 2.3.1Swift ImprovementTensorFlow 2.16.2Swift Improvement
1001.15975E-057.078584E-056.10.004319314956665043720.00388913154602051335
10001.0679188E-054.90754170000001E-054.60.004369051456451424090.00370328974723816347
100006.28462230000005E-063.25445147000002E-055.20.004172885131835946640.00359320862293243572
1000004.59303872000145E-063.11354585500016E-056.80.00420100715160379150.00364944223880768795
N timesteps
trials=1
warmup=3
1004.8334E-050.0002400425.00.02225589752197274600.0169031620025635349
10000.0003733750.0021902095.90.2426509857177736490.169112920761108452
100000.0036544580.0214703345.92.8196551799774277148.291606903076213214*
1000000.03724250.1791426664.836.6493611335754984983.00279688835126394*
331 | 332 | \* \- Two recommended performance improvements were adopted in TensorFlowSimulator.py before running benchmarks. The first was to decorate `getGradient` with `@tf.function` to disable default eager execution, which generally increased performance. The second was to use `tf.range` instead of Python's `range` function, in order to avoid 'Large unrolled loop' warnings. This greatly reduced overall memory usage, but seemed to trigger a severe increase in gradient times in high-timestep cases (10k and 100k). Underlying cause has not yet been identified. 333 | 334 | --- 335 | #### Memory Utilization 336 | For memory utilization, the `time` utility was used to measure 'maximum resident set size' and 'peak memory footprint'. 337 | Example output: 338 | ``` 339 | /usr/bin/time -l ./SwiftBenchmark 340 | trials: 1000 341 | timesteps: 1000 342 | average forward only time: 2.138608299999987e-05 seconds 343 | average forward and back (gradient) time: 0.00025673441800000017 seconds 344 | 0.28 real 0.26 user 0.02 sys 345 | 5029888 maximum resident set size 346 | 0 average shared memory size 347 | 0 average unshared data size 348 | 0 average unshared stack size 349 | 455 page reclaims 350 | 1 page faults 351 | 0 swaps 352 | 0 block input operations 353 | 0 block output operations 354 | 0 messages sent 355 | 0 messages received 356 | 0 signals received 357 | 0 voluntary context switches 358 | 22 involuntary context switches 359 | 3567193705 instructions retired 360 | 854329255 cycles elapsed 361 | 3278336 peak memory footprint 362 | ``` 363 | 364 | --- 365 | ##### Maximum resident set size 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | 397 | 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | 409 | 410 | 411 | 412 | 413 | 414 | 415 | 416 | 417 | 418 | 419 | 420 | 421 | 422 | 423 | 424 | 425 | 426 | 427 | 428 | 429 | 430 | 431 | 432 | 433 | 434 | 435 | 436 | 437 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | 445 | 446 | 447 | 448 | 449 | 450 | 451 | 452 | 453 | 454 | 455 | 456 | 457 | 458 | 459 | 460 | 461 | 462 | 463 | 464 | 465 | 466 | 467 | 468 |
N trials
timesteps=20
warmup=3
Swift nightly toolchain 2024-06-03Swift nightly toolchain 2024-05-15Swift ImprovementPyTorch 2.3.1Swift ImprovementTensorFlow 2.16.2Swift Improvement
100389939233914880.919875430451433504256111
1000339148837519361.119654246458430637056127
10000360448037519361.019950796855433668096120
100000353894439157761.120132659257437469184124
N timesteps
trials=1
warmup=3
100337510438666241.120545536060428326912126
1000386662455377921.437932236898426360832110
100008421376248709123.0240489267228560925542472
100000550502402203320324.010271408128186166158336030
469 | 470 | --- 471 | ##### Peak memory footprint 472 | 473 | 474 | 475 | 476 | 477 | 478 | 479 | 480 | 481 | 482 | 483 | 484 | 485 | 486 | 487 | 488 | 489 | 490 | 491 | 492 | 493 | 494 | 495 | 496 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | 504 | 505 | 506 | 507 | 508 | 509 | 510 | 511 | 512 | 513 | 514 | 515 | 516 | 517 | 518 | 519 | 520 | 521 | 522 | 523 | 524 | 525 | 526 | 527 | 528 | 529 | 530 | 531 | 532 | 533 | 534 | 535 | 536 | 537 | 538 | 539 | 540 | 541 | 542 | 543 | 544 | 545 | 546 | 547 | 548 | 549 | 550 | 551 | 552 | 553 | 554 | 555 | 556 | 557 | 558 | 559 | 560 | 561 | 562 | 563 | 564 | 565 | 566 | 567 | 568 | 569 | 570 | 571 | 572 | 573 | 574 |
N trials
timesteps=20
warmup=3
Swift nightly toolchain 2024-06-03Swift nightly toolchain 2024-05-15Swift ImprovementPyTorch 2.3.1Swift ImprovementTensorFlow 2.16.2Swift Improvement
100245881619016960.81334008965423921056097
1000195091222949761.213286035268236031808121
10000216390422786561.113325344062236719872109
100000209836824097281.113431833664240111552114
N timesteps
trials=1
warmup=3
100193452823932801.214334617674238309248123
1000239328040644481.729212992012223903008099
100006964480234304643.4181993094426129362195242
100000485310722142571524.41755035520036180688684816
575 | 576 | --- 577 | #### Power Consumption 578 | In order to measure the energy consumed during program execution we employed the use of a current shunt, a differential probe, and an oscilloscope. 579 | 580 | The current shunt resistor was placed in series with the positive input power terminal on the Orin. Both the voltage and current consumed were captured at high speed using 2 channels on an oscilloscope. 581 | 582 | The capture length of the oscilloscope trace is 1.2 seconds. Due to the fact that 3 of the 4 tests are too long to be fully captured, we measured the total time that each test took and used the oscilloscope measurements to determine the average power consumed during the test for each program. We then extrapolated the total power consumed by each program by multiplying the average power level by the length of the test in seconds. 583 | 584 | The overall number of compute operations was calculated by multiplying the number of trials (5000) by number of timesteps (1000) by each program's number of mathematical operations that occur in each timestep (37 for Swift, 49 for TensorFlow/PyTorch). It is worth noting that some calculations required extra steps to work with compatible tensor shapes (for example, compare the `updateQuanta` function in each program). While we do factor this into our results, it highlights the difference in using automatic differentation to operate on heterogeneous neural networks as opposed to conforming to shape-defined tensors. 585 | 586 | The following results show a few different views of energy consumption -- Operations computed per kiloJoule consumed, and Joules consumed per giga-Operation (J/GOps). 587 | 588 | ##### Power consumption of a 5000-trial 1000-timestep simulation 589 | 590 | 591 | 592 | 593 | 594 | 595 | 596 | 597 | 598 | 599 | 600 | 601 | 602 | 603 | 604 | 605 | 606 | 607 | 608 | 609 | 610 | 611 | 612 | 613 | 614 | 615 | 616 | 617 | 618 | 619 | 620 | 621 | 622 | 623 | 624 | 625 | 626 | 627 | 628 | 629 | 630 | 631 | 632 | 633 | 634 | 635 | 636 | 637 |
Test Power (avg W)Test Length (s)Total Energy (J)Normalized RatioOps / kiloJouleJoules / GigaOp
Swift nightly toolchain 2024-05-1510.373.82839.7064,660215
Swift nightly toolchain 2024-06-0310.200.6166.28129,45234
Tensorflow12.54658.8468259.6613153033713
PyTorch12.343340.82641220.1465626168245
638 | 639 | --- 640 | 641 | ## Previous Results 642 | ### 2023-12-10 643 | The following timings were gathered using these benchmarks on an M1 Pro MacBook Pro (14", 2021): 644 | 645 | | **Version** | **Time (ms)** | **Slowdown Compared to Swift** | 646 | |---|:---:|:---:| 647 | | **Swift** | 0.03 | 1X | 648 | | **PyTorch** | 8.16 | 238X | 649 | | **TensorFlow** | 11.0 | 322X | 650 | -------------------------------------------------------------------------------- /Benchmarks/BuildingSimulation/Swift/main.swift: -------------------------------------------------------------------------------- 1 | import _Differentiation 2 | import Foundation 3 | 4 | // Simulation parameters 5 | let trials = 100 6 | let timesteps = 20 7 | let dTime: Float = 0.1 8 | let printGradToCompare = false 9 | 10 | // Definitions 11 | let π = Float.pi 12 | 13 | struct SimParams: Differentiable { 14 | var tube: TubeType = .init() 15 | var slab: SlabType = .init() 16 | var quanta: QuantaType = .init() 17 | var tank: TankType = .init() 18 | var startingTemp: Float 19 | } 20 | 21 | struct TubeType: Differentiable { 22 | var tubeSpacing: Float = 0.50292 // meters 23 | var diameter: Float = 0.019 // m (3/4") 24 | var thickness: Float = 0.001588 // m (1/16") 25 | var resistivity: Float = 2.43 // (K/W)m 26 | } 27 | 28 | struct SlabType: Differentiable { 29 | var temp: Float = 21.1111111 // °C 30 | var area: Float = 100.0 // m^2 31 | var Cp: Float = 0.2 32 | var density: Float = 2242.58 // kg/m^3 33 | var thickness: Float = 0.101 // m 34 | } 35 | 36 | struct QuantaType: Differentiable { 37 | var power: Float = 0.0 // Watt 38 | var temp: Float = 60.0 // °C 39 | var flow: Float = 0.0006309 // m^3/sec 40 | var density: Float = 1000.0 // kg/m^3 41 | var Cp: Float = 4180.0 // ws/(kg • K) 42 | } 43 | 44 | struct TankType: Differentiable { 45 | var temp: Float = 70.0 46 | var volume: Float = 0.0757082 47 | var Cp: Float = 4180.000 48 | var density: Float = 1000.000 49 | var mass: Float = 75.708 50 | } 51 | 52 | // Computations 53 | 54 | @differentiable(reverse) 55 | func computeResistance(floor: SlabType, tube: TubeType, quanta _: QuantaType) -> Float { 56 | let geometry_coeff: Float = 10.0 57 | // let f_coff = 0.3333333 58 | 59 | let tubingSurfaceArea = (floor.area / tube.tubeSpacing) * π * tube.diameter 60 | let resistance_abs = tube.resistivity * tube.thickness / tubingSurfaceArea 61 | 62 | let resistance_corrected = resistance_abs * geometry_coeff // * (quanta.flow * f_coff) 63 | 64 | return resistance_corrected 65 | } 66 | 67 | struct QuantaAndPower: Differentiable { 68 | var quanta: QuantaType 69 | var power: Float 70 | } 71 | 72 | 73 | extension Differentiable { 74 | /// Applies the given closure to the derivative of `self`. 75 | /// 76 | /// Returns `self` like an identity function. When the return value is used in 77 | /// a context where it is differentiated with respect to, applies the given 78 | /// closure to the derivative of the return value. 79 | @inlinable 80 | @differentiable(reverse, wrt: self) 81 | func withDerivative(_: @escaping (inout TangentVector) -> Void) -> Self { 82 | return self 83 | } 84 | 85 | @inlinable 86 | @derivative(of: withDerivative) 87 | func _vjpWithDerivative( 88 | _ body: @escaping (inout TangentVector) -> Void 89 | ) -> (value: Self, pullback: (TangentVector) -> TangentVector) { 90 | return (self, { grad in 91 | var grad = grad 92 | body(&grad) 93 | return grad 94 | }) 95 | } 96 | } 97 | 98 | @differentiable(reverse) 99 | func computeLoadPower(floor: SlabType, tube: TubeType, quanta: QuantaType) -> QuantaAndPower { 100 | let resistance_abs = computeResistance(floor: floor, tube: tube, quanta: quanta) 101 | 102 | let conductance: Float = 1 / resistance_abs 103 | let dTemp = floor.temp - quanta.temp 104 | let power = dTemp * conductance 105 | 106 | var updatedQuanta = quanta 107 | updatedQuanta.power = power 108 | let loadPower = -power 109 | 110 | return QuantaAndPower(quanta: updatedQuanta, power: loadPower) 111 | } 112 | 113 | @differentiable(reverse) 114 | func updateQuanta(quanta: QuantaType) -> QuantaType { 115 | let workingVolume = (quanta.flow * dTime) 116 | let workingMass = (workingVolume * quanta.density) 117 | let workingEnergy = quanta.power * dTime 118 | let TempRise = workingEnergy / quanta.Cp / workingMass 119 | var updatedQuanta = quanta 120 | updatedQuanta.temp = quanta.temp + TempRise 121 | 122 | updatedQuanta.power = 0 123 | return updatedQuanta 124 | } 125 | 126 | @differentiable(reverse) 127 | func updateBuildingModel(power: Float, floor: SlabType) -> SlabType { 128 | var updatedFloor = floor 129 | 130 | let floorVolume = floor.area * floor.thickness 131 | let floorMass = floorVolume * floor.density 132 | 133 | updatedFloor.temp = floor.temp + ((power * dTime) / floor.Cp / floorMass) 134 | return updatedFloor 135 | } 136 | 137 | struct TankAndQuanta: Differentiable { 138 | var tank: TankType 139 | var quanta: QuantaType 140 | } 141 | 142 | @differentiable(reverse) 143 | func updateSourceTank(store: TankType, quanta: QuantaType) -> TankAndQuanta { 144 | var updatedStore = store 145 | var updatedQuanta = quanta 146 | 147 | let massPerTime = quanta.flow * quanta.density 148 | let dTemp = store.temp - quanta.temp 149 | let power = dTemp * massPerTime * quanta.Cp 150 | 151 | updatedQuanta.power = power 152 | 153 | let tankMass = store.volume * store.density 154 | let TempRise = (power * dTime) / store.Cp / tankMass 155 | updatedStore.temp = store.temp + TempRise 156 | 157 | return TankAndQuanta(tank: updatedStore, quanta: updatedQuanta) 158 | } 159 | 160 | var simParams = SimParams(startingTemp: 33.3) 161 | 162 | @differentiable(reverse) 163 | @inlinable public func absDifferentiable(_ value: Float) -> Float { 164 | if value < 0 { 165 | return -value 166 | } 167 | return value 168 | } 169 | 170 | func lossCalc(pred: Float, gt: Float) -> Float { 171 | let diff = pred - gt 172 | return absDifferentiable(diff) 173 | } 174 | 175 | // Simulations 176 | 177 | @differentiable(reverse) 178 | func simulate(simParams: SimParams) -> Float { 179 | let pexTube = simParams.tube 180 | var slab = simParams.slab 181 | var tank = simParams.tank 182 | var quanta = simParams.quanta 183 | 184 | slab.temp = simParams.startingTemp 185 | for _ in 0 ..< timesteps { 186 | let tankAndQuanta = updateSourceTank(store: tank, quanta: quanta) 187 | tank = tankAndQuanta.tank 188 | quanta = tankAndQuanta.quanta 189 | 190 | quanta = updateQuanta(quanta: quanta) 191 | 192 | let quantaAndPower = computeLoadPower(floor: slab, tube: pexTube, quanta: quanta) 193 | quanta = quantaAndPower.quanta 194 | let powerToBuilding = quantaAndPower.power 195 | quanta = updateQuanta(quanta: quanta) 196 | 197 | slab = updateBuildingModel(power: powerToBuilding, floor: slab) 198 | } 199 | return slab.temp 200 | } 201 | 202 | var blackHole: Any? 203 | @inline(never) 204 | func dontLetTheCompilerOptimizeThisAway(_ x: T) { 205 | blackHole = x 206 | } 207 | 208 | func measure(_ block: () throws -> T) throws -> (time: Double, result: T) { 209 | let t0 = DispatchTime.now() 210 | let result = try block() 211 | let t1 = DispatchTime.now() 212 | let elapsed = Double(t1.uptimeNanoseconds - t0.uptimeNanoseconds) / 1E9 213 | return (elapsed, result) 214 | } 215 | 216 | @differentiable(reverse) 217 | func fullPipe(simParams: SimParams) -> Float { 218 | let pred = simulate(simParams: simParams) 219 | let loss = lossCalc(pred: pred, gt: 27.344767) 220 | return loss 221 | } 222 | 223 | var totalPureForwardTime: Double = 0 224 | var totalGradientTime: Double = 0 225 | 226 | for _ in 0 ..< trials { 227 | let (forwardOnly, _) = try measure { 228 | return fullPipe(simParams: simParams) 229 | } 230 | dontLetTheCompilerOptimizeThisAway(forwardOnly) 231 | 232 | let (gradientTime, grad) = try measure { 233 | return gradient(at: simParams, of: fullPipe) 234 | } 235 | dontLetTheCompilerOptimizeThisAway(grad) 236 | 237 | if printGradToCompare { 238 | print(grad) 239 | } 240 | 241 | totalPureForwardTime += forwardOnly 242 | totalGradientTime += gradientTime 243 | } 244 | 245 | let averagePureForward = totalPureForwardTime / Double(trials) 246 | let averageGradient = totalGradientTime / Double(trials) 247 | 248 | print("trials: \(trials)") 249 | print("timesteps: \(timesteps)") 250 | print("average forward only time: \(averagePureForward) seconds") 251 | print("average forward and back (gradient) time: \(averageGradient) seconds") 252 | -------------------------------------------------------------------------------- /Benchmarks/BuildingSimulation/TensorFlow/TensorFlowSimulator.py: -------------------------------------------------------------------------------- 1 | 2 | import tensorflow as tf 3 | 4 | # Simulation parameters 5 | trials = 100 6 | timesteps = 20 7 | warmup = 3 8 | dTime = 0.1 9 | printGradToCompare = False 10 | 11 | @tf.function 12 | def doMath(a): 13 | return a * 2 14 | 15 | # Definitions 16 | 17 | π = 3.14159265359 18 | 19 | 20 | # TubeType and other custom object holding primitives will be represented with a 1D Tensor, 21 | # and SimParams will compose them into a 2D tensor 22 | 23 | # make each 1D Tensor the same length, to avoid having to use Ragged Tensors 24 | # with padding added to match length of other 1D Tensors 25 | TubeType = tf.constant([0.50292, 0.019, 0.001588, 2.43, 0.0]) 26 | 27 | # define indexes for sanity's sake 28 | class TubeTypeIndices: 29 | itubeSpacing = 0 30 | idiameter = 1 31 | ithickness = 2 32 | iresistivity = 3 33 | 34 | SlabType = tf.constant([21.1111111, 100.0, 0.2, 2242.58, 0.101]) 35 | 36 | class SlabTypeIndices: 37 | itemp = 0 38 | iarea = 1 39 | iCp = 2 40 | idensity = 3 41 | ithickness = 4 42 | 43 | QuantaType = tf.constant([0.0, 60.0, 0.0006309, 1000.0, 4180.0]) 44 | 45 | class QuantaIndices: 46 | ipower = 0 47 | itemp = 1 48 | iflow = 2 49 | idensity = 3 50 | iCp = 4 51 | 52 | TankType = tf.constant([70.0, 0.0757082, 4180.0, 1000.0, 75.708]) 53 | 54 | class TankTypeIndices: 55 | itemp = 0 56 | ivolume = 1 57 | iCp = 2 58 | idensity = 3 59 | imass = 4 60 | 61 | # represent starting temp as a 5 length padded Tensor to match other Tensor sizes 62 | # (to avoid having to use Ragged Tensors) 63 | startingTemperature = tf.constant([33.3, 0, 0, 0, 0]) 64 | 65 | 66 | # SimParams will be represented with a 2D Tensor, where each 67 | # member (a custom type itself) is represented by a 1D Tensor 68 | SimParamsConstant = tf.convert_to_tensor([TubeType, SlabType, QuantaType, TankType, startingTemperature]) 69 | 70 | class SimParamsIndices: 71 | itube = 0 72 | islab = 1 73 | iquanta = 2 74 | itank = 3 75 | istartingTemp = 4 76 | 77 | 78 | # Computations 79 | 80 | @tf.function 81 | def computeResistance(floor, tube, quanta): 82 | geometry_coeff = 10.0 83 | 84 | tubingSurfaceArea = (floor[SlabTypeIndices.iarea] / tube[TubeTypeIndices.itubeSpacing]) * π * tube[TubeTypeIndices.idiameter] 85 | resistance_abs = tube[TubeTypeIndices.iresistivity] * tube[TubeTypeIndices.ithickness] / tubingSurfaceArea 86 | 87 | resistance_corrected = resistance_abs * geometry_coeff 88 | 89 | return resistance_corrected 90 | 91 | 92 | @tf.function 93 | def computeLoadPower(floor, tube, quanta): 94 | resistance_abs = computeResistance(floor, tube, quanta) 95 | 96 | conductance = 1/resistance_abs 97 | dTemp = floor[SlabTypeIndices.itemp] - quanta[QuantaIndices.itemp] 98 | power = dTemp * conductance 99 | 100 | loadPower = -power 101 | 102 | resultQuanta = quanta * tf.constant([0.0, 1, 1, 1, 1]) + power * tf.constant([1.0, 0, 0, 0, 0]) 103 | 104 | return (resultQuanta, loadPower) 105 | 106 | 107 | slab, tube, quanta = tf.Variable(SlabType), tf.Variable(TubeType), tf.Variable(QuantaType) 108 | with tf.GradientTape() as tape: 109 | quantaAndPower = computeLoadPower(slab, tube, quanta) 110 | 111 | gradient = tape.gradient(quantaAndPower, [slab, tube, quanta]) 112 | 113 | @tf.function 114 | def updateQuanta(quanta: tf.Tensor) -> tf.Tensor: 115 | workingVolume = (quanta[QuantaIndices.iflow] * dTime) 116 | workingMass = (workingVolume * quanta[QuantaIndices.idensity]) 117 | workingEnergy = quanta[QuantaIndices.ipower] * dTime 118 | TempRise = workingEnergy / quanta[QuantaIndices.iCp] / workingMass 119 | 120 | resultQuanta = quanta + TempRise * tf.constant([0.0, 1, 0, 0, 0]) 121 | resultQuanta = resultQuanta * tf.constant([0.0, 1, 1, 1, 1]) 122 | 123 | return resultQuanta 124 | 125 | quanta = tf.Variable(QuantaType) 126 | with tf.GradientTape() as tape: 127 | tape.watch(quanta) 128 | newQuanta = updateQuanta(quanta) 129 | 130 | gradient = tape.gradient(newQuanta, [quanta]) 131 | 132 | @tf.function 133 | def updateBuildingModel(power, floor): 134 | floorVolume = floor[SlabTypeIndices.iarea] * floor[SlabTypeIndices.ithickness] 135 | floorMass = floorVolume * floor[SlabTypeIndices.idensity] 136 | floorTempChange = (power * dTime) / floor[SlabTypeIndices.iCp] / floorMass 137 | 138 | resultFloor = floor + floorTempChange * tf.constant([1.0, 0, 0, 0, 0]) 139 | 140 | return resultFloor 141 | 142 | inputPower = tf.constant([1.0])[0] 143 | 144 | inputPower = tf.Variable(inputPower) 145 | slab = tf.Variable(SlabType) 146 | with tf.GradientTape() as tape: 147 | tape.watch(inputPower) 148 | tape.watch(slab) 149 | newSlab = updateBuildingModel(inputPower, slab) 150 | 151 | gradient = tape.gradient(newSlab, [inputPower, slab]) 152 | 153 | @tf.function 154 | def updateSourceTank(store, quanta): 155 | massPerTime = quanta[QuantaIndices.iflow] * quanta[QuantaIndices.idensity] 156 | dTemp = store[TankTypeIndices.itemp] - quanta[QuantaIndices.itemp] 157 | power = dTemp * massPerTime * quanta[QuantaIndices.iCp] 158 | 159 | updatedQuanta = quanta * tf.constant([0.0, 1, 1, 1, 1]) + power * tf.constant([1.0, 0, 0, 0, 0]) 160 | 161 | tankMass = store[TankTypeIndices.ivolume] * store[TankTypeIndices.idensity] 162 | TempRise = (power * dTime) / store[TankTypeIndices.iCp] / tankMass 163 | 164 | updatedStore = store + TempRise * tf.constant([1.0, 0, 0, 0, 0]) 165 | 166 | return (updatedStore, updatedQuanta) 167 | 168 | store = tf.Variable(TankType) 169 | quanta = tf.Variable(QuantaType) 170 | with tf.GradientTape() as tape: 171 | tape.watch(store) 172 | tape.watch(quanta) 173 | tankAndQuanta = updateSourceTank(store, quanta) 174 | 175 | gradient = tape.gradient(tankAndQuanta, [store, quanta]) 176 | 177 | simParams = tf.Variable(SimParamsConstant) 178 | 179 | 180 | @tf.function 181 | def lossCalc(pred, gt): 182 | return tf.abs(pred - gt) 183 | 184 | # Simulations 185 | 186 | @tf.function 187 | def simulate(simParams): 188 | pexTube = simParams[SimParamsIndices.itube] 189 | slab = simParams[SimParamsIndices.islab] 190 | tank = simParams[SimParamsIndices.itank] 191 | quanta = simParams[SimParamsIndices.iquanta] 192 | 193 | startingTemp = simParams[SimParamsIndices.istartingTemp][0] 194 | slab = slab * tf.constant([0.0, 1, 1, 1, 1]) + startingTemp * tf.constant([1.0, 0, 0, 0, 0]) 195 | 196 | for i in tf.range(timesteps): 197 | tankAndQuanta = updateSourceTank(tank, quanta) 198 | tank = tankAndQuanta[0] 199 | quanta = tankAndQuanta[1] 200 | 201 | quanta = updateQuanta(quanta) 202 | 203 | quantaAndPower = computeLoadPower(slab, pexTube, quanta) 204 | quanta = quantaAndPower[0] 205 | powerToBuilding = quantaAndPower[1] 206 | quanta = updateQuanta(quanta) 207 | 208 | slab = updateBuildingModel(powerToBuilding, slab) 209 | 210 | return slab[SlabTypeIndices.itemp] 211 | 212 | 213 | 214 | import time 215 | 216 | def measure(function, arguments): 217 | start = time.time() 218 | result = function(arguments) 219 | end = time.time() 220 | return (end - start, result) 221 | 222 | 223 | @tf.function 224 | def fullPipe(simParams): 225 | pred = simulate(simParams) 226 | loss = lossCalc(pred, 27.344767) 227 | return loss 228 | 229 | 230 | @tf.function 231 | def getGradient(simParams): 232 | with tf.GradientTape() as tape: 233 | endTemperature = simulate(simParams) 234 | 235 | gradient = tape.gradient(endTemperature, [simParams]) 236 | return gradient 237 | 238 | 239 | totalForwardTime = 0 240 | totalGradientTime = 0 241 | 242 | for i in range(trials + warmup): 243 | 244 | forwardTime, forwardOutput = measure(fullPipe, SimParamsConstant) 245 | 246 | simParams = tf.Variable(SimParamsConstant) 247 | 248 | gradientTime, gradient = measure(getGradient, simParams) 249 | 250 | if printGradToCompare: 251 | print(gradient) 252 | 253 | if i >= warmup: 254 | totalForwardTime += forwardTime 255 | totalGradientTime += gradientTime 256 | 257 | 258 | averageForwardTime = totalForwardTime / trials 259 | averageGradientTime = totalGradientTime / trials 260 | 261 | print("trials:", trials) 262 | print("timesteps:", timesteps) 263 | print(f"average forward only time: {averageForwardTime} seconds") 264 | print(f"average forward and backwards (gradient) time: {averageGradientTime} seconds") 265 | -------------------------------------------------------------------------------- /Benchmarks/LanguageSuite/Benchmarks/LanguageCoverageBenchmarks/FuzzedFunctions.swift: -------------------------------------------------------------------------------- 1 | import _Differentiation 2 | import Foundation 3 | 4 | // Functions generated via a fuzzer using standard math operators. 5 | 6 | @differentiable(reverse) 7 | func fuzzedMath1(_ x0: Float, _ x1: Float, _ x2: Float) -> Float { 8 | var y = x0; 9 | let t3 = x0 + x2 + x2; 10 | let t4 = x1 + t3; 11 | let t5 = x0 + t3; 12 | let t6 = t3 - x1; 13 | let t7 = x1 * t5; 14 | let t8 = t7 + x1 + x0; 15 | let t10 = t8 + x0; 16 | let t11 = t4 * cos(t10 * (180 / Float.pi)); 17 | let t12 = sin(x2 * t8); 18 | let t13 = t3 * t8 * t4; 19 | let t14 = t11 - t11; 20 | let t15 = x1 - t4 - x1; 21 | let t16 = t8 * sin(t6 * (180 / Float.pi)); 22 | let t17 = t3 * t3; 23 | let t18 = t11 - x1 - t13; 24 | let t19 = sin(t10 * t15); 25 | let t20 = sin(t17 * t14); 26 | let t22 = t17 * t13; 27 | let t23 = x2 * t12 * t11; 28 | let t24 = t13 - t23 - t17 - t22; 29 | let t25 = t6 - t6; 30 | let t27 = x1 + x0; 31 | let t31 = t25 - t19 - t20; 32 | let t33 = t18 + t19 + x1; 33 | let t35 = (t3 * t12 - 1); 34 | let t37 = t15 * cos(t16 * (180 / Float.pi)); 35 | let t41 = sin(t35 * t15); 36 | let t49 = t31 / (0.001 + t24); 37 | let t51 = (x2 * t49 - 1); 38 | let t54 = t8 * sin(t25 * (180 / Float.pi)); 39 | let t64 = t20 * t24 * t25; 40 | let t72 = t41 * t27 * t33; 41 | let t78 = t14 - t72 - t54; 42 | let t86 = t78 + t64; 43 | let t102 = t37 * t86 * t51; 44 | let t = t102; 45 | y += t; 46 | return y; 47 | } 48 | @differentiable(reverse) 49 | func fuzzedMath2(_ x0: Float, _ x1: Float, _ x2: Float) -> Float { 50 | var y = x0; 51 | let t3 = x2 * cos(x2 * (180 / Float.pi)); 52 | let t4 = t3 * x0; 53 | let t5 = (t4 * t4 - 1); 54 | let t6 = x2 * x1 * t3; 55 | let t7 = t4 * cos(x1 * (180 / Float.pi)); 56 | let t8 = x2 * sin(x1 * (180 / Float.pi)); 57 | let t9 = t7 / (0.001 + t3); 58 | let t10 = x0 * cos(x0 * (180 / Float.pi)); 59 | let t12 = sin(t9 * t8); 60 | let t13 = t5 * cos(t10 * (180 / Float.pi)); 61 | let t14 = (t7 * t8 - 1); 62 | let t15 = t10 + t4 + x2; 63 | let t16 = (t3 * t7 - 1); 64 | let t17 = (t16 * t4 - 1); 65 | let t18 = t5 + t3 + t16; 66 | let t19 = t4 + t16; 67 | let t22 = t3 + t19 + t6; 68 | let t23 = t22 / (0.001 + t22); 69 | let t24 = t15 * cos(x0 * (180 / Float.pi)); 70 | let t26 = sin(t8 * t15); 71 | let t27 = t26 - x1; 72 | let t31 = t7 * sin(t12 * (180 / Float.pi)); 73 | let t32 = t7 - t22 - t26 - t23; 74 | let t33 = t16 * cos(t3 * (180 / Float.pi)); 75 | let t35 = t15 - t14 - t33; 76 | let t36 = t8 + x0 + x1; 77 | let t39 = t6 / (0.001 + t6); 78 | let t40 = t27 * cos(t8 * (180 / Float.pi)); 79 | let t41 = t16 / (0.001 + t35); 80 | let t46 = (t17 * t32 - 1); 81 | let t50 = t18 + t41 + t46; 82 | let t52 = x2 + t39; 83 | let t54 = t40 * t4 * t31; 84 | let t61 = t36 / (0.001 + t52); 85 | let t64 = t50 * cos(t24 * (180 / Float.pi)); 86 | let t74 = t14 + t13 + t54; 87 | let t90 = t74 - t61 - t10; 88 | let t98 = t90 / (0.001 + t64); 89 | let t102 = t98 / (0.001 + t9); 90 | let t = t102; 91 | y += t; 92 | return y; 93 | } 94 | 95 | 96 | // Functions generated via a fuzzer incorporating a ternary operator. 97 | 98 | @differentiable(reverse) 99 | func fuzzedMathTernary1(_ x0: Float, _ x1: Float, _ x2: Float) -> Float { 100 | var y = x0; 101 | let t3 = x1 + x1 + x1; 102 | let t4 = x1 * x1 * x0; 103 | let t5 = x0 - x2 - t4; 104 | let t6 = (t4 + t3) / (t4 - t3 + 0.001); 105 | let t7 = x2 + x0 + t5; 106 | let t9 = (x1 * t7 - 1); 107 | let t10 = sin(t4) * sin(t6); 108 | let t11 = sin(t6) * sin(t6); 109 | let t12 = cos(t9) * cos(t6); 110 | let t15 = t12 / (0.001 + x1); 111 | let t16 = x0 * t7 * x1; 112 | let t17 = t6 / (0.001 + x1); 113 | let t18 = sin(t10) * sin(t4); 114 | let t19 = (t11 + t16) / (t11 - t16 + 0.001); 115 | let t22 = (t11 * t11 - 1); 116 | let t23 = (x1 * t10 - 1); 117 | let t25 = t17 < t23 ? t17 : t23; 118 | let t26 = t16 / (0.001 + t12); 119 | let t28 = t26 / (0.001 + t16); 120 | let t30 = t28 * sin(t23 * (180 / Float.pi)); 121 | let t31 = t28 * t18 * t19; 122 | let t33 = t18 + t28 + t5 + t31 + t15; 123 | let t41 = (t33 + t6) / (t33 - t6 + 0.001); 124 | let t42 = t7 * t6 * t30; 125 | let t43 = t16 < t18 ? t16 : t18; 126 | let t59 = cos(t12) * cos(t25); 127 | let t81 = t42 + t59 + t22 + t43 + t41; 128 | let t102 = t81 + t33 + t11; 129 | let t = t102; 130 | y += t; 131 | return y; 132 | } 133 | 134 | @differentiable(reverse) 135 | func fuzzedMathTernary2(_ x0: Float, _ x1: Float, _ x2: Float) -> Float { 136 | var y = x0; 137 | let t3 = x2 * x1; 138 | let t4 = t3 / (0.001 + t3); 139 | let t5 = x2 + t3 + x0; 140 | let t6 = t4 - t3; 141 | let t8 = x1 * sin(x1 * (180 / Float.pi)); 142 | let t9 = t5 * sin(x2 * (180 / Float.pi)); 143 | let t10 = t8 - t6 - t9; 144 | let t11 = t6 * t8; 145 | let t12 = (t10 + t4) / (t10 - t4 + 0.001); 146 | let t13 = x2 * t12; 147 | let t14 = t6 / (0.001 + t11); 148 | let t15 = t8 - x1 - x2; 149 | let t18 = sin(x2) * sin(t14); 150 | let t19 = t12 < t6 ? t12 : t6; 151 | let t20 = t4 * x0; 152 | let t21 = (t14 + t8) / (t14 - t8 + 0.001); 153 | let t22 = (t6 + x1) / (t6 - x1 + 0.001); 154 | let t23 = sin(x1 * t5); 155 | let t25 = t18 * t20 * t13; 156 | let t31 = t21 - t6 - t19 - t23; 157 | let t34 = t15 - t31 - t13 - t25; 158 | let t49 = t5 > t22 ? t5 : t22; 159 | let t102 = (t34 * t49 - 1); 160 | let t = t102; 161 | y += t; 162 | return y; 163 | } 164 | -------------------------------------------------------------------------------- /Benchmarks/LanguageSuite/Benchmarks/LanguageCoverageBenchmarks/LanguageCoverageBenchmarks.swift: -------------------------------------------------------------------------------- 1 | import Benchmark 2 | import Foundation 3 | import _Differentiation 4 | 5 | enum CustomMeasurement { 6 | static let forward = BenchmarkMetric.custom("run forward (ns)", polarity: .prefersSmaller, useScalingFactor: true) 7 | static let reverse = BenchmarkMetric.custom("run reverse (ns)", polarity: .prefersSmaller, useScalingFactor: true) 8 | static let ratio = BenchmarkMetric.custom("ratio", polarity: .prefersSmaller, useScalingFactor: true) 9 | } 10 | 11 | extension Benchmark { 12 | @discardableResult 13 | convenience init?(_ name: String, forward: @escaping (Benchmark) -> (), reverse: @escaping (Benchmark) -> ()) { 14 | self.init(name, configuration: .init(metrics: [CustomMeasurement.forward, CustomMeasurement.reverse, CustomMeasurement.ratio])) { benchmark in 15 | let startForward = BenchmarkClock.now 16 | forward(benchmark) 17 | let endForward = BenchmarkClock.now 18 | let startReverse = BenchmarkClock.now 19 | reverse(benchmark) 20 | let endReverse = BenchmarkClock.now 21 | 22 | let forward = Int((endForward - startForward).nanoseconds()) 23 | let reverse = Int((endReverse - startReverse).nanoseconds()) 24 | 25 | benchmark.measurement(CustomMeasurement.forward, forward) 26 | benchmark.measurement(CustomMeasurement.reverse, reverse) 27 | benchmark.measurement(CustomMeasurement.ratio, reverse / forward) 28 | } 29 | } 30 | } 31 | 32 | let benchmarks = { 33 | Benchmark.defaultConfiguration = .init( 34 | warmupIterations: 1, 35 | scalingFactor: .kilo 36 | ) 37 | 38 | // Simple functions. 39 | 40 | Benchmark( 41 | "one operation", 42 | forward: { benchmark in 43 | for _ in benchmark.scaledIterations { 44 | blackHole(oneOperation(a: 2)) 45 | } 46 | }, 47 | reverse: { benchmark in 48 | for _ in benchmark.scaledIterations { 49 | blackHole(gradient(at: 2, of: oneOperation)) 50 | } 51 | } 52 | ) 53 | Benchmark( 54 | "sixteen operations", 55 | forward: { benchmark in 56 | for _ in benchmark.scaledIterations { 57 | blackHole(sixteenOperations(a: 2)) 58 | } 59 | }, 60 | reverse: { benchmark in 61 | for _ in benchmark.scaledIterations { 62 | blackHole(gradient(at: 2, of: sixteenOperations)) 63 | } 64 | } 65 | ) 66 | Benchmark( 67 | "two composed operations", 68 | forward: { benchmark in 69 | for _ in benchmark.scaledIterations { 70 | blackHole(twoComposedOperations(a: 2)) 71 | } 72 | }, 73 | reverse: { benchmark in 74 | for _ in benchmark.scaledIterations { 75 | blackHole(gradient(at: 2, of: twoComposedOperations)) 76 | } 77 | } 78 | ) 79 | Benchmark( 80 | "sixteen composed operations", 81 | forward: { benchmark in 82 | for _ in benchmark.scaledIterations { 83 | blackHole(sixteenComposedOperations(a: 2)) 84 | } 85 | }, 86 | reverse: { benchmark in 87 | for _ in benchmark.scaledIterations { 88 | blackHole(gradient(at: 2, of: sixteenComposedOperations)) 89 | } 90 | } 91 | ) 92 | 93 | // Functions with loops. 94 | 95 | Benchmark( 96 | "one operation looped (small)", 97 | forward: { benchmark in 98 | for _ in benchmark.scaledIterations { 99 | blackHole(oneOperationLoopedSmall(a: 2)) 100 | } 101 | }, 102 | reverse: { benchmark in 103 | for _ in benchmark.scaledIterations { 104 | blackHole(gradient(at: 2, of: oneOperationLoopedSmall)) 105 | } 106 | } 107 | ) 108 | Benchmark( 109 | "four operations looped (small)", 110 | forward: { benchmark in 111 | for _ in benchmark.scaledIterations { 112 | blackHole(fourOperationsLoopedSmall(a: 2)) 113 | } 114 | }, 115 | reverse: { benchmark in 116 | for _ in benchmark.scaledIterations { 117 | blackHole(gradient(at: 2, of: fourOperationsLoopedSmall)) 118 | } 119 | } 120 | ) 121 | Benchmark( 122 | "sixteen operations looped (small)", 123 | forward: { benchmark in 124 | for _ in benchmark.scaledIterations { 125 | blackHole(sixteenOperationsLoopedSmall(a: 2)) 126 | } 127 | }, 128 | reverse: { benchmark in 129 | for _ in benchmark.scaledIterations { 130 | blackHole(gradient(at: 2, of: sixteenOperationsLoopedSmall)) 131 | } 132 | } 133 | ) 134 | Benchmark( 135 | "one operation looped", 136 | forward: { benchmark in 137 | for _ in benchmark.scaledIterations { 138 | blackHole(oneOperationLooped(a: 2)) 139 | } 140 | }, 141 | reverse: { benchmark in 142 | for _ in benchmark.scaledIterations { 143 | blackHole(gradient(at: 2, of: oneOperationLooped)) 144 | } 145 | } 146 | ) 147 | Benchmark( 148 | "two operations looped", 149 | forward: { benchmark in 150 | for _ in benchmark.scaledIterations { 151 | blackHole(twoOperationsLooped(a: 2)) 152 | } 153 | }, 154 | reverse: { benchmark in 155 | for _ in benchmark.scaledIterations { 156 | blackHole(gradient(at: 2, of: twoOperationsLooped)) 157 | } 158 | } 159 | ) 160 | Benchmark( 161 | "four operations looped", 162 | forward: { benchmark in 163 | for _ in benchmark.scaledIterations { 164 | blackHole(fourOperationsLooped(a: 2)) 165 | } 166 | }, 167 | reverse: { benchmark in 168 | for _ in benchmark.scaledIterations { 169 | blackHole(gradient(at: 2, of: fourOperationsLooped)) 170 | } 171 | } 172 | ) 173 | Benchmark( 174 | "eight operations looped", 175 | forward: { benchmark in 176 | for _ in benchmark.scaledIterations { 177 | blackHole(eightOperationsLooped(a: 2)) 178 | } 179 | }, 180 | reverse: { benchmark in 181 | for _ in benchmark.scaledIterations { 182 | blackHole(gradient(at: 2, of: eightOperationsLooped)) 183 | } 184 | } 185 | ) 186 | Benchmark( 187 | "sixteen operations looped", 188 | forward: { benchmark in 189 | for _ in benchmark.scaledIterations { 190 | blackHole(sixteenOperationsLooped(a: 2)) 191 | } 192 | }, 193 | reverse: { benchmark in 194 | for _ in benchmark.scaledIterations { 195 | blackHole(gradient(at: 2, of: sixteenOperationsLooped)) 196 | } 197 | } 198 | ) 199 | Benchmark( 200 | "two composed operations looped", 201 | forward: { benchmark in 202 | for _ in benchmark.scaledIterations { 203 | blackHole(twoComposedOperationsLooped(a: 2)) 204 | } 205 | }, 206 | reverse: { benchmark in 207 | for _ in benchmark.scaledIterations { 208 | blackHole(gradient(at: 2, of: twoComposedOperationsLooped)) 209 | } 210 | } 211 | ) 212 | Benchmark( 213 | "sixteen composed operations looped", 214 | forward: { benchmark in 215 | for _ in benchmark.scaledIterations { 216 | blackHole(sixteenComposedOperationsLooped(a: 2)) 217 | } 218 | }, 219 | reverse: { benchmark in 220 | for _ in benchmark.scaledIterations { 221 | blackHole(gradient(at: 2, of: sixteenComposedOperationsLooped)) 222 | } 223 | } 224 | ) 225 | 226 | // Arithmetic and control flow functions generated by a fuzzer. 227 | 228 | Benchmark( 229 | "fuzzed arithmetic 1", 230 | forward: { benchmark in 231 | for _ in benchmark.scaledIterations { 232 | blackHole(fuzzedMath1(1.0, 2.0, 3.0)) 233 | } 234 | }, 235 | reverse: { benchmark in 236 | for _ in benchmark.scaledIterations { 237 | blackHole(gradient(at: 1.0, 2.0, 3.0, of: fuzzedMath1)) 238 | } 239 | } 240 | ) 241 | Benchmark( 242 | "fuzzed arithmetic 2", 243 | forward: { benchmark in 244 | for _ in benchmark.scaledIterations { 245 | blackHole(fuzzedMath2(1.0, 2.0, 3.0)) 246 | } 247 | }, 248 | reverse: { benchmark in 249 | for _ in benchmark.scaledIterations { 250 | blackHole(gradient(at: 1.0, 2.0, 3.0, of: fuzzedMath2)) 251 | } 252 | } 253 | ) 254 | 255 | Benchmark( 256 | "fuzzed arithmetic with ternary operators 1", 257 | forward: { benchmark in 258 | for _ in benchmark.scaledIterations { 259 | blackHole(fuzzedMathTernary1(1.0, 2.0, 3.0)) 260 | } 261 | }, 262 | reverse: { benchmark in 263 | for _ in benchmark.scaledIterations { 264 | blackHole(gradient(at: 1.0, 2.0, 3.0, of: fuzzedMathTernary1)) 265 | } 266 | } 267 | ) 268 | Benchmark( 269 | "fuzzed arithmetic with ternary operators 2", 270 | forward: { benchmark in 271 | for _ in benchmark.scaledIterations { 272 | blackHole(fuzzedMathTernary2(1.0, 2.0, 3.0)) 273 | } 274 | }, 275 | reverse: { benchmark in 276 | for _ in benchmark.scaledIterations { 277 | blackHole(gradient(at: 1.0, 2.0, 3.0, of: fuzzedMathTernary2)) 278 | } 279 | } 280 | ) 281 | } 282 | -------------------------------------------------------------------------------- /Benchmarks/LanguageSuite/Benchmarks/LanguageCoverageBenchmarks/LoopedFunctions.swift: -------------------------------------------------------------------------------- 1 | import _Differentiation 2 | 3 | // Simple functions in short, constant-sized loops. 4 | 5 | let smallLoopIterations = 8 6 | 7 | @differentiable(reverse) 8 | func oneOperationLoopedSmall(a: Float) -> Float { 9 | var a = a 10 | for _ in 0.. Float { 18 | var a = a 19 | for _ in 0.. Float { 28 | var a = a 29 | for _ in 0.. Float { 48 | var a = a 49 | for _ in 0.. Float { 57 | var a = a 58 | for _ in 0.. Float { 66 | var a = a 67 | for _ in 0.. Float { 76 | var a = a 77 | for _ in 0.. Float { 88 | var a = a 89 | for _ in 0.. Float { 106 | var a = a 107 | for _ in 0.. Float { 115 | var a = a 116 | for _ in 0.. Float { 7 | return a * 2 8 | } 9 | 10 | @differentiable(reverse) 11 | func sixteenOperations(a: Float) -> Float { 12 | let b = 3 / a * 2 13 | let c = 3 / b * 2 14 | let d = 3 / c * 2 15 | let e = 3 / d * 2 16 | let f = 3 / e * 2 17 | let g = 3 / f * 2 18 | let h = 3 / g * 2 19 | return 3 / h * 2 20 | } 21 | 22 | // Simple function composition. 23 | 24 | @differentiable(reverse) 25 | func oneOperationHelper(a: Float) -> Float { 26 | return 3 / a 27 | } 28 | 29 | @differentiable(reverse) 30 | func twoComposedOperations(a: Float) -> Float { 31 | oneOperationHelper(a: oneOperation(a: a)) 32 | } 33 | 34 | @differentiable(reverse) 35 | func sixteenComposedOperations(a: Float) -> Float { 36 | let b = oneOperation(a: a) 37 | let c = oneOperationHelper(a: b) 38 | let d = oneOperation(a: c) 39 | let e = oneOperationHelper(a: d) 40 | let f = oneOperation(a: e) 41 | let g = oneOperationHelper(a: f) 42 | let h = oneOperation(a: g) 43 | let i = oneOperationHelper(a: h) 44 | let b2 = oneOperation(a: i) 45 | let c2 = oneOperationHelper(a: b2) 46 | let d2 = oneOperation(a: c2) 47 | let e2 = oneOperationHelper(a: d2) 48 | let f2 = oneOperation(a: e2) 49 | let g2 = oneOperationHelper(a: f2) 50 | let h2 = oneOperation(a: g2) 51 | let i2 = oneOperationHelper(a: h2) 52 | return i2 53 | } 54 | -------------------------------------------------------------------------------- /Benchmarks/LanguageSuite/Package.resolved: -------------------------------------------------------------------------------- 1 | { 2 | "pins" : [ 3 | { 4 | "identity" : "hdrhistogram-swift", 5 | "kind" : "remoteSourceControl", 6 | "location" : "https://github.com/HdrHistogram/hdrhistogram-swift", 7 | "state" : { 8 | "revision" : "a69fa24d7b70421870cafa86340ece900489e17e", 9 | "version" : "0.1.2" 10 | } 11 | }, 12 | { 13 | "identity" : "package-benchmark", 14 | "kind" : "remoteSourceControl", 15 | "location" : "https://github.com/ordo-one/package-benchmark", 16 | "state" : { 17 | "revision" : "ddf6c1ae01e139120bcdb917ece52819ee69d47a", 18 | "version" : "1.22.1" 19 | } 20 | }, 21 | { 22 | "identity" : "swift-argument-parser", 23 | "kind" : "remoteSourceControl", 24 | "location" : "https://github.com/apple/swift-argument-parser", 25 | "state" : { 26 | "revision" : "c8ed701b513cf5177118a175d85fbbbcd707ab41", 27 | "version" : "1.3.0" 28 | } 29 | }, 30 | { 31 | "identity" : "swift-atomics", 32 | "kind" : "remoteSourceControl", 33 | "location" : "https://github.com/apple/swift-atomics", 34 | "state" : { 35 | "revision" : "cd142fd2f64be2100422d658e7411e39489da985", 36 | "version" : "1.2.0" 37 | } 38 | }, 39 | { 40 | "identity" : "swift-numerics", 41 | "kind" : "remoteSourceControl", 42 | "location" : "https://github.com/apple/swift-numerics", 43 | "state" : { 44 | "revision" : "0a5bc04095a675662cf24757cc0640aa2204253b", 45 | "version" : "1.0.2" 46 | } 47 | }, 48 | { 49 | "identity" : "swift-system", 50 | "kind" : "remoteSourceControl", 51 | "location" : "https://github.com/apple/swift-system", 52 | "state" : { 53 | "revision" : "025bcb1165deab2e20d4eaba79967ce73013f496", 54 | "version" : "1.2.1" 55 | } 56 | }, 57 | { 58 | "identity" : "texttable", 59 | "kind" : "remoteSourceControl", 60 | "location" : "https://github.com/ordo-one/TextTable", 61 | "state" : { 62 | "revision" : "a27a07300cf4ae322e0079ca0a475c5583dd575f", 63 | "version" : "0.0.2" 64 | } 65 | } 66 | ], 67 | "version" : 2 68 | } 69 | -------------------------------------------------------------------------------- /Benchmarks/LanguageSuite/Package.swift: -------------------------------------------------------------------------------- 1 | // swift-tools-version:5.9 2 | 3 | import PackageDescription 4 | 5 | let package = Package( 6 | name: "LanguageCoverageBenchmark", 7 | platforms: [ 8 | .macOS(.v13) 9 | ], 10 | products: [ 11 | .executable(name: "LanguageCoverageBenchmarks", targets: ["LanguageCoverageBenchmarks"]) 12 | ], 13 | dependencies: [ 14 | .package(url: "https://github.com/ordo-one/package-benchmark", from: "1.22.1"), 15 | ], 16 | targets: [ 17 | .executableTarget( 18 | name: "LanguageCoverageBenchmarks", 19 | dependencies: [ 20 | .product(name: "Benchmark", package: "package-benchmark"), 21 | ], 22 | path: "Benchmarks/LanguageCoverageBenchmarks", 23 | plugins: [ 24 | .plugin(name: "BenchmarkPlugin", package: "package-benchmark"), 25 | ] 26 | ) 27 | ] 28 | ) 29 | -------------------------------------------------------------------------------- /Benchmarks/LanguageSuite/README.md: -------------------------------------------------------------------------------- 1 | # Benchmarks of Language Coverage 2 | 3 | A primary capability of differentiable Swift is the automatic generation of reverse-mode 4 | derivatives (pullbacks) from arbitrary Swift functions. Ideally, those generated pullbacks 5 | would have roughly the same performance as running the original code (the forward pass). 6 | 7 | However, Swift is a complex language and performance of the generated pullback code currently varies 8 | based on the structure of the original functions. This benchmark suite is intended to cover a range 9 | of representative Swift code to verify pullback performance and guide future optimizations. 10 | 11 | ## Running Benchmarks 12 | 13 | A Swift toolchain with support for differentiation must be installed and in your current path. We 14 | recommend using one [downloaded from Swift.org](https://www.swift.org/download/) for your platform. 15 | Nightly toolchain snapshots tend to have better performance, due to new optimizations and 16 | architectural improvements constantly being upstreamed. More information on toolchain installation 17 | and management can be found [here](https://passivelogic.github.io/differentiable-swift-examples/documentation/differentiableswiftexamples/setup). 18 | 19 | Build and run the benchmark via the following: 20 | ```bash 21 | swift package benchmark 22 | ``` 23 | 24 | When using a recent Swift.org nightly toolchain snapshot on macOS, you may run into segfault issues when running from terminal. This is due to the executable using the system runtime instead of the toolchain provided one. 25 | It is also possible to run the benchmarks from Xcode ([more info here](https://swiftpackageindex.com/ordo-one/package-benchmark/1.22.1/documentation/benchmark/runningbenchmarks#Running-benchmarks-in-Xcode-and-using-Instruments-for-profiling-benchmarks)). 26 | Make sure Xcode is closed and run the following to open Xcode with jemalloc disabled : 27 | ```bash 28 | open --env BENCHMARK_DISABLE_JEMALLOC=true Package.swift 29 | ``` 30 | Set the executable's scheme to release mode and run the executable by pressing `Cmd+R`. 31 | -------------------------------------------------------------------------------- /Benchmarks/README.md: -------------------------------------------------------------------------------- 1 | # Differentiable Swift Benchmarks 2 | 3 | This directory hosts benchmarks of differentiable Swift's runtime performance. 4 | 5 | Currently, it contains the following benchmark: 6 | 7 | - [A building simulator](BuildingSimulation/) implemented in Swift, PyTorch, and TensorFlow to compare timings between the three. -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | 204 | 205 | ## Runtime Library Exception to the Apache 2.0 License: ## 206 | 207 | 208 | As an exception, if you use this Software to compile your source code and 209 | portions of this Software are embedded into the binary product as a result, 210 | you may redistribute such product without providing attribution as would 211 | otherwise be required by Sections 4(a), 4(b) and 4(d) of the License. 212 | -------------------------------------------------------------------------------- /Package.resolved: -------------------------------------------------------------------------------- 1 | { 2 | "pins" : [ 3 | { 4 | "identity" : "swift-docc-plugin", 5 | "kind" : "remoteSourceControl", 6 | "location" : "https://github.com/apple/swift-docc-plugin", 7 | "state" : { 8 | "revision" : "26ac5758409154cc448d7ab82389c520fa8a8247", 9 | "version" : "1.3.0" 10 | } 11 | }, 12 | { 13 | "identity" : "swift-docc-symbolkit", 14 | "kind" : "remoteSourceControl", 15 | "location" : "https://github.com/apple/swift-docc-symbolkit", 16 | "state" : { 17 | "revision" : "b45d1f2ed151d057b54504d653e0da5552844e34", 18 | "version" : "1.0.0" 19 | } 20 | } 21 | ], 22 | "version" : 2 23 | } 24 | -------------------------------------------------------------------------------- /Package.swift: -------------------------------------------------------------------------------- 1 | // swift-tools-version:5.6 2 | 3 | import PackageDescription 4 | 5 | let package = Package( 6 | name: "DifferentiableSwiftExamples", 7 | products: [ 8 | .executable(name: "BasicDifferentiation", targets: ["BasicDifferentiation"]), 9 | .executable(name: "BasicGradientDescent", targets: ["BasicGradientDescent"]), 10 | .executable(name: "CustomDerivatives", targets: ["CustomDerivatives"]), 11 | .library(name: "DifferentiableSwiftExamples", targets: ["DifferentiableSwiftExamples"]), 12 | ], 13 | dependencies: [ 14 | .package(url: "https://github.com/apple/swift-docc-plugin", from: "1.3.0") 15 | ], 16 | targets: [ 17 | .executableTarget(name: "BasicDifferentiation"), 18 | .executableTarget(name: "BasicGradientDescent"), 19 | .executableTarget(name: "CustomDerivatives"), 20 | .target( 21 | name: "DifferentiableSwiftExamples", 22 | path: "Sources/DifferentiableSwiftExamplesDocumentation" 23 | ), 24 | ] 25 | ) 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Differentiable Swift Examples 2 | 3 | Differentiable Swift is an experimental language feature for the [Swift language](https://www.swift.org) that is currently 4 | in the [pitch phase](https://forums.swift.org/t/differentiable-programming-for-gradient-based-machine-learning/42147) of 5 | the Swift Evolution process. The goal of this feature is to provide first-class, language-integrated support for 6 | differentiable programming, making Swift the first general-purpose, statically typed programming language to have automatic 7 | differentiation built in. Originally developed as part of the 8 | [Swift for TensorFlow](https://www.tensorflow.org/swift/guide/overview) project, teams 9 | at [PassiveLogic](https://passivelogic.com) and elsewhere are currently working on it. 10 | 11 | Differentiable Swift is purely a language feature and isn't tied to any specific machine learning framework or platform. 12 | It provides a means of building such frameworks in Swift, and works wherever Swift does: from Linux to macOS to 13 | [WebAssembly](https://swiftwasm.org). 14 | 15 | The goal of this repository is to provide examples and documentation for differentiable Swift, to illustrate how it can be 16 | used, and to show the power of automatic differentiation in various applications. We hope to grow this over time with new 17 | examples and documentation, and welcome contributions to that end. 18 | 19 | ## Documentation 20 | DocC-generated documentation can be found at https://passivelogic.github.io/differentiable-swift-examples 21 | 22 | ## Getting started 23 | 24 | Differentiable Swift is present as an experimental feature in modern Swift toolchains. Due to the rapid speed at which it 25 | is evolving, for best results, we recommend using a Swift toolchain downloaded [from Swift.org](https://www.swift.org/download/) 26 | from either the Swift 5.9 development snapshots or the nightly development snapshots. The latter will more closely track 27 | the latest additions and fixes being upstreamed but may be slightly less stable overall. 28 | 29 | It is possible to use differentiable Swift with the default Swift toolchains that ship inside Xcode, however only the 30 | compiler additions are present in those toolchains. The standard library support needed to use the `_Differentiation` module 31 | is not provided in those toolchains and needs to be added after the fact. One example of how to do this can be found 32 | in [this project](https://github.com/philipturner/differentiation). 33 | 34 | No special compiler flags are needed to activate differentiable Swift, but you do need to place the following: 35 | 36 | ```swift 37 | import _Differentiation 38 | ``` 39 | 40 | in any file where differentiation will be used. The compiler will warn you about this if you do forget to add the above 41 | and try to use any differentiable Swift capabilities. 42 | 43 | ## Examples 44 | 45 | The following examples are present in the repository, and can be built and run via: 46 | 47 | ```bash 48 | swift run [example] 49 | ``` 50 | 51 | - [BasicDifferentiation](Sources/BasicDifferentiation/main.swift): A very simple example of using automatic differentiation with a few different functions and types. 52 | - [CustomDerivatives](Sources/CustomDerivatives/main.swift): Differentiable Swift lets you register custom derivatives for functions, and this shows how to do so. 53 | - [BasicGradientDescent](Sources/BasicGradientDescent/main.swift): How to perform gradient descent optimization in Swift. 54 | 55 | 56 | ## Benchmarks 57 | 58 | A motivating benchmark of a building thermal model, optimized via gradient descent, is implemented 59 | in several languages and frameworks to compare against differentiable Swift in the [Benchmarks/BuildingSimulation](Benchmarks/BuildingSimulation/) directory. 60 | 61 | ## Differentiable Swift resources 62 | 63 | If you want to learn more about differentiable Swift, there are a variety of resources out there. The API has changed over time, 64 | so some older documentation may provide great background on the feature but not fully reflect code as it is written today. 65 | 66 | - [Differentiable programming for gradient-based machine learning](https://forums.swift.org/t/differentiable-programming-for-gradient-based-machine-learning/42147) 67 | - The Intro to Differentiable Swift series: 68 | - [Part 0: Why Automatic Differentiation is Awesome](https://medium.com/passivelogic/intro-to-differentiable-swift-part-0-why-automatic-differentiation-is-awesome-a522128ca9e3) 69 | - [Part 1: Gradient Descent](https://medium.com/passivelogic/intro-to-differentiable-swift-part-1-gradient-descent-181a06aaa596) 70 | - [Part 2: Differentiable Swift](https://medium.com/passivelogic/intro-to-differentiable-swift-part-2-differentiable-swift-25a99b97087f) 71 | - [Part 3: Differentiable API Introduction](https://medium.com/passivelogic/intro-to-differentiable-swift-part-3-differentiable-api-introduction-2d8d747e0ac8) 72 | - [Part 4: Differentiable Swift API Details](https://medium.com/passivelogic/intro-to-differentiable-swift-part-4-differentiable-swift-api-details-b6368c2dae5) 73 | - [Differentiable Programming Manifesto](https://github.com/apple/swift/blob/main/docs/DifferentiableProgramming.md) (note: slightly out of date) 74 | - The Swift for TensorFlow project explored the use of differentiable Swift paired with machine learning frameworks: 75 | - [Overview of Swift for TensorFlow](https://www.tensorflow.org/swift/guide/overview) 76 | - [Main Swift for TensorFlow GitHub repository](https://github.com/tensorflow/swift) 77 | - [Swift for TensorFlow machine learning APIs](https://github.com/tensorflow/swift-apis) 78 | - [Machine learning models and libraries](https://github.com/tensorflow/swift-models) 79 | 80 | -------------------------------------------------------------------------------- /Sources/BasicDifferentiation/main.swift: -------------------------------------------------------------------------------- 1 | // First, we need to enable differentiable Swift via a special import: 2 | 3 | import _Differentiation 4 | 5 | // You can mark a function as being differentiable if it has at least one differentiable 6 | // parameter and differentiable result. The `@differentiable` annotation is used to mark the 7 | // function, and the `reverse` specifier further clarifies that we want to use reverse-mode 8 | // differentiation. In the initial implementation of differentiable Swift, only reverse-mode 9 | // differentiation is currently fully functional. 10 | 11 | @differentiable(reverse) 12 | func square(_ x: Float) -> Float { 13 | return x * x 14 | } 15 | 16 | // Note that an inout value takes the place of both parameter and result, and a mutating function 17 | // implicitly passes `self` as inout. 18 | 19 | @differentiable(reverse) 20 | func squared(_ x: inout Float) { 21 | x = x * x 22 | } 23 | 24 | // To declare a type as being differentiable, it needs to conform to the Differentiable protocol. 25 | // Generally, types are differentiable if they are continuous or if all of their properties are 26 | // continuous and Differentiable. However, Differentiable types can have non-Differentiable 27 | // properties, if those properties are annotated with @noDerivative. Those non-Differentiable 28 | // properties will then not participate in differentiation. 29 | // 30 | // Differentiable properties must also be declared as `var` and not `let`, because in order for them 31 | // to be used in gradient descent they must be able to be moved by a tangent vector. 32 | 33 | struct MyValue: Differentiable { 34 | var x: Float 35 | var y: Double 36 | @noDerivative 37 | let isTracked: Bool 38 | } 39 | 40 | // To activate the differentiation machinery, there are some special built-in functions in the 41 | // Differentiation module within the Swift standard library that can give you the value from 42 | // the forward pass through a differentiable function as well as the backward pass. 43 | // 44 | // For functions with scalar outputs, `valueWithGradient(at:of:)` will return both the value and 45 | // the calculated gradient at a given input value: 46 | 47 | let (value, gradient) = valueWithGradient(at: 3.0, of: square) 48 | print("The value is \(value), and the gradient is \(gradient)") 49 | 50 | // In the more general case, `valueWithPullback(at:of)` will provide the value and a pullback 51 | // function for a differentiable function. For the Float-returning function above, the gradient 52 | // is obtained by passing 1 into the pullback function: 53 | 54 | let (value2, pullback) = valueWithPullback(at: 3.0, of: square) 55 | print("The value is \(value2), and the pullback at 1.0 is \(pullback(1.0))") 56 | -------------------------------------------------------------------------------- /Sources/BasicGradientDescent/main.swift: -------------------------------------------------------------------------------- 1 | import _Differentiation 2 | 3 | // In this example, we'll set up a very simple perceptron neural network and try to use gradient 4 | // descent to have it mimic the functionality of an AND gate. 5 | 6 | struct Perceptron: Differentiable { 7 | var weight1: Float = .random(in: -1..<1) 8 | var weight2: Float = .random(in: -1..<1) 9 | var bias: Float = 0.0 10 | 11 | @differentiable(reverse) 12 | func callAsFunction(_ x1: Float, _ x2: Float) -> Float { 13 | // Determine the weighted contribution from each input, plus bias. 14 | let output = (weight1 * x1) + (weight2 * x2) + bias 15 | // Apply a nonlinear activation function to the output. 16 | if output >= 0.0 { 17 | return output 18 | } else { 19 | return 0.1 * output 20 | } 21 | } 22 | } 23 | 24 | // This is our truth table for the expected output from various inputs. 25 | 26 | let andGateData: [(x1: Float, x2: Float, y: Float)] = [ 27 | (x1: 0, x2: 0, y: 0), 28 | (x1: 0, x2: 1, y: 0), 29 | (x1: 1, x2: 0, y: 0), 30 | (x1: 1, x2: 1, y: 1), 31 | ] 32 | 33 | // A loss function provides a measure of how far off we are from our target behavior. 34 | 35 | @differentiable(reverse) 36 | func loss(model: Perceptron) -> Float { 37 | var loss: Float = 0 38 | for (x1, x2, y) in andGateData { 39 | let prediction = model(x1, x2) 40 | let error = y - prediction 41 | loss = loss + error * error / 2 42 | } 43 | return loss 44 | } 45 | 46 | // Finally, we initialize the model with random weights and a zero bias: 47 | 48 | var model = Perceptron() 49 | 50 | // and then we perform training by finding the loss, determining a tangent vector that would 51 | // take us in a direction that should reduce that loss, and moving our model parameters by 52 | // that tangent vector. Over the course of training, we'll watch our loss values decrease as the 53 | // model is trained to replicate an AND gate. 54 | 55 | for _ in 0..<100 { 56 | let (loss, pullback) = valueWithPullback(at: model, of: loss) 57 | print("Loss: \(loss)") 58 | let gradient = pullback(-0.1) 59 | model.move(by: gradient) 60 | } 61 | 62 | // Let's try out our trained model on some test values: 63 | 64 | print("Trained model results:") 65 | 66 | let value1 = model(1.0, 0.0) 67 | 68 | print("Value at (1.0, 0.0): \(value1)") 69 | 70 | let value2 = model(1.0, 1.0) 71 | 72 | print("Value at (1.0, 1.0): \(value2)") 73 | -------------------------------------------------------------------------------- /Sources/CustomDerivatives/main.swift: -------------------------------------------------------------------------------- 1 | import Foundation 2 | import _Differentiation 3 | 4 | // In addition to compiler-generated derivatives, you can register your own custom derivatives 5 | // for any function to make them differentiable. This is particularly useful for functions that 6 | // have been defined in C libraries, like basic math functions. 7 | // 8 | // As an example of this, the following is a custom derivative defined for the `sqrt()` function. 9 | // `sqrt()` is a function where we don't have access to modify the original source code, so we 10 | // need to be able to register a derivative for it so that is can be used as part of differentiable 11 | // functions. 12 | // 13 | // Do do so, we define a vector-Jacobian product (VJP) (for more, see the excellent JAX 14 | // documentation: https://jax.readthedocs.io/en/latest/notebooks/autodiff_cookbook.html#vector-jacobian-products-vjps-aka-reverse-mode-autodiff ) 15 | // The VJP takes as its input the original parameters to the main function and provides as output 16 | // a tuple containing the value produced by the original function and a pullback function. The 17 | // pullback has as its inputs the tangent vectors of each differentiable result and as its output 18 | // the tangent vectors of each differentiable parameter. Note that for some types, like Double, the 19 | // type of the tangent vector is the same as the type of the base type. 20 | 21 | @derivative(of: sqrt) 22 | public func sqrtVJP(_ value: Double) -> (value: Double, pullback: (Double) -> Double) { 23 | let output = sqrt(value) 24 | func pullback(_ tangentVector: Double) -> Double { 25 | return tangentVector / (2 * output) 26 | } 27 | return (value: output, pullback: pullback) 28 | } 29 | 30 | // Once a custom derivative has been defined for a function, that function is now differentiable: 31 | 32 | let (value, gradient) = valueWithGradient(at: 9.0, of: sqrt) 33 | print("The sqrt() value is \(value), and the gradient is \(gradient)") 34 | 35 | // Custom derivatives are also useful in cases where the function may not be continuous across 36 | // all values, and thus may not have a derivative at all points. We can then provide custom 37 | // derivatives that specify an approximation that we can use, such as in the case of `min()`: 38 | // 39 | // For min(): "Returns: The lesser of `x` and `y`. If `x` is equal to `y`, returns `x`." 40 | // https://github.com/apple/swift/blob/main/stdlib/public/core/Algorithm.swift#L18 41 | 42 | @derivative(of: min) 43 | public func minVJP( 44 | _ lhs: T, 45 | _ rhs: T 46 | ) -> (value: T, pullback: (T.TangentVector) -> (T.TangentVector, T.TangentVector)) { 47 | func pullback(_ tangentVector: T.TangentVector) -> (T.TangentVector, T.TangentVector) { 48 | if lhs <= rhs { 49 | return (tangentVector, .zero) 50 | } else { 51 | return (.zero, tangentVector) 52 | } 53 | } 54 | return (value: min(lhs, rhs), pullback: pullback) 55 | } 56 | 57 | let (value2, gradient2) = valueWithGradient(at: 3.0, 4.0, of: min) 58 | print("The min() value is \(value2), and the gradient is \(gradient2)") 59 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/DifferentiableSwiftExamples.md: -------------------------------------------------------------------------------- 1 | # ``DifferentiableSwiftExamples`` 2 | 3 | @Metadata { 4 | @DisplayName("Differentiable Swift examples") 5 | } 6 | 7 | Articles and tutorials on the experimental language feature of differentiable Swift. 8 | 9 | ## Overview 10 | 11 | Differentiable Swift is an experimental language feature for the [Swift language](https://www.swift.org) that is currently 12 | in the [pitch phase](https://forums.swift.org/t/differentiable-programming-for-gradient-based-machine-learning/42147) of 13 | the Swift Evolution process. The goal of this feature is to provide first-class, language-integrated support for 14 | differentiable programming, making Swift the first general-purpose, statically typed programming language to have automatic 15 | differentiation built in. 16 | 17 | Differentiable Swift is purely a language feature and isn't tied to any specific machine learning framework or platform. 18 | It provides a means of building such frameworks in Swift, and works wherever Swift does: from Linux to macOS to 19 | [WebAssembly](https://swiftwasm.org). 20 | 21 | 22 | ## Topics 23 | 24 | ### Articles 25 | 26 | - 27 | - 28 | - 29 | 30 | ### Tutorials 31 | 32 | - 33 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableFunctions/DifferentiableFunctions-01-01.swift: -------------------------------------------------------------------------------- 1 | import _Differentiation 2 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableFunctions/DifferentiableFunctions-01-02.swift: -------------------------------------------------------------------------------- 1 | import _Differentiation 2 | 3 | func squared(_ input: Double) -> Double { 4 | input * input 5 | } 6 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableFunctions/DifferentiableFunctions-01-03.swift: -------------------------------------------------------------------------------- 1 | import _Differentiation 2 | 3 | @differentiable(reverse) 4 | func squared(_ input: Double) -> Double { 5 | input * input 6 | } 7 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableFunctions/DifferentiableFunctions-01-04.swift: -------------------------------------------------------------------------------- 1 | import _Differentiation 2 | 3 | @differentiable(reverse) 4 | func squared(_ input: Double) -> Double { 5 | input * input 6 | } 7 | 8 | let (value, gradient) = valueWithGradient(at: 3.0, of: squared) 9 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableFunctions/DifferentiableFunctions-01-05.swift: -------------------------------------------------------------------------------- 1 | import _Differentiation 2 | 3 | @differentiable(reverse) 4 | func squared(_ input: Double) -> Double { 5 | input * input 6 | } 7 | 8 | let (value, gradient) = valueWithGradient(at: 3.0, of: squared) 9 | print("The value is \(value), and the gradient is \(gradient).") 10 | 11 | // The value is 9.0, and the gradient is 6.0. 12 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableFunctions/DifferentiableFunctions-02-01.swift: -------------------------------------------------------------------------------- 1 | import _Differentiation 2 | 3 | func squared(_ input: Double) -> Double { 4 | input * input 5 | } 6 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableFunctions/DifferentiableFunctions-02-02.swift: -------------------------------------------------------------------------------- 1 | import _Differentiation 2 | 3 | func squared(_ input: Double) -> Double { 4 | input * input 5 | } 6 | 7 | @derivative(of: squared) 8 | func vjpSquared() 9 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableFunctions/DifferentiableFunctions-02-03.swift: -------------------------------------------------------------------------------- 1 | import _Differentiation 2 | 3 | func squared(_ input: Double) -> Double { 4 | input * input 5 | } 6 | 7 | @derivative(of: squared) 8 | func vjpSquared(_ input: Double) -> ( 9 | value: Double, 10 | pullback: (Double.TangentVector) -> Double.TangentVector 11 | ) 12 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableFunctions/DifferentiableFunctions-02-04.swift: -------------------------------------------------------------------------------- 1 | import _Differentiation 2 | 3 | func squared(_ input: Double) -> Double { 4 | input * input 5 | } 6 | 7 | @derivative(of: squared) 8 | func vjpSquared(_ input: Double) -> ( 9 | value: Double, 10 | pullback: (Double) -> Double 11 | ) 12 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableFunctions/DifferentiableFunctions-02-05.swift: -------------------------------------------------------------------------------- 1 | import _Differentiation 2 | 3 | func squared(_ input: Double) -> Double { 4 | input * input 5 | } 6 | 7 | @derivative(of: squared) 8 | func vjpSquared(_ input: Double) -> ( 9 | value: Double, 10 | pullback: (Double) -> Double 11 | ) { 12 | let output = squared(input) 13 | func pullback(_ tangentVector: Double) -> Double { 14 | return tangentVector * 2 * input 15 | } 16 | return (value: output, pullback: pullback) 17 | } 18 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableFunctions/DifferentiableFunctions-03-01.swift: -------------------------------------------------------------------------------- 1 | import Foundation 2 | import _Differentiation 3 | 4 | let (value, gradient) = valueWithGradient(at: 3.0, 4.0, of: min) 5 | 6 | // error: expression is not differentiable 7 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableFunctions/DifferentiableFunctions-03-02.swift: -------------------------------------------------------------------------------- 1 | import Foundation 2 | import _Differentiation 3 | 4 | @derivative(of: min) 5 | public func minVJP( 6 | _ lhs: T, 7 | _ rhs: T 8 | ) -> (value: T, pullback: (T.TangentVector) -> (T.TangentVector, T.TangentVector)) { 9 | func pullback(_ tangentVector: T.TangentVector) -> (T.TangentVector, T.TangentVector) { 10 | if lhs <= rhs { 11 | return (tangentVector, .zero) 12 | } else { 13 | return (.zero, tangentVector) 14 | } 15 | } 16 | return (value: min(lhs, rhs), pullback: pullback) 17 | } 18 | 19 | let (value, gradient) = valueWithGradient(at: 3.0, 4.0, of: min) 20 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableFunctions/DifferentiableFunctions-03-03.swift: -------------------------------------------------------------------------------- 1 | import Foundation 2 | import _Differentiation 3 | 4 | @derivative(of: min) 5 | public func minVJP( 6 | _ lhs: T, 7 | _ rhs: T 8 | ) -> (value: T, pullback: (T.TangentVector) -> (T.TangentVector, T.TangentVector)) { 9 | func pullback(_ tangentVector: T.TangentVector) -> (T.TangentVector, T.TangentVector) { 10 | if lhs <= rhs { 11 | return (tangentVector, .zero) 12 | } else { 13 | return (.zero, tangentVector) 14 | } 15 | } 16 | return (value: min(lhs, rhs), pullback: pullback) 17 | } 18 | 19 | let (value, gradient) = valueWithGradient(at: 3.0, 4.0, of: min) 20 | print("The min() value is \(value), and the gradient is \(gradient).") 21 | 22 | // The min() value is 3.0, and the gradient is (1.0, 0.0). 23 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableTypes/DifferentiableTypes-01-01.swift: -------------------------------------------------------------------------------- 1 | import _Differentiation 2 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableTypes/DifferentiableTypes-01-02.swift: -------------------------------------------------------------------------------- 1 | import _Differentiation 2 | 3 | struct MyValue { 4 | var x: Float 5 | var y: Double 6 | } 7 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableTypes/DifferentiableTypes-01-03.swift: -------------------------------------------------------------------------------- 1 | import _Differentiation 2 | 3 | struct MyValue: Differentiable { 4 | var x: Float 5 | var y: Double 6 | } 7 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableTypes/DifferentiableTypes-01-04.swift: -------------------------------------------------------------------------------- 1 | import _Differentiation 2 | 3 | struct MyValue: Differentiable { 4 | var x: Float 5 | var y: Double 6 | let isTracked: Bool 7 | } 8 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableTypes/DifferentiableTypes-01-05.swift: -------------------------------------------------------------------------------- 1 | import _Differentiation 2 | 3 | struct MyValue: Differentiable { 4 | var x: Float 5 | var y: Double 6 | @noDerivative 7 | let isTracked: Bool 8 | } 9 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableTypes/DifferentiableTypes-02-01.swift: -------------------------------------------------------------------------------- 1 | import _Differentiation 2 | 3 | struct Simple: Differentiable { 4 | var value1: Float 5 | var value2: Double 6 | } 7 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableTypes/DifferentiableTypes-02-02.swift: -------------------------------------------------------------------------------- 1 | import _Differentiation 2 | 3 | struct Simple: Differentiable { 4 | var value1: Float 5 | var value2: Double 6 | 7 | struct TangentVector: AdditiveArithmetic, Differentiable { 8 | var otherValue1: Float.TangentVector 9 | var otherValue2: Double.TangentVector 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableTypes/DifferentiableTypes-02-03.swift: -------------------------------------------------------------------------------- 1 | import _Differentiation 2 | 3 | struct Simple: Differentiable { 4 | var value1: Float 5 | var value2: Double 6 | 7 | struct TangentVector: AdditiveArithmetic, Differentiable { 8 | var otherValue1: Float.TangentVector 9 | var otherValue2: Double.TangentVector 10 | } 11 | 12 | mutating func move(by offset: TangentVector) { 13 | self.value1.move(by: offset.otherValue1) 14 | self.value2.move(by: offset.otherValue2) 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/GradientDescent/GradientDescent-01-01.swift: -------------------------------------------------------------------------------- 1 | import _Differentiation 2 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/GradientDescent/GradientDescent-01-02.swift: -------------------------------------------------------------------------------- 1 | import _Differentiation 2 | 3 | struct Perceptron { 4 | var weight1: Float = .random(in: -1..<1) 5 | var weight2: Float = .random(in: -1..<1) 6 | var bias: Float = 0.0 7 | } 8 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/GradientDescent/GradientDescent-01-03.swift: -------------------------------------------------------------------------------- 1 | import _Differentiation 2 | 3 | struct Perceptron: Differentiable { 4 | var weight1: Float = .random(in: -1..<1) 5 | var weight2: Float = .random(in: -1..<1) 6 | var bias: Float = 0.0 7 | } 8 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/GradientDescent/GradientDescent-01-04.swift: -------------------------------------------------------------------------------- 1 | import _Differentiation 2 | 3 | struct Perceptron: Differentiable { 4 | var weight1: Float = .random(in: -1..<1) 5 | var weight2: Float = .random(in: -1..<1) 6 | var bias: Float = 0.0 7 | 8 | @differentiable(reverse) 9 | func callAsFunction(_ x1: Float, _ x2: Float) -> Float { 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/GradientDescent/GradientDescent-01-05.swift: -------------------------------------------------------------------------------- 1 | import _Differentiation 2 | 3 | struct Perceptron: Differentiable { 4 | var weight1: Float = .random(in: -1..<1) 5 | var weight2: Float = .random(in: -1..<1) 6 | var bias: Float = 0.0 7 | 8 | @differentiable(reverse) 9 | func callAsFunction(_ x1: Float, _ x2: Float) -> Float { 10 | let output = (weight1 * x1) + (weight2 * x2) + bias 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/GradientDescent/GradientDescent-01-06.swift: -------------------------------------------------------------------------------- 1 | import _Differentiation 2 | 3 | struct Perceptron: Differentiable { 4 | var weight1: Float = .random(in: -1..<1) 5 | var weight2: Float = .random(in: -1..<1) 6 | var bias: Float = 0.0 7 | 8 | @differentiable(reverse) 9 | func callAsFunction(_ x1: Float, _ x2: Float) -> Float { 10 | let output = (weight1 * x1) + (weight2 * x2) + bias 11 | if output >= 0.0 { 12 | return output 13 | } else { 14 | return 0.1 * output 15 | } 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/GradientDescent/GradientDescent-01-07.swift: -------------------------------------------------------------------------------- 1 | import _Differentiation 2 | 3 | struct Perceptron: Differentiable { 4 | var weight1: Float = .random(in: -1..<1) 5 | var weight2: Float = .random(in: -1..<1) 6 | var bias: Float = 0.0 7 | 8 | @differentiable(reverse) 9 | func callAsFunction(_ x1: Float, _ x2: Float) -> Float { 10 | let output = (weight1 * x1) + (weight2 * x2) + bias 11 | if output >= 0.0 { 12 | return output 13 | } else { 14 | return 0.1 * output 15 | } 16 | } 17 | } 18 | 19 | let andGateData: [(x1: Float, x2: Float, y: Float)] = [ 20 | (x1: 0, x2: 0, y: 0), 21 | (x1: 0, x2: 1, y: 0), 22 | (x1: 1, x2: 0, y: 0), 23 | (x1: 1, x2: 1, y: 1), 24 | ] 25 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/GradientDescent/GradientDescent-01-08.swift: -------------------------------------------------------------------------------- 1 | import _Differentiation 2 | 3 | struct Perceptron: Differentiable { 4 | var weight1: Float = .random(in: -1..<1) 5 | var weight2: Float = .random(in: -1..<1) 6 | var bias: Float = 0.0 7 | 8 | @differentiable(reverse) 9 | func callAsFunction(_ x1: Float, _ x2: Float) -> Float { 10 | let output = (weight1 * x1) + (weight2 * x2) + bias 11 | if output >= 0.0 { 12 | return output 13 | } else { 14 | return 0.1 * output 15 | } 16 | } 17 | } 18 | 19 | let andGateData: [(x1: Float, x2: Float, y: Float)] = [ 20 | (x1: 0, x2: 0, y: 0), 21 | (x1: 0, x2: 1, y: 0), 22 | (x1: 1, x2: 0, y: 0), 23 | (x1: 1, x2: 1, y: 1), 24 | ] 25 | 26 | @differentiable(reverse) 27 | func loss(model: Perceptron) -> Float { 28 | var loss: Float = 0 29 | for (x1, x2, y) in andGateData { 30 | let prediction = model(x1, x2) 31 | let error = y - prediction 32 | loss = loss + error * error / 2 33 | } 34 | return loss 35 | } 36 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/GradientDescent/GradientDescent-01-09.swift: -------------------------------------------------------------------------------- 1 | import _Differentiation 2 | 3 | struct Perceptron: Differentiable { 4 | var weight1: Float = .random(in: -1..<1) 5 | var weight2: Float = .random(in: -1..<1) 6 | var bias: Float = 0.0 7 | 8 | @differentiable(reverse) 9 | func callAsFunction(_ x1: Float, _ x2: Float) -> Float { 10 | let output = (weight1 * x1) + (weight2 * x2) + bias 11 | if output >= 0.0 { 12 | return output 13 | } else { 14 | return 0.1 * output 15 | } 16 | } 17 | } 18 | 19 | let andGateData: [(x1: Float, x2: Float, y: Float)] = [ 20 | (x1: 0, x2: 0, y: 0), 21 | (x1: 0, x2: 1, y: 0), 22 | (x1: 1, x2: 0, y: 0), 23 | (x1: 1, x2: 1, y: 1), 24 | ] 25 | 26 | @differentiable(reverse) 27 | func loss(model: Perceptron) -> Float { 28 | var loss: Float = 0 29 | for (x1, x2, y) in andGateData { 30 | let prediction = model(x1, x2) 31 | let error = y - prediction 32 | loss = loss + error * error / 2 33 | } 34 | return loss 35 | } 36 | 37 | var model = Perceptron() 38 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/GradientDescent/GradientDescent-01-10.swift: -------------------------------------------------------------------------------- 1 | import _Differentiation 2 | 3 | struct Perceptron: Differentiable { 4 | var weight1: Float = .random(in: -1..<1) 5 | var weight2: Float = .random(in: -1..<1) 6 | var bias: Float = 0.0 7 | 8 | @differentiable(reverse) 9 | func callAsFunction(_ x1: Float, _ x2: Float) -> Float { 10 | let output = (weight1 * x1) + (weight2 * x2) + bias 11 | if output >= 0.0 { 12 | return output 13 | } else { 14 | return 0.1 * output 15 | } 16 | } 17 | } 18 | 19 | let andGateData: [(x1: Float, x2: Float, y: Float)] = [ 20 | (x1: 0, x2: 0, y: 0), 21 | (x1: 0, x2: 1, y: 0), 22 | (x1: 1, x2: 0, y: 0), 23 | (x1: 1, x2: 1, y: 1), 24 | ] 25 | 26 | @differentiable(reverse) 27 | func loss(model: Perceptron) -> Float { 28 | var loss: Float = 0 29 | for (x1, x2, y) in andGateData { 30 | let prediction = model(x1, x2) 31 | let error = y - prediction 32 | loss = loss + error * error / 2 33 | } 34 | return loss 35 | } 36 | 37 | var model = Perceptron() 38 | 39 | for _ in 0..<100 { 40 | } 41 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/GradientDescent/GradientDescent-01-11.swift: -------------------------------------------------------------------------------- 1 | import _Differentiation 2 | 3 | struct Perceptron: Differentiable { 4 | var weight1: Float = .random(in: -1..<1) 5 | var weight2: Float = .random(in: -1..<1) 6 | var bias: Float = 0.0 7 | 8 | @differentiable(reverse) 9 | func callAsFunction(_ x1: Float, _ x2: Float) -> Float { 10 | let output = (weight1 * x1) + (weight2 * x2) + bias 11 | if output >= 0.0 { 12 | return output 13 | } else { 14 | return 0.1 * output 15 | } 16 | } 17 | } 18 | 19 | let andGateData: [(x1: Float, x2: Float, y: Float)] = [ 20 | (x1: 0, x2: 0, y: 0), 21 | (x1: 0, x2: 1, y: 0), 22 | (x1: 1, x2: 0, y: 0), 23 | (x1: 1, x2: 1, y: 1), 24 | ] 25 | 26 | @differentiable(reverse) 27 | func loss(model: Perceptron) -> Float { 28 | var loss: Float = 0 29 | for (x1, x2, y) in andGateData { 30 | let prediction = model(x1, x2) 31 | let error = y - prediction 32 | loss = loss + error * error / 2 33 | } 34 | return loss 35 | } 36 | 37 | var model = Perceptron() 38 | 39 | for _ in 0..<100 { 40 | let (loss, pullback) = valueWithPullback(at: model, of: loss) 41 | print("Loss: \(loss)") 42 | } 43 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/GradientDescent/GradientDescent-01-12.swift: -------------------------------------------------------------------------------- 1 | import _Differentiation 2 | 3 | struct Perceptron: Differentiable { 4 | var weight1: Float = .random(in: -1..<1) 5 | var weight2: Float = .random(in: -1..<1) 6 | var bias: Float = 0.0 7 | 8 | @differentiable(reverse) 9 | func callAsFunction(_ x1: Float, _ x2: Float) -> Float { 10 | let output = (weight1 * x1) + (weight2 * x2) + bias 11 | if output >= 0.0 { 12 | return output 13 | } else { 14 | return 0.1 * output 15 | } 16 | } 17 | } 18 | 19 | let andGateData: [(x1: Float, x2: Float, y: Float)] = [ 20 | (x1: 0, x2: 0, y: 0), 21 | (x1: 0, x2: 1, y: 0), 22 | (x1: 1, x2: 0, y: 0), 23 | (x1: 1, x2: 1, y: 1), 24 | ] 25 | 26 | @differentiable(reverse) 27 | func loss(model: Perceptron) -> Float { 28 | var loss: Float = 0 29 | for (x1, x2, y) in andGateData { 30 | let prediction = model(x1, x2) 31 | let error = y - prediction 32 | loss = loss + error * error / 2 33 | } 34 | return loss 35 | } 36 | 37 | var model = Perceptron() 38 | 39 | for _ in 0..<100 { 40 | let (loss, pullback) = valueWithPullback(at: model, of: loss) 41 | print("Loss: \(loss)") 42 | let gradient = pullback(-0.1) 43 | model.move(by: gradient) 44 | } 45 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/GradientDescent/GradientDescent-01-13.swift: -------------------------------------------------------------------------------- 1 | import _Differentiation 2 | 3 | struct Perceptron: Differentiable { 4 | var weight1: Float = .random(in: -1..<1) 5 | var weight2: Float = .random(in: -1..<1) 6 | var bias: Float = 0.0 7 | 8 | @differentiable(reverse) 9 | func callAsFunction(_ x1: Float, _ x2: Float) -> Float { 10 | let output = (weight1 * x1) + (weight2 * x2) + bias 11 | if output >= 0.0 { 12 | return output 13 | } else { 14 | return 0.1 * output 15 | } 16 | } 17 | } 18 | 19 | let andGateData: [(x1: Float, x2: Float, y: Float)] = [ 20 | (x1: 0, x2: 0, y: 0), 21 | (x1: 0, x2: 1, y: 0), 22 | (x1: 1, x2: 0, y: 0), 23 | (x1: 1, x2: 1, y: 1), 24 | ] 25 | 26 | @differentiable(reverse) 27 | func loss(model: Perceptron) -> Float { 28 | var loss: Float = 0 29 | for (x1, x2, y) in andGateData { 30 | let prediction = model(x1, x2) 31 | let error = y - prediction 32 | loss = loss + error * error / 2 33 | } 34 | return loss 35 | } 36 | 37 | var model = Perceptron() 38 | 39 | for _ in 0..<100 { 40 | let (loss, pullback) = valueWithPullback(at: model, of: loss) 41 | print("Loss: \(loss)") 42 | let gradient = pullback(-0.1) 43 | model.move(by: gradient) 44 | } 45 | 46 | let value1 = model(1.0, 0.0) 47 | print("Value at (1.0, 0.0): \(value1)") 48 | // Value at (1.0, 0.0): 0.1 49 | let value2 = model(1.0, 1.0) 50 | print("Value at (1.0, 1.0): \(value2)") 51 | // Value at (1.0, 1.0): 0.9 52 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Setup.md: -------------------------------------------------------------------------------- 1 | # Setup 2 | 3 | How to set up your environment and get started with differentiable Swift. 4 | 5 | ## Overview 6 | 7 | Swift toolchains that ship with Xcode lack the `_Differentiation` module needed by differentiable Swift. In order to use differentiable Swift, you will need to install a version of the toolchain from Swift.org. There are multiple way to download, install and manage different versions of toolchains depending on your needs and/or platform. 8 | 9 | After a toolchain has been installed there are a few things to take into consideration in order to successfully run your differentiable Swift code. 10 | 11 | ### Installing a toolchain 12 | 13 | Toolchains can be manually downloaded and installed from [swift.org](https://swift.org/download). If you do so you can either pick a stable release or a nightly snapshot. The nightlies will often have more features and/or performance improvements. 14 | 15 | When working with different toolchain versions across different projects we suggest using either of the following tools to manage your toolchains: 16 | 17 | - [swiftenv](https://github.com/kylef/swiftenv) available for macOS and Linux 18 | - [swiftly](https://github.com/swift-server/swiftly) available for Linux (macOS support is on the roadmap) 19 | 20 | ### Compiling differentiable Swift code 21 | 22 | Compiling differentiable Swift code on Linux is easy! There's no extra setup needed and you can simply run the following if you're working on a Swift package: 23 | ```bash 24 | swift run 25 | ``` 26 | 27 | On macOS, when running code directly from Xcode the IDE handles all toolchain specific configuration for you. 28 | 29 | However, when compiling differentiable Swift code on macOS at the command line we have to set the following environment variables to make sure our custom toolchain uses the right macOS SDK and Swift runtime: 30 | 31 | ```bash 32 | export SDKROOT=/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX14.sdk 33 | ``` 34 | ```bash 35 | export DYLD_LIBRARY_PATH=/Library/Developer/Toolchains/swift-DEVELOPMENT-SNAPSHOT-2023-11-20-a.xctoolchain/usr/lib/swift/macosx 36 | ``` 37 | 38 | Now everything is set up and you can simply run: 39 | ```bash 40 | swift run 41 | ``` 42 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/SharpEdgesInDifferentiableSwift.md: -------------------------------------------------------------------------------- 1 | # Sharp edges in differentiable Swift 2 | 3 | An overview of some of the currently missing capabilities in differentiable Swift. 4 | 5 | ## Overview 6 | 7 | This is an overview of some of the currently missing capabilities in differentiable Swift you may encounter when working with the feature. Inspired by the original [Swift for TensorFlow notebook](https://www.tensorflow.org/swift/tutorials/Swift_autodiff_sharp_edges), we intend for this to be an up-to-date and comprehensive list of common issues you may encounter when working with differentiable Swift. 8 | 9 | - 10 | - 11 | - 12 | - 13 | - 14 | - 15 | 16 | ### Loops 17 | Loops over collections of `Differentiable` objects unfortunately aren't differentiable yet. So as of yet the compiler cannot determine the derivative of the following function: 18 | ```swift 19 | @differentiable(reverse) 20 | func loopy(values: [Double]) -> Double { 21 | var total = 0.0 22 | for value in values { 23 | total += value 24 | } 25 | return total 26 | } 27 | ``` 28 | 29 | Luckily there are ways around this! Reading a value at a certain index is differentiable. So instead of looping over the values directly, as a workaround we can loop over the indices as seen in the example below. The one thing to note here is that we access the indices by wrapping them in `withoutDerivative(at:)` This tells the compiler that we don't want to take the derivative of the property that returns all indices (which is not differentiable since they're discrete values.) 30 | ```swift 31 | @differentiable(reverse) 32 | func loopy(values: [Double]) -> Double { 33 | var total = 0.0 34 | for index in withoutDerivative(at: values.indices) { 35 | total += values[index] 36 | } 37 | return total 38 | } 39 | ``` 40 | 41 | This will return the correct gradient for this function given a certain input: 42 | ```swift 43 | let (value, gradient) = valueWithGradient(at: [1.0, 2.0, 3.0], of: loopy) 44 | // value = 6.0 45 | // gradient = [1.0, 1.0, 1.0] ie. a change in any of the values in the array will effect the output of the function equally. 46 | ``` 47 | 48 | ### Map and Reduce 49 | The `map` and `reduce` methods do not currently support closures marked with `@differentiable` but there's special differentiable versions of these that work exactly like you're used to: 50 | ```swift 51 | let a = [1.0, 2.0, 3.0] 52 | let aPlusOne = a.differentiableMap { $0 + 1.0 } // [2.0, 3.0, 4.0] 53 | let aSum = a.differentiableReduce { 0, + } // 6.0 54 | ``` 55 | 56 | ### Array subscript setters 57 | Currently the subcript setters on arrays (`array[0] = 1.0`) are not differentiable. Under the hood this is due to `_modify` subscript accessors not supporting differentiability yet. (Work is ongoing, and this feature should land in Swift soon.) 58 | We can currently get around this however by extending the `Array` type with a mutating `update(at:with:)` function 59 | ```swift 60 | extension Array where Element: Differentiable { 61 | @differentiable(where Element: Differentiable) // TODO: This where clause seems redundant? 62 | mutating func update(at index: Int, with newValue: Element) { 63 | self[index] = newValue 64 | } 65 | 66 | @derivative(of: update) 67 | mutating func vjpUpdate(at index: Int, with newValue: Element) 68 | -> (value: Void, pullback: (inout TangentVector) -> (Element.TangentVector)) 69 | { 70 | self.updated(at: index, with: newValue) 71 | return ((), { v in 72 | let dElement = v[index] 73 | v.base[index] = .zero 74 | return dElement 75 | }) 76 | } 77 | } 78 | ``` 79 | The first function wraps the subscript setter and marks it as differentiable. The second function defines a custom vjp (vector Jacobian product) telling the compiler what the derivative of this wrapped function is. 80 | 81 | Considering: 82 | ```swift 83 | var b: [Double] = [1.0, 2.0, 3.0] 84 | ``` 85 | Then instead of writing (which unfortunately is not differentiable (Coming soon!)): 86 | ```swift 87 | b[0] = 17.0 88 | ``` 89 | We can now write: 90 | ```swift 91 | b.update(at: 0, with: 17.0) 92 | ``` 93 | 94 | ### Floating point type conversions 95 | 96 | If you're converting between `FloatingPointNumber`s types such as `Float` and `Double` be aware that their constructors currently aren't differentiable. This can be remedied by using a permutation of the following extension on the floating types you need: 97 | ```swift 98 | extension Float { 99 | @usableFromInline 100 | @derivative(of: init(_:)) 101 | static func vjpInit(_ a: Double) -> (value: Float, pullback: (Float) -> Double) { 102 | func pullback(_ v: Float) -> Double { 103 | return Double(v) 104 | } 105 | return (value: Float(a), pullback: pullback) 106 | } 107 | } 108 | ``` 109 | This allows the following differentiable code to now compile: 110 | ```swift 111 | @differentiable(reverse) 112 | func convertToFloat(value: Double) -> Float { 113 | Float(value) 114 | } 115 | ``` 116 | Hopefully this will be part of the Swift standard library in the near future! 117 | 118 | 119 | ### Keypath subscripting 120 | `KeyPath` subscripting (get or set) doesn't work out of the box, but once again there's a workaround to get similar behaviour: 121 | ```swift 122 | extension Differentiable { 123 | //----------------------------------------------------------- 124 | // a read that's O(n) on the backwards pass (because of zeroTangentVector materialization) 125 | @inlinable 126 | @differentiable(where Self == TangentVector, T: Differentiable, T == T.TangentVector) 127 | public func read(at member: WritableKeyPath) -> T{ 128 | return self[keyPath: member] 129 | } 130 | 131 | @inlinable 132 | @derivative(of: read) 133 | public func vjpRead(at member: WritableKeyPath) -> (value: T, pullback: (T.TangentVector) -> Self.TangentVector) 134 | where Self == TangentVector, T == T.TangentVector 135 | { 136 | return (value: self[keyPath: member], pullback:{ downstream in 137 | var zeroes = self.zeroTangentVector 138 | zeroes[keyPath: member] = downstream 139 | return zeroes 140 | }) 141 | } 142 | } 143 | ``` 144 | 145 | ### Other 146 | 147 | - Forward mode differentiation (`@differentiable(forward)` JVPs) is only partially implemented. 148 | - Differentiation through `@_alwaysEmitIntoClient` tagged functions isn’t yet supported. The most common cases of these are in SIMD functions, like `.sum()`. 149 | - No support yet in the standard library for Dictionary differentiation. 150 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Tutorials/DifferentiableFunctions.tutorial: -------------------------------------------------------------------------------- 1 | @Tutorial(time: 15) { 2 | @Intro(title: "Differentiable functions") { 3 | This tutorial explores the ways that you can create, use, and customize differentiable Swift functions. 4 | } 5 | 6 | @Section(title: "Marking functions as differentiable") { 7 | @ContentAndMedia { 8 | Add the `@differentiable` attribute to a function and obtain its derivative. 9 | } 10 | 11 | @Steps { 12 | @Step { 13 | Start by importing the `_Differentiation` module. Differentiable Swift is an experimental language feature, and is activated only for files that import this module. 14 | @Code(name: "main.swift", file: "DifferentiableFunctions-01-01.swift") 15 | } 16 | 17 | @Step { 18 | Then create a function that we want to make differentiable. 19 | In this case we'll go for a simple function that takes an input and squares it. 20 | 21 | @Code(name: "main.swift", file: "DifferentiableFunctions-01-02.swift") 22 | } 23 | 24 | @Step { 25 | Mark the function with `@differentiable(reverse)` to indicate that we want it to be used for reverse-mode differentiation. 26 | @Code(name: "main.swift", file: "DifferentiableFunctions-01-03.swift") 27 | } 28 | 29 | Note that all arguments and results of a `@differentiable` function should have types conforming to the `Differentiable` protocol. 30 | Such types are generally continuous, such as `Float` and `Double`, which are among the standard library types conforming to `Differentiable`. 31 | 32 | @Step { 33 | The compiler will automatically generate the reverse-mode derivative (pullback) of a `@differentiable(reverse)` function. We can access that automatically-generated pullback via one of several built-in functions. 34 | @Code(name: "main.swift", file: "DifferentiableFunctions-01-04.swift") 35 | } 36 | 37 | @Step { 38 | Printing the computed value and gradient returns the result of the function and its derivative, as expected. 39 | @Code(name: "main.swift", file: "DifferentiableFunctions-01-05.swift") 40 | } 41 | } 42 | } 43 | 44 | @Section(title: "Defining custom derivatives") { 45 | @ContentAndMedia { 46 | Add a custom derivative to a Swift function. 47 | 48 | Functions with differentiable arguments and results can have automatically generated derivatives provided by the compiler. 49 | However, sometimes you may want to provide your own custom derivatives to override this machinery. 50 | } 51 | 52 | @Steps { 53 | @Step { 54 | We'll start with a function that squares a number. 55 | @Code(name: "main.swift", file: "DifferentiableFunctions-02-01.swift") 56 | } 57 | 58 | @Step { 59 | Normally, we would use the `@differentiable(reverse)` attribute to let the compiler build a derivative of this function. Instead, we'll register a custom derivative to be associated with this function. 60 | @Code(name: "main.swift", file: "DifferentiableFunctions-02-02.swift") 61 | } 62 | 63 | Note: the `vjp` prefix used here stands for vector-Jacobian product. 64 | 65 | @Step { 66 | The function signature of this custom derivative has a particular shape. It returns a tuple, with the first element being the value that would normally be returned from the base function, and the second a pullback closure. 67 | @Code(name: "main.swift", file: "DifferentiableFunctions-02-03.swift") 68 | } 69 | 70 | @Step { 71 | Differentiable types have an associated `TangentVector` type that is used in derivatives. In the case of a simple type like `Double`, it is its own `TangentVector` so we can simplify this. 72 | @Code(name: "main.swift", file: "DifferentiableFunctions-02-04.swift") 73 | } 74 | 75 | @Step { 76 | Finally, we define the body of the custom derivative by calculating the normal result of the function and then constructing the pullback function. 77 | @Code(name: "main.swift", file: "DifferentiableFunctions-02-05.swift") 78 | } 79 | } 80 | } 81 | 82 | @Section(title: "Making external functions differentiable") { 83 | @ContentAndMedia { 84 | Register a manual derivative with a system function to make it differentiable. 85 | 86 | The Swift compiler can automatically generate derivatives for Swift functions within a module, but non-Swift functions or Swift functions defined in an external module and not marked with `@differentiable` will not have automatic derivatives. To make those functions differentiable, you need to register your own derivatives for them. 87 | } 88 | 89 | @Steps { 90 | @Step { 91 | If we try to access the derivative of an external function, we may get an error that it is not differentiable. 92 | @Code(name: "main.swift", file: "DifferentiableFunctions-03-01.swift") 93 | } 94 | 95 | @Step { 96 | In this case, `min()` has not been marked as `@differentiable` and thus has no automatically-provided derivative. Even though the function definition exists outside of this module, we can register our own derivative for it. 97 | @Code(name: "main.swift", file: "DifferentiableFunctions-03-02.swift") 98 | } 99 | 100 | @Step { 101 | By doing so, the function is now differentiable and can participate in the rest of the differentiable Swift machinery. 102 | @Code(name: "main.swift", file: "DifferentiableFunctions-03-03.swift") 103 | } 104 | } 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Tutorials/DifferentiableTypes.tutorial: -------------------------------------------------------------------------------- 1 | @Tutorial(time: 15) { 2 | @Intro(title: "Differentiable types") { 3 | `@differentiable` functions require that their arguments and results conform to the `Differentiable` protocol. Some of the standard library types such as `Float`, `Double`, and `Array` already conform to this protocol. 4 | 5 | In this tutorial we will go into writing our own `Differentiable` types for use with more complex `@differentiable` functions. 6 | } 7 | 8 | @Section(title: "Conforming types to `Differentiable`") { 9 | @ContentAndMedia { 10 | Conform custom types to `Differentiable`, and use them as arguments to `@differentiable` functions. 11 | 12 | Differentiable functions must have at least one argument and one result each that conform to the `Differentiable`. Many continuous types present in the standard library, like `Float` or `Double`, conform to `Differentiable`. For you to use your own custom types as active arguments or results in differentiable functions, they must conform to `Differentiable`. 13 | } 14 | 15 | @Steps { 16 | @Step { 17 | Differentiable Swift is an experimental language feature. To enable it for a file, you must first import the `_Differentiation` module. 18 | @Code(name: "main.swift", file: "DifferentiableTypes-01-01.swift") 19 | } 20 | 21 | @Step { 22 | Custom types that only contain other `Differentiable`-conforming members are easy to make `Differentiable` themselves. 23 | @Code(name: "main.swift", file: "DifferentiableTypes-01-02.swift") 24 | } 25 | 26 | @Step { 27 | To do so, simply add a `Differentiable` conformance when they are defined. 28 | @Code(name: "main.swift", file: "DifferentiableTypes-01-03.swift") 29 | } 30 | 31 | @Step { 32 | A custom type can have non-`Differentiable` members. 33 | @Code(name: "main.swift", file: "DifferentiableTypes-01-04.swift") 34 | } 35 | 36 | @Step { 37 | In that case, annotate those members with the `@noDerivative` attribute. 38 | @Code(name: "main.swift", file: "DifferentiableTypes-01-05.swift") 39 | } 40 | } 41 | } 42 | 43 | @Section(title: "Defining custom tangent vectors for types") { 44 | @ContentAndMedia { 45 | Define a custom tangent vector for a type. 46 | 47 | Every differentiable type has a `TangentVector` associated with it, which represents the rate of change of that type when used in a derivative. 48 | The compiler normally synthesizes these tangent vectors for you on types conforming to `Differentiable`. 49 | 50 | However, there are situations in which you may want to define a custom tangent vector for a type. 51 | For example, `Array` doesn't conform to `AdditiveArithmetic`, and therefore can't have a tangent vector automatically synthesized. 52 | } 53 | 54 | @Steps { 55 | @Step { 56 | We begin with a simple differentiable type. 57 | @Code(name: "main.swift", file: "DifferentiableTypes-02-01.swift") 58 | } 59 | 60 | @Step { 61 | A custom tangent vector must either be a subtype named `TangentVector` or aliased to that name. It can contain a custom internal structure, but must conform to `AdditiveArithmetic` and `Differentiable`. 62 | @Code(name: "main.swift", file: "DifferentiableTypes-02-02.swift") 63 | } 64 | 65 | @Step { 66 | Finally, a `move(by:)` function must be defined that moves an instance of the base type by a small step along a direction. If the `TangentVector` you are stepping by is composed of other types' `TangentVector`s, you can use those existing `move(by:)` functions within this new one. 67 | @Code(name: "main.swift", file: "DifferentiableTypes-02-03.swift") 68 | } 69 | } 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Tutorials/GradientDescent.tutorial: -------------------------------------------------------------------------------- 1 | @Tutorial(time: 20) { 2 | @Intro(title: "Gradient descent") { 3 | Gradient descent optimization is a powerful technique for optimizing arbitrary parameters in a complex system to arrive at a desired result. 4 | 5 | This tutorial will demonstrate the use of gradient descent to train a perceptron to perform a boolean logic function. 6 | } 7 | 8 | @Section(title: "Training a perceptron") { 9 | @ContentAndMedia { 10 | Train a basic perceptron to mimic an AND gate. 11 | 12 | A perceptron is a simple, single-level neural network. In this case, we'll build one with two neurons, one for each input in the AND gate. The neurons will have trainable weights and a bias value, along with a nonlinear activation function. 13 | 14 | This perceptron will be trained against the desired behavior of an AND gate, and weights for the neurons will be optimized via gradient descent. 15 | } 16 | @Steps { 17 | @Step { 18 | Differentiable Swift is an experimental language feature. To begin with, we must enable it by importing the `_Differentiation` module. 19 | @Code(name: "main.swift", file: "GradientDescent-01-01.swift") 20 | } 21 | @Step { 22 | Let's create the perceptron as a custom data type. The weights for the connections between neurons will be randomly-initialized floating point values, and the bias will start at zero. 23 | @Code(name: "main.swift", file: "GradientDescent-01-02.swift") 24 | } 25 | @Step { 26 | Because all of the properties of our perceptron conform to `Differentiable`, it's easy to make the perceptron itself differentiable by adding the conformance to `Differentiable`. 27 | @Code(name: "main.swift", file: "GradientDescent-01-03.swift") 28 | } 29 | @Step { 30 | A convenient language feature that was introduced to Swift is the ability of structs to be called as if they were functions. To do so, you need to define a `callAsFunction()` method on the struct. 31 | @Code(name: "main.swift", file: "GradientDescent-01-04.swift") 32 | } 33 | @Step { 34 | Within the body of that function, we'll define how our perceptron model works. As a first step, we'll calculate the activation strength from the inbound activation of each neuron, multiplied by the respective connection weights. The bias is added to the result. 35 | @Code(name: "main.swift", file: "GradientDescent-01-05.swift") 36 | } 37 | @Step { 38 | To determine how strongly the outbound connection "fires", we'll apply a nonlinear function to it. In this case, if it is positive pass through the resulting value. If negative, only pass along a small fraction of the value. 39 | @Code(name: "main.swift", file: "GradientDescent-01-06.swift") 40 | } 41 | @Step { 42 | The goal is to train this perceptron to behave like an AND gate, so that will be set up as our target. 43 | @Code(name: "main.swift", file: "GradientDescent-01-07.swift") 44 | } 45 | @Step { 46 | To optimize the parameters of the perceptron, we'll set up a loss function that represents how closely our perceptron matches the target behavior. In this case, the lower the loss, the closer the perceptron models an AND gate. 47 | @Code(name: "main.swift", file: "GradientDescent-01-08.swift") 48 | } 49 | @Step { 50 | To start the training process, a new perceptron is initialized. 51 | @Code(name: "main.swift", file: "GradientDescent-01-09.swift") 52 | } 53 | @Step { 54 | We'll train this perceptron for 100 steps in a loop. 55 | @Code(name: "main.swift", file: "GradientDescent-01-10.swift") 56 | } 57 | @Step { 58 | The first part of a training step is to both obtain the current loss value of the perceptron when compared to our AND gate, along with a pullback closure. 59 | @Code(name: "main.swift", file: "GradientDescent-01-11.swift") 60 | } 61 | @Step { 62 | From the pullback closure, we'll determine how to modify the parameters of the perceptron by taking a small step in a direction that should reduce the loss value. The result of calling the pullback is a tangent vector for the perceptron, a type that reflects the rate of change of the perceptron's parameters. 63 | 64 | The function `.move(by:)` is provided by the `Differentiable` protocol and causes all of the perceptron's parameters to be adjusted by the tangent vector. 65 | @Code(name: "main.swift", file: "GradientDescent-01-12.swift") 66 | } 67 | @Step { 68 | This stepwise training of a model by continually nudging it in a desired direction is the powerful technique of gradient descent optimization in action. 69 | 70 | At the end of this process, we should have a perceptron that roughly approximates the functioning of an AND gate. 71 | @Code(name: "main.swift", file: "GradientDescent-01-13.swift") 72 | } 73 | } 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Tutorials/UsingDifferentiableSwift.tutorial: -------------------------------------------------------------------------------- 1 | @Tutorials(name: "Differentiable Swift") { 2 | @Intro(title: "Using differentiable Swift") { 3 | A collection of tutorials on the use of the differentiable Swift language feature. 4 | } 5 | 6 | @Chapter(name: "Differentiable functions and types") { 7 | How to work with differentiable functions and types. 8 | @TutorialReference(tutorial: "doc:DifferentiableFunctions") 9 | @TutorialReference(tutorial: "doc:DifferentiableTypes") 10 | } 11 | 12 | @Chapter(name: "Gradient descent optimization") { 13 | A simple example of how to harness the power of gradient descent optimization via differentiable Swift. 14 | @TutorialReference(tutorial: "doc:GradientDescent") 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/UsingDifferentiableSwift.md: -------------------------------------------------------------------------------- 1 | # Using differentiable Swift 2 | Introduces differentiable Swift and how to use it to define differentiable functions and types. 3 | 4 | ## Overview 5 | 6 | Differentiable Swift integrates first-class support for automatic differentiation right into the Swift language. 7 | This means that the compiler can generate derivatives of arbitrary Swift code, and the type system can identify and provide clear messages for many common programming errors around differentiability. 8 | 9 | Differentiable functions are a key enabler of the extremely powerful technique of gradient descent optimization, which powers much of deep learning, and are useful in many other applications. 10 | 11 | As an experimental feature, activation of differentiable Swift is gated behind the following import statement: 12 | 13 | ```swift 14 | import _Differentiation 15 | ``` 16 | 17 | which must be present in any Swift file taking advantage of differentiation. 18 | 19 | 20 | ### Differentiable functions 21 | 22 | You can mark a function as being differentiable if it has at least one differentiable parameter and 23 | differentiable result. The `@differentiable` annotation is used to mark the function, and the 24 | `reverse` specifier further clarifies that we want to use reverse-mode differentiation. 25 | 26 | ```swift 27 | @differentiable(reverse) 28 | func squared(_ x: Float) -> Float { 29 | return x * x 30 | } 31 | ``` 32 | 33 | In addition to letting the compiler define derivatives for Swift functions, you can register custom 34 | derivatives for any differentiable function. This is necessary for non-Swift functions or ones that 35 | reside in external modules you don't control, if you want these functions to be differentiable. For 36 | example, registering a derivative for the above `squared()` function might look like the following: 37 | 38 | ```swift 39 | @derivative(of: squared) 40 | func vjpSquared(_ input: Double) -> ( 41 | value: Double, 42 | pullback: (Double) -> Double 43 | ) { 44 | let output = squared(value) 45 | func pullback(_ tangentVector: Double) -> Double { 46 | return tangentVector * 2 * value 47 | } 48 | return (value: output, pullback: pullback) 49 | } 50 | ``` 51 | 52 | ### Differentiable types 53 | 54 | To declare a type as being differentiable, it needs to conform to the `Differentiable` protocol. 55 | Generally, types are differentiable if they are continuous or if all of their properties are 56 | continuous and `Differentiable`. Differentiable types can have non-Differentiable properties, if 57 | those properties are annotated with `@noDerivative`. For example, the following is a custom struct 58 | that is `Differentiable`: 59 | 60 | ``` swift 61 | struct MyValue: Differentiable { 62 | var x: Float 63 | var y: Double 64 | @noDerivative 65 | let isTracked: Bool 66 | } 67 | ``` 68 | 69 | 70 | ### Obtaining and working with gradients and pullbacks 71 | 72 | To activate the differentiation machinery, there are some special built-in functions in the 73 | Differentiation module within the Swift standard library that can give you the value from the 74 | forward pass through a differentiable function as well as the backward pass. 75 | 76 | For functions with scalar outputs, `valueWithGradient(at:of:)` will return both the value and the 77 | calculated gradient at a given input value: 78 | 79 | ```swift 80 | let (value, gradient) = valueWithGradient(at: 3.0, of: square) 81 | print("The value is \(value), and the gradient is \(gradient)") 82 | // Prints a value of 9.0 and a gradient of 6.0. 83 | ``` 84 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/EmptyFile.swift: -------------------------------------------------------------------------------- 1 | // This is an empty source file used to make DifferentiableSwiftExamplesDocumentation a valid build target. 2 | // DifferentiableSwiftExamplesDocumentation is an otherwise empty target that includes high-level documentation about using automatic differentiation in Swift. 3 | -------------------------------------------------------------------------------- /Sources/DifferentiableSwiftExamplesDocumentation/README.md: -------------------------------------------------------------------------------- 1 | # Differentiable Swift Examples Documentation 2 | `DifferentiableSwiftExamplesDocumentation` is an otherwise empty target that includes high-level documentation about using the experimental language feature of differentiable Swift. 3 | 4 | The documentation content in the `DifferentiableSwiftExamples.docc` catalog is published on GitHub Pages at https://passivelogic.github.io/differentiable-swift-examples 5 | --------------------------------------------------------------------------------