├── .github
└── workflows
│ ├── docc.yml
│ ├── lint.yml
│ └── test.yml
├── .gitignore
├── .swift-format.json
├── Benchmarks
├── BuildingSimulation
│ ├── PyTorch
│ │ └── PyTorchSimulator.py
│ ├── README.md
│ ├── Swift
│ │ └── main.swift
│ └── TensorFlow
│ │ └── TensorFlowSimulator.py
├── LanguageSuite
│ ├── Benchmarks
│ │ └── LanguageCoverageBenchmarks
│ │ │ ├── FuzzedFunctions.swift
│ │ │ ├── LanguageCoverageBenchmarks.swift
│ │ │ ├── LoopedFunctions.swift
│ │ │ └── SimpleFunctions.swift
│ ├── Package.resolved
│ ├── Package.swift
│ └── README.md
└── README.md
├── LICENSE.txt
├── Package.resolved
├── Package.swift
├── README.md
└── Sources
├── BasicDifferentiation
└── main.swift
├── BasicGradientDescent
└── main.swift
├── CustomDerivatives
└── main.swift
└── DifferentiableSwiftExamplesDocumentation
├── DifferentiableSwiftExamples.docc
├── DifferentiableSwiftExamples.md
├── Resources
│ └── Code
│ │ ├── DifferentiableFunctions
│ │ ├── DifferentiableFunctions-01-01.swift
│ │ ├── DifferentiableFunctions-01-02.swift
│ │ ├── DifferentiableFunctions-01-03.swift
│ │ ├── DifferentiableFunctions-01-04.swift
│ │ ├── DifferentiableFunctions-01-05.swift
│ │ ├── DifferentiableFunctions-02-01.swift
│ │ ├── DifferentiableFunctions-02-02.swift
│ │ ├── DifferentiableFunctions-02-03.swift
│ │ ├── DifferentiableFunctions-02-04.swift
│ │ ├── DifferentiableFunctions-02-05.swift
│ │ ├── DifferentiableFunctions-03-01.swift
│ │ ├── DifferentiableFunctions-03-02.swift
│ │ └── DifferentiableFunctions-03-03.swift
│ │ ├── DifferentiableTypes
│ │ ├── DifferentiableTypes-01-01.swift
│ │ ├── DifferentiableTypes-01-02.swift
│ │ ├── DifferentiableTypes-01-03.swift
│ │ ├── DifferentiableTypes-01-04.swift
│ │ ├── DifferentiableTypes-01-05.swift
│ │ ├── DifferentiableTypes-02-01.swift
│ │ ├── DifferentiableTypes-02-02.swift
│ │ └── DifferentiableTypes-02-03.swift
│ │ └── GradientDescent
│ │ ├── GradientDescent-01-01.swift
│ │ ├── GradientDescent-01-02.swift
│ │ ├── GradientDescent-01-03.swift
│ │ ├── GradientDescent-01-04.swift
│ │ ├── GradientDescent-01-05.swift
│ │ ├── GradientDescent-01-06.swift
│ │ ├── GradientDescent-01-07.swift
│ │ ├── GradientDescent-01-08.swift
│ │ ├── GradientDescent-01-09.swift
│ │ ├── GradientDescent-01-10.swift
│ │ ├── GradientDescent-01-11.swift
│ │ ├── GradientDescent-01-12.swift
│ │ └── GradientDescent-01-13.swift
├── Setup.md
├── SharpEdgesInDifferentiableSwift.md
├── Tutorials
│ ├── DifferentiableFunctions.tutorial
│ ├── DifferentiableTypes.tutorial
│ ├── GradientDescent.tutorial
│ └── UsingDifferentiableSwift.tutorial
└── UsingDifferentiableSwift.md
├── EmptyFile.swift
└── README.md
/.github/workflows/docc.yml:
--------------------------------------------------------------------------------
1 | name: docc
2 |
3 | on:
4 | push:
5 | branches: [main]
6 |
7 | permissions:
8 | pages: write
9 | id-token: write
10 | contents: read
11 |
12 | jobs:
13 | docc:
14 | runs-on: ubuntu-latest
15 | steps:
16 | - uses: actions/checkout@v3
17 | with:
18 | fetch-depth: 0
19 | - name: install swift
20 | uses: slashmo/install-swift@v0.4.0
21 | with:
22 | version: swift-DEVELOPMENT-SNAPSHOT-2023-11-20-a
23 | - name: build docs
24 | run: swift package --allow-writing-to-directory docs generate-documentation --target DifferentiableSwiftExamples --transform-for-static-hosting --hosting-base-path differentiable-swift-examples --output-path docs
25 | - name: update index
26 | run: echo '' > docs/index.html
27 | - name: setup pages
28 | id: pages
29 | uses: actions/configure-pages@v3
30 | - name: upload artifact
31 | uses: actions/upload-pages-artifact@v1
32 | with:
33 | path: docs
34 | - name: deploy to GitHub Pages
35 | id: deployment
36 | uses: actions/deploy-pages@v2
37 | environment:
38 | name: github-pages
39 | url: ${{ steps.deployment.outputs.page_url }}
40 |
--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
1 | name: lint
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | pull_request:
8 | branches:
9 | - '*'
10 |
11 | jobs:
12 | mac:
13 | name: lint-macOS
14 | runs-on: macOS-13
15 | steps:
16 | - uses: actions/checkout@v3
17 | - name: install
18 | run: brew install swift-format
19 | - name: run
20 | run: swift-format lint --recursive --parallel --strict --configuration .swift-format.json Package.swift Sources
21 |
--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
1 | name: test
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | pull_request:
8 | branches:
9 | - '*'
10 |
11 | jobs:
12 | mac:
13 | name: test-ubuntu
14 | runs-on: ubuntu-latest
15 | steps:
16 | - uses: actions/checkout@v3
17 | - name: install swift
18 | uses: slashmo/install-swift@v0.4.0
19 | with:
20 | version: swift-DEVELOPMENT-SNAPSHOT-2023-11-20-a
21 | - name: run
22 | run: swift build
23 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | *.xcuserstate
3 | project.xcworkspace/
4 | xcuserdata/
5 | DerivedData
6 | .idea
7 | *~
8 | .swiftpm/
9 | .build/
10 | .swift-version
11 | *.swp
12 | main
13 | SwiftBenchmark
14 |
--------------------------------------------------------------------------------
/.swift-format.json:
--------------------------------------------------------------------------------
1 | {
2 | "fileScopedDeclarationPrivacy" : {
3 | "accessLevel" : "private"
4 | },
5 | "indentation" : {
6 | "spaces" : 4
7 | },
8 | "indentConditionalCompilationBlocks" : true,
9 | "indentSwitchCaseLabels" : false,
10 | "lineBreakAroundMultilineExpressionChainComponents" : false,
11 | "lineBreakBeforeControlFlowKeywords" : false,
12 | "lineBreakBeforeEachArgument" : false,
13 | "lineBreakBeforeEachGenericRequirement" : false,
14 | "lineLength" : 100,
15 | "maximumBlankLines" : 1,
16 | "multiElementCollectionTrailingCommas" : true,
17 | "noAssignmentInExpressions" : {
18 | "allowedFunctions" : [
19 | "XCTAssertNoThrow"
20 | ]
21 | },
22 | "prioritizeKeepingFunctionOutputTogether" : false,
23 | "respectsExistingLineBreaks" : true,
24 | "rules" : {
25 | "AllPublicDeclarationsHaveDocumentation" : false,
26 | "AlwaysUseLiteralForEmptyCollectionInit" : false,
27 | "AlwaysUseLowerCamelCase" : true,
28 | "AmbiguousTrailingClosureOverload" : true,
29 | "BeginDocumentationCommentWithOneLineSummary" : false,
30 | "DoNotUseSemicolons" : true,
31 | "DontRepeatTypeInStaticProperties" : true,
32 | "FileScopedDeclarationPrivacy" : true,
33 | "FullyIndirectEnum" : true,
34 | "GroupNumericLiterals" : true,
35 | "IdentifiersMustBeASCII" : true,
36 | "NeverForceUnwrap" : false,
37 | "NeverUseForceTry" : false,
38 | "NeverUseImplicitlyUnwrappedOptionals" : false,
39 | "NoAccessLevelOnExtensionDeclaration" : true,
40 | "NoAssignmentInExpressions" : true,
41 | "NoBlockComments" : true,
42 | "NoCasesWithOnlyFallthrough" : true,
43 | "NoEmptyTrailingClosureParentheses" : true,
44 | "NoLabelsInCasePatterns" : true,
45 | "NoLeadingUnderscores" : false,
46 | "NoParensAroundConditions" : true,
47 | "NoPlaygroundLiterals" : true,
48 | "NoVoidReturnOnFunctionSignature" : true,
49 | "OmitExplicitReturns" : false,
50 | "OneCasePerLine" : true,
51 | "OneVariableDeclarationPerLine" : true,
52 | "OnlyOneTrailingClosureArgument" : true,
53 | "OrderedImports" : true,
54 | "ReplaceForEachWithForLoop" : true,
55 | "ReturnVoidInsteadOfEmptyTuple" : true,
56 | "TypeNamesShouldBeCapitalized" : true,
57 | "UseEarlyExits" : false,
58 | "UseLetInEveryBoundCaseVariable" : true,
59 | "UseShorthandTypeNames" : true,
60 | "UseSingleLinePropertyGetter" : true,
61 | "UseSynthesizedInitializer" : true,
62 | "UseTripleSlashForDocumentationComments" : true,
63 | "UseWhereClausesInForLoops" : false,
64 | "ValidateDocumentationComments" : false
65 | },
66 | "spacesAroundRangeFormationOperators" : false,
67 | "tabWidth" : 4,
68 | "version" : 1
69 | }
70 |
--------------------------------------------------------------------------------
/Benchmarks/BuildingSimulation/PyTorch/PyTorchSimulator.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | # Simulation parameters
4 | trials = 100
5 | timesteps = 20
6 | warmup = 3
7 | dTime = 0.1
8 | printGradToCompare = False
9 |
10 | # Definitions
11 | π = 3.14159265359
12 |
13 | # TubeType and other custom object holding primitives will be represented with a 1D Tensor,
14 | # and SimParams will compose them into a 2D tensor
15 |
16 | # make each 1D Tensor the same length, to avoid having to use Ragged Tensors
17 | # with padding added to match length of other 1D Tensors
18 | TubeType = torch.tensor([0.50292, 0.019, 0.001588, 2.43, 0.0], requires_grad=True)
19 |
20 | # define indexes for sanity's sake
21 | class TubeTypeIndices:
22 | itubeSpacing = 0
23 | idiameter = 1
24 | ithickness = 2
25 | iresistivity = 3
26 |
27 | SlabType = torch.tensor([21.1111111, 100.0, 0.2, 2242.58, 0.101], requires_grad=True)
28 |
29 | class SlabTypeIndices:
30 | itemp = 0
31 | iarea = 1
32 | iCp = 2
33 | idensity = 3
34 | ithickness = 4
35 |
36 | QuantaType = torch.tensor([0.0, 60.0, 0.0006309, 1000.0, 4180.0], requires_grad=True)
37 |
38 | class QuantaIndices:
39 | ipower = 0
40 | itemp = 1
41 | iflow = 2
42 | idensity = 3
43 | iCp = 4
44 |
45 | TankType = torch.tensor([70.0, 0.0757082, 4180.0, 1000.0, 75.708], requires_grad=True)
46 |
47 | class TankTypeIndices:
48 | itemp = 0
49 | ivolume = 1
50 | iCp = 2
51 | idensity = 3
52 | imass = 4
53 |
54 | #------------------------------------------------------------------------
55 | # represent starting temp as a 5 length padded Tensor to match other Tensor sizes
56 | # (to avoid having to use Ragged Tensors)
57 | startingTemperature = torch.tensor([33.3, 0, 0, 0, 0], requires_grad=True)
58 |
59 |
60 | # SimParams will be represented with a 2D Tensor, where each
61 | # member (a custom type itself) is represented by a 1D Tensor
62 | SimParamsConstant = torch.stack([TubeType, SlabType, QuantaType, TankType, startingTemperature])
63 | assert SimParamsConstant.size() == (5,5)
64 |
65 | class SimParamsIndices:
66 | itube = 0
67 | islab = 1
68 | iquanta = 2
69 | itank = 3
70 | istartingTemp = 4
71 |
72 |
73 | # Computations
74 |
75 | def computeResistance(floor, tube, quanta):
76 | geometry_coeff = 10.0
77 |
78 | tubingSurfaceArea = (floor[SlabTypeIndices.iarea] / tube[TubeTypeIndices.itubeSpacing]) * π * tube[TubeTypeIndices.idiameter]
79 | resistance_abs = tube[TubeTypeIndices.iresistivity] * tube[TubeTypeIndices.ithickness] / tubingSurfaceArea
80 |
81 | resistance_corrected = resistance_abs * geometry_coeff
82 |
83 | return resistance_corrected
84 |
85 |
86 | def computeLoadPower(floor, tube, quanta):
87 | resistance_abs = computeResistance(floor, tube, quanta)
88 |
89 | conductance = 1/resistance_abs
90 | dTemp = floor[SlabTypeIndices.itemp] - quanta[QuantaIndices.itemp]
91 | power = dTemp * conductance
92 |
93 | loadPower = -power
94 |
95 | resultQuanta = quanta * torch.tensor([0.0, 1, 1, 1, 1], requires_grad=True) + power * torch.tensor([1.0, 0, 0, 0, 0], requires_grad=True)
96 |
97 | return (resultQuanta, loadPower)
98 |
99 | def updateQuanta(quanta):
100 | workingVolume = (quanta[QuantaIndices.iflow] * dTime)
101 | workingMass = (workingVolume * quanta[QuantaIndices.idensity])
102 | workingEnergy = quanta[QuantaIndices.ipower] * dTime
103 | TempRise = workingEnergy / quanta[QuantaIndices.iCp] / workingMass
104 |
105 | resultQuanta = quanta + TempRise * torch.tensor([0.0, 1, 0, 0, 0])
106 | resultQuanta = resultQuanta * torch.tensor([0.0, 1, 1, 1, 1])
107 |
108 | return resultQuanta
109 |
110 | def updateBuildingModel(power, floor):
111 | floorVolume = floor[SlabTypeIndices.iarea] * floor[SlabTypeIndices.ithickness]
112 | floorMass = floorVolume * floor[SlabTypeIndices.idensity]
113 | floorTempChange = (power * dTime) / floor[SlabTypeIndices.iCp] / floorMass
114 |
115 | resultFloor = floor + floorTempChange * torch.Tensor([1.0, 0, 0, 0, 0])
116 |
117 | return resultFloor
118 |
119 | def updateSourceTank(store, quanta):
120 | massPerTime = quanta[QuantaIndices.iflow] * quanta[QuantaIndices.idensity]
121 | dTemp = store[TankTypeIndices.itemp] - quanta[QuantaIndices.itemp]
122 | power = dTemp * massPerTime * quanta[QuantaIndices.iCp]
123 |
124 | updatedQuanta = quanta * torch.Tensor([0.0, 1, 1, 1, 1]) + power * torch.Tensor([1.0, 0, 0, 0, 0])
125 |
126 | tankMass = store[TankTypeIndices.ivolume] * store[TankTypeIndices.idensity]
127 | TempRise = (power * dTime) / store[TankTypeIndices.iCp] / tankMass
128 |
129 | updatedStore = store + TempRise * torch.Tensor([1.0, 0, 0, 0, 0])
130 |
131 | return (updatedStore, updatedQuanta)
132 |
133 | def lossCalc(pred, gt):
134 | return torch.abs(pred - gt)
135 |
136 | # Simulations
137 |
138 | def simulate(simParams):
139 | pexTube = simParams[SimParamsIndices.itube]
140 | slab = simParams[SimParamsIndices.islab]
141 | tank = simParams[SimParamsIndices.itank]
142 | quanta = simParams[SimParamsIndices.iquanta]
143 |
144 | startingTemp = simParams[SimParamsIndices.istartingTemp][0]
145 | slab = slab * torch.Tensor([0.0, 1, 1, 1, 1]) + startingTemp * torch.Tensor([1.0, 0, 0, 0, 0])
146 |
147 | for i in range(0, timesteps):
148 | tankAndQuanta = updateSourceTank(tank, quanta)
149 | tank = tankAndQuanta[0]
150 | quanta = tankAndQuanta[1]
151 |
152 | quanta = updateQuanta(quanta)
153 |
154 | quantaAndPower = computeLoadPower(slab, pexTube, quanta)
155 | quanta = quantaAndPower[0]
156 | powerToBuilding = quantaAndPower[1]
157 | quanta = updateQuanta(quanta)
158 |
159 | slab = updateBuildingModel(powerToBuilding, slab)
160 |
161 | return slab[SlabTypeIndices.itemp]
162 |
163 | import time
164 |
165 | def measure(function, arguments):
166 | start = time.time()
167 | result = function(arguments)
168 | end = time.time()
169 | return (end - start, result)
170 |
171 |
172 | def fullPipe(simParams):
173 | pred = simulate(simParams)
174 | loss = lossCalc(pred, 27.344767)
175 | return loss
176 |
177 |
178 | totalForwardTime = 0
179 | totalGradientTime = 0
180 |
181 |
182 | for i in range(trials + warmup):
183 |
184 | inputs = SimParamsConstant
185 | forwardTime, forwardOutput = measure(fullPipe, inputs)
186 |
187 | simParams = SimParamsConstant
188 | def getGradient(simParams):
189 | gradient = torch.autograd.grad(forwardOutput, inputs)
190 | return gradient
191 |
192 |
193 | gradientTime, gradient = measure(getGradient, simParams)
194 |
195 | if printGradToCompare:
196 | print(gradient)
197 |
198 | if i >= warmup:
199 | totalForwardTime += forwardTime
200 | totalGradientTime += gradientTime
201 |
202 |
203 | averageForwardTime = totalForwardTime / trials
204 | averageGradientTime = totalGradientTime / trials
205 |
206 | print("trials:", trials)
207 | print("timesteps:", timesteps)
208 | print(f"average forward only time: {averageForwardTime} seconds")
209 | print(f"average forward and backwards (gradient) time: {averageGradientTime} seconds")
210 |
--------------------------------------------------------------------------------
/Benchmarks/BuildingSimulation/README.md:
--------------------------------------------------------------------------------
1 | # Differentiable Simulator Benchmarks
2 |
3 | [PassiveLogic](https://passivelogic.com) is constructing autonomous systems for building control and
4 | more, utilizing physics-based digital twins. As a motivating use case for differentiable Swift, a
5 | simple thermal model of a building was constructed and optimized via gradient descent in several
6 | languages and frameworks.
7 |
8 | Differentiable Swift proves to be the best of the available solutions, and that has driven
9 | PassiveLogic's investment in the language feature. This directory contains a representative benchmark
10 | for a thermal model of a building implemented in differentiable Swift,
11 | [PyTorch](https://pytorch.org), and [TensorFlow](https://www.tensorflow.org).
12 |
13 | In this benchmark, the average time for a full forward + backward pass through the simulation is
14 | measured across multiple trials. The lower the time, the better.
15 |
16 | ## Running Benchmarks
17 |
18 | To evaluate the benchmarks yourself, the following sections provide setup instructions for the
19 | environments needed for each language / framework. These instructions should be valid for macOS and
20 | Ubuntu 20.04, but may require slight modification for other platforms.
21 |
22 | ### Swift
23 |
24 | A Swift toolchain with support for differentiation must be installed and in your current path. We
25 | recommend using one [downloaded from Swift.org](https://www.swift.org/download/) for your platform.
26 | Nightly toolchain snapshots tend to have better performance, due to new optimizations and
27 | architectural improvements constantly being upstreamed. More information on toolchain installation
28 | and management can be found [here](https://passivelogic.github.io/differentiable-swift-examples/documentation/differentiableswiftexamples/setup).
29 |
30 | When using a recent Swift.org nightly toolchain snapshot on macOS, you may need to set the following environment variables to point to the correct macOS SDK and Swift runtime:
31 | ```bash
32 | export SDKROOT=/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX14.sdk
33 | ```
34 | ```bash
35 | export DYLD_LIBRARY_PATH=/Library/Developer/Toolchains/swift-DEVELOPMENT-SNAPSHOT-2023-11-20-a.xctoolchain/usr/lib/swift/macosx
36 | ```
37 |
38 | To build the benchmark, change into the `Swift` subdirectory and run the following:
39 | ```bash
40 | swiftc -O main.swift -o SwiftBenchmark
41 | ```
42 |
43 | and then run it via:
44 | ```bash
45 | ./SwiftBenchmark
46 | ```
47 |
48 | ### PyTorch
49 |
50 | For these benchmarks, we've used PyTorch on the CPU, running in a dedicated Python environment. If
51 | you have such an environment, you can activate it and jump ahead to running the benchmark. To
52 | set up such an environment, start in your home directory and type:
53 |
54 | ```bash
55 | python3 -m venv pytorch-cpu
56 | source pytorch-cpu/bin/activate
57 | pip install torch torchvision
58 | ```
59 |
60 | and then run the benchmark by going to the `PyTorch` subdirectory here and using:
61 |
62 | ```bash
63 | python3 PyTorchSimulator.py
64 | ```
65 |
66 | ### TensorFlow
67 |
68 | For these benchmarks, we've used TensorFlow on the CPU, running in a dedicated Python environment. If
69 | you have such an environment, you can activate it and jump ahead to running the benchmark. To
70 | set up such an environment, start in your home directory and type:
71 |
72 | ```bash
73 | python3 -m venv tensorflow-cpu
74 | source tensorflow-cpu/bin/activate
75 | pip install tensorflow
76 | ```
77 |
78 | and then run the benchmark by going to the `TensorFlow` subdirectory here and using:
79 |
80 | ```bash
81 | python3 TensorFlowSimulator.py
82 | ```
83 |
84 | ## Current Results
85 |
86 | ### 2024-07-30
87 |
88 | Various optimizations in Differentiable Swift landed in the nightly toolchain dated 2024-06-03. The following benchmarks were run primarily to measure the effect of those optimizations. This optimized version of Differentiable Swift was compared to PyTorch and TensorFlow, as well as the most recent toolchain _without_ these optimizations, which resolved to the nightly toolchain dated 2024-05-15.
89 |
90 | In addition to Forward Only and Gradient measurements, Memory Utilization and Power Consumption were also recorded for comparison. The dimensions of each simulation were scaled from 100 to 100,000 in both number of `trials` and `timesteps`.
91 |
92 | Note that 'Swift Improvement' in the tables below is calculated by dividing each measurement by the corresponding measurement from the optimized Swift column. In other words, a 'Swift Improvement' of 5.2 translates to a measurement being 5.2x longer/larger/more than optimized Swift's measurement.
93 |
94 | #### Environment Setup
95 | Forward Only/Gradient and Memory Utilization results were gathered from the same hardware, with the following specs:
96 | - Model: MacBook Pro, 2021
97 | - CPU: Apple M1 Max
98 | - Memory: 32 GB
99 | - OS: Sonoma 14.5
100 |
101 | Power consumption results were gathered from Jetson Orin NX hardware:
102 | - Model: Jetson Orin NX 16GB
103 | - CPU: 8-core Arm® Cortex®-A78AE v8.2 64-bit CPU 2MB L2 + 4MB L3
104 | - Memory: 16 GB
105 | - OS: Ubuntu 20.04.6 LTS
106 |
107 | #### Forward Only and Gradient times
108 | Results were recorded from each script's execution output. Example Swift output:
109 | ```
110 | $ ./SwiftBenchmark
111 | trials: 1000
112 | timesteps: 1000
113 | average forward only time: 2.1570954999999886e-05 seconds
114 | average forward and back (gradient) time: 0.0002565037070000004 seconds
115 | ```
116 |
117 | ---
118 | ##### Forward only time
119 |
120 |
121 |
122 | N trials timesteps=20 warmup=3 |
123 | Swift nightly toolchain 2024-06-03 |
124 | Swift nightly toolchain 2024-05-15 |
125 | Swift Improvement |
126 | PyTorch 2.3.1 |
127 | Swift Improvement |
128 | TensorFlow 2.16.2 |
129 | Swift Improvement |
130 |
131 |
132 |
133 | 100 |
134 | 1.0133E-06 |
135 | 1.30496E-06 |
136 | 1.3 |
137 | 0.00237510204315186 |
138 | 2,344 |
139 | 0.000805909633636475 |
140 | 795 |
141 |
142 |
143 | 1000 |
144 | 9.86573000000009E-07 |
145 | 6.88607999999986E-07 |
146 | 0.7 |
147 | 0.00232325196266174 |
148 | 2,355 |
149 | 0.00071248984336853 |
150 | 722 |
151 |
152 |
153 | 10000 |
154 | 5.82377500000031E-07 |
155 | 4.38613799999972E-07 |
156 | 0.8 |
157 | 0.00217494251728058 |
158 | 3,735 |
159 | 0.000711746025085449 |
160 | 1222 |
161 |
162 |
163 | 100000 |
164 | 4.26006110000326E-07 |
165 | 4.15276820000269E-07 |
166 | 1.0 |
167 | 0.00216188388347626 |
168 | 5,075 |
169 | 0.000706250309944153 |
170 | 1658 |
171 |
172 |
173 | N timesteps trials=1 warmup=3 |
174 | |
175 | |
176 | |
177 | |
178 | |
179 | |
180 | |
181 |
182 |
183 | 100 |
184 | 2.708E-06 |
185 | 1.5333E-05 |
186 | 5.7 |
187 | 0.0117650032043457 |
188 | 4344 |
189 | 0.0032660961151123 |
190 | 1206 |
191 |
192 |
193 | 1000 |
194 | 2.6625E-05 |
195 | 3.9333E-05 |
196 | 1.5 |
197 | 0.134914875030518 |
198 | 5067 |
199 | 0.0305349826812744 |
200 | 1146 |
201 |
202 |
203 | 10000 |
204 | 0.0002945 |
205 | 0.000286833 |
206 | 1.0 |
207 | 1.36807107925415 |
208 | 4645 |
209 | 0.283676862716675 |
210 | 963 |
211 |
212 |
213 | 100000 |
214 | 0.002944209 |
215 | 0.002668 |
216 | 0.9 |
217 | 14.5915961265564 |
218 | 4956 |
219 | 2.96268224716187 |
220 | 1006 |
221 |
222 |
223 |
224 | ---
225 |
226 | ##### Gradient time
227 |
228 |
229 |
230 | N trials timesteps=20 warmup=3 |
231 | Swift nightly toolchain 2024-06-03 |
232 | Swift nightly toolchain 2024-05-15 |
233 | Swift Improvement |
234 | PyTorch 2.3.1 |
235 | Swift Improvement |
236 | TensorFlow 2.16.2 |
237 | Swift Improvement |
238 |
239 |
240 |
241 | 100 |
242 | 1.15975E-05 |
243 | 7.078584E-05 |
244 | 6.1 |
245 | 0.00431931495666504 |
246 | 372 |
247 | 0.00388913154602051 |
248 | 335 |
249 |
250 |
251 | 1000 |
252 | 1.0679188E-05 |
253 | 4.90754170000001E-05 |
254 | 4.6 |
255 | 0.00436905145645142 |
256 | 409 |
257 | 0.00370328974723816 |
258 | 347 |
259 |
260 |
261 | 10000 |
262 | 6.28462230000005E-06 |
263 | 3.25445147000002E-05 |
264 | 5.2 |
265 | 0.00417288513183594 |
266 | 664 |
267 | 0.00359320862293243 |
268 | 572 |
269 |
270 |
271 | 100000 |
272 | 4.59303872000145E-06 |
273 | 3.11354585500016E-05 |
274 | 6.8 |
275 | 0.0042010071516037 |
276 | 915 |
277 | 0.00364944223880768 |
278 | 795 |
279 |
280 |
281 | N timesteps trials=1 warmup=3 |
282 | |
283 | |
284 | |
285 | |
286 | |
287 | |
288 | |
289 |
290 |
291 | 100 |
292 | 4.8334E-05 |
293 | 0.000240042 |
294 | 5.0 |
295 | 0.0222558975219727 |
296 | 460 |
297 | 0.0169031620025635 |
298 | 349 |
299 |
300 |
301 | 1000 |
302 | 0.000373375 |
303 | 0.002190209 |
304 | 5.9 |
305 | 0.242650985717773 |
306 | 649 |
307 | 0.169112920761108 |
308 | 452 |
309 |
310 |
311 | 10000 |
312 | 0.003654458 |
313 | 0.021470334 |
314 | 5.9 |
315 | 2.81965517997742 |
316 | 771 |
317 | 48.2916069030762 |
318 | 13214* |
319 |
320 |
321 | 100000 |
322 | 0.0372425 |
323 | 0.179142666 |
324 | 4.8 |
325 | 36.6493611335754 |
326 | 984 |
327 | 983.002796888351 |
328 | 26394* |
329 |
330 |
331 |
332 | \* \- Two recommended performance improvements were adopted in TensorFlowSimulator.py before running benchmarks. The first was to decorate `getGradient` with `@tf.function` to disable default eager execution, which generally increased performance. The second was to use `tf.range` instead of Python's `range` function, in order to avoid 'Large unrolled loop' warnings. This greatly reduced overall memory usage, but seemed to trigger a severe increase in gradient times in high-timestep cases (10k and 100k). Underlying cause has not yet been identified.
333 |
334 | ---
335 | #### Memory Utilization
336 | For memory utilization, the `time` utility was used to measure 'maximum resident set size' and 'peak memory footprint'.
337 | Example output:
338 | ```
339 | /usr/bin/time -l ./SwiftBenchmark
340 | trials: 1000
341 | timesteps: 1000
342 | average forward only time: 2.138608299999987e-05 seconds
343 | average forward and back (gradient) time: 0.00025673441800000017 seconds
344 | 0.28 real 0.26 user 0.02 sys
345 | 5029888 maximum resident set size
346 | 0 average shared memory size
347 | 0 average unshared data size
348 | 0 average unshared stack size
349 | 455 page reclaims
350 | 1 page faults
351 | 0 swaps
352 | 0 block input operations
353 | 0 block output operations
354 | 0 messages sent
355 | 0 messages received
356 | 0 signals received
357 | 0 voluntary context switches
358 | 22 involuntary context switches
359 | 3567193705 instructions retired
360 | 854329255 cycles elapsed
361 | 3278336 peak memory footprint
362 | ```
363 |
364 | ---
365 | ##### Maximum resident set size
366 |
367 |
368 | N trials timesteps=20 warmup=3 |
369 | Swift nightly toolchain 2024-06-03 |
370 | Swift nightly toolchain 2024-05-15 |
371 | Swift Improvement |
372 | PyTorch 2.3.1 |
373 | Swift Improvement |
374 | TensorFlow 2.16.2 |
375 | Swift Improvement |
376 |
377 |
378 |
379 | 100 |
380 | 3899392 |
381 | 3391488 |
382 | 0.9 |
383 | 198754304 |
384 | 51 |
385 | 433504256 |
386 | 111 |
387 |
388 |
389 | 1000 |
390 | 3391488 |
391 | 3751936 |
392 | 1.1 |
393 | 196542464 |
394 | 58 |
395 | 430637056 |
396 | 127 |
397 |
398 |
399 | 10000 |
400 | 3604480 |
401 | 3751936 |
402 | 1.0 |
403 | 199507968 |
404 | 55 |
405 | 433668096 |
406 | 120 |
407 |
408 |
409 | 100000 |
410 | 3538944 |
411 | 3915776 |
412 | 1.1 |
413 | 201326592 |
414 | 57 |
415 | 437469184 |
416 | 124 |
417 |
418 |
419 | N timesteps trials=1 warmup=3 |
420 | |
421 | |
422 | |
423 | |
424 | |
425 | |
426 | |
427 |
428 |
429 | 100 |
430 | 3375104 |
431 | 3866624 |
432 | 1.1 |
433 | 205455360 |
434 | 60 |
435 | 428326912 |
436 | 126 |
437 |
438 |
439 | 1000 |
440 | 3866624 |
441 | 5537792 |
442 | 1.4 |
443 | 379322368 |
444 | 98 |
445 | 426360832 |
446 | 110 |
447 |
448 |
449 | 10000 |
450 | 8421376 |
451 | 24870912 |
452 | 3.0 |
453 | 2404892672 |
454 | 285 |
455 | 609255424 |
456 | 72 |
457 |
458 |
459 | 100000 |
460 | 55050240 |
461 | 220332032 |
462 | 4.0 |
463 | 10271408128 |
464 | 186 |
465 | 1661583360 |
466 | 30 |
467 |
468 |
469 |
470 | ---
471 | ##### Peak memory footprint
472 |
473 |
474 | N trials timesteps=20 warmup=3 |
475 | Swift nightly toolchain 2024-06-03 |
476 | Swift nightly toolchain 2024-05-15 |
477 | Swift Improvement |
478 | PyTorch 2.3.1 |
479 | Swift Improvement |
480 | TensorFlow 2.16.2 |
481 | Swift Improvement |
482 |
483 |
484 |
485 | 100 |
486 | 2458816 |
487 | 1901696 |
488 | 0.8 |
489 | 133400896 |
490 | 54 |
491 | 239210560 |
492 | 97 |
493 |
494 |
495 | 1000 |
496 | 1950912 |
497 | 2294976 |
498 | 1.2 |
499 | 132860352 |
500 | 68 |
501 | 236031808 |
502 | 121 |
503 |
504 |
505 | 10000 |
506 | 2163904 |
507 | 2278656 |
508 | 1.1 |
509 | 133253440 |
510 | 62 |
511 | 236719872 |
512 | 109 |
513 |
514 |
515 | 100000 |
516 | 2098368 |
517 | 2409728 |
518 | 1.1 |
519 | 134318336 |
520 | 64 |
521 | 240111552 |
522 | 114 |
523 |
524 |
525 | N timesteps trials=1 warmup=3 |
526 | |
527 | |
528 | |
529 | |
530 | |
531 | |
532 | |
533 |
534 |
535 | 100 |
536 | 1934528 |
537 | 2393280 |
538 | 1.2 |
539 | 143346176 |
540 | 74 |
541 | 238309248 |
542 | 123 |
543 |
544 |
545 | 1000 |
546 | 2393280 |
547 | 4064448 |
548 | 1.7 |
549 | 292129920 |
550 | 122 |
551 | 239030080 |
552 | 99 |
553 |
554 |
555 | 10000 |
556 | 6964480 |
557 | 23430464 |
558 | 3.4 |
559 | 1819930944 |
560 | 261 |
561 | 293621952 |
562 | 42 |
563 |
564 |
565 | 100000 |
566 | 48531072 |
567 | 214257152 |
568 | 4.4 |
569 | 17550355200 |
570 | 361 |
571 | 806886848 |
572 | 16 |
573 |
574 |
575 |
576 | ---
577 | #### Power Consumption
578 | In order to measure the energy consumed during program execution we employed the use of a current shunt, a differential probe, and an oscilloscope.
579 |
580 | The current shunt resistor was placed in series with the positive input power terminal on the Orin. Both the voltage and current consumed were captured at high speed using 2 channels on an oscilloscope.
581 |
582 | The capture length of the oscilloscope trace is 1.2 seconds. Due to the fact that 3 of the 4 tests are too long to be fully captured, we measured the total time that each test took and used the oscilloscope measurements to determine the average power consumed during the test for each program. We then extrapolated the total power consumed by each program by multiplying the average power level by the length of the test in seconds.
583 |
584 | The overall number of compute operations was calculated by multiplying the number of trials (5000) by number of timesteps (1000) by each program's number of mathematical operations that occur in each timestep (37 for Swift, 49 for TensorFlow/PyTorch). It is worth noting that some calculations required extra steps to work with compatible tensor shapes (for example, compare the `updateQuanta` function in each program). While we do factor this into our results, it highlights the difference in using automatic differentation to operate on heterogeneous neural networks as opposed to conforming to shape-defined tensors.
585 |
586 | The following results show a few different views of energy consumption -- Operations computed per kiloJoule consumed, and Joules consumed per giga-Operation (J/GOps).
587 |
588 | ##### Power consumption of a 5000-trial 1000-timestep simulation
589 |
590 |
591 | |
592 | Test Power (avg W) |
593 | Test Length (s) |
594 | Total Energy (J) |
595 | Normalized Ratio |
596 | Ops / kiloJoule |
597 | Joules / GigaOp |
598 |
599 |
600 |
601 | Swift nightly toolchain 2024-05-15 |
602 | 10.37 |
603 | 3.828 |
604 | 39.70 |
605 | 6 |
606 | 4,660 |
607 | 215 |
608 |
609 |
610 | Swift nightly toolchain 2024-06-03 |
611 | 10.20 |
612 | 0.616 |
613 | 6.28 |
614 | 1 |
615 | 29,452 |
616 | 34 |
617 |
618 |
619 | Tensorflow |
620 | 12.54 |
621 | 658.846 |
622 | 8259.66 |
623 | 1315 |
624 | 30 |
625 | 33713 |
626 |
627 |
628 | PyTorch |
629 | 12.34 |
630 | 3340.826 |
631 | 41220.14 |
632 | 6562 |
633 | 6 |
634 | 168245 |
635 |
636 |
637 |
638 |
639 | ---
640 |
641 | ## Previous Results
642 | ### 2023-12-10
643 | The following timings were gathered using these benchmarks on an M1 Pro MacBook Pro (14", 2021):
644 |
645 | | **Version** | **Time (ms)** | **Slowdown Compared to Swift** |
646 | |---|:---:|:---:|
647 | | **Swift** | 0.03 | 1X |
648 | | **PyTorch** | 8.16 | 238X |
649 | | **TensorFlow** | 11.0 | 322X |
650 |
--------------------------------------------------------------------------------
/Benchmarks/BuildingSimulation/Swift/main.swift:
--------------------------------------------------------------------------------
1 | import _Differentiation
2 | import Foundation
3 |
4 | // Simulation parameters
5 | let trials = 100
6 | let timesteps = 20
7 | let dTime: Float = 0.1
8 | let printGradToCompare = false
9 |
10 | // Definitions
11 | let π = Float.pi
12 |
13 | struct SimParams: Differentiable {
14 | var tube: TubeType = .init()
15 | var slab: SlabType = .init()
16 | var quanta: QuantaType = .init()
17 | var tank: TankType = .init()
18 | var startingTemp: Float
19 | }
20 |
21 | struct TubeType: Differentiable {
22 | var tubeSpacing: Float = 0.50292 // meters
23 | var diameter: Float = 0.019 // m (3/4")
24 | var thickness: Float = 0.001588 // m (1/16")
25 | var resistivity: Float = 2.43 // (K/W)m
26 | }
27 |
28 | struct SlabType: Differentiable {
29 | var temp: Float = 21.1111111 // °C
30 | var area: Float = 100.0 // m^2
31 | var Cp: Float = 0.2
32 | var density: Float = 2242.58 // kg/m^3
33 | var thickness: Float = 0.101 // m
34 | }
35 |
36 | struct QuantaType: Differentiable {
37 | var power: Float = 0.0 // Watt
38 | var temp: Float = 60.0 // °C
39 | var flow: Float = 0.0006309 // m^3/sec
40 | var density: Float = 1000.0 // kg/m^3
41 | var Cp: Float = 4180.0 // ws/(kg • K)
42 | }
43 |
44 | struct TankType: Differentiable {
45 | var temp: Float = 70.0
46 | var volume: Float = 0.0757082
47 | var Cp: Float = 4180.000
48 | var density: Float = 1000.000
49 | var mass: Float = 75.708
50 | }
51 |
52 | // Computations
53 |
54 | @differentiable(reverse)
55 | func computeResistance(floor: SlabType, tube: TubeType, quanta _: QuantaType) -> Float {
56 | let geometry_coeff: Float = 10.0
57 | // let f_coff = 0.3333333
58 |
59 | let tubingSurfaceArea = (floor.area / tube.tubeSpacing) * π * tube.diameter
60 | let resistance_abs = tube.resistivity * tube.thickness / tubingSurfaceArea
61 |
62 | let resistance_corrected = resistance_abs * geometry_coeff // * (quanta.flow * f_coff)
63 |
64 | return resistance_corrected
65 | }
66 |
67 | struct QuantaAndPower: Differentiable {
68 | var quanta: QuantaType
69 | var power: Float
70 | }
71 |
72 |
73 | extension Differentiable {
74 | /// Applies the given closure to the derivative of `self`.
75 | ///
76 | /// Returns `self` like an identity function. When the return value is used in
77 | /// a context where it is differentiated with respect to, applies the given
78 | /// closure to the derivative of the return value.
79 | @inlinable
80 | @differentiable(reverse, wrt: self)
81 | func withDerivative(_: @escaping (inout TangentVector) -> Void) -> Self {
82 | return self
83 | }
84 |
85 | @inlinable
86 | @derivative(of: withDerivative)
87 | func _vjpWithDerivative(
88 | _ body: @escaping (inout TangentVector) -> Void
89 | ) -> (value: Self, pullback: (TangentVector) -> TangentVector) {
90 | return (self, { grad in
91 | var grad = grad
92 | body(&grad)
93 | return grad
94 | })
95 | }
96 | }
97 |
98 | @differentiable(reverse)
99 | func computeLoadPower(floor: SlabType, tube: TubeType, quanta: QuantaType) -> QuantaAndPower {
100 | let resistance_abs = computeResistance(floor: floor, tube: tube, quanta: quanta)
101 |
102 | let conductance: Float = 1 / resistance_abs
103 | let dTemp = floor.temp - quanta.temp
104 | let power = dTemp * conductance
105 |
106 | var updatedQuanta = quanta
107 | updatedQuanta.power = power
108 | let loadPower = -power
109 |
110 | return QuantaAndPower(quanta: updatedQuanta, power: loadPower)
111 | }
112 |
113 | @differentiable(reverse)
114 | func updateQuanta(quanta: QuantaType) -> QuantaType {
115 | let workingVolume = (quanta.flow * dTime)
116 | let workingMass = (workingVolume * quanta.density)
117 | let workingEnergy = quanta.power * dTime
118 | let TempRise = workingEnergy / quanta.Cp / workingMass
119 | var updatedQuanta = quanta
120 | updatedQuanta.temp = quanta.temp + TempRise
121 |
122 | updatedQuanta.power = 0
123 | return updatedQuanta
124 | }
125 |
126 | @differentiable(reverse)
127 | func updateBuildingModel(power: Float, floor: SlabType) -> SlabType {
128 | var updatedFloor = floor
129 |
130 | let floorVolume = floor.area * floor.thickness
131 | let floorMass = floorVolume * floor.density
132 |
133 | updatedFloor.temp = floor.temp + ((power * dTime) / floor.Cp / floorMass)
134 | return updatedFloor
135 | }
136 |
137 | struct TankAndQuanta: Differentiable {
138 | var tank: TankType
139 | var quanta: QuantaType
140 | }
141 |
142 | @differentiable(reverse)
143 | func updateSourceTank(store: TankType, quanta: QuantaType) -> TankAndQuanta {
144 | var updatedStore = store
145 | var updatedQuanta = quanta
146 |
147 | let massPerTime = quanta.flow * quanta.density
148 | let dTemp = store.temp - quanta.temp
149 | let power = dTemp * massPerTime * quanta.Cp
150 |
151 | updatedQuanta.power = power
152 |
153 | let tankMass = store.volume * store.density
154 | let TempRise = (power * dTime) / store.Cp / tankMass
155 | updatedStore.temp = store.temp + TempRise
156 |
157 | return TankAndQuanta(tank: updatedStore, quanta: updatedQuanta)
158 | }
159 |
160 | var simParams = SimParams(startingTemp: 33.3)
161 |
162 | @differentiable(reverse)
163 | @inlinable public func absDifferentiable(_ value: Float) -> Float {
164 | if value < 0 {
165 | return -value
166 | }
167 | return value
168 | }
169 |
170 | func lossCalc(pred: Float, gt: Float) -> Float {
171 | let diff = pred - gt
172 | return absDifferentiable(diff)
173 | }
174 |
175 | // Simulations
176 |
177 | @differentiable(reverse)
178 | func simulate(simParams: SimParams) -> Float {
179 | let pexTube = simParams.tube
180 | var slab = simParams.slab
181 | var tank = simParams.tank
182 | var quanta = simParams.quanta
183 |
184 | slab.temp = simParams.startingTemp
185 | for _ in 0 ..< timesteps {
186 | let tankAndQuanta = updateSourceTank(store: tank, quanta: quanta)
187 | tank = tankAndQuanta.tank
188 | quanta = tankAndQuanta.quanta
189 |
190 | quanta = updateQuanta(quanta: quanta)
191 |
192 | let quantaAndPower = computeLoadPower(floor: slab, tube: pexTube, quanta: quanta)
193 | quanta = quantaAndPower.quanta
194 | let powerToBuilding = quantaAndPower.power
195 | quanta = updateQuanta(quanta: quanta)
196 |
197 | slab = updateBuildingModel(power: powerToBuilding, floor: slab)
198 | }
199 | return slab.temp
200 | }
201 |
202 | var blackHole: Any?
203 | @inline(never)
204 | func dontLetTheCompilerOptimizeThisAway(_ x: T) {
205 | blackHole = x
206 | }
207 |
208 | func measure(_ block: () throws -> T) throws -> (time: Double, result: T) {
209 | let t0 = DispatchTime.now()
210 | let result = try block()
211 | let t1 = DispatchTime.now()
212 | let elapsed = Double(t1.uptimeNanoseconds - t0.uptimeNanoseconds) / 1E9
213 | return (elapsed, result)
214 | }
215 |
216 | @differentiable(reverse)
217 | func fullPipe(simParams: SimParams) -> Float {
218 | let pred = simulate(simParams: simParams)
219 | let loss = lossCalc(pred: pred, gt: 27.344767)
220 | return loss
221 | }
222 |
223 | var totalPureForwardTime: Double = 0
224 | var totalGradientTime: Double = 0
225 |
226 | for _ in 0 ..< trials {
227 | let (forwardOnly, _) = try measure {
228 | return fullPipe(simParams: simParams)
229 | }
230 | dontLetTheCompilerOptimizeThisAway(forwardOnly)
231 |
232 | let (gradientTime, grad) = try measure {
233 | return gradient(at: simParams, of: fullPipe)
234 | }
235 | dontLetTheCompilerOptimizeThisAway(grad)
236 |
237 | if printGradToCompare {
238 | print(grad)
239 | }
240 |
241 | totalPureForwardTime += forwardOnly
242 | totalGradientTime += gradientTime
243 | }
244 |
245 | let averagePureForward = totalPureForwardTime / Double(trials)
246 | let averageGradient = totalGradientTime / Double(trials)
247 |
248 | print("trials: \(trials)")
249 | print("timesteps: \(timesteps)")
250 | print("average forward only time: \(averagePureForward) seconds")
251 | print("average forward and back (gradient) time: \(averageGradient) seconds")
252 |
--------------------------------------------------------------------------------
/Benchmarks/BuildingSimulation/TensorFlow/TensorFlowSimulator.py:
--------------------------------------------------------------------------------
1 |
2 | import tensorflow as tf
3 |
4 | # Simulation parameters
5 | trials = 100
6 | timesteps = 20
7 | warmup = 3
8 | dTime = 0.1
9 | printGradToCompare = False
10 |
11 | @tf.function
12 | def doMath(a):
13 | return a * 2
14 |
15 | # Definitions
16 |
17 | π = 3.14159265359
18 |
19 |
20 | # TubeType and other custom object holding primitives will be represented with a 1D Tensor,
21 | # and SimParams will compose them into a 2D tensor
22 |
23 | # make each 1D Tensor the same length, to avoid having to use Ragged Tensors
24 | # with padding added to match length of other 1D Tensors
25 | TubeType = tf.constant([0.50292, 0.019, 0.001588, 2.43, 0.0])
26 |
27 | # define indexes for sanity's sake
28 | class TubeTypeIndices:
29 | itubeSpacing = 0
30 | idiameter = 1
31 | ithickness = 2
32 | iresistivity = 3
33 |
34 | SlabType = tf.constant([21.1111111, 100.0, 0.2, 2242.58, 0.101])
35 |
36 | class SlabTypeIndices:
37 | itemp = 0
38 | iarea = 1
39 | iCp = 2
40 | idensity = 3
41 | ithickness = 4
42 |
43 | QuantaType = tf.constant([0.0, 60.0, 0.0006309, 1000.0, 4180.0])
44 |
45 | class QuantaIndices:
46 | ipower = 0
47 | itemp = 1
48 | iflow = 2
49 | idensity = 3
50 | iCp = 4
51 |
52 | TankType = tf.constant([70.0, 0.0757082, 4180.0, 1000.0, 75.708])
53 |
54 | class TankTypeIndices:
55 | itemp = 0
56 | ivolume = 1
57 | iCp = 2
58 | idensity = 3
59 | imass = 4
60 |
61 | # represent starting temp as a 5 length padded Tensor to match other Tensor sizes
62 | # (to avoid having to use Ragged Tensors)
63 | startingTemperature = tf.constant([33.3, 0, 0, 0, 0])
64 |
65 |
66 | # SimParams will be represented with a 2D Tensor, where each
67 | # member (a custom type itself) is represented by a 1D Tensor
68 | SimParamsConstant = tf.convert_to_tensor([TubeType, SlabType, QuantaType, TankType, startingTemperature])
69 |
70 | class SimParamsIndices:
71 | itube = 0
72 | islab = 1
73 | iquanta = 2
74 | itank = 3
75 | istartingTemp = 4
76 |
77 |
78 | # Computations
79 |
80 | @tf.function
81 | def computeResistance(floor, tube, quanta):
82 | geometry_coeff = 10.0
83 |
84 | tubingSurfaceArea = (floor[SlabTypeIndices.iarea] / tube[TubeTypeIndices.itubeSpacing]) * π * tube[TubeTypeIndices.idiameter]
85 | resistance_abs = tube[TubeTypeIndices.iresistivity] * tube[TubeTypeIndices.ithickness] / tubingSurfaceArea
86 |
87 | resistance_corrected = resistance_abs * geometry_coeff
88 |
89 | return resistance_corrected
90 |
91 |
92 | @tf.function
93 | def computeLoadPower(floor, tube, quanta):
94 | resistance_abs = computeResistance(floor, tube, quanta)
95 |
96 | conductance = 1/resistance_abs
97 | dTemp = floor[SlabTypeIndices.itemp] - quanta[QuantaIndices.itemp]
98 | power = dTemp * conductance
99 |
100 | loadPower = -power
101 |
102 | resultQuanta = quanta * tf.constant([0.0, 1, 1, 1, 1]) + power * tf.constant([1.0, 0, 0, 0, 0])
103 |
104 | return (resultQuanta, loadPower)
105 |
106 |
107 | slab, tube, quanta = tf.Variable(SlabType), tf.Variable(TubeType), tf.Variable(QuantaType)
108 | with tf.GradientTape() as tape:
109 | quantaAndPower = computeLoadPower(slab, tube, quanta)
110 |
111 | gradient = tape.gradient(quantaAndPower, [slab, tube, quanta])
112 |
113 | @tf.function
114 | def updateQuanta(quanta: tf.Tensor) -> tf.Tensor:
115 | workingVolume = (quanta[QuantaIndices.iflow] * dTime)
116 | workingMass = (workingVolume * quanta[QuantaIndices.idensity])
117 | workingEnergy = quanta[QuantaIndices.ipower] * dTime
118 | TempRise = workingEnergy / quanta[QuantaIndices.iCp] / workingMass
119 |
120 | resultQuanta = quanta + TempRise * tf.constant([0.0, 1, 0, 0, 0])
121 | resultQuanta = resultQuanta * tf.constant([0.0, 1, 1, 1, 1])
122 |
123 | return resultQuanta
124 |
125 | quanta = tf.Variable(QuantaType)
126 | with tf.GradientTape() as tape:
127 | tape.watch(quanta)
128 | newQuanta = updateQuanta(quanta)
129 |
130 | gradient = tape.gradient(newQuanta, [quanta])
131 |
132 | @tf.function
133 | def updateBuildingModel(power, floor):
134 | floorVolume = floor[SlabTypeIndices.iarea] * floor[SlabTypeIndices.ithickness]
135 | floorMass = floorVolume * floor[SlabTypeIndices.idensity]
136 | floorTempChange = (power * dTime) / floor[SlabTypeIndices.iCp] / floorMass
137 |
138 | resultFloor = floor + floorTempChange * tf.constant([1.0, 0, 0, 0, 0])
139 |
140 | return resultFloor
141 |
142 | inputPower = tf.constant([1.0])[0]
143 |
144 | inputPower = tf.Variable(inputPower)
145 | slab = tf.Variable(SlabType)
146 | with tf.GradientTape() as tape:
147 | tape.watch(inputPower)
148 | tape.watch(slab)
149 | newSlab = updateBuildingModel(inputPower, slab)
150 |
151 | gradient = tape.gradient(newSlab, [inputPower, slab])
152 |
153 | @tf.function
154 | def updateSourceTank(store, quanta):
155 | massPerTime = quanta[QuantaIndices.iflow] * quanta[QuantaIndices.idensity]
156 | dTemp = store[TankTypeIndices.itemp] - quanta[QuantaIndices.itemp]
157 | power = dTemp * massPerTime * quanta[QuantaIndices.iCp]
158 |
159 | updatedQuanta = quanta * tf.constant([0.0, 1, 1, 1, 1]) + power * tf.constant([1.0, 0, 0, 0, 0])
160 |
161 | tankMass = store[TankTypeIndices.ivolume] * store[TankTypeIndices.idensity]
162 | TempRise = (power * dTime) / store[TankTypeIndices.iCp] / tankMass
163 |
164 | updatedStore = store + TempRise * tf.constant([1.0, 0, 0, 0, 0])
165 |
166 | return (updatedStore, updatedQuanta)
167 |
168 | store = tf.Variable(TankType)
169 | quanta = tf.Variable(QuantaType)
170 | with tf.GradientTape() as tape:
171 | tape.watch(store)
172 | tape.watch(quanta)
173 | tankAndQuanta = updateSourceTank(store, quanta)
174 |
175 | gradient = tape.gradient(tankAndQuanta, [store, quanta])
176 |
177 | simParams = tf.Variable(SimParamsConstant)
178 |
179 |
180 | @tf.function
181 | def lossCalc(pred, gt):
182 | return tf.abs(pred - gt)
183 |
184 | # Simulations
185 |
186 | @tf.function
187 | def simulate(simParams):
188 | pexTube = simParams[SimParamsIndices.itube]
189 | slab = simParams[SimParamsIndices.islab]
190 | tank = simParams[SimParamsIndices.itank]
191 | quanta = simParams[SimParamsIndices.iquanta]
192 |
193 | startingTemp = simParams[SimParamsIndices.istartingTemp][0]
194 | slab = slab * tf.constant([0.0, 1, 1, 1, 1]) + startingTemp * tf.constant([1.0, 0, 0, 0, 0])
195 |
196 | for i in tf.range(timesteps):
197 | tankAndQuanta = updateSourceTank(tank, quanta)
198 | tank = tankAndQuanta[0]
199 | quanta = tankAndQuanta[1]
200 |
201 | quanta = updateQuanta(quanta)
202 |
203 | quantaAndPower = computeLoadPower(slab, pexTube, quanta)
204 | quanta = quantaAndPower[0]
205 | powerToBuilding = quantaAndPower[1]
206 | quanta = updateQuanta(quanta)
207 |
208 | slab = updateBuildingModel(powerToBuilding, slab)
209 |
210 | return slab[SlabTypeIndices.itemp]
211 |
212 |
213 |
214 | import time
215 |
216 | def measure(function, arguments):
217 | start = time.time()
218 | result = function(arguments)
219 | end = time.time()
220 | return (end - start, result)
221 |
222 |
223 | @tf.function
224 | def fullPipe(simParams):
225 | pred = simulate(simParams)
226 | loss = lossCalc(pred, 27.344767)
227 | return loss
228 |
229 |
230 | @tf.function
231 | def getGradient(simParams):
232 | with tf.GradientTape() as tape:
233 | endTemperature = simulate(simParams)
234 |
235 | gradient = tape.gradient(endTemperature, [simParams])
236 | return gradient
237 |
238 |
239 | totalForwardTime = 0
240 | totalGradientTime = 0
241 |
242 | for i in range(trials + warmup):
243 |
244 | forwardTime, forwardOutput = measure(fullPipe, SimParamsConstant)
245 |
246 | simParams = tf.Variable(SimParamsConstant)
247 |
248 | gradientTime, gradient = measure(getGradient, simParams)
249 |
250 | if printGradToCompare:
251 | print(gradient)
252 |
253 | if i >= warmup:
254 | totalForwardTime += forwardTime
255 | totalGradientTime += gradientTime
256 |
257 |
258 | averageForwardTime = totalForwardTime / trials
259 | averageGradientTime = totalGradientTime / trials
260 |
261 | print("trials:", trials)
262 | print("timesteps:", timesteps)
263 | print(f"average forward only time: {averageForwardTime} seconds")
264 | print(f"average forward and backwards (gradient) time: {averageGradientTime} seconds")
265 |
--------------------------------------------------------------------------------
/Benchmarks/LanguageSuite/Benchmarks/LanguageCoverageBenchmarks/FuzzedFunctions.swift:
--------------------------------------------------------------------------------
1 | import _Differentiation
2 | import Foundation
3 |
4 | // Functions generated via a fuzzer using standard math operators.
5 |
6 | @differentiable(reverse)
7 | func fuzzedMath1(_ x0: Float, _ x1: Float, _ x2: Float) -> Float {
8 | var y = x0;
9 | let t3 = x0 + x2 + x2;
10 | let t4 = x1 + t3;
11 | let t5 = x0 + t3;
12 | let t6 = t3 - x1;
13 | let t7 = x1 * t5;
14 | let t8 = t7 + x1 + x0;
15 | let t10 = t8 + x0;
16 | let t11 = t4 * cos(t10 * (180 / Float.pi));
17 | let t12 = sin(x2 * t8);
18 | let t13 = t3 * t8 * t4;
19 | let t14 = t11 - t11;
20 | let t15 = x1 - t4 - x1;
21 | let t16 = t8 * sin(t6 * (180 / Float.pi));
22 | let t17 = t3 * t3;
23 | let t18 = t11 - x1 - t13;
24 | let t19 = sin(t10 * t15);
25 | let t20 = sin(t17 * t14);
26 | let t22 = t17 * t13;
27 | let t23 = x2 * t12 * t11;
28 | let t24 = t13 - t23 - t17 - t22;
29 | let t25 = t6 - t6;
30 | let t27 = x1 + x0;
31 | let t31 = t25 - t19 - t20;
32 | let t33 = t18 + t19 + x1;
33 | let t35 = (t3 * t12 - 1);
34 | let t37 = t15 * cos(t16 * (180 / Float.pi));
35 | let t41 = sin(t35 * t15);
36 | let t49 = t31 / (0.001 + t24);
37 | let t51 = (x2 * t49 - 1);
38 | let t54 = t8 * sin(t25 * (180 / Float.pi));
39 | let t64 = t20 * t24 * t25;
40 | let t72 = t41 * t27 * t33;
41 | let t78 = t14 - t72 - t54;
42 | let t86 = t78 + t64;
43 | let t102 = t37 * t86 * t51;
44 | let t = t102;
45 | y += t;
46 | return y;
47 | }
48 | @differentiable(reverse)
49 | func fuzzedMath2(_ x0: Float, _ x1: Float, _ x2: Float) -> Float {
50 | var y = x0;
51 | let t3 = x2 * cos(x2 * (180 / Float.pi));
52 | let t4 = t3 * x0;
53 | let t5 = (t4 * t4 - 1);
54 | let t6 = x2 * x1 * t3;
55 | let t7 = t4 * cos(x1 * (180 / Float.pi));
56 | let t8 = x2 * sin(x1 * (180 / Float.pi));
57 | let t9 = t7 / (0.001 + t3);
58 | let t10 = x0 * cos(x0 * (180 / Float.pi));
59 | let t12 = sin(t9 * t8);
60 | let t13 = t5 * cos(t10 * (180 / Float.pi));
61 | let t14 = (t7 * t8 - 1);
62 | let t15 = t10 + t4 + x2;
63 | let t16 = (t3 * t7 - 1);
64 | let t17 = (t16 * t4 - 1);
65 | let t18 = t5 + t3 + t16;
66 | let t19 = t4 + t16;
67 | let t22 = t3 + t19 + t6;
68 | let t23 = t22 / (0.001 + t22);
69 | let t24 = t15 * cos(x0 * (180 / Float.pi));
70 | let t26 = sin(t8 * t15);
71 | let t27 = t26 - x1;
72 | let t31 = t7 * sin(t12 * (180 / Float.pi));
73 | let t32 = t7 - t22 - t26 - t23;
74 | let t33 = t16 * cos(t3 * (180 / Float.pi));
75 | let t35 = t15 - t14 - t33;
76 | let t36 = t8 + x0 + x1;
77 | let t39 = t6 / (0.001 + t6);
78 | let t40 = t27 * cos(t8 * (180 / Float.pi));
79 | let t41 = t16 / (0.001 + t35);
80 | let t46 = (t17 * t32 - 1);
81 | let t50 = t18 + t41 + t46;
82 | let t52 = x2 + t39;
83 | let t54 = t40 * t4 * t31;
84 | let t61 = t36 / (0.001 + t52);
85 | let t64 = t50 * cos(t24 * (180 / Float.pi));
86 | let t74 = t14 + t13 + t54;
87 | let t90 = t74 - t61 - t10;
88 | let t98 = t90 / (0.001 + t64);
89 | let t102 = t98 / (0.001 + t9);
90 | let t = t102;
91 | y += t;
92 | return y;
93 | }
94 |
95 |
96 | // Functions generated via a fuzzer incorporating a ternary operator.
97 |
98 | @differentiable(reverse)
99 | func fuzzedMathTernary1(_ x0: Float, _ x1: Float, _ x2: Float) -> Float {
100 | var y = x0;
101 | let t3 = x1 + x1 + x1;
102 | let t4 = x1 * x1 * x0;
103 | let t5 = x0 - x2 - t4;
104 | let t6 = (t4 + t3) / (t4 - t3 + 0.001);
105 | let t7 = x2 + x0 + t5;
106 | let t9 = (x1 * t7 - 1);
107 | let t10 = sin(t4) * sin(t6);
108 | let t11 = sin(t6) * sin(t6);
109 | let t12 = cos(t9) * cos(t6);
110 | let t15 = t12 / (0.001 + x1);
111 | let t16 = x0 * t7 * x1;
112 | let t17 = t6 / (0.001 + x1);
113 | let t18 = sin(t10) * sin(t4);
114 | let t19 = (t11 + t16) / (t11 - t16 + 0.001);
115 | let t22 = (t11 * t11 - 1);
116 | let t23 = (x1 * t10 - 1);
117 | let t25 = t17 < t23 ? t17 : t23;
118 | let t26 = t16 / (0.001 + t12);
119 | let t28 = t26 / (0.001 + t16);
120 | let t30 = t28 * sin(t23 * (180 / Float.pi));
121 | let t31 = t28 * t18 * t19;
122 | let t33 = t18 + t28 + t5 + t31 + t15;
123 | let t41 = (t33 + t6) / (t33 - t6 + 0.001);
124 | let t42 = t7 * t6 * t30;
125 | let t43 = t16 < t18 ? t16 : t18;
126 | let t59 = cos(t12) * cos(t25);
127 | let t81 = t42 + t59 + t22 + t43 + t41;
128 | let t102 = t81 + t33 + t11;
129 | let t = t102;
130 | y += t;
131 | return y;
132 | }
133 |
134 | @differentiable(reverse)
135 | func fuzzedMathTernary2(_ x0: Float, _ x1: Float, _ x2: Float) -> Float {
136 | var y = x0;
137 | let t3 = x2 * x1;
138 | let t4 = t3 / (0.001 + t3);
139 | let t5 = x2 + t3 + x0;
140 | let t6 = t4 - t3;
141 | let t8 = x1 * sin(x1 * (180 / Float.pi));
142 | let t9 = t5 * sin(x2 * (180 / Float.pi));
143 | let t10 = t8 - t6 - t9;
144 | let t11 = t6 * t8;
145 | let t12 = (t10 + t4) / (t10 - t4 + 0.001);
146 | let t13 = x2 * t12;
147 | let t14 = t6 / (0.001 + t11);
148 | let t15 = t8 - x1 - x2;
149 | let t18 = sin(x2) * sin(t14);
150 | let t19 = t12 < t6 ? t12 : t6;
151 | let t20 = t4 * x0;
152 | let t21 = (t14 + t8) / (t14 - t8 + 0.001);
153 | let t22 = (t6 + x1) / (t6 - x1 + 0.001);
154 | let t23 = sin(x1 * t5);
155 | let t25 = t18 * t20 * t13;
156 | let t31 = t21 - t6 - t19 - t23;
157 | let t34 = t15 - t31 - t13 - t25;
158 | let t49 = t5 > t22 ? t5 : t22;
159 | let t102 = (t34 * t49 - 1);
160 | let t = t102;
161 | y += t;
162 | return y;
163 | }
164 |
--------------------------------------------------------------------------------
/Benchmarks/LanguageSuite/Benchmarks/LanguageCoverageBenchmarks/LanguageCoverageBenchmarks.swift:
--------------------------------------------------------------------------------
1 | import Benchmark
2 | import Foundation
3 | import _Differentiation
4 |
5 | enum CustomMeasurement {
6 | static let forward = BenchmarkMetric.custom("run forward (ns)", polarity: .prefersSmaller, useScalingFactor: true)
7 | static let reverse = BenchmarkMetric.custom("run reverse (ns)", polarity: .prefersSmaller, useScalingFactor: true)
8 | static let ratio = BenchmarkMetric.custom("ratio", polarity: .prefersSmaller, useScalingFactor: true)
9 | }
10 |
11 | extension Benchmark {
12 | @discardableResult
13 | convenience init?(_ name: String, forward: @escaping (Benchmark) -> (), reverse: @escaping (Benchmark) -> ()) {
14 | self.init(name, configuration: .init(metrics: [CustomMeasurement.forward, CustomMeasurement.reverse, CustomMeasurement.ratio])) { benchmark in
15 | let startForward = BenchmarkClock.now
16 | forward(benchmark)
17 | let endForward = BenchmarkClock.now
18 | let startReverse = BenchmarkClock.now
19 | reverse(benchmark)
20 | let endReverse = BenchmarkClock.now
21 |
22 | let forward = Int((endForward - startForward).nanoseconds())
23 | let reverse = Int((endReverse - startReverse).nanoseconds())
24 |
25 | benchmark.measurement(CustomMeasurement.forward, forward)
26 | benchmark.measurement(CustomMeasurement.reverse, reverse)
27 | benchmark.measurement(CustomMeasurement.ratio, reverse / forward)
28 | }
29 | }
30 | }
31 |
32 | let benchmarks = {
33 | Benchmark.defaultConfiguration = .init(
34 | warmupIterations: 1,
35 | scalingFactor: .kilo
36 | )
37 |
38 | // Simple functions.
39 |
40 | Benchmark(
41 | "one operation",
42 | forward: { benchmark in
43 | for _ in benchmark.scaledIterations {
44 | blackHole(oneOperation(a: 2))
45 | }
46 | },
47 | reverse: { benchmark in
48 | for _ in benchmark.scaledIterations {
49 | blackHole(gradient(at: 2, of: oneOperation))
50 | }
51 | }
52 | )
53 | Benchmark(
54 | "sixteen operations",
55 | forward: { benchmark in
56 | for _ in benchmark.scaledIterations {
57 | blackHole(sixteenOperations(a: 2))
58 | }
59 | },
60 | reverse: { benchmark in
61 | for _ in benchmark.scaledIterations {
62 | blackHole(gradient(at: 2, of: sixteenOperations))
63 | }
64 | }
65 | )
66 | Benchmark(
67 | "two composed operations",
68 | forward: { benchmark in
69 | for _ in benchmark.scaledIterations {
70 | blackHole(twoComposedOperations(a: 2))
71 | }
72 | },
73 | reverse: { benchmark in
74 | for _ in benchmark.scaledIterations {
75 | blackHole(gradient(at: 2, of: twoComposedOperations))
76 | }
77 | }
78 | )
79 | Benchmark(
80 | "sixteen composed operations",
81 | forward: { benchmark in
82 | for _ in benchmark.scaledIterations {
83 | blackHole(sixteenComposedOperations(a: 2))
84 | }
85 | },
86 | reverse: { benchmark in
87 | for _ in benchmark.scaledIterations {
88 | blackHole(gradient(at: 2, of: sixteenComposedOperations))
89 | }
90 | }
91 | )
92 |
93 | // Functions with loops.
94 |
95 | Benchmark(
96 | "one operation looped (small)",
97 | forward: { benchmark in
98 | for _ in benchmark.scaledIterations {
99 | blackHole(oneOperationLoopedSmall(a: 2))
100 | }
101 | },
102 | reverse: { benchmark in
103 | for _ in benchmark.scaledIterations {
104 | blackHole(gradient(at: 2, of: oneOperationLoopedSmall))
105 | }
106 | }
107 | )
108 | Benchmark(
109 | "four operations looped (small)",
110 | forward: { benchmark in
111 | for _ in benchmark.scaledIterations {
112 | blackHole(fourOperationsLoopedSmall(a: 2))
113 | }
114 | },
115 | reverse: { benchmark in
116 | for _ in benchmark.scaledIterations {
117 | blackHole(gradient(at: 2, of: fourOperationsLoopedSmall))
118 | }
119 | }
120 | )
121 | Benchmark(
122 | "sixteen operations looped (small)",
123 | forward: { benchmark in
124 | for _ in benchmark.scaledIterations {
125 | blackHole(sixteenOperationsLoopedSmall(a: 2))
126 | }
127 | },
128 | reverse: { benchmark in
129 | for _ in benchmark.scaledIterations {
130 | blackHole(gradient(at: 2, of: sixteenOperationsLoopedSmall))
131 | }
132 | }
133 | )
134 | Benchmark(
135 | "one operation looped",
136 | forward: { benchmark in
137 | for _ in benchmark.scaledIterations {
138 | blackHole(oneOperationLooped(a: 2))
139 | }
140 | },
141 | reverse: { benchmark in
142 | for _ in benchmark.scaledIterations {
143 | blackHole(gradient(at: 2, of: oneOperationLooped))
144 | }
145 | }
146 | )
147 | Benchmark(
148 | "two operations looped",
149 | forward: { benchmark in
150 | for _ in benchmark.scaledIterations {
151 | blackHole(twoOperationsLooped(a: 2))
152 | }
153 | },
154 | reverse: { benchmark in
155 | for _ in benchmark.scaledIterations {
156 | blackHole(gradient(at: 2, of: twoOperationsLooped))
157 | }
158 | }
159 | )
160 | Benchmark(
161 | "four operations looped",
162 | forward: { benchmark in
163 | for _ in benchmark.scaledIterations {
164 | blackHole(fourOperationsLooped(a: 2))
165 | }
166 | },
167 | reverse: { benchmark in
168 | for _ in benchmark.scaledIterations {
169 | blackHole(gradient(at: 2, of: fourOperationsLooped))
170 | }
171 | }
172 | )
173 | Benchmark(
174 | "eight operations looped",
175 | forward: { benchmark in
176 | for _ in benchmark.scaledIterations {
177 | blackHole(eightOperationsLooped(a: 2))
178 | }
179 | },
180 | reverse: { benchmark in
181 | for _ in benchmark.scaledIterations {
182 | blackHole(gradient(at: 2, of: eightOperationsLooped))
183 | }
184 | }
185 | )
186 | Benchmark(
187 | "sixteen operations looped",
188 | forward: { benchmark in
189 | for _ in benchmark.scaledIterations {
190 | blackHole(sixteenOperationsLooped(a: 2))
191 | }
192 | },
193 | reverse: { benchmark in
194 | for _ in benchmark.scaledIterations {
195 | blackHole(gradient(at: 2, of: sixteenOperationsLooped))
196 | }
197 | }
198 | )
199 | Benchmark(
200 | "two composed operations looped",
201 | forward: { benchmark in
202 | for _ in benchmark.scaledIterations {
203 | blackHole(twoComposedOperationsLooped(a: 2))
204 | }
205 | },
206 | reverse: { benchmark in
207 | for _ in benchmark.scaledIterations {
208 | blackHole(gradient(at: 2, of: twoComposedOperationsLooped))
209 | }
210 | }
211 | )
212 | Benchmark(
213 | "sixteen composed operations looped",
214 | forward: { benchmark in
215 | for _ in benchmark.scaledIterations {
216 | blackHole(sixteenComposedOperationsLooped(a: 2))
217 | }
218 | },
219 | reverse: { benchmark in
220 | for _ in benchmark.scaledIterations {
221 | blackHole(gradient(at: 2, of: sixteenComposedOperationsLooped))
222 | }
223 | }
224 | )
225 |
226 | // Arithmetic and control flow functions generated by a fuzzer.
227 |
228 | Benchmark(
229 | "fuzzed arithmetic 1",
230 | forward: { benchmark in
231 | for _ in benchmark.scaledIterations {
232 | blackHole(fuzzedMath1(1.0, 2.0, 3.0))
233 | }
234 | },
235 | reverse: { benchmark in
236 | for _ in benchmark.scaledIterations {
237 | blackHole(gradient(at: 1.0, 2.0, 3.0, of: fuzzedMath1))
238 | }
239 | }
240 | )
241 | Benchmark(
242 | "fuzzed arithmetic 2",
243 | forward: { benchmark in
244 | for _ in benchmark.scaledIterations {
245 | blackHole(fuzzedMath2(1.0, 2.0, 3.0))
246 | }
247 | },
248 | reverse: { benchmark in
249 | for _ in benchmark.scaledIterations {
250 | blackHole(gradient(at: 1.0, 2.0, 3.0, of: fuzzedMath2))
251 | }
252 | }
253 | )
254 |
255 | Benchmark(
256 | "fuzzed arithmetic with ternary operators 1",
257 | forward: { benchmark in
258 | for _ in benchmark.scaledIterations {
259 | blackHole(fuzzedMathTernary1(1.0, 2.0, 3.0))
260 | }
261 | },
262 | reverse: { benchmark in
263 | for _ in benchmark.scaledIterations {
264 | blackHole(gradient(at: 1.0, 2.0, 3.0, of: fuzzedMathTernary1))
265 | }
266 | }
267 | )
268 | Benchmark(
269 | "fuzzed arithmetic with ternary operators 2",
270 | forward: { benchmark in
271 | for _ in benchmark.scaledIterations {
272 | blackHole(fuzzedMathTernary2(1.0, 2.0, 3.0))
273 | }
274 | },
275 | reverse: { benchmark in
276 | for _ in benchmark.scaledIterations {
277 | blackHole(gradient(at: 1.0, 2.0, 3.0, of: fuzzedMathTernary2))
278 | }
279 | }
280 | )
281 | }
282 |
--------------------------------------------------------------------------------
/Benchmarks/LanguageSuite/Benchmarks/LanguageCoverageBenchmarks/LoopedFunctions.swift:
--------------------------------------------------------------------------------
1 | import _Differentiation
2 |
3 | // Simple functions in short, constant-sized loops.
4 |
5 | let smallLoopIterations = 8
6 |
7 | @differentiable(reverse)
8 | func oneOperationLoopedSmall(a: Float) -> Float {
9 | var a = a
10 | for _ in 0.. Float {
18 | var a = a
19 | for _ in 0.. Float {
28 | var a = a
29 | for _ in 0.. Float {
48 | var a = a
49 | for _ in 0.. Float {
57 | var a = a
58 | for _ in 0.. Float {
66 | var a = a
67 | for _ in 0.. Float {
76 | var a = a
77 | for _ in 0.. Float {
88 | var a = a
89 | for _ in 0.. Float {
106 | var a = a
107 | for _ in 0.. Float {
115 | var a = a
116 | for _ in 0.. Float {
7 | return a * 2
8 | }
9 |
10 | @differentiable(reverse)
11 | func sixteenOperations(a: Float) -> Float {
12 | let b = 3 / a * 2
13 | let c = 3 / b * 2
14 | let d = 3 / c * 2
15 | let e = 3 / d * 2
16 | let f = 3 / e * 2
17 | let g = 3 / f * 2
18 | let h = 3 / g * 2
19 | return 3 / h * 2
20 | }
21 |
22 | // Simple function composition.
23 |
24 | @differentiable(reverse)
25 | func oneOperationHelper(a: Float) -> Float {
26 | return 3 / a
27 | }
28 |
29 | @differentiable(reverse)
30 | func twoComposedOperations(a: Float) -> Float {
31 | oneOperationHelper(a: oneOperation(a: a))
32 | }
33 |
34 | @differentiable(reverse)
35 | func sixteenComposedOperations(a: Float) -> Float {
36 | let b = oneOperation(a: a)
37 | let c = oneOperationHelper(a: b)
38 | let d = oneOperation(a: c)
39 | let e = oneOperationHelper(a: d)
40 | let f = oneOperation(a: e)
41 | let g = oneOperationHelper(a: f)
42 | let h = oneOperation(a: g)
43 | let i = oneOperationHelper(a: h)
44 | let b2 = oneOperation(a: i)
45 | let c2 = oneOperationHelper(a: b2)
46 | let d2 = oneOperation(a: c2)
47 | let e2 = oneOperationHelper(a: d2)
48 | let f2 = oneOperation(a: e2)
49 | let g2 = oneOperationHelper(a: f2)
50 | let h2 = oneOperation(a: g2)
51 | let i2 = oneOperationHelper(a: h2)
52 | return i2
53 | }
54 |
--------------------------------------------------------------------------------
/Benchmarks/LanguageSuite/Package.resolved:
--------------------------------------------------------------------------------
1 | {
2 | "pins" : [
3 | {
4 | "identity" : "hdrhistogram-swift",
5 | "kind" : "remoteSourceControl",
6 | "location" : "https://github.com/HdrHistogram/hdrhistogram-swift",
7 | "state" : {
8 | "revision" : "a69fa24d7b70421870cafa86340ece900489e17e",
9 | "version" : "0.1.2"
10 | }
11 | },
12 | {
13 | "identity" : "package-benchmark",
14 | "kind" : "remoteSourceControl",
15 | "location" : "https://github.com/ordo-one/package-benchmark",
16 | "state" : {
17 | "revision" : "ddf6c1ae01e139120bcdb917ece52819ee69d47a",
18 | "version" : "1.22.1"
19 | }
20 | },
21 | {
22 | "identity" : "swift-argument-parser",
23 | "kind" : "remoteSourceControl",
24 | "location" : "https://github.com/apple/swift-argument-parser",
25 | "state" : {
26 | "revision" : "c8ed701b513cf5177118a175d85fbbbcd707ab41",
27 | "version" : "1.3.0"
28 | }
29 | },
30 | {
31 | "identity" : "swift-atomics",
32 | "kind" : "remoteSourceControl",
33 | "location" : "https://github.com/apple/swift-atomics",
34 | "state" : {
35 | "revision" : "cd142fd2f64be2100422d658e7411e39489da985",
36 | "version" : "1.2.0"
37 | }
38 | },
39 | {
40 | "identity" : "swift-numerics",
41 | "kind" : "remoteSourceControl",
42 | "location" : "https://github.com/apple/swift-numerics",
43 | "state" : {
44 | "revision" : "0a5bc04095a675662cf24757cc0640aa2204253b",
45 | "version" : "1.0.2"
46 | }
47 | },
48 | {
49 | "identity" : "swift-system",
50 | "kind" : "remoteSourceControl",
51 | "location" : "https://github.com/apple/swift-system",
52 | "state" : {
53 | "revision" : "025bcb1165deab2e20d4eaba79967ce73013f496",
54 | "version" : "1.2.1"
55 | }
56 | },
57 | {
58 | "identity" : "texttable",
59 | "kind" : "remoteSourceControl",
60 | "location" : "https://github.com/ordo-one/TextTable",
61 | "state" : {
62 | "revision" : "a27a07300cf4ae322e0079ca0a475c5583dd575f",
63 | "version" : "0.0.2"
64 | }
65 | }
66 | ],
67 | "version" : 2
68 | }
69 |
--------------------------------------------------------------------------------
/Benchmarks/LanguageSuite/Package.swift:
--------------------------------------------------------------------------------
1 | // swift-tools-version:5.9
2 |
3 | import PackageDescription
4 |
5 | let package = Package(
6 | name: "LanguageCoverageBenchmark",
7 | platforms: [
8 | .macOS(.v13)
9 | ],
10 | products: [
11 | .executable(name: "LanguageCoverageBenchmarks", targets: ["LanguageCoverageBenchmarks"])
12 | ],
13 | dependencies: [
14 | .package(url: "https://github.com/ordo-one/package-benchmark", from: "1.22.1"),
15 | ],
16 | targets: [
17 | .executableTarget(
18 | name: "LanguageCoverageBenchmarks",
19 | dependencies: [
20 | .product(name: "Benchmark", package: "package-benchmark"),
21 | ],
22 | path: "Benchmarks/LanguageCoverageBenchmarks",
23 | plugins: [
24 | .plugin(name: "BenchmarkPlugin", package: "package-benchmark"),
25 | ]
26 | )
27 | ]
28 | )
29 |
--------------------------------------------------------------------------------
/Benchmarks/LanguageSuite/README.md:
--------------------------------------------------------------------------------
1 | # Benchmarks of Language Coverage
2 |
3 | A primary capability of differentiable Swift is the automatic generation of reverse-mode
4 | derivatives (pullbacks) from arbitrary Swift functions. Ideally, those generated pullbacks
5 | would have roughly the same performance as running the original code (the forward pass).
6 |
7 | However, Swift is a complex language and performance of the generated pullback code currently varies
8 | based on the structure of the original functions. This benchmark suite is intended to cover a range
9 | of representative Swift code to verify pullback performance and guide future optimizations.
10 |
11 | ## Running Benchmarks
12 |
13 | A Swift toolchain with support for differentiation must be installed and in your current path. We
14 | recommend using one [downloaded from Swift.org](https://www.swift.org/download/) for your platform.
15 | Nightly toolchain snapshots tend to have better performance, due to new optimizations and
16 | architectural improvements constantly being upstreamed. More information on toolchain installation
17 | and management can be found [here](https://passivelogic.github.io/differentiable-swift-examples/documentation/differentiableswiftexamples/setup).
18 |
19 | Build and run the benchmark via the following:
20 | ```bash
21 | swift package benchmark
22 | ```
23 |
24 | When using a recent Swift.org nightly toolchain snapshot on macOS, you may run into segfault issues when running from terminal. This is due to the executable using the system runtime instead of the toolchain provided one.
25 | It is also possible to run the benchmarks from Xcode ([more info here](https://swiftpackageindex.com/ordo-one/package-benchmark/1.22.1/documentation/benchmark/runningbenchmarks#Running-benchmarks-in-Xcode-and-using-Instruments-for-profiling-benchmarks)).
26 | Make sure Xcode is closed and run the following to open Xcode with jemalloc disabled :
27 | ```bash
28 | open --env BENCHMARK_DISABLE_JEMALLOC=true Package.swift
29 | ```
30 | Set the executable's scheme to release mode and run the executable by pressing `Cmd+R`.
31 |
--------------------------------------------------------------------------------
/Benchmarks/README.md:
--------------------------------------------------------------------------------
1 | # Differentiable Swift Benchmarks
2 |
3 | This directory hosts benchmarks of differentiable Swift's runtime performance.
4 |
5 | Currently, it contains the following benchmark:
6 |
7 | - [A building simulator](BuildingSimulation/) implemented in Swift, PyTorch, and TensorFlow to compare timings between the three.
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
203 |
204 |
205 | ## Runtime Library Exception to the Apache 2.0 License: ##
206 |
207 |
208 | As an exception, if you use this Software to compile your source code and
209 | portions of this Software are embedded into the binary product as a result,
210 | you may redistribute such product without providing attribution as would
211 | otherwise be required by Sections 4(a), 4(b) and 4(d) of the License.
212 |
--------------------------------------------------------------------------------
/Package.resolved:
--------------------------------------------------------------------------------
1 | {
2 | "pins" : [
3 | {
4 | "identity" : "swift-docc-plugin",
5 | "kind" : "remoteSourceControl",
6 | "location" : "https://github.com/apple/swift-docc-plugin",
7 | "state" : {
8 | "revision" : "26ac5758409154cc448d7ab82389c520fa8a8247",
9 | "version" : "1.3.0"
10 | }
11 | },
12 | {
13 | "identity" : "swift-docc-symbolkit",
14 | "kind" : "remoteSourceControl",
15 | "location" : "https://github.com/apple/swift-docc-symbolkit",
16 | "state" : {
17 | "revision" : "b45d1f2ed151d057b54504d653e0da5552844e34",
18 | "version" : "1.0.0"
19 | }
20 | }
21 | ],
22 | "version" : 2
23 | }
24 |
--------------------------------------------------------------------------------
/Package.swift:
--------------------------------------------------------------------------------
1 | // swift-tools-version:5.6
2 |
3 | import PackageDescription
4 |
5 | let package = Package(
6 | name: "DifferentiableSwiftExamples",
7 | products: [
8 | .executable(name: "BasicDifferentiation", targets: ["BasicDifferentiation"]),
9 | .executable(name: "BasicGradientDescent", targets: ["BasicGradientDescent"]),
10 | .executable(name: "CustomDerivatives", targets: ["CustomDerivatives"]),
11 | .library(name: "DifferentiableSwiftExamples", targets: ["DifferentiableSwiftExamples"]),
12 | ],
13 | dependencies: [
14 | .package(url: "https://github.com/apple/swift-docc-plugin", from: "1.3.0")
15 | ],
16 | targets: [
17 | .executableTarget(name: "BasicDifferentiation"),
18 | .executableTarget(name: "BasicGradientDescent"),
19 | .executableTarget(name: "CustomDerivatives"),
20 | .target(
21 | name: "DifferentiableSwiftExamples",
22 | path: "Sources/DifferentiableSwiftExamplesDocumentation"
23 | ),
24 | ]
25 | )
26 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Differentiable Swift Examples
2 |
3 | Differentiable Swift is an experimental language feature for the [Swift language](https://www.swift.org) that is currently
4 | in the [pitch phase](https://forums.swift.org/t/differentiable-programming-for-gradient-based-machine-learning/42147) of
5 | the Swift Evolution process. The goal of this feature is to provide first-class, language-integrated support for
6 | differentiable programming, making Swift the first general-purpose, statically typed programming language to have automatic
7 | differentiation built in. Originally developed as part of the
8 | [Swift for TensorFlow](https://www.tensorflow.org/swift/guide/overview) project, teams
9 | at [PassiveLogic](https://passivelogic.com) and elsewhere are currently working on it.
10 |
11 | Differentiable Swift is purely a language feature and isn't tied to any specific machine learning framework or platform.
12 | It provides a means of building such frameworks in Swift, and works wherever Swift does: from Linux to macOS to
13 | [WebAssembly](https://swiftwasm.org).
14 |
15 | The goal of this repository is to provide examples and documentation for differentiable Swift, to illustrate how it can be
16 | used, and to show the power of automatic differentiation in various applications. We hope to grow this over time with new
17 | examples and documentation, and welcome contributions to that end.
18 |
19 | ## Documentation
20 | DocC-generated documentation can be found at https://passivelogic.github.io/differentiable-swift-examples
21 |
22 | ## Getting started
23 |
24 | Differentiable Swift is present as an experimental feature in modern Swift toolchains. Due to the rapid speed at which it
25 | is evolving, for best results, we recommend using a Swift toolchain downloaded [from Swift.org](https://www.swift.org/download/)
26 | from either the Swift 5.9 development snapshots or the nightly development snapshots. The latter will more closely track
27 | the latest additions and fixes being upstreamed but may be slightly less stable overall.
28 |
29 | It is possible to use differentiable Swift with the default Swift toolchains that ship inside Xcode, however only the
30 | compiler additions are present in those toolchains. The standard library support needed to use the `_Differentiation` module
31 | is not provided in those toolchains and needs to be added after the fact. One example of how to do this can be found
32 | in [this project](https://github.com/philipturner/differentiation).
33 |
34 | No special compiler flags are needed to activate differentiable Swift, but you do need to place the following:
35 |
36 | ```swift
37 | import _Differentiation
38 | ```
39 |
40 | in any file where differentiation will be used. The compiler will warn you about this if you do forget to add the above
41 | and try to use any differentiable Swift capabilities.
42 |
43 | ## Examples
44 |
45 | The following examples are present in the repository, and can be built and run via:
46 |
47 | ```bash
48 | swift run [example]
49 | ```
50 |
51 | - [BasicDifferentiation](Sources/BasicDifferentiation/main.swift): A very simple example of using automatic differentiation with a few different functions and types.
52 | - [CustomDerivatives](Sources/CustomDerivatives/main.swift): Differentiable Swift lets you register custom derivatives for functions, and this shows how to do so.
53 | - [BasicGradientDescent](Sources/BasicGradientDescent/main.swift): How to perform gradient descent optimization in Swift.
54 |
55 |
56 | ## Benchmarks
57 |
58 | A motivating benchmark of a building thermal model, optimized via gradient descent, is implemented
59 | in several languages and frameworks to compare against differentiable Swift in the [Benchmarks/BuildingSimulation](Benchmarks/BuildingSimulation/) directory.
60 |
61 | ## Differentiable Swift resources
62 |
63 | If you want to learn more about differentiable Swift, there are a variety of resources out there. The API has changed over time,
64 | so some older documentation may provide great background on the feature but not fully reflect code as it is written today.
65 |
66 | - [Differentiable programming for gradient-based machine learning](https://forums.swift.org/t/differentiable-programming-for-gradient-based-machine-learning/42147)
67 | - The Intro to Differentiable Swift series:
68 | - [Part 0: Why Automatic Differentiation is Awesome](https://medium.com/passivelogic/intro-to-differentiable-swift-part-0-why-automatic-differentiation-is-awesome-a522128ca9e3)
69 | - [Part 1: Gradient Descent](https://medium.com/passivelogic/intro-to-differentiable-swift-part-1-gradient-descent-181a06aaa596)
70 | - [Part 2: Differentiable Swift](https://medium.com/passivelogic/intro-to-differentiable-swift-part-2-differentiable-swift-25a99b97087f)
71 | - [Part 3: Differentiable API Introduction](https://medium.com/passivelogic/intro-to-differentiable-swift-part-3-differentiable-api-introduction-2d8d747e0ac8)
72 | - [Part 4: Differentiable Swift API Details](https://medium.com/passivelogic/intro-to-differentiable-swift-part-4-differentiable-swift-api-details-b6368c2dae5)
73 | - [Differentiable Programming Manifesto](https://github.com/apple/swift/blob/main/docs/DifferentiableProgramming.md) (note: slightly out of date)
74 | - The Swift for TensorFlow project explored the use of differentiable Swift paired with machine learning frameworks:
75 | - [Overview of Swift for TensorFlow](https://www.tensorflow.org/swift/guide/overview)
76 | - [Main Swift for TensorFlow GitHub repository](https://github.com/tensorflow/swift)
77 | - [Swift for TensorFlow machine learning APIs](https://github.com/tensorflow/swift-apis)
78 | - [Machine learning models and libraries](https://github.com/tensorflow/swift-models)
79 |
80 |
--------------------------------------------------------------------------------
/Sources/BasicDifferentiation/main.swift:
--------------------------------------------------------------------------------
1 | // First, we need to enable differentiable Swift via a special import:
2 |
3 | import _Differentiation
4 |
5 | // You can mark a function as being differentiable if it has at least one differentiable
6 | // parameter and differentiable result. The `@differentiable` annotation is used to mark the
7 | // function, and the `reverse` specifier further clarifies that we want to use reverse-mode
8 | // differentiation. In the initial implementation of differentiable Swift, only reverse-mode
9 | // differentiation is currently fully functional.
10 |
11 | @differentiable(reverse)
12 | func square(_ x: Float) -> Float {
13 | return x * x
14 | }
15 |
16 | // Note that an inout value takes the place of both parameter and result, and a mutating function
17 | // implicitly passes `self` as inout.
18 |
19 | @differentiable(reverse)
20 | func squared(_ x: inout Float) {
21 | x = x * x
22 | }
23 |
24 | // To declare a type as being differentiable, it needs to conform to the Differentiable protocol.
25 | // Generally, types are differentiable if they are continuous or if all of their properties are
26 | // continuous and Differentiable. However, Differentiable types can have non-Differentiable
27 | // properties, if those properties are annotated with @noDerivative. Those non-Differentiable
28 | // properties will then not participate in differentiation.
29 | //
30 | // Differentiable properties must also be declared as `var` and not `let`, because in order for them
31 | // to be used in gradient descent they must be able to be moved by a tangent vector.
32 |
33 | struct MyValue: Differentiable {
34 | var x: Float
35 | var y: Double
36 | @noDerivative
37 | let isTracked: Bool
38 | }
39 |
40 | // To activate the differentiation machinery, there are some special built-in functions in the
41 | // Differentiation module within the Swift standard library that can give you the value from
42 | // the forward pass through a differentiable function as well as the backward pass.
43 | //
44 | // For functions with scalar outputs, `valueWithGradient(at:of:)` will return both the value and
45 | // the calculated gradient at a given input value:
46 |
47 | let (value, gradient) = valueWithGradient(at: 3.0, of: square)
48 | print("The value is \(value), and the gradient is \(gradient)")
49 |
50 | // In the more general case, `valueWithPullback(at:of)` will provide the value and a pullback
51 | // function for a differentiable function. For the Float-returning function above, the gradient
52 | // is obtained by passing 1 into the pullback function:
53 |
54 | let (value2, pullback) = valueWithPullback(at: 3.0, of: square)
55 | print("The value is \(value2), and the pullback at 1.0 is \(pullback(1.0))")
56 |
--------------------------------------------------------------------------------
/Sources/BasicGradientDescent/main.swift:
--------------------------------------------------------------------------------
1 | import _Differentiation
2 |
3 | // In this example, we'll set up a very simple perceptron neural network and try to use gradient
4 | // descent to have it mimic the functionality of an AND gate.
5 |
6 | struct Perceptron: Differentiable {
7 | var weight1: Float = .random(in: -1..<1)
8 | var weight2: Float = .random(in: -1..<1)
9 | var bias: Float = 0.0
10 |
11 | @differentiable(reverse)
12 | func callAsFunction(_ x1: Float, _ x2: Float) -> Float {
13 | // Determine the weighted contribution from each input, plus bias.
14 | let output = (weight1 * x1) + (weight2 * x2) + bias
15 | // Apply a nonlinear activation function to the output.
16 | if output >= 0.0 {
17 | return output
18 | } else {
19 | return 0.1 * output
20 | }
21 | }
22 | }
23 |
24 | // This is our truth table for the expected output from various inputs.
25 |
26 | let andGateData: [(x1: Float, x2: Float, y: Float)] = [
27 | (x1: 0, x2: 0, y: 0),
28 | (x1: 0, x2: 1, y: 0),
29 | (x1: 1, x2: 0, y: 0),
30 | (x1: 1, x2: 1, y: 1),
31 | ]
32 |
33 | // A loss function provides a measure of how far off we are from our target behavior.
34 |
35 | @differentiable(reverse)
36 | func loss(model: Perceptron) -> Float {
37 | var loss: Float = 0
38 | for (x1, x2, y) in andGateData {
39 | let prediction = model(x1, x2)
40 | let error = y - prediction
41 | loss = loss + error * error / 2
42 | }
43 | return loss
44 | }
45 |
46 | // Finally, we initialize the model with random weights and a zero bias:
47 |
48 | var model = Perceptron()
49 |
50 | // and then we perform training by finding the loss, determining a tangent vector that would
51 | // take us in a direction that should reduce that loss, and moving our model parameters by
52 | // that tangent vector. Over the course of training, we'll watch our loss values decrease as the
53 | // model is trained to replicate an AND gate.
54 |
55 | for _ in 0..<100 {
56 | let (loss, pullback) = valueWithPullback(at: model, of: loss)
57 | print("Loss: \(loss)")
58 | let gradient = pullback(-0.1)
59 | model.move(by: gradient)
60 | }
61 |
62 | // Let's try out our trained model on some test values:
63 |
64 | print("Trained model results:")
65 |
66 | let value1 = model(1.0, 0.0)
67 |
68 | print("Value at (1.0, 0.0): \(value1)")
69 |
70 | let value2 = model(1.0, 1.0)
71 |
72 | print("Value at (1.0, 1.0): \(value2)")
73 |
--------------------------------------------------------------------------------
/Sources/CustomDerivatives/main.swift:
--------------------------------------------------------------------------------
1 | import Foundation
2 | import _Differentiation
3 |
4 | // In addition to compiler-generated derivatives, you can register your own custom derivatives
5 | // for any function to make them differentiable. This is particularly useful for functions that
6 | // have been defined in C libraries, like basic math functions.
7 | //
8 | // As an example of this, the following is a custom derivative defined for the `sqrt()` function.
9 | // `sqrt()` is a function where we don't have access to modify the original source code, so we
10 | // need to be able to register a derivative for it so that is can be used as part of differentiable
11 | // functions.
12 | //
13 | // Do do so, we define a vector-Jacobian product (VJP) (for more, see the excellent JAX
14 | // documentation: https://jax.readthedocs.io/en/latest/notebooks/autodiff_cookbook.html#vector-jacobian-products-vjps-aka-reverse-mode-autodiff )
15 | // The VJP takes as its input the original parameters to the main function and provides as output
16 | // a tuple containing the value produced by the original function and a pullback function. The
17 | // pullback has as its inputs the tangent vectors of each differentiable result and as its output
18 | // the tangent vectors of each differentiable parameter. Note that for some types, like Double, the
19 | // type of the tangent vector is the same as the type of the base type.
20 |
21 | @derivative(of: sqrt)
22 | public func sqrtVJP(_ value: Double) -> (value: Double, pullback: (Double) -> Double) {
23 | let output = sqrt(value)
24 | func pullback(_ tangentVector: Double) -> Double {
25 | return tangentVector / (2 * output)
26 | }
27 | return (value: output, pullback: pullback)
28 | }
29 |
30 | // Once a custom derivative has been defined for a function, that function is now differentiable:
31 |
32 | let (value, gradient) = valueWithGradient(at: 9.0, of: sqrt)
33 | print("The sqrt() value is \(value), and the gradient is \(gradient)")
34 |
35 | // Custom derivatives are also useful in cases where the function may not be continuous across
36 | // all values, and thus may not have a derivative at all points. We can then provide custom
37 | // derivatives that specify an approximation that we can use, such as in the case of `min()`:
38 | //
39 | // For min(): "Returns: The lesser of `x` and `y`. If `x` is equal to `y`, returns `x`."
40 | // https://github.com/apple/swift/blob/main/stdlib/public/core/Algorithm.swift#L18
41 |
42 | @derivative(of: min)
43 | public func minVJP(
44 | _ lhs: T,
45 | _ rhs: T
46 | ) -> (value: T, pullback: (T.TangentVector) -> (T.TangentVector, T.TangentVector)) {
47 | func pullback(_ tangentVector: T.TangentVector) -> (T.TangentVector, T.TangentVector) {
48 | if lhs <= rhs {
49 | return (tangentVector, .zero)
50 | } else {
51 | return (.zero, tangentVector)
52 | }
53 | }
54 | return (value: min(lhs, rhs), pullback: pullback)
55 | }
56 |
57 | let (value2, gradient2) = valueWithGradient(at: 3.0, 4.0, of: min)
58 | print("The min() value is \(value2), and the gradient is \(gradient2)")
59 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/DifferentiableSwiftExamples.md:
--------------------------------------------------------------------------------
1 | # ``DifferentiableSwiftExamples``
2 |
3 | @Metadata {
4 | @DisplayName("Differentiable Swift examples")
5 | }
6 |
7 | Articles and tutorials on the experimental language feature of differentiable Swift.
8 |
9 | ## Overview
10 |
11 | Differentiable Swift is an experimental language feature for the [Swift language](https://www.swift.org) that is currently
12 | in the [pitch phase](https://forums.swift.org/t/differentiable-programming-for-gradient-based-machine-learning/42147) of
13 | the Swift Evolution process. The goal of this feature is to provide first-class, language-integrated support for
14 | differentiable programming, making Swift the first general-purpose, statically typed programming language to have automatic
15 | differentiation built in.
16 |
17 | Differentiable Swift is purely a language feature and isn't tied to any specific machine learning framework or platform.
18 | It provides a means of building such frameworks in Swift, and works wherever Swift does: from Linux to macOS to
19 | [WebAssembly](https://swiftwasm.org).
20 |
21 |
22 | ## Topics
23 |
24 | ### Articles
25 |
26 | -
27 | -
28 | -
29 |
30 | ### Tutorials
31 |
32 | -
33 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableFunctions/DifferentiableFunctions-01-01.swift:
--------------------------------------------------------------------------------
1 | import _Differentiation
2 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableFunctions/DifferentiableFunctions-01-02.swift:
--------------------------------------------------------------------------------
1 | import _Differentiation
2 |
3 | func squared(_ input: Double) -> Double {
4 | input * input
5 | }
6 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableFunctions/DifferentiableFunctions-01-03.swift:
--------------------------------------------------------------------------------
1 | import _Differentiation
2 |
3 | @differentiable(reverse)
4 | func squared(_ input: Double) -> Double {
5 | input * input
6 | }
7 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableFunctions/DifferentiableFunctions-01-04.swift:
--------------------------------------------------------------------------------
1 | import _Differentiation
2 |
3 | @differentiable(reverse)
4 | func squared(_ input: Double) -> Double {
5 | input * input
6 | }
7 |
8 | let (value, gradient) = valueWithGradient(at: 3.0, of: squared)
9 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableFunctions/DifferentiableFunctions-01-05.swift:
--------------------------------------------------------------------------------
1 | import _Differentiation
2 |
3 | @differentiable(reverse)
4 | func squared(_ input: Double) -> Double {
5 | input * input
6 | }
7 |
8 | let (value, gradient) = valueWithGradient(at: 3.0, of: squared)
9 | print("The value is \(value), and the gradient is \(gradient).")
10 |
11 | // The value is 9.0, and the gradient is 6.0.
12 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableFunctions/DifferentiableFunctions-02-01.swift:
--------------------------------------------------------------------------------
1 | import _Differentiation
2 |
3 | func squared(_ input: Double) -> Double {
4 | input * input
5 | }
6 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableFunctions/DifferentiableFunctions-02-02.swift:
--------------------------------------------------------------------------------
1 | import _Differentiation
2 |
3 | func squared(_ input: Double) -> Double {
4 | input * input
5 | }
6 |
7 | @derivative(of: squared)
8 | func vjpSquared()
9 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableFunctions/DifferentiableFunctions-02-03.swift:
--------------------------------------------------------------------------------
1 | import _Differentiation
2 |
3 | func squared(_ input: Double) -> Double {
4 | input * input
5 | }
6 |
7 | @derivative(of: squared)
8 | func vjpSquared(_ input: Double) -> (
9 | value: Double,
10 | pullback: (Double.TangentVector) -> Double.TangentVector
11 | )
12 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableFunctions/DifferentiableFunctions-02-04.swift:
--------------------------------------------------------------------------------
1 | import _Differentiation
2 |
3 | func squared(_ input: Double) -> Double {
4 | input * input
5 | }
6 |
7 | @derivative(of: squared)
8 | func vjpSquared(_ input: Double) -> (
9 | value: Double,
10 | pullback: (Double) -> Double
11 | )
12 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableFunctions/DifferentiableFunctions-02-05.swift:
--------------------------------------------------------------------------------
1 | import _Differentiation
2 |
3 | func squared(_ input: Double) -> Double {
4 | input * input
5 | }
6 |
7 | @derivative(of: squared)
8 | func vjpSquared(_ input: Double) -> (
9 | value: Double,
10 | pullback: (Double) -> Double
11 | ) {
12 | let output = squared(input)
13 | func pullback(_ tangentVector: Double) -> Double {
14 | return tangentVector * 2 * input
15 | }
16 | return (value: output, pullback: pullback)
17 | }
18 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableFunctions/DifferentiableFunctions-03-01.swift:
--------------------------------------------------------------------------------
1 | import Foundation
2 | import _Differentiation
3 |
4 | let (value, gradient) = valueWithGradient(at: 3.0, 4.0, of: min)
5 |
6 | // error: expression is not differentiable
7 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableFunctions/DifferentiableFunctions-03-02.swift:
--------------------------------------------------------------------------------
1 | import Foundation
2 | import _Differentiation
3 |
4 | @derivative(of: min)
5 | public func minVJP(
6 | _ lhs: T,
7 | _ rhs: T
8 | ) -> (value: T, pullback: (T.TangentVector) -> (T.TangentVector, T.TangentVector)) {
9 | func pullback(_ tangentVector: T.TangentVector) -> (T.TangentVector, T.TangentVector) {
10 | if lhs <= rhs {
11 | return (tangentVector, .zero)
12 | } else {
13 | return (.zero, tangentVector)
14 | }
15 | }
16 | return (value: min(lhs, rhs), pullback: pullback)
17 | }
18 |
19 | let (value, gradient) = valueWithGradient(at: 3.0, 4.0, of: min)
20 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableFunctions/DifferentiableFunctions-03-03.swift:
--------------------------------------------------------------------------------
1 | import Foundation
2 | import _Differentiation
3 |
4 | @derivative(of: min)
5 | public func minVJP(
6 | _ lhs: T,
7 | _ rhs: T
8 | ) -> (value: T, pullback: (T.TangentVector) -> (T.TangentVector, T.TangentVector)) {
9 | func pullback(_ tangentVector: T.TangentVector) -> (T.TangentVector, T.TangentVector) {
10 | if lhs <= rhs {
11 | return (tangentVector, .zero)
12 | } else {
13 | return (.zero, tangentVector)
14 | }
15 | }
16 | return (value: min(lhs, rhs), pullback: pullback)
17 | }
18 |
19 | let (value, gradient) = valueWithGradient(at: 3.0, 4.0, of: min)
20 | print("The min() value is \(value), and the gradient is \(gradient).")
21 |
22 | // The min() value is 3.0, and the gradient is (1.0, 0.0).
23 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableTypes/DifferentiableTypes-01-01.swift:
--------------------------------------------------------------------------------
1 | import _Differentiation
2 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableTypes/DifferentiableTypes-01-02.swift:
--------------------------------------------------------------------------------
1 | import _Differentiation
2 |
3 | struct MyValue {
4 | var x: Float
5 | var y: Double
6 | }
7 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableTypes/DifferentiableTypes-01-03.swift:
--------------------------------------------------------------------------------
1 | import _Differentiation
2 |
3 | struct MyValue: Differentiable {
4 | var x: Float
5 | var y: Double
6 | }
7 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableTypes/DifferentiableTypes-01-04.swift:
--------------------------------------------------------------------------------
1 | import _Differentiation
2 |
3 | struct MyValue: Differentiable {
4 | var x: Float
5 | var y: Double
6 | let isTracked: Bool
7 | }
8 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableTypes/DifferentiableTypes-01-05.swift:
--------------------------------------------------------------------------------
1 | import _Differentiation
2 |
3 | struct MyValue: Differentiable {
4 | var x: Float
5 | var y: Double
6 | @noDerivative
7 | let isTracked: Bool
8 | }
9 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableTypes/DifferentiableTypes-02-01.swift:
--------------------------------------------------------------------------------
1 | import _Differentiation
2 |
3 | struct Simple: Differentiable {
4 | var value1: Float
5 | var value2: Double
6 | }
7 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableTypes/DifferentiableTypes-02-02.swift:
--------------------------------------------------------------------------------
1 | import _Differentiation
2 |
3 | struct Simple: Differentiable {
4 | var value1: Float
5 | var value2: Double
6 |
7 | struct TangentVector: AdditiveArithmetic, Differentiable {
8 | var otherValue1: Float.TangentVector
9 | var otherValue2: Double.TangentVector
10 | }
11 | }
12 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/DifferentiableTypes/DifferentiableTypes-02-03.swift:
--------------------------------------------------------------------------------
1 | import _Differentiation
2 |
3 | struct Simple: Differentiable {
4 | var value1: Float
5 | var value2: Double
6 |
7 | struct TangentVector: AdditiveArithmetic, Differentiable {
8 | var otherValue1: Float.TangentVector
9 | var otherValue2: Double.TangentVector
10 | }
11 |
12 | mutating func move(by offset: TangentVector) {
13 | self.value1.move(by: offset.otherValue1)
14 | self.value2.move(by: offset.otherValue2)
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/GradientDescent/GradientDescent-01-01.swift:
--------------------------------------------------------------------------------
1 | import _Differentiation
2 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/GradientDescent/GradientDescent-01-02.swift:
--------------------------------------------------------------------------------
1 | import _Differentiation
2 |
3 | struct Perceptron {
4 | var weight1: Float = .random(in: -1..<1)
5 | var weight2: Float = .random(in: -1..<1)
6 | var bias: Float = 0.0
7 | }
8 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/GradientDescent/GradientDescent-01-03.swift:
--------------------------------------------------------------------------------
1 | import _Differentiation
2 |
3 | struct Perceptron: Differentiable {
4 | var weight1: Float = .random(in: -1..<1)
5 | var weight2: Float = .random(in: -1..<1)
6 | var bias: Float = 0.0
7 | }
8 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/GradientDescent/GradientDescent-01-04.swift:
--------------------------------------------------------------------------------
1 | import _Differentiation
2 |
3 | struct Perceptron: Differentiable {
4 | var weight1: Float = .random(in: -1..<1)
5 | var weight2: Float = .random(in: -1..<1)
6 | var bias: Float = 0.0
7 |
8 | @differentiable(reverse)
9 | func callAsFunction(_ x1: Float, _ x2: Float) -> Float {
10 | }
11 | }
12 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/GradientDescent/GradientDescent-01-05.swift:
--------------------------------------------------------------------------------
1 | import _Differentiation
2 |
3 | struct Perceptron: Differentiable {
4 | var weight1: Float = .random(in: -1..<1)
5 | var weight2: Float = .random(in: -1..<1)
6 | var bias: Float = 0.0
7 |
8 | @differentiable(reverse)
9 | func callAsFunction(_ x1: Float, _ x2: Float) -> Float {
10 | let output = (weight1 * x1) + (weight2 * x2) + bias
11 | }
12 | }
13 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/GradientDescent/GradientDescent-01-06.swift:
--------------------------------------------------------------------------------
1 | import _Differentiation
2 |
3 | struct Perceptron: Differentiable {
4 | var weight1: Float = .random(in: -1..<1)
5 | var weight2: Float = .random(in: -1..<1)
6 | var bias: Float = 0.0
7 |
8 | @differentiable(reverse)
9 | func callAsFunction(_ x1: Float, _ x2: Float) -> Float {
10 | let output = (weight1 * x1) + (weight2 * x2) + bias
11 | if output >= 0.0 {
12 | return output
13 | } else {
14 | return 0.1 * output
15 | }
16 | }
17 | }
18 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/GradientDescent/GradientDescent-01-07.swift:
--------------------------------------------------------------------------------
1 | import _Differentiation
2 |
3 | struct Perceptron: Differentiable {
4 | var weight1: Float = .random(in: -1..<1)
5 | var weight2: Float = .random(in: -1..<1)
6 | var bias: Float = 0.0
7 |
8 | @differentiable(reverse)
9 | func callAsFunction(_ x1: Float, _ x2: Float) -> Float {
10 | let output = (weight1 * x1) + (weight2 * x2) + bias
11 | if output >= 0.0 {
12 | return output
13 | } else {
14 | return 0.1 * output
15 | }
16 | }
17 | }
18 |
19 | let andGateData: [(x1: Float, x2: Float, y: Float)] = [
20 | (x1: 0, x2: 0, y: 0),
21 | (x1: 0, x2: 1, y: 0),
22 | (x1: 1, x2: 0, y: 0),
23 | (x1: 1, x2: 1, y: 1),
24 | ]
25 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/GradientDescent/GradientDescent-01-08.swift:
--------------------------------------------------------------------------------
1 | import _Differentiation
2 |
3 | struct Perceptron: Differentiable {
4 | var weight1: Float = .random(in: -1..<1)
5 | var weight2: Float = .random(in: -1..<1)
6 | var bias: Float = 0.0
7 |
8 | @differentiable(reverse)
9 | func callAsFunction(_ x1: Float, _ x2: Float) -> Float {
10 | let output = (weight1 * x1) + (weight2 * x2) + bias
11 | if output >= 0.0 {
12 | return output
13 | } else {
14 | return 0.1 * output
15 | }
16 | }
17 | }
18 |
19 | let andGateData: [(x1: Float, x2: Float, y: Float)] = [
20 | (x1: 0, x2: 0, y: 0),
21 | (x1: 0, x2: 1, y: 0),
22 | (x1: 1, x2: 0, y: 0),
23 | (x1: 1, x2: 1, y: 1),
24 | ]
25 |
26 | @differentiable(reverse)
27 | func loss(model: Perceptron) -> Float {
28 | var loss: Float = 0
29 | for (x1, x2, y) in andGateData {
30 | let prediction = model(x1, x2)
31 | let error = y - prediction
32 | loss = loss + error * error / 2
33 | }
34 | return loss
35 | }
36 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/GradientDescent/GradientDescent-01-09.swift:
--------------------------------------------------------------------------------
1 | import _Differentiation
2 |
3 | struct Perceptron: Differentiable {
4 | var weight1: Float = .random(in: -1..<1)
5 | var weight2: Float = .random(in: -1..<1)
6 | var bias: Float = 0.0
7 |
8 | @differentiable(reverse)
9 | func callAsFunction(_ x1: Float, _ x2: Float) -> Float {
10 | let output = (weight1 * x1) + (weight2 * x2) + bias
11 | if output >= 0.0 {
12 | return output
13 | } else {
14 | return 0.1 * output
15 | }
16 | }
17 | }
18 |
19 | let andGateData: [(x1: Float, x2: Float, y: Float)] = [
20 | (x1: 0, x2: 0, y: 0),
21 | (x1: 0, x2: 1, y: 0),
22 | (x1: 1, x2: 0, y: 0),
23 | (x1: 1, x2: 1, y: 1),
24 | ]
25 |
26 | @differentiable(reverse)
27 | func loss(model: Perceptron) -> Float {
28 | var loss: Float = 0
29 | for (x1, x2, y) in andGateData {
30 | let prediction = model(x1, x2)
31 | let error = y - prediction
32 | loss = loss + error * error / 2
33 | }
34 | return loss
35 | }
36 |
37 | var model = Perceptron()
38 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/GradientDescent/GradientDescent-01-10.swift:
--------------------------------------------------------------------------------
1 | import _Differentiation
2 |
3 | struct Perceptron: Differentiable {
4 | var weight1: Float = .random(in: -1..<1)
5 | var weight2: Float = .random(in: -1..<1)
6 | var bias: Float = 0.0
7 |
8 | @differentiable(reverse)
9 | func callAsFunction(_ x1: Float, _ x2: Float) -> Float {
10 | let output = (weight1 * x1) + (weight2 * x2) + bias
11 | if output >= 0.0 {
12 | return output
13 | } else {
14 | return 0.1 * output
15 | }
16 | }
17 | }
18 |
19 | let andGateData: [(x1: Float, x2: Float, y: Float)] = [
20 | (x1: 0, x2: 0, y: 0),
21 | (x1: 0, x2: 1, y: 0),
22 | (x1: 1, x2: 0, y: 0),
23 | (x1: 1, x2: 1, y: 1),
24 | ]
25 |
26 | @differentiable(reverse)
27 | func loss(model: Perceptron) -> Float {
28 | var loss: Float = 0
29 | for (x1, x2, y) in andGateData {
30 | let prediction = model(x1, x2)
31 | let error = y - prediction
32 | loss = loss + error * error / 2
33 | }
34 | return loss
35 | }
36 |
37 | var model = Perceptron()
38 |
39 | for _ in 0..<100 {
40 | }
41 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/GradientDescent/GradientDescent-01-11.swift:
--------------------------------------------------------------------------------
1 | import _Differentiation
2 |
3 | struct Perceptron: Differentiable {
4 | var weight1: Float = .random(in: -1..<1)
5 | var weight2: Float = .random(in: -1..<1)
6 | var bias: Float = 0.0
7 |
8 | @differentiable(reverse)
9 | func callAsFunction(_ x1: Float, _ x2: Float) -> Float {
10 | let output = (weight1 * x1) + (weight2 * x2) + bias
11 | if output >= 0.0 {
12 | return output
13 | } else {
14 | return 0.1 * output
15 | }
16 | }
17 | }
18 |
19 | let andGateData: [(x1: Float, x2: Float, y: Float)] = [
20 | (x1: 0, x2: 0, y: 0),
21 | (x1: 0, x2: 1, y: 0),
22 | (x1: 1, x2: 0, y: 0),
23 | (x1: 1, x2: 1, y: 1),
24 | ]
25 |
26 | @differentiable(reverse)
27 | func loss(model: Perceptron) -> Float {
28 | var loss: Float = 0
29 | for (x1, x2, y) in andGateData {
30 | let prediction = model(x1, x2)
31 | let error = y - prediction
32 | loss = loss + error * error / 2
33 | }
34 | return loss
35 | }
36 |
37 | var model = Perceptron()
38 |
39 | for _ in 0..<100 {
40 | let (loss, pullback) = valueWithPullback(at: model, of: loss)
41 | print("Loss: \(loss)")
42 | }
43 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/GradientDescent/GradientDescent-01-12.swift:
--------------------------------------------------------------------------------
1 | import _Differentiation
2 |
3 | struct Perceptron: Differentiable {
4 | var weight1: Float = .random(in: -1..<1)
5 | var weight2: Float = .random(in: -1..<1)
6 | var bias: Float = 0.0
7 |
8 | @differentiable(reverse)
9 | func callAsFunction(_ x1: Float, _ x2: Float) -> Float {
10 | let output = (weight1 * x1) + (weight2 * x2) + bias
11 | if output >= 0.0 {
12 | return output
13 | } else {
14 | return 0.1 * output
15 | }
16 | }
17 | }
18 |
19 | let andGateData: [(x1: Float, x2: Float, y: Float)] = [
20 | (x1: 0, x2: 0, y: 0),
21 | (x1: 0, x2: 1, y: 0),
22 | (x1: 1, x2: 0, y: 0),
23 | (x1: 1, x2: 1, y: 1),
24 | ]
25 |
26 | @differentiable(reverse)
27 | func loss(model: Perceptron) -> Float {
28 | var loss: Float = 0
29 | for (x1, x2, y) in andGateData {
30 | let prediction = model(x1, x2)
31 | let error = y - prediction
32 | loss = loss + error * error / 2
33 | }
34 | return loss
35 | }
36 |
37 | var model = Perceptron()
38 |
39 | for _ in 0..<100 {
40 | let (loss, pullback) = valueWithPullback(at: model, of: loss)
41 | print("Loss: \(loss)")
42 | let gradient = pullback(-0.1)
43 | model.move(by: gradient)
44 | }
45 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Resources/Code/GradientDescent/GradientDescent-01-13.swift:
--------------------------------------------------------------------------------
1 | import _Differentiation
2 |
3 | struct Perceptron: Differentiable {
4 | var weight1: Float = .random(in: -1..<1)
5 | var weight2: Float = .random(in: -1..<1)
6 | var bias: Float = 0.0
7 |
8 | @differentiable(reverse)
9 | func callAsFunction(_ x1: Float, _ x2: Float) -> Float {
10 | let output = (weight1 * x1) + (weight2 * x2) + bias
11 | if output >= 0.0 {
12 | return output
13 | } else {
14 | return 0.1 * output
15 | }
16 | }
17 | }
18 |
19 | let andGateData: [(x1: Float, x2: Float, y: Float)] = [
20 | (x1: 0, x2: 0, y: 0),
21 | (x1: 0, x2: 1, y: 0),
22 | (x1: 1, x2: 0, y: 0),
23 | (x1: 1, x2: 1, y: 1),
24 | ]
25 |
26 | @differentiable(reverse)
27 | func loss(model: Perceptron) -> Float {
28 | var loss: Float = 0
29 | for (x1, x2, y) in andGateData {
30 | let prediction = model(x1, x2)
31 | let error = y - prediction
32 | loss = loss + error * error / 2
33 | }
34 | return loss
35 | }
36 |
37 | var model = Perceptron()
38 |
39 | for _ in 0..<100 {
40 | let (loss, pullback) = valueWithPullback(at: model, of: loss)
41 | print("Loss: \(loss)")
42 | let gradient = pullback(-0.1)
43 | model.move(by: gradient)
44 | }
45 |
46 | let value1 = model(1.0, 0.0)
47 | print("Value at (1.0, 0.0): \(value1)")
48 | // Value at (1.0, 0.0): 0.1
49 | let value2 = model(1.0, 1.0)
50 | print("Value at (1.0, 1.0): \(value2)")
51 | // Value at (1.0, 1.0): 0.9
52 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Setup.md:
--------------------------------------------------------------------------------
1 | # Setup
2 |
3 | How to set up your environment and get started with differentiable Swift.
4 |
5 | ## Overview
6 |
7 | Swift toolchains that ship with Xcode lack the `_Differentiation` module needed by differentiable Swift. In order to use differentiable Swift, you will need to install a version of the toolchain from Swift.org. There are multiple way to download, install and manage different versions of toolchains depending on your needs and/or platform.
8 |
9 | After a toolchain has been installed there are a few things to take into consideration in order to successfully run your differentiable Swift code.
10 |
11 | ### Installing a toolchain
12 |
13 | Toolchains can be manually downloaded and installed from [swift.org](https://swift.org/download). If you do so you can either pick a stable release or a nightly snapshot. The nightlies will often have more features and/or performance improvements.
14 |
15 | When working with different toolchain versions across different projects we suggest using either of the following tools to manage your toolchains:
16 |
17 | - [swiftenv](https://github.com/kylef/swiftenv) available for macOS and Linux
18 | - [swiftly](https://github.com/swift-server/swiftly) available for Linux (macOS support is on the roadmap)
19 |
20 | ### Compiling differentiable Swift code
21 |
22 | Compiling differentiable Swift code on Linux is easy! There's no extra setup needed and you can simply run the following if you're working on a Swift package:
23 | ```bash
24 | swift run
25 | ```
26 |
27 | On macOS, when running code directly from Xcode the IDE handles all toolchain specific configuration for you.
28 |
29 | However, when compiling differentiable Swift code on macOS at the command line we have to set the following environment variables to make sure our custom toolchain uses the right macOS SDK and Swift runtime:
30 |
31 | ```bash
32 | export SDKROOT=/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX14.sdk
33 | ```
34 | ```bash
35 | export DYLD_LIBRARY_PATH=/Library/Developer/Toolchains/swift-DEVELOPMENT-SNAPSHOT-2023-11-20-a.xctoolchain/usr/lib/swift/macosx
36 | ```
37 |
38 | Now everything is set up and you can simply run:
39 | ```bash
40 | swift run
41 | ```
42 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/SharpEdgesInDifferentiableSwift.md:
--------------------------------------------------------------------------------
1 | # Sharp edges in differentiable Swift
2 |
3 | An overview of some of the currently missing capabilities in differentiable Swift.
4 |
5 | ## Overview
6 |
7 | This is an overview of some of the currently missing capabilities in differentiable Swift you may encounter when working with the feature. Inspired by the original [Swift for TensorFlow notebook](https://www.tensorflow.org/swift/tutorials/Swift_autodiff_sharp_edges), we intend for this to be an up-to-date and comprehensive list of common issues you may encounter when working with differentiable Swift.
8 |
9 | -
10 | -
11 | -
12 | -
13 | -
14 | -
15 |
16 | ### Loops
17 | Loops over collections of `Differentiable` objects unfortunately aren't differentiable yet. So as of yet the compiler cannot determine the derivative of the following function:
18 | ```swift
19 | @differentiable(reverse)
20 | func loopy(values: [Double]) -> Double {
21 | var total = 0.0
22 | for value in values {
23 | total += value
24 | }
25 | return total
26 | }
27 | ```
28 |
29 | Luckily there are ways around this! Reading a value at a certain index is differentiable. So instead of looping over the values directly, as a workaround we can loop over the indices as seen in the example below. The one thing to note here is that we access the indices by wrapping them in `withoutDerivative(at:)` This tells the compiler that we don't want to take the derivative of the property that returns all indices (which is not differentiable since they're discrete values.)
30 | ```swift
31 | @differentiable(reverse)
32 | func loopy(values: [Double]) -> Double {
33 | var total = 0.0
34 | for index in withoutDerivative(at: values.indices) {
35 | total += values[index]
36 | }
37 | return total
38 | }
39 | ```
40 |
41 | This will return the correct gradient for this function given a certain input:
42 | ```swift
43 | let (value, gradient) = valueWithGradient(at: [1.0, 2.0, 3.0], of: loopy)
44 | // value = 6.0
45 | // gradient = [1.0, 1.0, 1.0] ie. a change in any of the values in the array will effect the output of the function equally.
46 | ```
47 |
48 | ### Map and Reduce
49 | The `map` and `reduce` methods do not currently support closures marked with `@differentiable` but there's special differentiable versions of these that work exactly like you're used to:
50 | ```swift
51 | let a = [1.0, 2.0, 3.0]
52 | let aPlusOne = a.differentiableMap { $0 + 1.0 } // [2.0, 3.0, 4.0]
53 | let aSum = a.differentiableReduce { 0, + } // 6.0
54 | ```
55 |
56 | ### Array subscript setters
57 | Currently the subcript setters on arrays (`array[0] = 1.0`) are not differentiable. Under the hood this is due to `_modify` subscript accessors not supporting differentiability yet. (Work is ongoing, and this feature should land in Swift soon.)
58 | We can currently get around this however by extending the `Array` type with a mutating `update(at:with:)` function
59 | ```swift
60 | extension Array where Element: Differentiable {
61 | @differentiable(where Element: Differentiable) // TODO: This where clause seems redundant?
62 | mutating func update(at index: Int, with newValue: Element) {
63 | self[index] = newValue
64 | }
65 |
66 | @derivative(of: update)
67 | mutating func vjpUpdate(at index: Int, with newValue: Element)
68 | -> (value: Void, pullback: (inout TangentVector) -> (Element.TangentVector))
69 | {
70 | self.updated(at: index, with: newValue)
71 | return ((), { v in
72 | let dElement = v[index]
73 | v.base[index] = .zero
74 | return dElement
75 | })
76 | }
77 | }
78 | ```
79 | The first function wraps the subscript setter and marks it as differentiable. The second function defines a custom vjp (vector Jacobian product) telling the compiler what the derivative of this wrapped function is.
80 |
81 | Considering:
82 | ```swift
83 | var b: [Double] = [1.0, 2.0, 3.0]
84 | ```
85 | Then instead of writing (which unfortunately is not differentiable (Coming soon!)):
86 | ```swift
87 | b[0] = 17.0
88 | ```
89 | We can now write:
90 | ```swift
91 | b.update(at: 0, with: 17.0)
92 | ```
93 |
94 | ### Floating point type conversions
95 |
96 | If you're converting between `FloatingPointNumber`s types such as `Float` and `Double` be aware that their constructors currently aren't differentiable. This can be remedied by using a permutation of the following extension on the floating types you need:
97 | ```swift
98 | extension Float {
99 | @usableFromInline
100 | @derivative(of: init(_:))
101 | static func vjpInit(_ a: Double) -> (value: Float, pullback: (Float) -> Double) {
102 | func pullback(_ v: Float) -> Double {
103 | return Double(v)
104 | }
105 | return (value: Float(a), pullback: pullback)
106 | }
107 | }
108 | ```
109 | This allows the following differentiable code to now compile:
110 | ```swift
111 | @differentiable(reverse)
112 | func convertToFloat(value: Double) -> Float {
113 | Float(value)
114 | }
115 | ```
116 | Hopefully this will be part of the Swift standard library in the near future!
117 |
118 |
119 | ### Keypath subscripting
120 | `KeyPath` subscripting (get or set) doesn't work out of the box, but once again there's a workaround to get similar behaviour:
121 | ```swift
122 | extension Differentiable {
123 | //-----------------------------------------------------------
124 | // a read that's O(n) on the backwards pass (because of zeroTangentVector materialization)
125 | @inlinable
126 | @differentiable(where Self == TangentVector, T: Differentiable, T == T.TangentVector)
127 | public func read(at member: WritableKeyPath) -> T{
128 | return self[keyPath: member]
129 | }
130 |
131 | @inlinable
132 | @derivative(of: read)
133 | public func vjpRead(at member: WritableKeyPath) -> (value: T, pullback: (T.TangentVector) -> Self.TangentVector)
134 | where Self == TangentVector, T == T.TangentVector
135 | {
136 | return (value: self[keyPath: member], pullback:{ downstream in
137 | var zeroes = self.zeroTangentVector
138 | zeroes[keyPath: member] = downstream
139 | return zeroes
140 | })
141 | }
142 | }
143 | ```
144 |
145 | ### Other
146 |
147 | - Forward mode differentiation (`@differentiable(forward)` JVPs) is only partially implemented.
148 | - Differentiation through `@_alwaysEmitIntoClient` tagged functions isn’t yet supported. The most common cases of these are in SIMD functions, like `.sum()`.
149 | - No support yet in the standard library for Dictionary differentiation.
150 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Tutorials/DifferentiableFunctions.tutorial:
--------------------------------------------------------------------------------
1 | @Tutorial(time: 15) {
2 | @Intro(title: "Differentiable functions") {
3 | This tutorial explores the ways that you can create, use, and customize differentiable Swift functions.
4 | }
5 |
6 | @Section(title: "Marking functions as differentiable") {
7 | @ContentAndMedia {
8 | Add the `@differentiable` attribute to a function and obtain its derivative.
9 | }
10 |
11 | @Steps {
12 | @Step {
13 | Start by importing the `_Differentiation` module. Differentiable Swift is an experimental language feature, and is activated only for files that import this module.
14 | @Code(name: "main.swift", file: "DifferentiableFunctions-01-01.swift")
15 | }
16 |
17 | @Step {
18 | Then create a function that we want to make differentiable.
19 | In this case we'll go for a simple function that takes an input and squares it.
20 |
21 | @Code(name: "main.swift", file: "DifferentiableFunctions-01-02.swift")
22 | }
23 |
24 | @Step {
25 | Mark the function with `@differentiable(reverse)` to indicate that we want it to be used for reverse-mode differentiation.
26 | @Code(name: "main.swift", file: "DifferentiableFunctions-01-03.swift")
27 | }
28 |
29 | Note that all arguments and results of a `@differentiable` function should have types conforming to the `Differentiable` protocol.
30 | Such types are generally continuous, such as `Float` and `Double`, which are among the standard library types conforming to `Differentiable`.
31 |
32 | @Step {
33 | The compiler will automatically generate the reverse-mode derivative (pullback) of a `@differentiable(reverse)` function. We can access that automatically-generated pullback via one of several built-in functions.
34 | @Code(name: "main.swift", file: "DifferentiableFunctions-01-04.swift")
35 | }
36 |
37 | @Step {
38 | Printing the computed value and gradient returns the result of the function and its derivative, as expected.
39 | @Code(name: "main.swift", file: "DifferentiableFunctions-01-05.swift")
40 | }
41 | }
42 | }
43 |
44 | @Section(title: "Defining custom derivatives") {
45 | @ContentAndMedia {
46 | Add a custom derivative to a Swift function.
47 |
48 | Functions with differentiable arguments and results can have automatically generated derivatives provided by the compiler.
49 | However, sometimes you may want to provide your own custom derivatives to override this machinery.
50 | }
51 |
52 | @Steps {
53 | @Step {
54 | We'll start with a function that squares a number.
55 | @Code(name: "main.swift", file: "DifferentiableFunctions-02-01.swift")
56 | }
57 |
58 | @Step {
59 | Normally, we would use the `@differentiable(reverse)` attribute to let the compiler build a derivative of this function. Instead, we'll register a custom derivative to be associated with this function.
60 | @Code(name: "main.swift", file: "DifferentiableFunctions-02-02.swift")
61 | }
62 |
63 | Note: the `vjp` prefix used here stands for vector-Jacobian product.
64 |
65 | @Step {
66 | The function signature of this custom derivative has a particular shape. It returns a tuple, with the first element being the value that would normally be returned from the base function, and the second a pullback closure.
67 | @Code(name: "main.swift", file: "DifferentiableFunctions-02-03.swift")
68 | }
69 |
70 | @Step {
71 | Differentiable types have an associated `TangentVector` type that is used in derivatives. In the case of a simple type like `Double`, it is its own `TangentVector` so we can simplify this.
72 | @Code(name: "main.swift", file: "DifferentiableFunctions-02-04.swift")
73 | }
74 |
75 | @Step {
76 | Finally, we define the body of the custom derivative by calculating the normal result of the function and then constructing the pullback function.
77 | @Code(name: "main.swift", file: "DifferentiableFunctions-02-05.swift")
78 | }
79 | }
80 | }
81 |
82 | @Section(title: "Making external functions differentiable") {
83 | @ContentAndMedia {
84 | Register a manual derivative with a system function to make it differentiable.
85 |
86 | The Swift compiler can automatically generate derivatives for Swift functions within a module, but non-Swift functions or Swift functions defined in an external module and not marked with `@differentiable` will not have automatic derivatives. To make those functions differentiable, you need to register your own derivatives for them.
87 | }
88 |
89 | @Steps {
90 | @Step {
91 | If we try to access the derivative of an external function, we may get an error that it is not differentiable.
92 | @Code(name: "main.swift", file: "DifferentiableFunctions-03-01.swift")
93 | }
94 |
95 | @Step {
96 | In this case, `min()` has not been marked as `@differentiable` and thus has no automatically-provided derivative. Even though the function definition exists outside of this module, we can register our own derivative for it.
97 | @Code(name: "main.swift", file: "DifferentiableFunctions-03-02.swift")
98 | }
99 |
100 | @Step {
101 | By doing so, the function is now differentiable and can participate in the rest of the differentiable Swift machinery.
102 | @Code(name: "main.swift", file: "DifferentiableFunctions-03-03.swift")
103 | }
104 | }
105 | }
106 | }
107 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Tutorials/DifferentiableTypes.tutorial:
--------------------------------------------------------------------------------
1 | @Tutorial(time: 15) {
2 | @Intro(title: "Differentiable types") {
3 | `@differentiable` functions require that their arguments and results conform to the `Differentiable` protocol. Some of the standard library types such as `Float`, `Double`, and `Array` already conform to this protocol.
4 |
5 | In this tutorial we will go into writing our own `Differentiable` types for use with more complex `@differentiable` functions.
6 | }
7 |
8 | @Section(title: "Conforming types to `Differentiable`") {
9 | @ContentAndMedia {
10 | Conform custom types to `Differentiable`, and use them as arguments to `@differentiable` functions.
11 |
12 | Differentiable functions must have at least one argument and one result each that conform to the `Differentiable`. Many continuous types present in the standard library, like `Float` or `Double`, conform to `Differentiable`. For you to use your own custom types as active arguments or results in differentiable functions, they must conform to `Differentiable`.
13 | }
14 |
15 | @Steps {
16 | @Step {
17 | Differentiable Swift is an experimental language feature. To enable it for a file, you must first import the `_Differentiation` module.
18 | @Code(name: "main.swift", file: "DifferentiableTypes-01-01.swift")
19 | }
20 |
21 | @Step {
22 | Custom types that only contain other `Differentiable`-conforming members are easy to make `Differentiable` themselves.
23 | @Code(name: "main.swift", file: "DifferentiableTypes-01-02.swift")
24 | }
25 |
26 | @Step {
27 | To do so, simply add a `Differentiable` conformance when they are defined.
28 | @Code(name: "main.swift", file: "DifferentiableTypes-01-03.swift")
29 | }
30 |
31 | @Step {
32 | A custom type can have non-`Differentiable` members.
33 | @Code(name: "main.swift", file: "DifferentiableTypes-01-04.swift")
34 | }
35 |
36 | @Step {
37 | In that case, annotate those members with the `@noDerivative` attribute.
38 | @Code(name: "main.swift", file: "DifferentiableTypes-01-05.swift")
39 | }
40 | }
41 | }
42 |
43 | @Section(title: "Defining custom tangent vectors for types") {
44 | @ContentAndMedia {
45 | Define a custom tangent vector for a type.
46 |
47 | Every differentiable type has a `TangentVector` associated with it, which represents the rate of change of that type when used in a derivative.
48 | The compiler normally synthesizes these tangent vectors for you on types conforming to `Differentiable`.
49 |
50 | However, there are situations in which you may want to define a custom tangent vector for a type.
51 | For example, `Array` doesn't conform to `AdditiveArithmetic`, and therefore can't have a tangent vector automatically synthesized.
52 | }
53 |
54 | @Steps {
55 | @Step {
56 | We begin with a simple differentiable type.
57 | @Code(name: "main.swift", file: "DifferentiableTypes-02-01.swift")
58 | }
59 |
60 | @Step {
61 | A custom tangent vector must either be a subtype named `TangentVector` or aliased to that name. It can contain a custom internal structure, but must conform to `AdditiveArithmetic` and `Differentiable`.
62 | @Code(name: "main.swift", file: "DifferentiableTypes-02-02.swift")
63 | }
64 |
65 | @Step {
66 | Finally, a `move(by:)` function must be defined that moves an instance of the base type by a small step along a direction. If the `TangentVector` you are stepping by is composed of other types' `TangentVector`s, you can use those existing `move(by:)` functions within this new one.
67 | @Code(name: "main.swift", file: "DifferentiableTypes-02-03.swift")
68 | }
69 | }
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Tutorials/GradientDescent.tutorial:
--------------------------------------------------------------------------------
1 | @Tutorial(time: 20) {
2 | @Intro(title: "Gradient descent") {
3 | Gradient descent optimization is a powerful technique for optimizing arbitrary parameters in a complex system to arrive at a desired result.
4 |
5 | This tutorial will demonstrate the use of gradient descent to train a perceptron to perform a boolean logic function.
6 | }
7 |
8 | @Section(title: "Training a perceptron") {
9 | @ContentAndMedia {
10 | Train a basic perceptron to mimic an AND gate.
11 |
12 | A perceptron is a simple, single-level neural network. In this case, we'll build one with two neurons, one for each input in the AND gate. The neurons will have trainable weights and a bias value, along with a nonlinear activation function.
13 |
14 | This perceptron will be trained against the desired behavior of an AND gate, and weights for the neurons will be optimized via gradient descent.
15 | }
16 | @Steps {
17 | @Step {
18 | Differentiable Swift is an experimental language feature. To begin with, we must enable it by importing the `_Differentiation` module.
19 | @Code(name: "main.swift", file: "GradientDescent-01-01.swift")
20 | }
21 | @Step {
22 | Let's create the perceptron as a custom data type. The weights for the connections between neurons will be randomly-initialized floating point values, and the bias will start at zero.
23 | @Code(name: "main.swift", file: "GradientDescent-01-02.swift")
24 | }
25 | @Step {
26 | Because all of the properties of our perceptron conform to `Differentiable`, it's easy to make the perceptron itself differentiable by adding the conformance to `Differentiable`.
27 | @Code(name: "main.swift", file: "GradientDescent-01-03.swift")
28 | }
29 | @Step {
30 | A convenient language feature that was introduced to Swift is the ability of structs to be called as if they were functions. To do so, you need to define a `callAsFunction()` method on the struct.
31 | @Code(name: "main.swift", file: "GradientDescent-01-04.swift")
32 | }
33 | @Step {
34 | Within the body of that function, we'll define how our perceptron model works. As a first step, we'll calculate the activation strength from the inbound activation of each neuron, multiplied by the respective connection weights. The bias is added to the result.
35 | @Code(name: "main.swift", file: "GradientDescent-01-05.swift")
36 | }
37 | @Step {
38 | To determine how strongly the outbound connection "fires", we'll apply a nonlinear function to it. In this case, if it is positive pass through the resulting value. If negative, only pass along a small fraction of the value.
39 | @Code(name: "main.swift", file: "GradientDescent-01-06.swift")
40 | }
41 | @Step {
42 | The goal is to train this perceptron to behave like an AND gate, so that will be set up as our target.
43 | @Code(name: "main.swift", file: "GradientDescent-01-07.swift")
44 | }
45 | @Step {
46 | To optimize the parameters of the perceptron, we'll set up a loss function that represents how closely our perceptron matches the target behavior. In this case, the lower the loss, the closer the perceptron models an AND gate.
47 | @Code(name: "main.swift", file: "GradientDescent-01-08.swift")
48 | }
49 | @Step {
50 | To start the training process, a new perceptron is initialized.
51 | @Code(name: "main.swift", file: "GradientDescent-01-09.swift")
52 | }
53 | @Step {
54 | We'll train this perceptron for 100 steps in a loop.
55 | @Code(name: "main.swift", file: "GradientDescent-01-10.swift")
56 | }
57 | @Step {
58 | The first part of a training step is to both obtain the current loss value of the perceptron when compared to our AND gate, along with a pullback closure.
59 | @Code(name: "main.swift", file: "GradientDescent-01-11.swift")
60 | }
61 | @Step {
62 | From the pullback closure, we'll determine how to modify the parameters of the perceptron by taking a small step in a direction that should reduce the loss value. The result of calling the pullback is a tangent vector for the perceptron, a type that reflects the rate of change of the perceptron's parameters.
63 |
64 | The function `.move(by:)` is provided by the `Differentiable` protocol and causes all of the perceptron's parameters to be adjusted by the tangent vector.
65 | @Code(name: "main.swift", file: "GradientDescent-01-12.swift")
66 | }
67 | @Step {
68 | This stepwise training of a model by continually nudging it in a desired direction is the powerful technique of gradient descent optimization in action.
69 |
70 | At the end of this process, we should have a perceptron that roughly approximates the functioning of an AND gate.
71 | @Code(name: "main.swift", file: "GradientDescent-01-13.swift")
72 | }
73 | }
74 | }
75 | }
76 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/Tutorials/UsingDifferentiableSwift.tutorial:
--------------------------------------------------------------------------------
1 | @Tutorials(name: "Differentiable Swift") {
2 | @Intro(title: "Using differentiable Swift") {
3 | A collection of tutorials on the use of the differentiable Swift language feature.
4 | }
5 |
6 | @Chapter(name: "Differentiable functions and types") {
7 | How to work with differentiable functions and types.
8 | @TutorialReference(tutorial: "doc:DifferentiableFunctions")
9 | @TutorialReference(tutorial: "doc:DifferentiableTypes")
10 | }
11 |
12 | @Chapter(name: "Gradient descent optimization") {
13 | A simple example of how to harness the power of gradient descent optimization via differentiable Swift.
14 | @TutorialReference(tutorial: "doc:GradientDescent")
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/DifferentiableSwiftExamples.docc/UsingDifferentiableSwift.md:
--------------------------------------------------------------------------------
1 | # Using differentiable Swift
2 | Introduces differentiable Swift and how to use it to define differentiable functions and types.
3 |
4 | ## Overview
5 |
6 | Differentiable Swift integrates first-class support for automatic differentiation right into the Swift language.
7 | This means that the compiler can generate derivatives of arbitrary Swift code, and the type system can identify and provide clear messages for many common programming errors around differentiability.
8 |
9 | Differentiable functions are a key enabler of the extremely powerful technique of gradient descent optimization, which powers much of deep learning, and are useful in many other applications.
10 |
11 | As an experimental feature, activation of differentiable Swift is gated behind the following import statement:
12 |
13 | ```swift
14 | import _Differentiation
15 | ```
16 |
17 | which must be present in any Swift file taking advantage of differentiation.
18 |
19 |
20 | ### Differentiable functions
21 |
22 | You can mark a function as being differentiable if it has at least one differentiable parameter and
23 | differentiable result. The `@differentiable` annotation is used to mark the function, and the
24 | `reverse` specifier further clarifies that we want to use reverse-mode differentiation.
25 |
26 | ```swift
27 | @differentiable(reverse)
28 | func squared(_ x: Float) -> Float {
29 | return x * x
30 | }
31 | ```
32 |
33 | In addition to letting the compiler define derivatives for Swift functions, you can register custom
34 | derivatives for any differentiable function. This is necessary for non-Swift functions or ones that
35 | reside in external modules you don't control, if you want these functions to be differentiable. For
36 | example, registering a derivative for the above `squared()` function might look like the following:
37 |
38 | ```swift
39 | @derivative(of: squared)
40 | func vjpSquared(_ input: Double) -> (
41 | value: Double,
42 | pullback: (Double) -> Double
43 | ) {
44 | let output = squared(value)
45 | func pullback(_ tangentVector: Double) -> Double {
46 | return tangentVector * 2 * value
47 | }
48 | return (value: output, pullback: pullback)
49 | }
50 | ```
51 |
52 | ### Differentiable types
53 |
54 | To declare a type as being differentiable, it needs to conform to the `Differentiable` protocol.
55 | Generally, types are differentiable if they are continuous or if all of their properties are
56 | continuous and `Differentiable`. Differentiable types can have non-Differentiable properties, if
57 | those properties are annotated with `@noDerivative`. For example, the following is a custom struct
58 | that is `Differentiable`:
59 |
60 | ``` swift
61 | struct MyValue: Differentiable {
62 | var x: Float
63 | var y: Double
64 | @noDerivative
65 | let isTracked: Bool
66 | }
67 | ```
68 |
69 |
70 | ### Obtaining and working with gradients and pullbacks
71 |
72 | To activate the differentiation machinery, there are some special built-in functions in the
73 | Differentiation module within the Swift standard library that can give you the value from the
74 | forward pass through a differentiable function as well as the backward pass.
75 |
76 | For functions with scalar outputs, `valueWithGradient(at:of:)` will return both the value and the
77 | calculated gradient at a given input value:
78 |
79 | ```swift
80 | let (value, gradient) = valueWithGradient(at: 3.0, of: square)
81 | print("The value is \(value), and the gradient is \(gradient)")
82 | // Prints a value of 9.0 and a gradient of 6.0.
83 | ```
84 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/EmptyFile.swift:
--------------------------------------------------------------------------------
1 | // This is an empty source file used to make DifferentiableSwiftExamplesDocumentation a valid build target.
2 | // DifferentiableSwiftExamplesDocumentation is an otherwise empty target that includes high-level documentation about using automatic differentiation in Swift.
3 |
--------------------------------------------------------------------------------
/Sources/DifferentiableSwiftExamplesDocumentation/README.md:
--------------------------------------------------------------------------------
1 | # Differentiable Swift Examples Documentation
2 | `DifferentiableSwiftExamplesDocumentation` is an otherwise empty target that includes high-level documentation about using the experimental language feature of differentiable Swift.
3 |
4 | The documentation content in the `DifferentiableSwiftExamples.docc` catalog is published on GitHub Pages at https://passivelogic.github.io/differentiable-swift-examples
5 |
--------------------------------------------------------------------------------