├── src ├── FSharp │ └── FSharpExamples │ │ ├── Properties │ │ └── launchSettings.json │ │ ├── FSharpExamples.fsproj │ │ ├── Program.fs │ │ ├── AdversarialExampleGeneration.fs │ │ ├── TextClassification.fs │ │ ├── MNIST.fs │ │ ├── AlexNet.fs │ │ └── SequenceToSequence.fs ├── CSharp │ ├── CSharpExamples │ │ ├── Properties │ │ │ └── launchSettings.json │ │ ├── arguments.json │ │ ├── CSharpExamples.csproj │ │ ├── Program.cs │ │ ├── TextClassification.cs │ │ ├── AdversarialExampleGeneration.cs │ │ ├── MNIST.cs │ │ ├── CIFAR10.cs │ │ └── SequenceToSequence.cs │ └── Models │ │ ├── Models.csproj │ │ ├── TextClassification.cs │ │ ├── MNIST.cs │ │ ├── AlexNet.cs │ │ ├── VGG.cs │ │ ├── MobileNet.cs │ │ ├── SequenceToSequence.cs │ │ └── ResNet.cs ├── Utils │ ├── Examples.Utils.csproj │ ├── BigEndianReader.cs │ ├── TorchText.Data.Utils.cs │ ├── Datasets.cs │ ├── Decompress.cs │ ├── Arguments.cs │ ├── Vocab.cs │ ├── AG_NEWSReader.cs │ ├── CIFARReader .cs │ ├── MNISTReader.cs │ └── ArgumentParser.cs └── TorchSharpExamples.sln ├── CODE_OF_CONDUCT.md ├── tutorials ├── CSharp │ ├── README.md │ ├── tutorial1.ipynb │ ├── tutorial3.ipynb │ └── tutorial7.ipynb ├── FSharp │ ├── README.md │ ├── tutorial1.ipynb │ ├── tutorial3.ipynb │ └── tutorial7.ipynb └── README.md ├── LICENSE ├── SECURITY.md ├── README.md └── .gitignore /src/FSharp/FSharpExamples/Properties/launchSettings.json: -------------------------------------------------------------------------------- 1 | { 2 | "profiles": { 3 | "FSharpExamples": { 4 | "commandName": "Project", 5 | "commandLineArgs": "-e 2 alexnet" 6 | } 7 | } 8 | } -------------------------------------------------------------------------------- /src/CSharp/CSharpExamples/Properties/launchSettings.json: -------------------------------------------------------------------------------- 1 | { 2 | "profiles": { 3 | "CSharpExamples": { 4 | "commandName": "Project", 5 | "commandLineArgs": "-e 10 alexnet -l tb_runs" 6 | } 7 | } 8 | } -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | This project has adopted the code of conduct defined by the Contributor Covenant 4 | to clarify expected behavior in our community. 5 | 6 | For more information, see the [.NET Foundation Code of Conduct](https://dotnetfoundation.org/code-of-conduct). 7 | -------------------------------------------------------------------------------- /src/CSharp/Models/Models.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | net6.0 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /tutorials/CSharp/README.md: -------------------------------------------------------------------------------- 1 | ## C# Tutorials 2 | 3 | 4 | 5 | [Tutorial 1](tutorial1.ipynb): Setting Things Up 6 | 7 | [Tutorial 2](tutorial2.ipynb): Tensors 8 | 9 | [Tutorial 3](tutorial3.ipynb): Basic Numerics 10 | 11 | [Tutorial 4](tutorial4.ipynb): Random Numbers and Distributions 12 | 13 | [Tutorial 5](tutorial5.ipynb): CUDA 14 | 15 | [Tutorial 6](tutorial6.ipynb): Models 16 | 17 | [Tutorial 7](tutorial7.ipynb): Learning Rate Schedulers 18 | 19 | Tutorial 8: TorchVision 20 | -------------------------------------------------------------------------------- /tutorials/FSharp/README.md: -------------------------------------------------------------------------------- 1 | ## F# Tutorials 2 | 3 | 4 | 5 | [Tutorial 1](tutorial1.ipynb): Setting Things Up 6 | 7 | [Tutorial 2](tutorial2.ipynb): Tensors 8 | 9 | [Tutorial 3](tutorial3.ipynb): Basic Numerics 10 | 11 | [Tutorial 4](tutorial4.ipynb): Random Numbers and Distributions 12 | 13 | [Tutorial 5](tutorial5.ipynb): CUDA 14 | 15 | [Tutorial 6](tutorial6.ipynb): Models 16 | 17 | [Tutorial 7](tutorial7.ipynb): Learning Rate Schedulers 18 | 19 | Tutorial 8: TorchVision 20 | -------------------------------------------------------------------------------- /src/CSharp/CSharpExamples/arguments.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "longName": "epochs", 4 | "shortName": "e", 5 | "argType": "integer", 6 | "explanation": "The maximum number of epochs to use for training." 7 | }, 8 | { 9 | "longName": "timeout", 10 | "shortName": "t", 11 | "argType": "integer", 12 | "explanation": "The maximum time, measured in seconds, to use for training." 13 | }, 14 | { 15 | "longName": "logdir", 16 | "shortName": "l", 17 | "argType": "string", 18 | "explanation": "A directory for Tensorboard logging." 19 | } 20 | ] 21 | -------------------------------------------------------------------------------- /src/Utils/Examples.Utils.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | net6.0 5 | AnyCPU 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /src/CSharp/CSharpExamples/CSharpExamples.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net6.0 6 | CSharpExamples.Program 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | PreserveNewest 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/FSharp/FSharpExamples/FSharpExamples.fsproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net6.0 6 | 3390;$(WarnOn) 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | PreserveNewest 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) .NET Foundation. All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /src/Utils/BigEndianReader.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. 2 | using System; 3 | using System.IO; 4 | 5 | namespace TorchSharp.Examples.Utils 6 | { 7 | public class BigEndianReader 8 | { 9 | public BigEndianReader(BinaryReader baseReader) 10 | { 11 | mBaseReader = baseReader; 12 | } 13 | 14 | public int ReadInt32() 15 | { 16 | return BitConverter.ToInt32(ReadBigEndianBytes(4), 0); 17 | } 18 | 19 | public byte[] ReadBigEndianBytes(int count) 20 | { 21 | byte[] bytes = new byte[count]; 22 | for (int i = count - 1; i >= 0; i--) 23 | bytes[i] = mBaseReader.ReadByte(); 24 | 25 | return bytes; 26 | } 27 | 28 | public byte[] ReadBytes(int count) 29 | { 30 | return mBaseReader.ReadBytes(count); 31 | } 32 | 33 | public void Close() 34 | { 35 | mBaseReader.Close(); 36 | } 37 | 38 | public Stream BaseStream { 39 | get { return mBaseReader.BaseStream; } 40 | } 41 | 42 | private BinaryReader mBaseReader; 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/FSharp/FSharpExamples/Program.fs: -------------------------------------------------------------------------------- 1 | // Learn more about F# at http://docs.microsoft.com/dotnet/fsharp 2 | 3 | open System 4 | open System.IO 5 | open System.Reflection 6 | 7 | open TorchSharp.Examples 8 | open TorchSharp.Examples.Utils 9 | 10 | 11 | [] 12 | let main args = 13 | 14 | let argumentsPath = Path.Combine(Path.GetDirectoryName(Assembly.GetEntryAssembly().Location), "arguments.json") 15 | let argumentParser = new ArgumentParser(new FileInfo(argumentsPath), args) 16 | 17 | if argumentParser.Count = 0 then 18 | argumentParser.UsingMessage("CSharpExamples", "") 19 | 1 // return an integer exit code 20 | else 21 | 22 | let epochs = 23 | match argumentParser.TryGetValueInt "epochs" with 24 | | true,e -> e 25 | | false,_ -> 16 26 | 27 | let timeout = 28 | match argumentParser.TryGetValueInt "timeout" with 29 | | true,t -> t 30 | | false,_ -> 3600 31 | 32 | for idx = 0 to argumentParser.Count-1 do 33 | 34 | let modelName = argumentParser.[idx] 35 | 36 | match modelName.ToLowerInvariant() with 37 | | "mnist" -> FSharpExamples.MNIST.run epochs 38 | | "fgsm" -> FSharpExamples.AdversarialExampleGeneration.run epochs 39 | | "alexnet" -> FSharpExamples.AlexNet.run epochs 40 | | "seq2seq" -> FSharpExamples.SequenceToSequence.run epochs 41 | | "text" -> FSharpExamples.TextClassification.run epochs 42 | | _ -> eprintf "Unknown model name" 43 | 44 | 0 // return an integer exit code -------------------------------------------------------------------------------- /src/CSharp/Models/TextClassification.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. 2 | using System; 3 | using System.IO; 4 | using System.Linq; 5 | using System.Collections.Generic; 6 | using System.Diagnostics; 7 | 8 | using static TorchSharp.torch; 9 | using static TorchSharp.torch.nn; 10 | using static TorchSharp.torch.nn.functional; 11 | 12 | namespace TorchSharp.Examples 13 | { 14 | /// 15 | /// This example is based on the PyTorch tutorial at: 16 | /// 17 | /// https://pytorch.org/tutorials/beginner/text_sentiment_ngrams_tutorial.html 18 | /// 19 | /// 20 | public class TextClassificationModel : Module 21 | { 22 | private Modules.EmbeddingBag embedding; 23 | private Modules.Linear fc; 24 | 25 | public TextClassificationModel(long vocab_size, long embed_dim, long num_class) : base("TextClassification") 26 | { 27 | embedding = EmbeddingBag(vocab_size, embed_dim, sparse: false); 28 | fc = Linear(embed_dim, num_class); 29 | InitWeights(); 30 | 31 | RegisterComponents(); 32 | } 33 | 34 | private void InitWeights() 35 | { 36 | var initrange = 0.5; 37 | 38 | init.uniform_(embedding.weight, -initrange, initrange); 39 | init.uniform_(fc.weight, -initrange, initrange); 40 | init.zeros_(fc.bias); 41 | } 42 | 43 | public override Tensor forward(Tensor input, Tensor offsets) 44 | { 45 | var t = embedding.call(input, offsets); 46 | return fc.forward(t); 47 | } 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /tutorials/README.md: -------------------------------------------------------------------------------- 1 | # TorchSharp Tutorials 2 | 3 | Like its Python-based cousin, TorchSharp is a rich and capable numerics library, especially well suited for machine learning using neural networks, that is, deep learning. 4 | 5 | You can dive into TorchSharp by studying the examples provided, which provides an end-to-end view. On the other hand, if you would like a more methodical, step-by-step introduction to the many concepts and capabilities it offers, these tutorials aim to bring you one step at a time toward a comprehensive understanding of what it can do, and how. 6 | 7 | The tutorials are organized to start with the very basics, creating and using tensors, which are generalized vectors and matrices, and the fundamental data type of all deep learning. Once we have treated tensors in depth, we will move on to using tensors to compute things. There are a ton of numerical operators available, and you can express just about anything using TorchSharp. 8 | 9 | Once we have looked at numerics, it is time to move on to constructing models from numerics, and then to train them using optimizers and learning rate schedulers. 10 | 11 | Most tutorials are presented in the form of an interactive notebook, which is intended to be executed under .NET Interactive. These notebooks have been developed and tested using Visual Studio Code with the .NET Interactive extension installed. 12 | 13 | For more information on installing the .NET Interactive extension, see: 14 | 15 | [Installing .NET Interactive Notebooks](https://marketplace.visualstudio.com/items?itemName=ms-dotnettools.dotnet-interactive-vscode) 16 | 17 | or 18 | 19 | [Installing .NET Interactive](https://github.com/dotnet/interactive/blob/main/docs/install-dotnet-interactive.md) 20 | 21 | 22 | For your tutorials, please choose one of: [C#](./CSharp/README.md) or [F#](./FSharp/README.md). 23 | 24 | __More tutorials are coming.__ 25 | -------------------------------------------------------------------------------- /src/Utils/TorchText.Data.Utils.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Linq; 5 | using System.Text; 6 | using System.Text.RegularExpressions; 7 | using System.Threading.Tasks; 8 | 9 | namespace TorchText.Data 10 | { 11 | public static partial class Utils 12 | { 13 | public static Func> get_tokenizer(string name) 14 | { 15 | if (name == "basic_english") return BasicEnglish; 16 | throw new NotImplementedException($"The '{name}' text tokenizer is not implemented."); 17 | } 18 | 19 | private static string[] _patterns = new string []{ 20 | "\'", 21 | "\"", 22 | "\\.", 23 | "
", 24 | ",", 25 | "\\(", 26 | "\\)", 27 | "\\!", 28 | "\\?", 29 | "\\;", 30 | "\\:", 31 | "\\\\", 32 | "\\s+", 33 | }; 34 | private static string[] _replacements = new string[] { 35 | " \\' ", 36 | "", 37 | " . ", 38 | " ", 39 | " , ", 40 | " ( ", 41 | " ) ", 42 | " ! ", 43 | " ? ", 44 | " ", 45 | " ", 46 | " ", 47 | " " 48 | }; 49 | 50 | private static IEnumerable BasicEnglish(string input) 51 | { 52 | if (_patterns.Length != _replacements.Length) 53 | throw new InvalidProgramException("internal error: patterns and replacements are not the same length"); 54 | 55 | input = input.Trim().ToLowerInvariant(); 56 | 57 | for (var i = 0; i < _patterns.Length; ++i) { 58 | input = Regex.Replace(input, _patterns[i], _replacements[i]); 59 | } 60 | return input.Split(' '); 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/Utils/Datasets.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. 2 | using System; 3 | using System.Collections.Generic; 4 | using System.IO; 5 | using System.Linq; 6 | 7 | namespace TorchText 8 | { 9 | /// 10 | /// This belongs in its own package, 'TorchText'. 11 | /// For now, it's useful to keep it with the examples that use it. 12 | /// 13 | public static class Datasets 14 | { 15 | /// 16 | /// WikiText2 17 | /// 18 | /// One of 'train', 'valid', or 'test' 19 | /// The folder where the WikiText2 data set was downloaded and extracted. 20 | /// An enumeration of lines from the text. 21 | /// 22 | /// Download the data set at: 23 | /// https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip 24 | /// 25 | public static IEnumerable WikiText2(string split, string root = ".data") 26 | { 27 | var dataPath = Path.Combine(root, "wikitext-2", $"wiki.{split}.tokens"); 28 | return File.ReadLines(dataPath).Select(line => line.Trim()).Where(line => line.Length > 0); 29 | } 30 | 31 | /// 32 | /// WikiText2 33 | /// 34 | /// The folder where the WikiText2 data set was downloaded and extracted. 35 | /// An enumeration of lines from the text for each of the data sets (training, validation, and test). 36 | /// 37 | /// Download the data set at: 38 | /// https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip 39 | /// 40 | public static (IEnumerable, IEnumerable, IEnumerable) WikiText2(string root = ".data") 41 | { 42 | return (WikiText2("train", root), WikiText2("valid", root), WikiText2("test", root)); 43 | } 44 | 45 | /// 46 | /// Hack to get around F# issue. 47 | /// 48 | /// 49 | public static bool cuda_is_available() => TorchSharp.torch.cuda.is_available(); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/CSharp/Models/MNIST.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. 2 | using System; 3 | using System.IO; 4 | using System.Collections.Generic; 5 | using System.Diagnostics; 6 | using static TorchSharp.torch; 7 | 8 | using static TorchSharp.torch.nn; 9 | using static TorchSharp.torch.nn.functional; 10 | 11 | namespace TorchSharp.Examples.MNIST 12 | { 13 | public class Model : Module 14 | { 15 | private Module conv1 = Conv2d(1, 32, 3); 16 | private Module conv2 = Conv2d(32, 64, 3); 17 | private Module fc1 = Linear(9216, 128); 18 | private Module fc2 = Linear(128, 10); 19 | 20 | // These don't have any parameters, so the only reason to instantiate 21 | // them is performance, since they will be used over and over. 22 | private Module pool1 = MaxPool2d(kernel_size:new long[] { 2, 2 }); 23 | 24 | private Module relu1 = ReLU(); 25 | private Module relu2 = ReLU(); 26 | private Module relu3 = ReLU(); 27 | 28 | private Module dropout1 = Dropout(0.25); 29 | private Module dropout2 = Dropout(0.5); 30 | 31 | private Module flatten = Flatten(); 32 | private Module logsm = LogSoftmax(1); 33 | 34 | public Model(string name, torch.Device device = null) : base(name) 35 | { 36 | RegisterComponents(); 37 | 38 | if (device != null && device.type != DeviceType.CPU) 39 | this.to(device); 40 | } 41 | 42 | public override Tensor forward(Tensor input) 43 | { 44 | var l11 = conv1.forward(input); 45 | var l12 = relu1.forward(l11); 46 | 47 | var l21 = conv2.forward(l12); 48 | var l22 = relu2.forward(l21); 49 | var l23 = pool1.forward(l22); 50 | var l24 = dropout1.forward(l23); 51 | 52 | var x = flatten.forward(l24); 53 | 54 | var l31 = fc1.forward(x); 55 | var l32 = relu3.forward(l31); 56 | var l33 = dropout2.forward(l32); 57 | 58 | var l41 = fc2.forward(l33); 59 | 60 | return logsm.forward(l41); 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/Utils/Decompress.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. 2 | using System; 3 | using System.IO; 4 | using System.Linq; 5 | using System.Threading; 6 | using System.Threading.Tasks; 7 | using ICSharpCode.SharpZipLib.Core; 8 | using ICSharpCode.SharpZipLib.GZip; 9 | using ICSharpCode.SharpZipLib.Tar; 10 | 11 | //NOTE: This code was inspired by code found int the SciSharpStack-Examples repository. 12 | // https://github.com/SciSharp/SciSharp-Stack-Examples 13 | 14 | namespace TorchSharp.Examples.Utils 15 | { 16 | public static class Decompress 17 | { 18 | public static void DecompressGZipFile(string gzipFileName, string targetDir) 19 | { 20 | byte[] buf = new byte[4096]; 21 | 22 | using (var fs = File.OpenRead(gzipFileName)) 23 | using (var gzipStream = new GZipInputStream(fs)) { 24 | 25 | string fnOut = Path.Combine(targetDir, Path.GetFileNameWithoutExtension(gzipFileName)); 26 | 27 | using (var fsOut = File.Create(fnOut)) { 28 | StreamUtils.Copy(gzipStream, fsOut, buf); 29 | } 30 | } 31 | } 32 | public static void ExtractTGZ(string gzArchiveName, string destFolder) 33 | { 34 | var flag = gzArchiveName.Split(Path.DirectorySeparatorChar).Last().Split('.').First() + ".bin"; 35 | if (File.Exists(Path.Combine(destFolder, flag))) return; 36 | 37 | Console.WriteLine($"Extracting."); 38 | var task = Task.Run(() => { 39 | using (var inStream = File.OpenRead(gzArchiveName)) { 40 | using (var gzipStream = new GZipInputStream(inStream)) { 41 | #pragma warning disable CS0618 // Type or member is obsolete 42 | using (TarArchive tarArchive = TarArchive.CreateInputTarArchive(gzipStream)) 43 | #pragma warning restore CS0618 // Type or member is obsolete 44 | tarArchive.ExtractContents(destFolder); 45 | } 46 | } 47 | }); 48 | 49 | while (!task.IsCompleted) { 50 | Thread.Sleep(200); 51 | Console.Write("."); 52 | } 53 | 54 | File.Create(Path.Combine(destFolder, flag)); 55 | Console.WriteLine(""); 56 | Console.WriteLine("Extraction completed."); 57 | } 58 | 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/TorchSharpExamples.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.0.32112.339 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Examples.Utils", "Utils\Examples.Utils.csproj", "{9B26E338-1AAD-4E64-B9EA-CE6D1C10A9E9}" 7 | EndProject 8 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Models", "CSharp\Models\Models.csproj", "{AF43A5E5-DBAE-46CE-9B06-69F2F34140FD}" 9 | EndProject 10 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "CSharpExamples", "CSharp\CSharpExamples\CSharpExamples.csproj", "{E56038AD-B99F-4333-BA8C-3F65C95C638E}" 11 | EndProject 12 | Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "FSharpExamples", "FSharp\FSharpExamples\FSharpExamples.fsproj", "{E9B24578-E02C-4B9B-B4A4-2458E876E8C1}" 13 | EndProject 14 | Global 15 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 16 | Debug|Any CPU = Debug|Any CPU 17 | Release|Any CPU = Release|Any CPU 18 | EndGlobalSection 19 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 20 | {9B26E338-1AAD-4E64-B9EA-CE6D1C10A9E9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 21 | {9B26E338-1AAD-4E64-B9EA-CE6D1C10A9E9}.Debug|Any CPU.Build.0 = Debug|Any CPU 22 | {9B26E338-1AAD-4E64-B9EA-CE6D1C10A9E9}.Release|Any CPU.ActiveCfg = Release|Any CPU 23 | {9B26E338-1AAD-4E64-B9EA-CE6D1C10A9E9}.Release|Any CPU.Build.0 = Release|Any CPU 24 | {AF43A5E5-DBAE-46CE-9B06-69F2F34140FD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 25 | {AF43A5E5-DBAE-46CE-9B06-69F2F34140FD}.Debug|Any CPU.Build.0 = Debug|Any CPU 26 | {AF43A5E5-DBAE-46CE-9B06-69F2F34140FD}.Release|Any CPU.ActiveCfg = Release|Any CPU 27 | {AF43A5E5-DBAE-46CE-9B06-69F2F34140FD}.Release|Any CPU.Build.0 = Release|Any CPU 28 | {E56038AD-B99F-4333-BA8C-3F65C95C638E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 29 | {E56038AD-B99F-4333-BA8C-3F65C95C638E}.Debug|Any CPU.Build.0 = Debug|Any CPU 30 | {E56038AD-B99F-4333-BA8C-3F65C95C638E}.Release|Any CPU.ActiveCfg = Release|Any CPU 31 | {E56038AD-B99F-4333-BA8C-3F65C95C638E}.Release|Any CPU.Build.0 = Release|Any CPU 32 | {E9B24578-E02C-4B9B-B4A4-2458E876E8C1}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 33 | {E9B24578-E02C-4B9B-B4A4-2458E876E8C1}.Debug|Any CPU.Build.0 = Debug|Any CPU 34 | {E9B24578-E02C-4B9B-B4A4-2458E876E8C1}.Release|Any CPU.ActiveCfg = Release|Any CPU 35 | {E9B24578-E02C-4B9B-B4A4-2458E876E8C1}.Release|Any CPU.Build.0 = Release|Any CPU 36 | EndGlobalSection 37 | GlobalSection(SolutionProperties) = preSolution 38 | HideSolutionNode = FALSE 39 | EndGlobalSection 40 | GlobalSection(ExtensibilityGlobals) = postSolution 41 | SolutionGuid = {FC1609FE-9105-4B47-BA8D-5EBF6D388046} 42 | EndGlobalSection 43 | EndGlobal 44 | -------------------------------------------------------------------------------- /src/Utils/Arguments.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using System.Threading.Tasks; 6 | 7 | using Newtonsoft.Json; 8 | 9 | namespace TorchSharp.Examples.Utils 10 | { 11 | [JsonObject] 12 | public sealed class ArgumentDescriptor 13 | { 14 | /// 15 | /// Long names are used with '--' and can be any one word using letters and numbers. 16 | /// The long name spelling are not case-sensitive. 17 | /// 18 | [JsonProperty(Required = Required.Always)] 19 | public string LongName { get; set; } 20 | 21 | /// 22 | /// Short names must be a single character, and are sensitive to case. 23 | /// 24 | [JsonProperty(Required = Required.Default)] 25 | public string ShortName { get; set; } 26 | 27 | /// 28 | /// If true, the parser should allow multiple values. 29 | /// 30 | [JsonProperty(Required = Required.Default)] 31 | public bool AllowMultiple { get; set; } 32 | 33 | /// 34 | /// The kind of argument. 35 | /// 36 | [JsonProperty(Required = Required.Always)] 37 | public ArgumentType ArgType { get; set; } 38 | 39 | /// 40 | /// An explanation of the argument, intended for human consumption as part of a 'using' message. 41 | /// 42 | public String Explanation { get; set; } 43 | 44 | public enum ArgumentType 45 | { 46 | /// 47 | /// A string argument. 48 | /// 49 | /// 50 | /// --name=foobar 51 | /// 52 | String, 53 | /// 54 | /// An integer argument. 55 | /// 56 | /// 57 | /// --count=10 58 | /// 59 | Integer, 60 | /// 61 | /// An comma-separated list of strings. 62 | /// 63 | /// 64 | /// --options=a,b,c 65 | /// 66 | List, 67 | /// 68 | /// A boolean argument, for example 69 | /// 70 | /// 71 | /// --doit=true 72 | /// 73 | Boolean, 74 | /// 75 | /// A Boolean flag that requires no value. Absence is 'false' 76 | /// 77 | /// 78 | /// --doit 79 | /// 80 | Flag, 81 | } 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /src/CSharp/CSharpExamples/Program.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.IO; 3 | using System.Reflection; 4 | using TorchSharp.Examples.Utils; 5 | 6 | namespace CSharpExamples 7 | { 8 | public class Program 9 | { 10 | static void Main(string[] args) 11 | { 12 | var argumentsPath = Path.Combine(Path.GetDirectoryName(Assembly.GetEntryAssembly().Location), "arguments.json"); 13 | var argumentParser = new ArgumentParser(new FileInfo(argumentsPath), args); 14 | 15 | if (argumentParser.Count == 0) 16 | { 17 | argumentParser.UsingMessage("CSharpExamples", ""); 18 | return; 19 | } 20 | 21 | argumentParser.TryGetValue("epochs", out int epochs, 16); 22 | argumentParser.TryGetValue("timeout", out int timeout, 3600); 23 | argumentParser.TryGetValue("logdir", out string logdir, null); 24 | 25 | for (var idx = 0; idx < argumentParser.Count; idx++) 26 | { 27 | switch(argumentParser[idx].ToLower()) 28 | { 29 | case "mnist": 30 | case "fashion-mnist": 31 | MNIST.Run(epochs, timeout, logdir, argumentParser[idx].ToLower()); 32 | break; 33 | 34 | case "fgsm": 35 | case "fashion-fgsm": 36 | AdversarialExampleGeneration.Run(epochs, timeout, logdir, argumentParser[idx].ToLower()); 37 | break; 38 | 39 | case "alexnet": 40 | case "resnet": 41 | case "mobilenet": 42 | case "resnet18": 43 | case "resnet34": 44 | case "resnet50": 45 | #if false 46 | // The following are disabled, because they require big CUDA processors in order to run. 47 | case "resnet101": 48 | case "resnet152": 49 | #endif 50 | case "vgg11": 51 | case "vgg13": 52 | case "vgg16": 53 | case "vgg19": 54 | CIFAR10.Run(epochs, timeout, logdir, argumentParser[idx]); 55 | break; 56 | 57 | case "text": 58 | TextClassification.Run(epochs, timeout, logdir); 59 | break; 60 | 61 | case "seq2seq": 62 | SequenceToSequence.Run(epochs, timeout, logdir); 63 | break; 64 | 65 | default: 66 | Console.Error.WriteLine($"Unknown model name: {argumentParser[idx]}"); 67 | break; 68 | } 69 | } 70 | } 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/CSharp/Models/AlexNet.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. 2 | using System; 3 | using System.IO; 4 | using System.Linq; 5 | using System.Collections.Generic; 6 | using System.Diagnostics; 7 | 8 | using TorchSharp; 9 | using static TorchSharp.torch; 10 | using static TorchSharp.torch.nn; 11 | using static TorchSharp.torch.nn.functional; 12 | 13 | namespace TorchSharp.Examples 14 | { 15 | /// 16 | /// Modified version of original AlexNet to fix CIFAR10 32x32 images. 17 | /// 18 | public class AlexNet : Module 19 | { 20 | private readonly Module features; 21 | private readonly Module avgPool; 22 | private readonly Module classifier; 23 | 24 | public AlexNet(string name, int numClasses, Device device = null) : base(name) 25 | { 26 | features = Sequential( 27 | ("c1", Conv2d(3, 64, kernel_size:3, stride: 2, padding: 1)), 28 | ("r1", ReLU(inplace: true)), 29 | ("mp1", MaxPool2d(kernel_size:new long[] { 2, 2 })), 30 | ("c2", Conv2d(64, 192, kernel_size:3, padding: 1)), 31 | ("r2", ReLU(inplace: true)), 32 | ("mp2", MaxPool2d(kernel_size:new long[] { 2, 2 })), 33 | ("c3", Conv2d(192, 384, kernel_size:3, padding: 1)), 34 | ("r3", ReLU(inplace: true)), 35 | ("c4", Conv2d(384, 256, kernel_size:3, padding: 1)), 36 | ("r4", ReLU(inplace: true)), 37 | ("c5", Conv2d(256, 256, kernel_size:3, padding: 1)), 38 | ("r5", ReLU(inplace: true)), 39 | ("mp3", MaxPool2d(kernel_size:new long[] { 2, 2 }))); 40 | 41 | avgPool = AdaptiveAvgPool2d(new long[] { 2, 2 }); 42 | 43 | classifier = Sequential( 44 | ("d1", Dropout()), 45 | ("l1", Linear(256 * 2 * 2, 4096)), 46 | ("r1", ReLU(inplace: true)), 47 | ("d2", Dropout()), 48 | ("l2", Linear(4096, 4096)), 49 | ("r3", ReLU(inplace: true)), 50 | ("d3", Dropout()), 51 | ("l3", Linear(4096, numClasses)) 52 | ); 53 | 54 | RegisterComponents(); 55 | 56 | if (device != null && device.type != DeviceType.CPU) 57 | this.to(device); 58 | } 59 | 60 | public override Tensor forward(Tensor input) 61 | { 62 | var f = features.forward(input); 63 | var avg = avgPool.forward(f); 64 | 65 | var x = avg.view(new long[] { avg.shape[0], 256 * 2 * 2 }); 66 | 67 | return classifier.forward(x); 68 | } 69 | } 70 | 71 | } 72 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd). 40 | 41 | -------------------------------------------------------------------------------- /src/CSharp/Models/VGG.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. 2 | using System.Collections.Generic; 3 | using TorchSharp; 4 | using static TorchSharp.torch; 5 | using static TorchSharp.torch.nn; 6 | 7 | namespace TorchSharp.Examples 8 | { 9 | /// 10 | /// Modified version of VGG to classify CIFAR10 32x32 images. 11 | /// 12 | /// 13 | /// With an unaugmented CIFAR-10 data set, the author of this saw training converge 14 | /// at roughly 85% accuracy on the test set, after 50 epochs using VGG-16. 15 | /// 16 | public class VGG : Module 17 | { 18 | // The code here is is loosely based on https://github.com/kuangliu/pytorch-cifar/blob/master/models/vgg.py 19 | // Licence and copypright notice at: https://github.com/kuangliu/pytorch-cifar/blob/master/LICENSE 20 | 21 | private readonly Dictionary _channels = new Dictionary() { 22 | { "vgg11", new long[] { 64, 0, 128, 0, 256, 256, 0, 512, 512, 0, 512, 512, 0 } }, 23 | { "vgg13", new long[] { 64, 64, 0, 128, 128, 0, 256, 256, 0, 512, 512, 0, 512, 512, 0 } }, 24 | { "vgg16", new long[] { 64, 64, 0, 128, 128, 0, 256, 256, 256, 0, 512, 512, 512, 0, 512, 512, 512, 0 } }, 25 | { "vgg19", new long[] { 64, 64, 0, 128, 128, 0, 256, 256, 256, 256, 0, 512, 512, 512, 512, 0, 512, 512, 512, 512, 0 } } 26 | }; 27 | 28 | private readonly Module layers; 29 | 30 | public VGG(string name, int numClasses, Device device = null) : base(name) 31 | { 32 | var modules = new List<(string, Module)>(); 33 | 34 | var channels = _channels[name.ToLower()]; 35 | 36 | long in_channels = 3; 37 | 38 | for (var i = 0; i < channels.Length; i++) { 39 | 40 | if (channels[i] == 0) { 41 | modules.Add(($"MaxPool2d-{i}a", MaxPool2d(kernel_size:2, stride: 2))); 42 | } else { 43 | modules.Add(($"conv2d-{i}a", Conv2d(in_channels, channels[i], kernel_size:3, padding: 1))); 44 | modules.Add(($"bnrm2d-{i}a", BatchNorm2d(channels[i]))); 45 | modules.Add(($"relu-{i}b", ReLU(inplace: true))); 46 | in_channels = channels[i]; 47 | } 48 | } 49 | modules.Add(("avgpool2d", AvgPool2d(kernel_size: 1, stride: 1))); 50 | modules.Add(("flatten", Flatten())); 51 | modules.Add(("linear", Linear(512, numClasses))); 52 | 53 | layers = Sequential(modules); 54 | 55 | RegisterComponents(); 56 | 57 | if (device != null && device.type != DeviceType.CPU) 58 | this.to(device); 59 | } 60 | 61 | public override Tensor forward(Tensor input) 62 | { 63 | return layers.forward(input); 64 | } 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/CSharp/Models/MobileNet.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. 2 | using System; 3 | using System.Collections.Generic; 4 | 5 | using TorchSharp; 6 | using static TorchSharp.torch; 7 | using static TorchSharp.torch.nn; 8 | 9 | namespace TorchSharp.Examples 10 | { 11 | /// 12 | /// Modified version of MobileNet to classify CIFAR10 32x32 images. 13 | /// 14 | /// 15 | /// With an unaugmented CIFAR-10 data set, the author of this saw training converge 16 | /// at roughly 75% accuracy on the test set, over the course of 1500 epochs. 17 | /// 18 | public class MobileNet : Module 19 | { 20 | // The code here is is loosely based on https://github.com/kuangliu/pytorch-cifar/blob/master/models/mobilenet.py 21 | // Licence and copypright notice at: https://github.com/kuangliu/pytorch-cifar/blob/master/LICENSE 22 | 23 | private readonly long[] planes = new long[] { 64, 128, 128, 256, 256, 512, 512, 512, 512, 512, 512, 1024, 1024 }; 24 | private readonly long[] strides = new long[] { 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 2, 1 }; 25 | 26 | private readonly Module layers; 27 | 28 | public MobileNet(string name, int numClasses, Device device = null) : base(name) 29 | { 30 | if (planes.Length != strides.Length) throw new ArgumentException("'planes' and 'strides' must have the same length."); 31 | 32 | var modules = new List<(string, Module)>(); 33 | 34 | modules.Add(($"conv2d-first", Conv2d(3, 32, kernel_size:3, stride: 1, padding: 1, bias: false))); 35 | modules.Add(($"bnrm2d-first", BatchNorm2d(32))); 36 | modules.Add(($"relu-first", ReLU())); 37 | MakeLayers(modules, 32); 38 | modules.Add(("avgpool", AvgPool2d(new long[] { 2, 2 }))); 39 | modules.Add(("flatten", Flatten())); 40 | modules.Add(($"linear", Linear(planes[^1], numClasses))); 41 | 42 | layers = Sequential(modules); 43 | 44 | RegisterComponents(); 45 | 46 | if (device != null && device.type != DeviceType.CPU) 47 | this.to(device); 48 | } 49 | 50 | private void MakeLayers(List<(string, Module)> modules, long in_planes) 51 | { 52 | 53 | for (var i = 0; i < strides.Length; i++) { 54 | var out_planes = planes[i]; 55 | var stride = strides[i]; 56 | 57 | modules.Add(($"conv2d-{i}a", Conv2d(in_planes, in_planes, kernel_size:3, stride: stride, padding: 1, groups: in_planes, bias: false))); 58 | modules.Add(($"bnrm2d-{i}a", BatchNorm2d(in_planes))); 59 | modules.Add(($"relu-{i}a", ReLU())); 60 | modules.Add(($"conv2d-{i}b", Conv2d(in_planes, out_planes, kernel_size:1L, stride: 1L, padding: 0L, bias: false))); 61 | modules.Add(($"bnrm2d-{i}b", BatchNorm2d(out_planes))); 62 | modules.Add(($"relu-{i}b", ReLU())); 63 | 64 | in_planes = out_planes; 65 | } 66 | } 67 | 68 | public override Tensor forward(Tensor input) 69 | { 70 | return layers.forward(input); 71 | } 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /src/Utils/Vocab.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. 2 | using System; 3 | using System.Collections; 4 | using System.Collections.Generic; 5 | using System.Diagnostics.CodeAnalysis; 6 | using System.Linq; 7 | using TorchSharp; 8 | 9 | using static TorchSharp.torch.nn; 10 | 11 | namespace TorchText.Vocab 12 | { 13 | /// 14 | /// This needs a permanent place. 15 | /// The functionality is based on the Python 'Counter' class. 16 | /// 17 | /// 18 | public class Counter : IEnumerable> 19 | { 20 | private Dictionary _dict = new Dictionary(); 21 | 22 | public void update(T key) 23 | { 24 | if (_dict.TryGetValue(key, out int count)) { 25 | _dict[key] = count + 1; 26 | } else { 27 | _dict[key] = 1; 28 | } 29 | } 30 | public void update(IEnumerable keys) 31 | { 32 | foreach (T key in keys) { 33 | update(key); 34 | } 35 | } 36 | public int this[T key] { get => _dict[key]; } 37 | 38 | public IEnumerator> GetEnumerator() 39 | { 40 | return _dict.GetEnumerator(); 41 | } 42 | 43 | IEnumerator IEnumerable.GetEnumerator() 44 | { 45 | return GetEnumerator(); 46 | } 47 | } 48 | 49 | /// 50 | /// This belongs in its own package, 'TorchText'. 51 | /// For now, it's useful to keep it with the examples that use it. 52 | /// 53 | public class Vocab 54 | { 55 | public Vocab(Counter counter, int? maxSize = null, int minFreq = 1, string[] specials = null, Func unkInit = null, bool specialsFirst = true) 56 | { 57 | if (specials == null) specials = new string[] { "", "" }; 58 | if (unkInit == null) unkInit = (t => init.zeros_(t.clone())); 59 | if (specialsFirst) { 60 | foreach (var sp in specials) { 61 | _dict.Add(sp, _last++); 62 | } 63 | } 64 | foreach (var kv in counter.Where(kv => kv.Value >= minFreq)) { 65 | if (!specials.Contains(kv.Key)) { 66 | _dict.Add(kv.Key, _last++); 67 | } 68 | if (_last > (maxSize ?? int.MaxValue)) 69 | break; 70 | } 71 | if (!specialsFirst) { 72 | foreach (var sp in specials) { 73 | _dict.Add(sp, _last++); 74 | } 75 | } 76 | } 77 | 78 | public int this[string key] { get => _dict.TryGetValue(key, out int value) ? value : _dict[""]; } 79 | 80 | public int Count => _dict.Count; 81 | 82 | public void Add(string key, int value) 83 | { 84 | _dict.Add(key, value); 85 | } 86 | 87 | public void Add(KeyValuePair item) 88 | { 89 | Add(item.Key, item.Value); 90 | } 91 | 92 | public bool TryGetValue(string key, [MaybeNullWhen(false)] out int value) 93 | { 94 | return _dict.TryGetValue(key, out value); 95 | } 96 | 97 | private Dictionary _dict = new Dictionary(); 98 | private int _last = 0; 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /src/CSharp/Models/SequenceToSequence.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. 2 | using System; 3 | using System.IO; 4 | using System.Linq; 5 | using System.Collections.Generic; 6 | using System.Diagnostics; 7 | 8 | using static TorchSharp.torch; 9 | using static TorchSharp.torch.nn; 10 | using static TorchSharp.torch.nn.functional; 11 | 12 | namespace TorchSharp.Examples 13 | { 14 | /// 15 | /// This example is based on the PyTorch tutorial at: 16 | /// 17 | /// https://pytorch.org/tutorials/beginner/transformer_tutorial.html 18 | /// 19 | /// 20 | 21 | public class TransformerModel : Module 22 | { 23 | private Modules.TransformerEncoder transformer_encoder; 24 | private PositionalEncoding pos_encoder; 25 | private Modules.Embedding encoder; 26 | private Modules.Linear decoder; 27 | 28 | private long ninputs; 29 | private Device device; 30 | 31 | public TransformerModel(long ntokens, long ninputs, long nheads, long nhidden, long nlayers, double dropout = 0.5) : base("Transformer") 32 | { 33 | this.ninputs = ninputs; 34 | 35 | pos_encoder = new PositionalEncoding(ninputs, dropout); 36 | var encoder_layers = TransformerEncoderLayer(ninputs, nheads, nhidden, dropout); 37 | transformer_encoder = TransformerEncoder(encoder_layers, nlayers); 38 | encoder = Embedding(ntokens, ninputs); 39 | decoder = Linear(ninputs, ntokens); 40 | InitWeights(); 41 | 42 | RegisterComponents(); 43 | } 44 | 45 | public Tensor GenerateSquareSubsequentMask(long size) 46 | { 47 | var mask = (torch.ones(new long[] { size, size }) == 1).triu().transpose(0, 1); 48 | return mask.to_type(ScalarType.Float32) 49 | .masked_fill(mask == 0, float.NegativeInfinity) 50 | .masked_fill(mask == 1, 0.0f).to(device); 51 | } 52 | 53 | private void InitWeights() 54 | { 55 | var initrange = 0.1; 56 | 57 | init.uniform_(encoder.weight, -initrange, initrange); 58 | init.zeros_(decoder.bias); 59 | init.uniform_(decoder.weight, -initrange, initrange); 60 | } 61 | 62 | public override Tensor forward(Tensor t, Tensor mask) 63 | { 64 | using var src = pos_encoder.forward(encoder.forward(t) * MathF.Sqrt(ninputs)); 65 | using var enc = transformer_encoder.call(src, mask); 66 | return decoder.forward(enc); 67 | } 68 | 69 | public TransformerModel to(Device device) 70 | { 71 | this.to(device); 72 | this.device = device; 73 | return this; 74 | } 75 | } 76 | 77 | class PositionalEncoding : Module 78 | { 79 | private Module dropout; 80 | private Tensor pe; 81 | 82 | public PositionalEncoding(long dmodel, double dropout, int maxLen = 5000) : base("PositionalEncoding") 83 | { 84 | this.dropout = Dropout(dropout); 85 | var pe = torch.zeros(new long[] { maxLen, dmodel }); 86 | var position = torch.arange(0, maxLen, 1).unsqueeze(1); 87 | var divTerm = (torch.arange(0, dmodel, 2) * (-Math.Log(10000.0) / dmodel)).exp(); 88 | pe[TensorIndex.Ellipsis, TensorIndex.Slice(0, null, 2)] = (position * divTerm).sin(); 89 | pe[TensorIndex.Ellipsis, TensorIndex.Slice(1, null, 2)] = (position * divTerm).cos(); 90 | this.pe = pe.unsqueeze(0).transpose(0, 1); 91 | 92 | RegisterComponents(); 93 | } 94 | 95 | public override Tensor forward(Tensor t) 96 | { 97 | var x = t + pe[TensorIndex.Slice(null, t.shape[0]), TensorIndex.Slice()]; 98 | return dropout.forward(x); 99 | } 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /src/Utils/AG_NEWSReader.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. 2 | using System; 3 | using System.Collections.Generic; 4 | using System.IO; 5 | using System.Linq; 6 | using TorchSharp; 7 | using static TorchSharp.torch; 8 | 9 | namespace TorchText.Data 10 | { 11 | public class AG_NEWSReader : IDisposable 12 | { 13 | public static AG_NEWSReader AG_NEWS(string split, Device device, string root = ".data") 14 | { 15 | var dataPath = Path.Combine(root, $"{split}.csv"); 16 | return new AG_NEWSReader(dataPath, device); 17 | } 18 | 19 | private AG_NEWSReader(string path, Device device) 20 | { 21 | _path = path; 22 | _device = device; 23 | } 24 | 25 | private string _path; 26 | private Device _device; 27 | 28 | public IEnumerable<(int, string)> Enumerate() 29 | { 30 | return File.ReadLines(_path).Select(line => ParseLine(line)); 31 | } 32 | 33 | public IEnumerable<(Tensor, Tensor, Tensor)> GetBatches(Func> tokenizer, Vocab.Vocab vocab, long batch_size) 34 | { 35 | // This data set fits in memory, so we will simply load it all and cache it between epochs. 36 | 37 | var inputs = new List<(int, string)>(); 38 | 39 | if (_data == null) { 40 | 41 | _data = new List<(Tensor, Tensor, Tensor)>(); 42 | 43 | var counter = 0; 44 | var lines = Enumerate().ToList(); 45 | var left = lines.Count; 46 | 47 | foreach (var line in lines) { 48 | 49 | inputs.Add(line); 50 | left -= 1; 51 | 52 | if (++counter == batch_size || left == 0) { 53 | _data.Add(Batchifier(inputs, tokenizer, vocab)); 54 | inputs.Clear(); 55 | counter = 0; 56 | } 57 | } 58 | } 59 | 60 | return _data; 61 | } 62 | 63 | private List<(Tensor, Tensor, Tensor)> _data; 64 | private bool disposedValue; 65 | 66 | private (Tensor, Tensor, Tensor) Batchifier(IEnumerable<(int, string)> input, Func> tokenizer, Vocab.Vocab vocab) 67 | { 68 | var label_list = new List(); 69 | var text_list = new List(); 70 | var offsets = new List(); 71 | offsets.Add(0); 72 | 73 | long last = 0; 74 | 75 | foreach (var (label, text) in input) { 76 | label_list.Add(label); 77 | var processed_text = torch.tensor(tokenizer(text).Select(t => (long)vocab[t]).ToArray(),dtype:torch.int64); 78 | text_list.Add(processed_text); 79 | last += processed_text.size(0); 80 | offsets.Add(last); 81 | } 82 | 83 | var labels = torch.tensor(label_list.ToArray(), dtype: torch.int64).to(_device); 84 | var texts = torch.cat(text_list.ToArray(), 0).to(_device); 85 | var offs = torch.tensor(offsets.Take(label_list.Count).ToArray(), dtype:torch.int64).to(_device); 86 | 87 | return (labels, texts, offs); 88 | } 89 | 90 | public (int, string) ParseLine(string line) 91 | { 92 | int label = 0; 93 | string text = ""; 94 | 95 | int firstComma = line.IndexOf("\",\""); 96 | label = int.Parse(line.Substring(1, firstComma - 1)); 97 | text = line.Substring(firstComma + 2, line.Length - firstComma - 2); 98 | int secondComma = text.IndexOf("\",\""); 99 | text = text.Substring(secondComma + 2, text.Length - secondComma - 2); 100 | int thirdComma = text.IndexOf("\",\""); 101 | 102 | text = text.Substring(thirdComma + 2, text.Length - thirdComma - 3); 103 | 104 | return (label-1, text); 105 | } 106 | 107 | protected virtual void Dispose(bool disposing) 108 | { 109 | if (!disposedValue) { 110 | if (disposing && _data != null) { 111 | foreach (var (l, t, o) in _data) { 112 | l.Dispose(); 113 | t.Dispose(); 114 | o.Dispose(); 115 | } 116 | } 117 | 118 | disposedValue = true; 119 | } 120 | } 121 | 122 | public void Dispose() 123 | { 124 | // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method 125 | Dispose(disposing: true); 126 | GC.SuppressFinalize(this); 127 | } 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /src/FSharp/FSharpExamples/AdversarialExampleGeneration.fs: -------------------------------------------------------------------------------- 1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. 2 | module FSharpExamples.AdversarialExampleGeneration 3 | 4 | open System 5 | open System.IO 6 | open System.Diagnostics 7 | 8 | open TorchSharp 9 | open type TorchSharp.torch.nn 10 | open type TorchSharp.torch.optim 11 | open type TorchSharp.Scalar 12 | 13 | open FSharpExamples 14 | open TorchSharp.Examples 15 | 16 | // FGSM Attack 17 | // 18 | // Based on : https://pytorch.org/tutorials/beginner/fgsm_tutorial.html 19 | // 20 | // There are at least two interesting data sets to use with this example: 21 | // 22 | // 1. The classic MNIST set of 60000 images of handwritten digits. 23 | // 24 | // It is available at: http://yann.lecun.com/exdb/mnist/ 25 | // 26 | // 2. The 'fashion-mnist' data set, which has the exact same file names and format as MNIST, but is a harder 27 | // data set to train on. It's just as large as MNIST, and has the same 60/10 split of training and test 28 | // data. 29 | // It is available at: https://github.com/zalandoresearch/fashion-mnist/tree/master/data/fashion 30 | // 31 | // In each case, there are four .gz files to download. Place them in a folder and then point the '_dataLocation' 32 | // constant below at the folder location. 33 | // 34 | // The example is based on the PyTorch tutorial, but the results from attacking the model are very different from 35 | // what the tutorial article notes, at least on the machine where it was developed. There is an order-of-magnitude lower 36 | // drop-off in accuracy in this version. That said, when running the PyTorch tutorial on the same machine, the 37 | // accuracy trajectories are the same between .NET and Python. If the base convulutational model is trained 38 | // using Python, and then used for the FGSM attack in both .NET and Python, the drop-off trajectories are extremenly 39 | // close. 40 | 41 | let mutable trainBatchSize = 64 42 | let mutable testBatchSize = 128 43 | 44 | let logInterval = 100 45 | 46 | let cmdArgs = Environment.GetCommandLineArgs() 47 | let dataset = if cmdArgs.Length = 2 then cmdArgs.[1] else "mnist" 48 | 49 | let datasetPath = Path.Join(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", dataset) 50 | 51 | torch.random.manual_seed(1L) |> ignore 52 | 53 | let hasCUDA = TorchText.Datasets.cuda_is_available() 54 | 55 | let device = if hasCUDA then torch.CUDA else torch.CPU 56 | 57 | let criterion x y = functional.nll_loss(x,y) 58 | 59 | let attack (image:torch.Tensor) (eps:Scalar) (data_grad:torch.Tensor) = 60 | use sign = data_grad.sign() 61 | (image + eps * sign).clamp(0.0.ToScalar(), 1.0.ToScalar()) 62 | 63 | let test (model:MNIST.Model) (eps:float) (dataLoader:MNISTReader) size = 64 | 65 | let mutable correct = 0 66 | 67 | for (input,labels) in dataLoader do 68 | 69 | use d = torch.NewDisposeScope() 70 | 71 | input.requires_grad <- true 72 | 73 | begin // This is introduced in order to let a few tensors go out of scope before GC 74 | use estimate = input --> model 75 | use loss = criterion estimate labels 76 | 77 | model.zero_grad() 78 | loss.backward() 79 | 80 | use perturbed = attack input (eps.ToScalar()) (input.grad) 81 | use final = perturbed --> model 82 | correct <- correct + final.argmax(1L).eq(labels).sum().ToInt32() 83 | end 84 | 85 | float correct / size 86 | 87 | let run epochs = 88 | 89 | printfn $"Running AdversarialExampleGeneration on {device.``type``.ToString()}" 90 | printfn $"Dataset: {dataset}" 91 | 92 | let targetDir = Path.Combine(datasetPath, "test_data") 93 | 94 | MNIST.getDataFiles datasetPath targetDir 95 | 96 | if device.``type`` = DeviceType.CUDA then 97 | trainBatchSize <- trainBatchSize * 4 98 | testBatchSize <- testBatchSize * 4 99 | 100 | let normImage = torchvision.transforms.Normalize( [|0.1307|], [|0.3081|], device=device) 101 | use testData = new MNISTReader(targetDir, "t10k", testBatchSize, device=device, transform=normImage) 102 | 103 | let modelFile = dataset + ".model.bin" 104 | 105 | let model = 106 | if not (File.Exists(modelFile)) then 107 | printfn $"\n Running MNIST on {device.``type``.ToString()} in order to pre-train the model." 108 | 109 | let model = new MNIST.Model("model",device) 110 | 111 | use train = new MNISTReader(targetDir, "train", trainBatchSize, device=device, shuffle=true, transform=normImage) 112 | MNIST.trainingLoop model epochs dataset train testData |> ignore 113 | 114 | printfn "Moving on to the Adversarial model.\n" 115 | 116 | model 117 | 118 | else 119 | let model = new MNIST.Model("model", torch.CPU) 120 | model.load(modelFile) |> ignore 121 | model 122 | 123 | model.``to``(device) |> ignore 124 | 125 | model.eval() 126 | 127 | let epsilons = [| 0.0; 0.05; 0.1; 0.15; 0.20; 0.25; 0.30; 0.35; 0.40; 0.45; 0.50|] 128 | 129 | for eps in epsilons do 130 | let attacked = test model eps testData (float testData.Size) 131 | printfn $"Epsilon: {eps:F2}, accuracy: {attacked:P2}" 132 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Gitter](https://badges.gitter.im/dotnet/TorchSharp.svg)](https://gitter.im/dotnet/TorchSharp?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) 2 |
3 | # TorchSharp Examples 4 | 5 | This repo holds examples and tutorials related to [TorchSharp](https://github.com/dotnet/TorchSharp), .NET-only bindings to libtorch, the engine behind PyTorch. If you are trying to familiarize yourself with TorchSharp, rather than contributing to it, this is the place to go. 6 | 7 | Currently, the examples are the same that are also found in the TorchSharp repo. Unlike the setup in that repo, where the examples are part of the overall VS solution file and use project references to pick up the TorchSharp dependencies, in this repo, the example solution is using the publically available TorchSharp packages form NuGet. 8 | 9 | The examples and tutorials assume that you are on the latest version of TorchSharp, which currently is 0.97.5. 10 | 11 | ### System / Environment Requirements 12 | 13 | In order to use TorchSharp, you will need both the most recent TorchSharp package, as well as one of the several libtorch-* packages that are available. The most basic one, which is used in this repository, is the libtorch-cpu package. As the name suggests, it uses a CPU backend to do training and inference. 14 | 15 | There is also support for CUDA 11.3 on both Windows and Linux, and each of these combinations has its own NuGet package. If you want to train on CUDA, you need to replace references to libtorch-cpu in the solution and projects. 16 | 17 | __Note__: Starting with NuGet release 0.93.4, we have simplified the package structure, so you only need to select one of these three packages, and it will include the others: 18 | 19 | TorchSharp-cpu 20 | TorchSharp-cuda-windows 21 | TorchSharp-cuda-linux 22 | 23 | The examples solution should build without any modifications, either with Visual Studio, or using `dotnet build'. All of the examples build on an Nvidia GPU with 8GB of memory, while only a subset build on a GPU with 6GB. Running more than a few epochs while training on a CPU will take a very long time, especially on the CIFAR10 examples. MNIST is the most reasonable example to train on a CPU. 24 | 25 | ## Structure 26 | 27 | There are variants of all models in both C# and F#. For C#, there is a 'Models' library, and a 'XXXExamples' console app, which is what is used for batch training of the model. For F#, the models are bundled with the training code (we may restructure this in the future). There is also a utility library that is written in C# only, and used from both C# and F#. 28 | 29 | The console apps are, as mentioned, meant to be used for batch training. The command line must specify the model to be used. In the case of MNIST, there are two data sets -- the original 'MNIST' as well as the harder 'Fashion MNIST'. 30 | 31 | The repo contains no actual data sets. You have to download them manually and, in some cases, extract the data from archives. 32 | 33 | ## Data Sets 34 | 35 | The MNIST model uses either: 36 | 37 | * [MNIST](http://yann.lecun.com/exdb/mnist/) 38 | 39 | * [Fashion MNIST](https://github.com/zalandoresearch/fashion-mnist/tree/master/data/fashion) 40 | 41 | Both sets are 28x28 grayscale images, archived in .gz files. 42 | 43 | The AlexNet, ResNet*, MobileNet, and VGG* models use the [CIFAR10](https://www.cs.toronto.edu/~kriz/cifar.html) data set. Instructions on how to download it is available in the CIFAR10 source files. 44 | 45 | SequenceToSequence uses the [WikiText2](https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip) dataset. It's kept in a regular .zip file. 46 | 47 | TextClassification uses the [AG_NEWS](https://github.com/mhjabreel/CharCnn_Keras/tree/master/data/ag_news_csv) dataset, a CSV file. 48 | 49 | # Tutorials 50 | 51 | We have started work on tutorials, but they are not ready yet. They will mostly be based on .NET Interactive notebooks. If you haven't tried that environment yet, it's worth playing around with it inside VS Code. 52 | 53 | # Contributing 54 | 55 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 56 | 57 | There are two main things we would like help with: 58 | 59 | 1. Adding completely new examples. File an issue and assign it to yourself, so we can track it. 60 | 61 | 2. Picking up an issue from the 'Issues' list. For example, the examples are currently set up to run on Windows, picking up data from under the 'Downloads' folder. If you have thoughts on the best way to do this on MacOS or Linux, please help with that. 62 | 63 | If you add a new example, please adjust it to work on a mainstream CUDA processor. This means making sure that it builds on an 8GB processor, with sufficient invocations of the garbage collector. 64 | 65 | ## A Useful Tip for Contributors 66 | 67 | A useful tip from the Tensorflow.NET repo: 68 | 69 | After you fork, add dotnet/TorchSharp as 'upstream' to your local repo ... 70 | 71 | ```git 72 | git remote add upstream https://github.com/dotnet/TorchSharpExamples.git 73 | ``` 74 | 75 | This makes it easy to keep your fork up to date by regularly pulling and merging from upstream. 76 | 77 | Assuming that you do all your development off your main branch, keep your main updated 78 | with these commands: 79 | 80 | ```git 81 | git checkout main 82 | git pull upstream main 83 | git push origin main 84 | ``` 85 | 86 | Then, you merge onto your dev branch: 87 | 88 | ```git 89 | git checkout <> 90 | git merge main 91 | ``` 92 | -------------------------------------------------------------------------------- /src/Utils/CIFARReader .cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. 2 | using System; 3 | using System.Collections; 4 | using System.Collections.Generic; 5 | using System.IO; 6 | using System.Linq; 7 | using static TorchSharp.torch; 8 | 9 | namespace TorchSharp.Examples 10 | { 11 | /// 12 | /// Data reader utility for datasets that follow the MNIST data set's layout: 13 | /// 14 | /// A number of single-channel (grayscale) images are laid out in a flat file with four 32-bit integers at the head. 15 | /// The format is documented at the bottom of the page at: http://yann.lecun.com/exdb/mnist/ 16 | /// 17 | public sealed class CIFARReader : IDisposable 18 | { 19 | /// 20 | /// Constructor 21 | /// 22 | /// Path to the folder containing the image files. 23 | /// True if this is a test set, otherwise false. 24 | /// The batch size 25 | /// Randomly shuffle the images. 26 | /// The device, i.e. CPU or GPU to place the output tensors on. 27 | /// A list of image transformations, helpful for data augmentation. 28 | public CIFARReader(string path, bool test, int batch_size = 32, bool shuffle = false, Device device = null, IList transforms = null) 29 | { 30 | _transforms = transforms == null ? new List() : transforms; 31 | 32 | // The MNIST data set is small enough to fit in memory, so let's load it there. 33 | 34 | var dataPath = Path.Combine(path, "cifar-10-batches-bin"); 35 | 36 | if (test) { 37 | _size = ReadSingleFile(Path.Combine(dataPath, "test_batch.bin"), batch_size, shuffle, device); 38 | } else { 39 | _size += ReadSingleFile(Path.Combine(dataPath, "data_batch_1.bin"), batch_size, shuffle, device); 40 | _size += ReadSingleFile(Path.Combine(dataPath, "data_batch_2.bin"), batch_size, shuffle, device); 41 | _size += ReadSingleFile(Path.Combine(dataPath, "data_batch_3.bin"), batch_size, shuffle, device); 42 | _size += ReadSingleFile(Path.Combine(dataPath, "data_batch_4.bin"), batch_size, shuffle, device); 43 | _size += ReadSingleFile(Path.Combine(dataPath, "data_batch_5.bin"), batch_size, shuffle, device); 44 | } 45 | } 46 | 47 | private int ReadSingleFile(string path, int batch_size, bool shuffle, Device device) 48 | { 49 | const int height = 32; 50 | const int width = 32; 51 | const int channels = 3; 52 | const int count = 10000; 53 | 54 | byte[] dataBytes = File.ReadAllBytes(path); 55 | 56 | if (dataBytes.Length != (1 + channels * height * width) * count) 57 | throw new InvalidDataException($"Not a proper CIFAR10 file: {path}"); 58 | 59 | // Set up the indices array. 60 | Random rnd = new Random(); 61 | var indices = !shuffle ? 62 | Enumerable.Range(0, count).ToArray() : 63 | Enumerable.Range(0, count).OrderBy(c => rnd.Next()).ToArray(); 64 | 65 | var imgSize = channels * height * width; 66 | 67 | // Go through the data and create tensors 68 | for (var i = 0; i < count;) { 69 | 70 | var take = Math.Min(batch_size, Math.Max(0, count - i)); 71 | 72 | if (take < 1) break; 73 | 74 | var dataTensor = torch.zeros(new long[] { take, imgSize }, device: device); 75 | var lablTensor = torch.zeros(new long[] { take }, torch.int64, device: device); 76 | 77 | // Take 78 | for (var j = 0; j < take; j++) { 79 | var idx = indices[i++]; 80 | var lblStart = idx * (1 + imgSize); 81 | var imgStart = lblStart + 1; 82 | 83 | lablTensor[j] = torch.tensor(dataBytes[lblStart], torch.int64); 84 | 85 | var floats = dataBytes[imgStart..(imgStart + imgSize)].Select(b => (float)b).ToArray(); 86 | using (var inputTensor = torch.tensor(floats)) 87 | dataTensor.index_put_(inputTensor, TensorIndex.Single(j)); 88 | } 89 | 90 | data.Add(dataTensor.reshape(take, channels, height, width)); 91 | dataTensor.Dispose(); 92 | labels.Add(lablTensor); 93 | } 94 | 95 | return count; 96 | } 97 | 98 | public int Size { get { 99 | return _size * (_transforms.Count + 1); 100 | } } 101 | private int _size = 0; 102 | 103 | private List data = new List(); 104 | private List labels = new List(); 105 | 106 | private IList _transforms; 107 | 108 | public IEnumerable<(Tensor, Tensor)> Data() 109 | { 110 | for (var i = 0; i < data.Count; i++) { 111 | yield return (data[i], labels[i]); 112 | 113 | foreach (var tfrm in _transforms) { 114 | yield return (tfrm.call(data[i]), labels[i]); 115 | } 116 | } 117 | } 118 | 119 | public void Dispose() 120 | { 121 | data.ForEach(d => d.Dispose()); 122 | labels.ForEach(d => d.Dispose()); 123 | } 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /src/FSharp/FSharpExamples/TextClassification.fs: -------------------------------------------------------------------------------- 1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. 2 | module FSharpExamples.TextClassification 3 | 4 | open System 5 | open System.IO 6 | open System.Linq 7 | open System.Diagnostics 8 | open System.Collections.Generic 9 | 10 | open TorchSharp 11 | open type TorchSharp.torch.nn 12 | 13 | open TorchSharp.Examples 14 | 15 | // This example is based on the PyTorch tutorial at: 16 | // 17 | // https://pytorch.org/tutorials/beginner/text_sentiment_ngrams_tutorial.html 18 | // 19 | // It relies on the AG_NEWS dataset, which can be downloaded in CSV form at: 20 | // 21 | // https://github.com/mhjabreel/CharCnn_Keras/tree/master/data/ag_news_csv 22 | // 23 | // Download the two files, and place them in a folder called "AG_NEWS" in 24 | // accordance with the file path below (Windows only). 25 | 26 | let emsize = 200L 27 | 28 | let batch_size = 64L 29 | let eval_batch_size = 256L 30 | 31 | let epochs = 16 32 | 33 | let lr = 5.0 34 | 35 | let logInterval = 200 36 | 37 | let cmdArgs = Environment.GetCommandLineArgs() 38 | 39 | let datasetPath = Path.Join(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", "AG_NEWS") 40 | 41 | torch.random.manual_seed(1L) |> ignore 42 | 43 | let hasCUDA = TorchText.Datasets.cuda_is_available() 44 | 45 | let device = if hasCUDA then torch.CUDA else torch.CPU 46 | 47 | let criterion x y = torch.nn.functional.cross_entropy(x,y) 48 | 49 | type TextClassificationModel(vocabSize, embedDim, nClasses, device:torch.Device) as this = 50 | inherit Module("Transformer") 51 | 52 | let embedding = EmbeddingBag(vocabSize, embedDim, sparse=false) 53 | let fc = Linear(embedDim, nClasses) 54 | 55 | do 56 | let initrange = 0.5 57 | 58 | init.uniform_(embedding.weight, -initrange, initrange) |> ignore 59 | init.uniform_(fc.weight, -initrange, initrange) |> ignore 60 | init.zeros_(fc.bias) |> ignore 61 | 62 | this.RegisterComponents() 63 | 64 | if device.``type`` = DeviceType.CUDA then 65 | this.``to``(device) |> ignore 66 | 67 | override _.forward(input, offsets) = 68 | embedding.call(input, offsets) --> fc 69 | 70 | let train epoch (trainData:IEnumerable) (model:TextClassificationModel) (optimizer:torch.optim.Optimizer) = 71 | 72 | model.train() 73 | 74 | let mutable total_acc = 0.0 75 | let mutable total_count = 0L 76 | let mutable batch = 0 77 | 78 | let batch_count = trainData.Count() 79 | 80 | for labels,texts,offsets in trainData do 81 | 82 | use d = torch.NewDisposeScope() 83 | 84 | optimizer.zero_grad() 85 | 86 | let predicted_labels = model.forward(texts, offsets) 87 | let loss = criterion predicted_labels labels 88 | 89 | loss.backward() 90 | torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5) |> ignore 91 | optimizer.step() |> ignore 92 | 93 | total_acc <- total_acc + float ((predicted_labels.argmax(1L).eq(labels)).sum().cpu().item()) 94 | total_count <- total_count + labels.size(0) 95 | 96 | if (batch % logInterval = 0) && (batch > 0) then 97 | let accuracy = (total_acc / (float total_count)).ToString("0.00") 98 | printfn $"epoch: {epoch} | batch: {batch} / {batch_count} | accuracy: {accuracy}" 99 | 100 | batch <- batch + 1 101 | 102 | let evaluate (testData:IEnumerable) (model:TextClassificationModel) = 103 | 104 | model.eval() 105 | 106 | let mutable total_acc = 0.0 107 | let mutable total_count = 0L 108 | 109 | for labels,texts,offsets in testData do 110 | 111 | let predicted_labels = model.forward(texts, offsets) 112 | let loss = criterion predicted_labels labels 113 | 114 | total_acc <- total_acc + float ((predicted_labels.argmax(1L).eq(labels)).sum().cpu().item()) 115 | total_count <- total_count + labels.size(0) 116 | 117 | total_acc / (float total_count) 118 | 119 | let run epochs = 120 | 121 | printfn $"Running TextClassification on {device.``type``.ToString()} for {epochs} epochs." 122 | 123 | use reader = TorchText.Data.AG_NEWSReader.AG_NEWS("train", device, datasetPath) 124 | let dataloader = reader.Enumerate() 125 | 126 | let tokenizer = TorchText.Data.Utils.get_tokenizer("basic_english") 127 | let counter = new TorchText.Vocab.Counter() 128 | 129 | for label,text in dataloader do 130 | counter.update(tokenizer.Invoke(text)) 131 | 132 | let vocab = TorchText.Vocab.Vocab(counter) 133 | 134 | let model = new TextClassificationModel((int64 vocab.Count), emsize, 4L, device) 135 | 136 | let optimizer = torch.optim.SGD(model.parameters(), lr) 137 | let scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, 0.2, last_epoch=5) 138 | 139 | let sw = Stopwatch() 140 | 141 | for epoch = 1 to epochs do 142 | 143 | sw.Restart() 144 | 145 | let batches = [| for b in reader.GetBatches(tokenizer, vocab, batch_size) -> b.ToTuple() |] 146 | train epoch batches model optimizer 147 | 148 | sw.Stop() 149 | 150 | let lrStr = optimizer.ParamGroups.First().LearningRate.ToString("0.0000") 151 | let tsStr = sw.Elapsed.TotalSeconds.ToString("0.0") 152 | printfn $"\nEnd of epoch: {epoch} | lr: {lrStr} | time: {tsStr}s\n" 153 | scheduler.step() |> ignore 154 | 155 | use test_reader = TorchText.Data.AG_NEWSReader.AG_NEWS("test", device, datasetPath) 156 | 157 | sw.Restart() 158 | 159 | let batches = [| for b in test_reader.GetBatches(tokenizer, vocab, batch_size) -> b.ToTuple() |] 160 | let accuracy = evaluate batches model 161 | 162 | let accStr = accuracy.ToString("0.00") 163 | let tsStr = sw.Elapsed.TotalSeconds.ToString("0.0") 164 | printf $"\nEnd of training: test accuracy: {accStr} | eval time: {tsStr}s\n" 165 | 166 | sw.Stop() 167 | -------------------------------------------------------------------------------- /src/FSharp/FSharpExamples/MNIST.fs: -------------------------------------------------------------------------------- 1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. 2 | module FSharpExamples.MNIST 3 | 4 | open System 5 | open System.IO 6 | open System.Diagnostics 7 | 8 | open TorchSharp 9 | open type TorchSharp.torch 10 | open type TorchSharp.torch.nn 11 | open type TorchSharp.torch.optim 12 | open type TorchSharp.Scalar 13 | 14 | open TorchSharp.Examples 15 | 16 | // Simple MNIST Convolutional model. 17 | // 18 | // There are at least two interesting data sets to use with this example: 19 | // 20 | // 1. The classic MNIST set of 60000 images of handwritten digits. 21 | // 22 | // It is available at: http://yann.lecun.com/exdb/mnist/ 23 | // 24 | // 2. The 'fashion-mnist' data set, which has the exact same file names and format as MNIST, but is a harder 25 | // data set to train on. It's just as large as MNIST, and has the same 60/10 split of training and test 26 | // data. 27 | // It is available at: https://github.com/zalandoresearch/fashion-mnist/tree/master/data/fashion 28 | // 29 | // In each case, there are four .gz files to download. Place them in a folder and then point the '_dataLocation' 30 | // constant below at the folder location. 31 | 32 | let mutable trainBatchSize = 64 33 | let mutable testBatchSize = 128 34 | 35 | let logInterval = 100 36 | 37 | let cmdArgs = Environment.GetCommandLineArgs() 38 | let dataset = if cmdArgs.Length = 2 then cmdArgs.[1] else "mnist" 39 | 40 | let datasetPath = Path.Join(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", dataset) 41 | 42 | torch.random.manual_seed(1L) |> ignore 43 | 44 | let hasCUDA = TorchText.Datasets.cuda_is_available() 45 | 46 | let device = if hasCUDA then torch.CUDA else torch.CPU 47 | 48 | let getDataFiles sourceDir targetDir = 49 | 50 | if not (Directory.Exists(targetDir)) then 51 | Directory.CreateDirectory(targetDir) |> ignore 52 | Utils.Decompress.DecompressGZipFile(Path.Combine(sourceDir, "train-images-idx3-ubyte.gz"), targetDir) 53 | Utils.Decompress.DecompressGZipFile(Path.Combine(sourceDir, "train-labels-idx1-ubyte.gz"), targetDir) 54 | Utils.Decompress.DecompressGZipFile(Path.Combine(sourceDir, "t10k-images-idx3-ubyte.gz"), targetDir) 55 | Utils.Decompress.DecompressGZipFile(Path.Combine(sourceDir, "t10k-labels-idx1-ubyte.gz"), targetDir) 56 | 57 | type Model(name,device:torch.Device) as this = 58 | inherit Module(name) 59 | 60 | let conv1 = Conv2d(1L, 32L, 3L) 61 | let conv2 = Conv2d(32L, 64L, 3L) 62 | let fc1 = Linear(9216L, 128L) 63 | let fc2 = Linear(128L, 10L) 64 | 65 | let pool1 = MaxPool2d(kernel_size=[|2L; 2L|]) 66 | 67 | let relu = ReLU() 68 | 69 | let dropout1 = Dropout(0.25) 70 | let dropout2 = Dropout(0.5) 71 | let flatten = Flatten() 72 | 73 | let logsm = LogSoftmax(1L) 74 | 75 | do 76 | this.RegisterComponents() 77 | 78 | if device.``type`` = DeviceType.CUDA then 79 | this.``to``(device) |> ignore 80 | 81 | override _.forward(input) = 82 | input 83 | --> conv1 --> relu --> conv2 --> relu --> pool1 --> dropout1 84 | --> flatten 85 | --> fc1 --> relu --> dropout2 --> fc2 86 | --> logsm 87 | 88 | let loss x y = functional.nll_loss(x,y,reduction=Reduction.Mean) 89 | 90 | let train (model:Module) (optimizer:Optimizer) (dataLoader: MNISTReader) epoch = 91 | model.train() 92 | 93 | let size = dataLoader.Size 94 | let batchSize = dataLoader.BatchSize 95 | 96 | let mutable batchID = 1 97 | 98 | printfn $"Epoch: {epoch}..." 99 | 100 | for (input,labels) in dataLoader do 101 | 102 | use d = torch.NewDisposeScope() 103 | 104 | optimizer.zero_grad() 105 | 106 | begin // This is introduced in order to let a few tensors go out of scope before GC 107 | use estimate = input --> model 108 | use output = loss estimate labels 109 | 110 | output.backward() 111 | optimizer.step() |> ignore 112 | 113 | if batchID % logInterval = 0 then 114 | printfn $"\rTrain: epoch {epoch} [{batchID * batchSize} / {size}] Loss: {output.ToSingle():F4}" 115 | 116 | batchID <- batchID + 1 117 | end 118 | 119 | let test (model:Model) (dataLoader:MNISTReader) = 120 | model.eval() 121 | 122 | let sz = single dataLoader.Size 123 | 124 | let mutable testLoss = 0.0f 125 | let mutable correct = 0 126 | 127 | for (input,labels) in dataLoader do 128 | 129 | use d = torch.NewDisposeScope() 130 | 131 | begin // This is introduced in order to let a few tensors go out of scope before GC 132 | use estimate = input --> model 133 | use output = loss estimate labels 134 | testLoss <- testLoss + output.ToSingle() 135 | 136 | let pred = estimate.argmax(1L) 137 | correct <- correct + pred.eq(labels).sum().ToInt32() 138 | end 139 | 140 | printfn $"Size: {sz}, Total: {sz}" 141 | printfn $"\rTest set: Average loss {(testLoss / sz):F4} | Accuracy {(single correct / sz):P2}" 142 | 143 | let trainingLoop (model:Model) epochs dataset trainData testData = 144 | 145 | let epochs = if device.``type`` = DeviceType.CUDA then epochs * 4 else epochs 146 | 147 | let optimizer = Adam(model.parameters()) 148 | lr_scheduler.StepLR(optimizer, 1, 0.7, last_epoch=5) |> ignore 149 | 150 | let sw = Stopwatch() 151 | sw.Start() 152 | 153 | for epoch = 1 to epochs do 154 | train model optimizer trainData epoch 155 | test model testData 156 | 157 | sw.Stop() 158 | 159 | printfn $"Elapsed time: {sw.Elapsed.TotalSeconds:F1} s." 160 | printfn $"Saving model to '{dataset}'.model.bin" 161 | 162 | model.save(dataset + ".model.bin") |> ignore 163 | 164 | let run epochs = 165 | printfn $"Running MNIST on {device.``type``.ToString()}" 166 | printfn $"Dataset: {dataset}" 167 | 168 | let targetDir = Path.Combine(datasetPath, "test_data") 169 | 170 | getDataFiles datasetPath targetDir 171 | 172 | if device.``type`` = DeviceType.CUDA then 173 | trainBatchSize <- trainBatchSize * 4 174 | testBatchSize <- testBatchSize * 4 175 | 176 | let normImage = torchvision.transforms.Normalize( [|0.1307|], [|0.3081|], device=device) 177 | use train = new MNISTReader(targetDir, "train", trainBatchSize, device=device, shuffle=true, transform=normImage) 178 | use test = new MNISTReader(targetDir, "t10k", testBatchSize, device=device, transform=normImage) 179 | 180 | let model = new Model("model", device) 181 | 182 | trainingLoop model epochs dataset train test 183 | -------------------------------------------------------------------------------- /src/Utils/MNISTReader.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. 2 | using System; 3 | using System.Collections; 4 | using System.Collections.Generic; 5 | using System.IO; 6 | using System.Linq; 7 | using static TorchSharp.torchvision; 8 | using static TorchSharp.torch; 9 | 10 | namespace TorchSharp.Examples 11 | { 12 | /// 13 | /// Data reader utility for datasets that follow the MNIST data set's layout: 14 | /// 15 | /// A number of single-channel (grayscale) images are laid out in a flat file with four 32-bit integers at the head. 16 | /// The format is documented at the bottom of the page at: http://yann.lecun.com/exdb/mnist/ 17 | /// 18 | public sealed class MNISTReader : IEnumerable<(Tensor, Tensor)>, IDisposable 19 | { 20 | /// 21 | /// Constructor 22 | /// 23 | /// Path to the folder containing the image files. 24 | /// The file name prefix, either 'train' or 't10k' (the latter being the test data set). 25 | /// The batch size 26 | /// Randomly shuffle the images. 27 | /// The device, i.e. CPU or GPU to place the output tensors on. 28 | /// 29 | public MNISTReader(string path, string prefix, int batch_size = 32, bool shuffle = false, torch.Device device = null, ITransform transform = null) 30 | { 31 | // The MNIST data set is small enough to fit in memory, so let's load it there. 32 | 33 | BatchSize = batch_size; 34 | 35 | var dataPath = Path.Combine(path, prefix + "-images-idx3-ubyte"); 36 | var labelPath = Path.Combine(path, prefix + "-labels-idx1-ubyte"); 37 | 38 | var count = -1; 39 | var height = 0; 40 | var width = 0; 41 | 42 | byte[] dataBytes = null; 43 | byte[] labelBytes = null; 44 | 45 | using (var file = File.Open(dataPath, FileMode.Open, FileAccess.Read, FileShare.Read)) 46 | using (var rdr = new System.IO.BinaryReader(file)) { 47 | 48 | var reader = new Utils.BigEndianReader(rdr); 49 | var x = reader.ReadInt32(); // Magic number 50 | count = reader.ReadInt32(); 51 | 52 | height = reader.ReadInt32(); 53 | width = reader.ReadInt32(); 54 | 55 | // Read all the data into memory. 56 | dataBytes = reader.ReadBytes(height * width * count); 57 | } 58 | 59 | using (var file = File.Open(labelPath, FileMode.Open, FileAccess.Read, FileShare.Read)) 60 | using (var rdr = new System.IO.BinaryReader(file)) { 61 | 62 | var reader = new Utils.BigEndianReader(rdr); 63 | var x = reader.ReadInt32(); // Magic number 64 | var lblcnt = reader.ReadInt32(); 65 | 66 | if (lblcnt != count) throw new InvalidDataException("Image data and label counts are different."); 67 | 68 | // Read all the data into memory. 69 | labelBytes = reader.ReadBytes(lblcnt); 70 | } 71 | 72 | // Set up the indices array. 73 | Random rnd = new Random(); 74 | var indices = !shuffle ? 75 | Enumerable.Range(0, count).ToArray() : 76 | Enumerable.Range(0, count).OrderBy(c => rnd.Next()).ToArray(); 77 | 78 | var imgSize = height * width; 79 | 80 | // Go through the data and create tensors 81 | for (var i = 0; i < count;) { 82 | 83 | var take = Math.Min(batch_size, Math.Max(0, count - i)); 84 | 85 | if (take < 1) break; 86 | 87 | var dataTensor = torch.zeros(new long[] { take, imgSize}, device: device); 88 | var lablTensor = torch.zeros(new long[] { take }, torch.int64, device: device); 89 | 90 | // Take 91 | for (var j = 0; j < take; j++) { 92 | var idx = indices[i++]; 93 | var imgStart = idx * imgSize; 94 | 95 | var floats = dataBytes[imgStart.. (imgStart+imgSize)].Select(b => b/256.0f).ToArray(); 96 | using (var inputTensor = torch.tensor(floats)) 97 | dataTensor.index_put_(inputTensor, TensorIndex.Single(j)); 98 | lablTensor[j] = torch.tensor(labelBytes[idx], torch.int64); 99 | } 100 | 101 | var batch = dataTensor.reshape(take, 1, height, width); 102 | 103 | if (transform != null) { 104 | // Carefully dispose the original 105 | using(var batch_copy = batch) 106 | batch = transform.call(batch); 107 | } 108 | 109 | data.Add(batch); 110 | dataTensor.Dispose(); 111 | labels.Add(lablTensor); 112 | } 113 | 114 | Size = count; 115 | } 116 | 117 | public int Size { get; set; } 118 | 119 | public int BatchSize { get; private set; } 120 | 121 | private List data = new List(); 122 | private List labels = new List(); 123 | 124 | public IEnumerator<(Tensor, Tensor)> GetEnumerator() 125 | { 126 | return new MNISTEnumerator(data, labels); 127 | } 128 | 129 | IEnumerator IEnumerable.GetEnumerator() 130 | { 131 | return GetEnumerator(); 132 | } 133 | 134 | public void Dispose() 135 | { 136 | data.ForEach(d => d.Dispose()); 137 | labels.ForEach(d => d.Dispose()); 138 | } 139 | 140 | private class MNISTEnumerator : IEnumerator<(Tensor, Tensor)> 141 | { 142 | public MNISTEnumerator(List data, List labels) 143 | { 144 | this.data = data; 145 | this.labels = labels; 146 | } 147 | 148 | public (Tensor, Tensor) Current { 149 | get { 150 | if (curIdx == -1) throw new InvalidOperationException("Calling 'Current' before 'MoveNext()'"); 151 | return (data[curIdx], labels[curIdx]); 152 | } 153 | } 154 | 155 | object IEnumerator.Current => Current; 156 | 157 | public void Dispose() 158 | { 159 | } 160 | 161 | public bool MoveNext() 162 | { 163 | curIdx += 1; 164 | return curIdx < data.Count; 165 | } 166 | 167 | public void Reset() 168 | { 169 | curIdx = -1; 170 | } 171 | 172 | private int curIdx = -1; 173 | private List data = null; 174 | private List labels = null; 175 | } 176 | } 177 | } 178 | -------------------------------------------------------------------------------- /src/CSharp/CSharpExamples/TextClassification.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. 2 | using System; 3 | using System.IO; 4 | using System.Linq; 5 | using System.Collections.Generic; 6 | using System.Diagnostics; 7 | 8 | using TorchSharp; 9 | 10 | using TorchSharp.Examples; 11 | using TorchSharp.Examples.Utils; 12 | 13 | using static TorchSharp.torch; 14 | 15 | using static TorchSharp.torch.nn; 16 | using static TorchSharp.torch.nn.functional; 17 | 18 | namespace CSharpExamples 19 | { 20 | /// 21 | /// This example is based on the PyTorch tutorial at: 22 | /// 23 | /// https://pytorch.org/tutorials/beginner/text_sentiment_ngrams_tutorial.html 24 | /// 25 | /// It relies on the AG_NEWS dataset, which can be downloaded in CSV form at: 26 | /// 27 | /// https://github.com/mhjabreel/CharCnn_Keras/tree/master/data/ag_news_csv 28 | /// 29 | /// Download the two files, and place them in a folder called "AG_NEWS" in 30 | /// accordance with the file path below (Windows only). 31 | /// 32 | /// 33 | public class TextClassification 34 | { 35 | private const long emsize = 200; 36 | 37 | private const long batch_size = 128; 38 | private const long eval_batch_size = 128; 39 | 40 | private const int epochs = 15; 41 | 42 | // This path assumes that you're running this on Windows. 43 | private readonly static string _dataLocation = Path.Join(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", "AG_NEWS"); 44 | 45 | internal static void Run(int epochs, int timeout, string logdir) 46 | { 47 | torch.random.manual_seed(1); 48 | 49 | var cwd = Environment.CurrentDirectory; 50 | 51 | var device = 52 | torch.cuda.is_available() ? torch.CUDA : 53 | torch.mps_is_available() ? torch.MPS : 54 | torch.CPU; 55 | 56 | Console.WriteLine(); 57 | Console.WriteLine($"\tRunning TextClassification on {device.type.ToString()} for {epochs} epochs, terminating after {TimeSpan.FromSeconds(timeout)}."); 58 | Console.WriteLine(); 59 | 60 | Console.WriteLine($"\tPreparing training and test data..."); 61 | 62 | using (var reader = TorchText.Data.AG_NEWSReader.AG_NEWS("train", (Device)device, _dataLocation)) 63 | { 64 | 65 | var dataloader = reader.Enumerate(); 66 | 67 | var tokenizer = TorchText.Data.Utils.get_tokenizer("basic_english"); 68 | 69 | var counter = new TorchText.Vocab.Counter(); 70 | foreach (var (label, text) in dataloader) 71 | { 72 | counter.update(tokenizer(text)); 73 | } 74 | 75 | var vocab = new TorchText.Vocab.Vocab(counter); 76 | 77 | 78 | Console.WriteLine($"\tCreating the model..."); 79 | Console.WriteLine(); 80 | 81 | var model = new TextClassificationModel(vocab.Count, emsize, 4).to((Device)device); 82 | 83 | var loss = CrossEntropyLoss(); 84 | var lr = 5.0; 85 | var optimizer = torch.optim.SGD(model.parameters(), lr); 86 | var scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, 0.2, last_epoch: 5); 87 | 88 | var totalTime = new Stopwatch(); 89 | totalTime.Start(); 90 | 91 | foreach (var epoch in Enumerable.Range(1, epochs)) 92 | { 93 | 94 | var sw = new Stopwatch(); 95 | sw.Start(); 96 | 97 | train(epoch, reader.GetBatches(tokenizer, vocab, batch_size), model, loss, optimizer); 98 | 99 | sw.Stop(); 100 | 101 | Console.WriteLine($"\nEnd of epoch: {epoch} | lr: {optimizer.ParamGroups.First().LearningRate:0.0000} | time: {sw.Elapsed.TotalSeconds:0.0}s\n"); 102 | scheduler.step(); 103 | 104 | if (totalTime.Elapsed.TotalSeconds > timeout) break; 105 | } 106 | 107 | totalTime.Stop(); 108 | 109 | using (var test_reader = TorchText.Data.AG_NEWSReader.AG_NEWS("test", (Device)device, _dataLocation)) 110 | { 111 | 112 | var sw = new Stopwatch(); 113 | sw.Start(); 114 | 115 | var accuracy = evaluate(test_reader.GetBatches(tokenizer, vocab, eval_batch_size), model, loss); 116 | 117 | sw.Stop(); 118 | 119 | Console.WriteLine($"\nEnd of training: test accuracy: {accuracy:0.00} | eval time: {sw.Elapsed.TotalSeconds:0.0}s\n"); 120 | scheduler.step(); 121 | } 122 | } 123 | 124 | } 125 | 126 | static void train(int epoch, IEnumerable<(Tensor, Tensor, Tensor)> train_data, TextClassificationModel model, Loss criterion, torch.optim.Optimizer optimizer) 127 | { 128 | model.train(); 129 | 130 | double total_acc = 0.0; 131 | long total_count = 0; 132 | long log_interval = 250; 133 | 134 | var batch = 0; 135 | 136 | var batch_count = train_data.Count(); 137 | 138 | using (var d = torch.NewDisposeScope()) 139 | { 140 | foreach (var (labels, texts, offsets) in train_data) 141 | { 142 | 143 | optimizer.zero_grad(); 144 | 145 | using (var predicted_labels = model.forward(texts, offsets)) 146 | { 147 | 148 | var loss = criterion.forward(predicted_labels, labels); 149 | loss.backward(); 150 | torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5); 151 | optimizer.step(); 152 | 153 | total_acc += (predicted_labels.argmax(1) == labels).sum().to(torch.CPU).item(); 154 | total_count += labels.size(0); 155 | } 156 | 157 | if (batch % log_interval == 0 && batch > 0) 158 | { 159 | var accuracy = total_acc / total_count; 160 | Console.WriteLine($"epoch: {epoch} | batch: {batch} / {batch_count} | accuracy: {accuracy:0.00}"); 161 | } 162 | batch += 1; 163 | } 164 | } 165 | } 166 | 167 | static double evaluate(IEnumerable<(Tensor, Tensor, Tensor)> test_data, TextClassificationModel model, Loss criterion) 168 | { 169 | model.eval(); 170 | 171 | double total_acc = 0.0; 172 | long total_count = 0; 173 | 174 | using (var d = torch.NewDisposeScope()) 175 | { 176 | foreach (var (labels, texts, offsets) in test_data) 177 | { 178 | 179 | using (var predicted_labels = model.forward(texts, offsets)) 180 | { 181 | var loss = criterion.forward(predicted_labels, labels); 182 | 183 | total_acc += (predicted_labels.argmax(1) == labels).sum().to(torch.CPU).item(); 184 | total_count += labels.size(0); 185 | } 186 | } 187 | 188 | return total_acc / total_count; 189 | } 190 | } 191 | } 192 | } 193 | -------------------------------------------------------------------------------- /tutorials/CSharp/tutorial1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "# Setting Things Up\n", 9 | "\n", 10 | "To use TorchSharp, you need some packages from NuGet.\n", 11 | "\n", 12 | "First and foremost, you need to download the most recent version of the `TorchSharp` package at [https://www.nuget.org/packages/TorchSharp/](https://www.nuget.org/packages/TorchSharp/). That's the .NET bindings to libtorch, and it contains the .NET API. However, you also need one of several packages containing distributions of libtorch itself, the highly capable native-code engine behind PyTorch.\n", 13 | "\n", 14 | "Starting with version 0.93.4, there are bundled versions of these packages, which is what these tutorials rely on.\n", 15 | "\n", 16 | "The basic backend supports training and inference on CPUs, but there is also support for CUDA on Windows and Linux, for use on machines with compatible hardware. Using CUDA for training can speed things up by orders of magnitude, so it's important to use the right backend.\n", 17 | "\n", 18 | "These are the various libtorch packages:\n", 19 | "\n", 20 | "|Name|URL|Description|\n", 21 | "|-----|-----------------|--------------|\n", 22 | "|TorchSharp-cpu|https://www.nuget.org/packages/TorchSharp-cpu/|TorchSharp with only the CPU backend, which works on Windows, Linux, and MacOS|\n", 23 | "|TorchSharp-cuda-windows|https://www.nuget.org/packages/TorchSharp-cuda-windows/|A CUDA backend with only Windows binaries|\n", 24 | "|TorchSharp-cuda-linux|https://www.nuget.org/packages/TorchSharp-cuda-linux/|A CUDA backend with only Linux binaries|" 25 | ] 26 | }, 27 | { 28 | "attachments": {}, 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "All the tutorial notebooks (with the exception of the one that covers CUDA) will rely on the CPU package, since that takes up the least amount of disk space and works everywhere. If you have access to a CUDA processor, replace the package name with the applicable Windows or Linux package from NuGet (TorchSharp-cuda-windows and TorchSharp-cuda-linux, respectively)." 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": { 39 | "dotnet_interactive": { 40 | "language": "csharp" 41 | }, 42 | "vscode": { 43 | "languageId": "polyglot-notebook" 44 | } 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "#r \"nuget: TorchSharp-cpu\"\n" 49 | ] 50 | }, 51 | { 52 | "attachments": {}, 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "# Usings\n", 57 | "\n", 58 | "Once you have the right NuGet packages, the next thing is to get the right usings directives at the top of your source files. TorchSharp consists of a lot of namespaces and static classes, and to make programming TorchSharp convenient, you usually need to include a several of them." 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": { 65 | "dotnet_interactive": { 66 | "language": "csharp" 67 | }, 68 | "vscode": { 69 | "languageId": "polyglot-notebook" 70 | } 71 | }, 72 | "outputs": [], 73 | "source": [ 74 | "using TorchSharp;\n", 75 | "\n", 76 | "using static TorchSharp.torch.nn;\n", 77 | "using static TorchSharp.torch.nn.functional;\n", 78 | "using static TorchSharp.TensorExtensionMethods;" 79 | ] 80 | }, 81 | { 82 | "attachments": {}, 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "However, for these tutorials, it would obscure the API to have too many usings. It's better, for pedagocial reasons, to explicitly qualify names until their scope becomes well known. So, the tutorials will generally use a minimal set of usings." 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": {}, 92 | "source": [ 93 | "# Formatting\n", 94 | "\n", 95 | ".NET Interactive will, by default, display the fields and properties of the objects that are found at the end of each cell. \n", 96 | "\n", 97 | "Before v0.100.3, you had to add the following code to the top of each notebook:" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": { 104 | "dotnet_interactive": { 105 | "language": "csharp" 106 | }, 107 | "vscode": { 108 | "languageId": "polyglot-notebook" 109 | } 110 | }, 111 | "outputs": [], 112 | "source": [ 113 | "using Microsoft.DotNet.Interactive.Formatting;\n", 114 | "\n", 115 | "Formatter.SetPreferredMimeTypesFor(typeof(torch.Tensor), \"text/plain\");\n", 116 | "Formatter.Register((torch.Tensor x) => x.ToString(TorchSharp.TensorStringStyle.Default, fltFormat:\"G7\"));" 117 | ] 118 | }, 119 | { 120 | "attachments": {}, 121 | "cell_type": "markdown", 122 | "metadata": {}, 123 | "source": [ 124 | "But that is no longer necessary. You can still do so if you want to keep code as it is, but TorchSharp is now better integrated with .NET Interactive and will do it automatically." 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": {}, 130 | "source": [ 131 | "Note that there are now three styles that may be used to format tensor output: C#-style, NumPy-style and Julia-style. The default is 'Julia,' but if you continue to use he top-of-notebook formatting, whatever you set it to in the cell at the top will be used to format tensors automatically." 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": { 138 | "dotnet_interactive": { 139 | "language": "csharp" 140 | }, 141 | "vscode": { 142 | "languageId": "polyglot-notebook" 143 | } 144 | }, 145 | "outputs": [], 146 | "source": [ 147 | "torch.ones(2,3,3)" 148 | ] 149 | }, 150 | { 151 | "cell_type": "markdown", 152 | "metadata": {}, 153 | "source": [ 154 | " The 'Default' style means that the actual format is picked up at runtime from the global style, which is 'Julia' unless you set it to 'NumPy' or 'CSharp': " 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "metadata": { 161 | "dotnet_interactive": { 162 | "language": "csharp" 163 | }, 164 | "vscode": { 165 | "languageId": "polyglot-notebook" 166 | } 167 | }, 168 | "outputs": [], 169 | "source": [ 170 | "torch.TensorStringStyle = torch.numpy;\n", 171 | "\n", 172 | "torch.ones(2,3,3)" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": null, 178 | "metadata": { 179 | "vscode": { 180 | "languageId": "polyglot-notebook" 181 | } 182 | }, 183 | "outputs": [], 184 | "source": [ 185 | "torch.TensorStringStyle = torch.csharp;\n", 186 | "\n", 187 | "torch.rand(2,3,3)" 188 | ] 189 | }, 190 | { 191 | "attachments": {}, 192 | "cell_type": "markdown", 193 | "metadata": {}, 194 | "source": [ 195 | "For a more in-depth description of tensor formatting, see: [TorchSharp Wiki - Tensor String Frmatting](https://github.com/dotnet/TorchSharp/wiki/Tensor-String-Formatting)" 196 | ] 197 | }, 198 | { 199 | "attachments": {}, 200 | "cell_type": "markdown", 201 | "metadata": {}, 202 | "source": [] 203 | } 204 | ], 205 | "metadata": { 206 | "kernelspec": { 207 | "display_name": ".NET (C#)", 208 | "language": "C#", 209 | "name": ".net-csharp" 210 | }, 211 | "language_info": { 212 | "name": "C#" 213 | } 214 | }, 215 | "nbformat": 4, 216 | "nbformat_minor": 2 217 | } 218 | -------------------------------------------------------------------------------- /tutorials/FSharp/tutorial1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "# Setting Things Up\n", 9 | "\n", 10 | "To use TorchSharp, you need some packages from NuGet.\n", 11 | "\n", 12 | "First and foremost, you need to download the most recent version of the `TorchSharp` package at [https://www.nuget.org/packages/TorchSharp/](https://www.nuget.org/packages/TorchSharp/). That's the .NET bindings to libtorch, and it contains the .NET API. However, you also need one of several packages containing distributions of libtorch itself, the highly capable native-code engine behind PyTorch.\n", 13 | "\n", 14 | "Starting with version 0.93.4, there are bundled versions of these packages, which is what these tutorials rely on.\n", 15 | "\n", 16 | "The basic backend supports training and inference on CPUs, but there is also support for CUDA on Windows and Linux, for use on machines with compatible hardware. Using CUDA for training can speed things up by orders of magnitude, so it's important to use the right backend.\n", 17 | "\n", 18 | "These are the various libtorch packages:\n", 19 | "\n", 20 | "|Name|URL|Description|\n", 21 | "|-----|-----------------|--------------|\n", 22 | "|TorchSharp-cpu|https://www.nuget.org/packages/TorchSharp-cpu/|TorchSharp with only the CPU backend, which works on Windows, Linux, and MacOS|\n", 23 | "|TorchSharp-cuda-windows|https://www.nuget.org/packages/TorchSharp-cuda-windows/|A CUDA backend with only Windows binaries|\n", 24 | "|TorchSharp-cuda-linux|https://www.nuget.org/packages/TorchSharp-cuda-linux/|A CUDA backend with only Linux binaries|" 25 | ] 26 | }, 27 | { 28 | "attachments": {}, 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "All the tutorial notebooks (with the exception of the one that covers CUDA) will rely on the CPU package, since that takes up the least amount of disk space and works everywhere. If you have access to a CUDA processor, replace the package name with the applicable Windows or Linux package." 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": { 39 | "dotnet_interactive": { 40 | "language": "fsharp" 41 | }, 42 | "vscode": { 43 | "languageId": "polyglot-notebook" 44 | } 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "#r \"nuget:TorchSharp-cpu\"" 49 | ] 50 | }, 51 | { 52 | "attachments": {}, 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "# Opening\n", 57 | "\n", 58 | "Once you have the right NuGet packages, the next thing is to get the right open directives at the top of your source files. TorchSharp consists of a lot of namespaces and static classes, and to make programming TorchSharp convenient, you usually need to include a several of them." 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": { 65 | "dotnet_interactive": { 66 | "language": "fsharp" 67 | }, 68 | "vscode": { 69 | "languageId": "polyglot-notebook" 70 | } 71 | }, 72 | "outputs": [], 73 | "source": [ 74 | "open TorchSharp\n", 75 | "\n", 76 | "open type TorchSharp.torch\n", 77 | "open type TorchSharp.torch.nn\n", 78 | "open type TorchSharp.torch.nn.functional\n", 79 | "open type TorchSharp.TensorExtensionMethods" 80 | ] 81 | }, 82 | { 83 | "attachments": {}, 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "However, for these tutorials, it would obscure the API to have too many `open` directives. It's better, for pedagocial reasons, to explicitly qualify names until their scope becomes well known. So, the tutorials will generally use a minimal set of `open` directives." 88 | ] 89 | }, 90 | { 91 | "attachments": {}, 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "# Formatting\n", 96 | "\n", 97 | ".NET Interactive will, by default, display the fields and properties of the objects that are found at the end of each cell. \n", 98 | "\n", 99 | "Before v0.100.3, you had to add the following code to the top of each notebook:" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": { 106 | "dotnet_interactive": { 107 | "language": "fsharp" 108 | }, 109 | "vscode": { 110 | "languageId": "polyglot-notebook" 111 | } 112 | }, 113 | "outputs": [], 114 | "source": [ 115 | "open Microsoft.DotNet.Interactive.Formatting\n", 116 | "\n", 117 | "Formatter.SetPreferredMimeTypesFor(typeof, \"text/plain\")\n", 118 | "Formatter.Register(fun (x:torch.Tensor) -> x.ToString(TorchSharp.TensorStringStyle.Default))" 119 | ] 120 | }, 121 | { 122 | "attachments": {}, 123 | "cell_type": "markdown", 124 | "metadata": {}, 125 | "source": [ 126 | "But that is no longer necessary. You can still do so if you want to keep code as it is, but TorchSharp is now better integrated with .NET Interactive and will do it automatically." 127 | ] 128 | }, 129 | { 130 | "attachments": {}, 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "Note that there are now three styles that may be used to format tensor output: C#-style, NumPy-style and Julia-style. The default is 'Julia,' but if you continue to use he top-of-notebook formatting, whatever you set it to in the cell at the top will be used to format tensors automatically." 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": { 141 | "dotnet_interactive": { 142 | "language": "fsharp" 143 | }, 144 | "vscode": { 145 | "languageId": "polyglot-notebook" 146 | } 147 | }, 148 | "outputs": [], 149 | "source": [ 150 | "torch.ones(2,3,3)" 151 | ] 152 | }, 153 | { 154 | "attachments": {}, 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | " The 'Default' style means that the actual format is picked up at runtime from the global style, which is 'Julia' unless you set it to 'NumPy' or 'CSharp' (sorry, no F# styling yet.):" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": null, 164 | "metadata": { 165 | "dotnet_interactive": { 166 | "language": "fsharp" 167 | }, 168 | "vscode": { 169 | "languageId": "polyglot-notebook" 170 | } 171 | }, 172 | "outputs": [], 173 | "source": [ 174 | "torch.TensorStringStyle <- torch.numpy;\n", 175 | "\n", 176 | "torch.ones(2,3,3)" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": null, 182 | "metadata": { 183 | "dotnet_interactive": { 184 | "language": "fsharp" 185 | }, 186 | "vscode": { 187 | "languageId": "polyglot-notebook" 188 | } 189 | }, 190 | "outputs": [], 191 | "source": [ 192 | "torch.TensorStringStyle <- torch.csharp;\n", 193 | "\n", 194 | "torch.rand(2,3,3)" 195 | ] 196 | }, 197 | { 198 | "attachments": {}, 199 | "cell_type": "markdown", 200 | "metadata": {}, 201 | "source": [ 202 | "For a more in-depth description of tensor formatting, see: [TorchSharp Wiki - Tensor String Frmatting](https://github.com/dotnet/TorchSharp/wiki/Tensor-String-Formatting)" 203 | ] 204 | } 205 | ], 206 | "metadata": { 207 | "kernelspec": { 208 | "display_name": ".NET (C#)", 209 | "language": "C#", 210 | "name": ".net-csharp" 211 | }, 212 | "language_info": { 213 | "file_extension": ".cs", 214 | "mimetype": "text/x-csharp", 215 | "name": "C#", 216 | "pygments_lexer": "csharp", 217 | "version": "9.0" 218 | }, 219 | "orig_nbformat": 4 220 | }, 221 | "nbformat": 4, 222 | "nbformat_minor": 2 223 | } 224 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.rsuser 8 | *.suo 9 | *.user 10 | *.userosscache 11 | *.sln.docstates 12 | 13 | # TorchSharp serialized model files from tutorials 14 | *.model.bin 15 | 16 | # User-specific files (MonoDevelop/Xamarin Studio) 17 | *.userprefs 18 | 19 | # Mono auto generated files 20 | mono_crash.* 21 | 22 | # Build results 23 | [Dd]ebug/ 24 | [Dd]ebugPublic/ 25 | [Rr]elease/ 26 | [Rr]eleases/ 27 | x64/ 28 | x86/ 29 | [Aa][Rr][Mm]/ 30 | [Aa][Rr][Mm]64/ 31 | bld/ 32 | [Bb]in/ 33 | [Oo]bj/ 34 | [Ll]og/ 35 | [Ll]ogs/ 36 | 37 | # Visual Studio 2015/2017 cache/options directory 38 | .vs/ 39 | # Uncomment if you have tasks that create the project's static files in wwwroot 40 | #wwwroot/ 41 | 42 | # Visual Studio 2017 auto generated files 43 | Generated\ Files/ 44 | 45 | # MSTest test Results 46 | [Tt]est[Rr]esult*/ 47 | [Bb]uild[Ll]og.* 48 | 49 | # NUnit 50 | *.VisualState.xml 51 | TestResult.xml 52 | nunit-*.xml 53 | 54 | # Build Results of an ATL Project 55 | [Dd]ebugPS/ 56 | [Rr]eleasePS/ 57 | dlldata.c 58 | 59 | # Benchmark Results 60 | BenchmarkDotNet.Artifacts/ 61 | 62 | # .NET Core 63 | project.lock.json 64 | project.fragment.lock.json 65 | artifacts/ 66 | 67 | # StyleCop 68 | StyleCopReport.xml 69 | 70 | # Files built by Visual Studio 71 | *_i.c 72 | *_p.c 73 | *_h.h 74 | *.ilk 75 | *.meta 76 | *.obj 77 | *.iobj 78 | *.pch 79 | *.pdb 80 | *.ipdb 81 | *.pgc 82 | *.pgd 83 | *.rsp 84 | *.sbr 85 | *.tlb 86 | *.tli 87 | *.tlh 88 | *.tmp 89 | *.tmp_proj 90 | *_wpftmp.csproj 91 | *.log 92 | *.vspscc 93 | *.vssscc 94 | .builds 95 | *.pidb 96 | *.svclog 97 | *.scc 98 | 99 | # Chutzpah Test files 100 | _Chutzpah* 101 | 102 | # Visual C++ cache files 103 | ipch/ 104 | *.aps 105 | *.ncb 106 | *.opendb 107 | *.opensdf 108 | *.sdf 109 | *.cachefile 110 | *.VC.db 111 | *.VC.VC.opendb 112 | 113 | # Visual Studio profiler 114 | *.psess 115 | *.vsp 116 | *.vspx 117 | *.sap 118 | 119 | # Visual Studio Trace Files 120 | *.e2e 121 | 122 | # TFS 2012 Local Workspace 123 | $tf/ 124 | 125 | # Guidance Automation Toolkit 126 | *.gpState 127 | 128 | # ReSharper is a .NET coding add-in 129 | _ReSharper*/ 130 | *.[Rr]e[Ss]harper 131 | *.DotSettings.user 132 | 133 | # TeamCity is a build add-in 134 | _TeamCity* 135 | 136 | # DotCover is a Code Coverage Tool 137 | *.dotCover 138 | 139 | # AxoCover is a Code Coverage Tool 140 | .axoCover/* 141 | !.axoCover/settings.json 142 | 143 | # Visual Studio code coverage results 144 | *.coverage 145 | *.coveragexml 146 | 147 | # NCrunch 148 | _NCrunch_* 149 | .*crunch*.local.xml 150 | nCrunchTemp_* 151 | 152 | # MightyMoose 153 | *.mm.* 154 | AutoTest.Net/ 155 | 156 | # Web workbench (sass) 157 | .sass-cache/ 158 | 159 | # Installshield output folder 160 | [Ee]xpress/ 161 | 162 | # DocProject is a documentation generator add-in 163 | DocProject/buildhelp/ 164 | DocProject/Help/*.HxT 165 | DocProject/Help/*.HxC 166 | DocProject/Help/*.hhc 167 | DocProject/Help/*.hhk 168 | DocProject/Help/*.hhp 169 | DocProject/Help/Html2 170 | DocProject/Help/html 171 | 172 | # Click-Once directory 173 | publish/ 174 | 175 | # Publish Web Output 176 | *.[Pp]ublish.xml 177 | *.azurePubxml 178 | # Note: Comment the next line if you want to checkin your web deploy settings, 179 | # but database connection strings (with potential passwords) will be unencrypted 180 | *.pubxml 181 | *.publishproj 182 | 183 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 184 | # checkin your Azure Web App publish settings, but sensitive information contained 185 | # in these scripts will be unencrypted 186 | PublishScripts/ 187 | 188 | # NuGet Packages 189 | *.nupkg 190 | # NuGet Symbol Packages 191 | *.snupkg 192 | # The packages folder can be ignored because of Package Restore 193 | **/[Pp]ackages/* 194 | # except build/, which is used as an MSBuild target. 195 | !**/[Pp]ackages/build/ 196 | # Uncomment if necessary however generally it will be regenerated when needed 197 | #!**/[Pp]ackages/repositories.config 198 | # NuGet v3's project.json files produces more ignorable files 199 | *.nuget.props 200 | *.nuget.targets 201 | 202 | # Microsoft Azure Build Output 203 | csx/ 204 | *.build.csdef 205 | 206 | # Microsoft Azure Emulator 207 | ecf/ 208 | rcf/ 209 | 210 | # Windows Store app package directories and files 211 | AppPackages/ 212 | BundleArtifacts/ 213 | Package.StoreAssociation.xml 214 | _pkginfo.txt 215 | *.appx 216 | *.appxbundle 217 | *.appxupload 218 | 219 | # Visual Studio cache files 220 | # files ending in .cache can be ignored 221 | *.[Cc]ache 222 | # but keep track of directories ending in .cache 223 | !?*.[Cc]ache/ 224 | 225 | # Others 226 | ClientBin/ 227 | ~$* 228 | *~ 229 | *.dbmdl 230 | *.dbproj.schemaview 231 | *.jfm 232 | *.pfx 233 | *.publishsettings 234 | orleans.codegen.cs 235 | 236 | # Including strong name files can present a security risk 237 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 238 | #*.snk 239 | 240 | # Since there are multiple workflows, uncomment next line to ignore bower_components 241 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 242 | #bower_components/ 243 | 244 | # RIA/Silverlight projects 245 | Generated_Code/ 246 | 247 | # Backup & report files from converting an old project file 248 | # to a newer Visual Studio version. Backup files are not needed, 249 | # because we have git ;-) 250 | _UpgradeReport_Files/ 251 | Backup*/ 252 | UpgradeLog*.XML 253 | UpgradeLog*.htm 254 | ServiceFabricBackup/ 255 | *.rptproj.bak 256 | 257 | # SQL Server files 258 | *.mdf 259 | *.ldf 260 | *.ndf 261 | 262 | # Business Intelligence projects 263 | *.rdl.data 264 | *.bim.layout 265 | *.bim_*.settings 266 | *.rptproj.rsuser 267 | *- [Bb]ackup.rdl 268 | *- [Bb]ackup ([0-9]).rdl 269 | *- [Bb]ackup ([0-9][0-9]).rdl 270 | 271 | # Microsoft Fakes 272 | FakesAssemblies/ 273 | 274 | # GhostDoc plugin setting file 275 | *.GhostDoc.xml 276 | 277 | # Node.js Tools for Visual Studio 278 | .ntvs_analysis.dat 279 | node_modules/ 280 | 281 | # Visual Studio 6 build log 282 | *.plg 283 | 284 | # Visual Studio 6 workspace options file 285 | *.opt 286 | 287 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 288 | *.vbw 289 | 290 | # Visual Studio LightSwitch build output 291 | **/*.HTMLClient/GeneratedArtifacts 292 | **/*.DesktopClient/GeneratedArtifacts 293 | **/*.DesktopClient/ModelManifest.xml 294 | **/*.Server/GeneratedArtifacts 295 | **/*.Server/ModelManifest.xml 296 | _Pvt_Extensions 297 | 298 | # Paket dependency manager 299 | .paket/paket.exe 300 | paket-files/ 301 | 302 | # FAKE - F# Make 303 | .fake/ 304 | 305 | # CodeRush personal settings 306 | .cr/personal 307 | 308 | # Python Tools for Visual Studio (PTVS) 309 | __pycache__/ 310 | *.pyc 311 | 312 | # Cake - Uncomment if you are using it 313 | # tools/** 314 | # !tools/packages.config 315 | 316 | # Tabs Studio 317 | *.tss 318 | 319 | # Telerik's JustMock configuration file 320 | *.jmconfig 321 | 322 | # BizTalk build output 323 | *.btp.cs 324 | *.btm.cs 325 | *.odx.cs 326 | *.xsd.cs 327 | 328 | # OpenCover UI analysis results 329 | OpenCover/ 330 | 331 | # Azure Stream Analytics local run output 332 | ASALocalRun/ 333 | 334 | # MSBuild Binary and Structured Log 335 | *.binlog 336 | 337 | # NVidia Nsight GPU debugger configuration file 338 | *.nvuser 339 | 340 | # MFractors (Xamarin productivity tool) working folder 341 | .mfractor/ 342 | 343 | # Local History for Visual Studio 344 | .localhistory/ 345 | 346 | # BeatPulse healthcheck temp database 347 | healthchecksdb 348 | 349 | # Backup folder for Package Reference Convert tool in Visual Studio 2017 350 | MigrationBackup/ 351 | 352 | Downloads/ 353 | runs/ 354 | 355 | # Ionide (cross platform F# VS Code tools) working folder 356 | .ionide/ 357 | 358 | *.dat.x 359 | *.dat.y 360 | 361 | nuget.config 362 | -------------------------------------------------------------------------------- /tutorials/CSharp/tutorial3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "dotnet_interactive": { 8 | "language": "csharp" 9 | }, 10 | "vscode": { 11 | "languageId": "polyglot-notebook" 12 | } 13 | }, 14 | "outputs": [], 15 | "source": [ 16 | "#r \"nuget:TorchSharp-cpu\"\n", 17 | "\n", 18 | "using TorchSharp;\n", 19 | "using static TorchSharp.TensorExtensionMethods;\n", 20 | "using Microsoft.DotNet.Interactive.Formatting;" 21 | ] 22 | }, 23 | { 24 | "attachments": {}, 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "# Basic Numerics\n", 29 | "\n", 30 | "Arithmetic is what TorchSharp is all about, and the capabilities are rich. It's all about tensor arithmetic, though -- that's where GPU acceleration makes sense." 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": { 37 | "dotnet_interactive": { 38 | "language": "csharp" 39 | }, 40 | "vscode": { 41 | "languageId": "polyglot-notebook" 42 | } 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "var a = torch.ones(3,4);\n", 47 | "var b = torch.zeros(3,4);\n", 48 | "var c = torch.tensor(5);\n", 49 | "a * c + b" 50 | ] 51 | }, 52 | { 53 | "attachments": {}, 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "It's often the case that you can reuse the storage for one of the operands, so TorchSharp defines a number of 'in place' operators. These will only work if the operand has the same shape and layout as the result, of course. To use the in-place operators, you can't use the nice math syntax, you have to use functions. TorchSharp follows the PyTorch convention of appending a '_' to the name of in-place operators. It's very similar to the '*=', '+=', etc. operators in C#, except that they can be chained together.\n", 58 | "\n", 59 | "In the expression below, the storage for 'a' is used to hold first the result of multiplying with c, and then adding b." 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": { 66 | "dotnet_interactive": { 67 | "language": "csharp" 68 | }, 69 | "vscode": { 70 | "languageId": "polyglot-notebook" 71 | } 72 | }, 73 | "outputs": [], 74 | "source": [ 75 | "a.mul_(c).add_(b)" 76 | ] 77 | }, 78 | { 79 | "attachments": {}, 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "After this, 'a' is not longer holding ones, since it's been overwritten. The impact on performance that using in-place operators has is significant, if used consistently, but it's important to know what you're overwriting and not to over-use in-place operators. Think of it as a performance optimization." 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": { 90 | "dotnet_interactive": { 91 | "language": "csharp" 92 | }, 93 | "vscode": { 94 | "languageId": "polyglot-notebook" 95 | } 96 | }, 97 | "outputs": [], 98 | "source": [ 99 | "a" 100 | ] 101 | }, 102 | { 103 | "attachments": {}, 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "## Broadcasting" 108 | ] 109 | }, 110 | { 111 | "attachments": {}, 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "In the simple example above, you saw that 'c' was defined from a single value. If we look at it, we can see that it's a singleton tensor. That is, it has no shape." 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": { 122 | "dotnet_interactive": { 123 | "language": "csharp" 124 | }, 125 | "vscode": { 126 | "languageId": "polyglot-notebook" 127 | } 128 | }, 129 | "outputs": [], 130 | "source": [ 131 | "c.shape" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": { 138 | "dotnet_interactive": { 139 | "language": "csharp" 140 | }, 141 | "vscode": { 142 | "languageId": "polyglot-notebook" 143 | } 144 | }, 145 | "outputs": [], 146 | "source": [ 147 | "c" 148 | ] 149 | }, 150 | { 151 | "attachments": {}, 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "Even though its shape differed from that of 'a,' we were able to use it in the computation. How come?\n", 156 | "\n", 157 | "TorchSharp will adjust the shape, without allocating new memory, of a tensor to be compatible with another tensor in situations like this. This is called 'broadcasting' and is found in most every numerics and deep learning library around. It's not just singletons that can be broadcast -- any tensor that is compatible will have it work." 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "metadata": { 164 | "dotnet_interactive": { 165 | "language": "csharp" 166 | }, 167 | "vscode": { 168 | "languageId": "polyglot-notebook" 169 | } 170 | }, 171 | "outputs": [], 172 | "source": [ 173 | "a = torch.ones(3,4);\n", 174 | "(a + torch.ones(4)).print();\n", 175 | "a + torch.ones(1,4)" 176 | ] 177 | }, 178 | { 179 | "attachments": {}, 180 | "cell_type": "markdown", 181 | "metadata": {}, 182 | "source": [ 183 | "# Numerics Library\n", 184 | "\n", 185 | "The collection of numerical operators that are available is too large to go through here, but suffice it to say that all the usual suspects are available. Most of the operate on an element-wise basis, i.e. the operator is applied to each element of the operands, possibly with broadcasting getting involved.\n", 186 | "\n", 187 | "One notable and __very__ significant exception is matrix multiplication, which is vector dot product generalized to matrices. The '*' operator denotes element-wise multiplication, while matrix multiplication is performed by the 'mm' method:" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "metadata": { 194 | "dotnet_interactive": { 195 | "language": "csharp" 196 | }, 197 | "vscode": { 198 | "languageId": "polyglot-notebook" 199 | } 200 | }, 201 | "outputs": [], 202 | "source": [ 203 | "a = torch.full(4,4, 17);\n", 204 | "b = torch.full(4,4, 12);\n", 205 | "\n", 206 | "(a * b).print();\n", 207 | "(a.mm(b)).str()" 208 | ] 209 | }, 210 | { 211 | "attachments": {}, 212 | "cell_type": "markdown", 213 | "metadata": {}, 214 | "source": [ 215 | "There are some very specialized operators doing more than one thing at a time avoiding creating temporaries. Some of them are there because the absence of temporaries can lead to more numerical stability (such as avoiding rounding error propagation), or because you don't have to go back and forth between the CPU and GPU as often. It is almost always the right choice to use these special composite operators when they are a match for your computation.\n", 216 | "\n", 217 | "An example is xlogy(), which performs x * log(y) all in one operation." 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": null, 223 | "metadata": { 224 | "dotnet_interactive": { 225 | "language": "csharp" 226 | }, 227 | "vscode": { 228 | "languageId": "polyglot-notebook" 229 | } 230 | }, 231 | "outputs": [], 232 | "source": [ 233 | "var x = torch.rand(5);\n", 234 | "var y = torch.rand(5);\n", 235 | "(x * torch.log(y)).print();\n", 236 | "x.xlogy(y)" 237 | ] 238 | } 239 | ], 240 | "metadata": { 241 | "kernelspec": { 242 | "display_name": ".NET (C#)", 243 | "language": "C#", 244 | "name": ".net-csharp" 245 | }, 246 | "language_info": { 247 | "name": "C#" 248 | } 249 | }, 250 | "nbformat": 4, 251 | "nbformat_minor": 2 252 | } 253 | -------------------------------------------------------------------------------- /tutorials/FSharp/tutorial3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "dotnet_interactive": { 8 | "language": "fsharp" 9 | }, 10 | "vscode": { 11 | "languageId": "polyglot-notebook" 12 | } 13 | }, 14 | "outputs": [], 15 | "source": [ 16 | "#r \"nuget: TorchSharp-cpu\"\n", 17 | "\n", 18 | "open TorchSharp\n", 19 | "open type TorchSharp.torch\n", 20 | "open type TorchSharp.TensorExtensionMethods" 21 | ] 22 | }, 23 | { 24 | "attachments": {}, 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "# Basic Numerics\n", 29 | "\n", 30 | "Arithmetic is what TorchSharp is all about, and the capabilities are rich. It's all about tensor arithmetic, though -- that's where GPU acceleration makes sense." 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": { 37 | "dotnet_interactive": { 38 | "language": "fsharp" 39 | }, 40 | "vscode": { 41 | "languageId": "polyglot-notebook" 42 | } 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "let a = torch.ones(3,4)\n", 47 | "let b = torch.zeros(3,4)\n", 48 | "let c = torch.tensor(5)\n", 49 | "a * c + b" 50 | ] 51 | }, 52 | { 53 | "attachments": {}, 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "It's often the case that you can reuse the storage for one of the operands, so TorchSharp defines a number of 'in place' operators. These will only work if the operand has the same shape and layout as the result, of course. To use the in-place operators, you can't use the nice math syntax, you have to use functions. TorchSharp follows the PyTorch convention of appending a '_' to the name of in-place operators. It's very similar to the '*=', '+=', etc. operators in C#, except that they can be chained together.\n", 58 | "\n", 59 | "In the expression below, the storage for 'a' is used to hold first the result of multiplying with c, and then adding b." 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": { 66 | "dotnet_interactive": { 67 | "language": "fsharp" 68 | }, 69 | "vscode": { 70 | "languageId": "polyglot-notebook" 71 | } 72 | }, 73 | "outputs": [], 74 | "source": [ 75 | "a.mul_(c).add_(b)" 76 | ] 77 | }, 78 | { 79 | "attachments": {}, 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "After this, 'a' is not longer holding ones, since it's been overwritten. The impact on performance that using in-place operators has is significant, if used consistently, but it's important to know what you're overwriting and not to over-use in-place operators. Think of it as a performance optimization." 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": { 90 | "dotnet_interactive": { 91 | "language": "fsharp" 92 | }, 93 | "vscode": { 94 | "languageId": "polyglot-notebook" 95 | } 96 | }, 97 | "outputs": [], 98 | "source": [ 99 | "a" 100 | ] 101 | }, 102 | { 103 | "attachments": {}, 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "## Broadcasting" 108 | ] 109 | }, 110 | { 111 | "attachments": {}, 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "In the simple example above, you saw that 'c' was defined from a single value. If we look at it, we can see that it's a singleton tensor. That is, it has no shape." 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": { 122 | "dotnet_interactive": { 123 | "language": "fsharp" 124 | }, 125 | "vscode": { 126 | "languageId": "polyglot-notebook" 127 | } 128 | }, 129 | "outputs": [], 130 | "source": [ 131 | "c.shape" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": { 138 | "dotnet_interactive": { 139 | "language": "fsharp" 140 | }, 141 | "vscode": { 142 | "languageId": "polyglot-notebook" 143 | } 144 | }, 145 | "outputs": [], 146 | "source": [ 147 | "c" 148 | ] 149 | }, 150 | { 151 | "attachments": {}, 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "Even though its shape differed from that of 'a,' we were able to use it in the computation. How come?\n", 156 | "\n", 157 | "TorchSharp will adjust the shape, without allocating new memory, of a tensor to be compatible with another tensor in situations like this. This is called 'broadcasting' and is found in most every numerics and deep learning library around. It's not just singletons that can be broadcast -- any tensor that is compatible will have it work." 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "metadata": { 164 | "dotnet_interactive": { 165 | "language": "fsharp" 166 | }, 167 | "vscode": { 168 | "languageId": "polyglot-notebook" 169 | } 170 | }, 171 | "outputs": [], 172 | "source": [ 173 | "let a = torch.ones(3,4)\n", 174 | "(a + torch.ones(4)).print()\n", 175 | "a + torch.ones(1,4)" 176 | ] 177 | }, 178 | { 179 | "attachments": {}, 180 | "cell_type": "markdown", 181 | "metadata": {}, 182 | "source": [ 183 | "# Numerics Library\n", 184 | "\n", 185 | "The collection of numerical operators that are available is too large to go through here, but suffice it to say that all the usual suspects are available. Most of the operate on an element-wise basis, i.e. the operator is applied to each element of the operands, possibly with broadcasting getting involved.\n", 186 | "\n", 187 | "One notable and __very__ significant exception is matrix multiplication, which is vector dot product generalized to matrices. The '*' operator denotes element-wise multiplication, while matrix multiplication is performed by the 'mm' method:" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "metadata": { 194 | "dotnet_interactive": { 195 | "language": "fsharp" 196 | }, 197 | "vscode": { 198 | "languageId": "polyglot-notebook" 199 | } 200 | }, 201 | "outputs": [], 202 | "source": [ 203 | "let a = torch.full(4L,4L, (17).ToScalar())\n", 204 | "let b = torch.full(4L,4L, (12).ToScalar())\n", 205 | "\n", 206 | "(a * b).print()\n", 207 | "(a.mm(b))" 208 | ] 209 | }, 210 | { 211 | "attachments": {}, 212 | "cell_type": "markdown", 213 | "metadata": {}, 214 | "source": [ 215 | "There are some very specialized operators doing more than one thing at a time avoiding creating temporaries. Some of them are there because the absence of temporaries can lead to more numerical stability (such as avoiding rounding error propagation), or because you don't have to go back and forth between the CPU and GPU as often. It is almost always the right choice to use these special composite operators when they are a match for your computation.\n", 216 | "\n", 217 | "An example is xlogy(), which performs x * log(y) all in one operation." 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": null, 223 | "metadata": { 224 | "dotnet_interactive": { 225 | "language": "fsharp" 226 | }, 227 | "vscode": { 228 | "languageId": "polyglot-notebook" 229 | } 230 | }, 231 | "outputs": [], 232 | "source": [ 233 | "let x = torch.rand(5)\n", 234 | "let y = torch.rand(5)\n", 235 | "(x * torch.log(y)).print()\n", 236 | "x.xlogy(y)" 237 | ] 238 | } 239 | ], 240 | "metadata": { 241 | "kernelspec": { 242 | "display_name": ".NET (F#)", 243 | "language": "F#", 244 | "name": ".net-fsharp" 245 | }, 246 | "language_info": { 247 | "name": "F#" 248 | } 249 | }, 250 | "nbformat": 4, 251 | "nbformat_minor": 2 252 | } 253 | -------------------------------------------------------------------------------- /src/CSharp/CSharpExamples/AdversarialExampleGeneration.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. 2 | using System; 3 | using System.IO; 4 | using System.Collections.Generic; 5 | 6 | using TorchSharp; 7 | using static TorchSharp.torchvision; 8 | 9 | using TorchSharp.Examples; 10 | using TorchSharp.Examples.Utils; 11 | 12 | using static TorchSharp.torch; 13 | 14 | using static TorchSharp.torch.nn; 15 | using static TorchSharp.torch.nn.functional; 16 | 17 | 18 | namespace CSharpExamples 19 | { 20 | /// 21 | /// FGSM Attack 22 | /// 23 | /// Based on : https://pytorch.org/tutorials/beginner/fgsm_tutorial.html 24 | /// 25 | /// 26 | /// There are at least two interesting data sets to use with this example: 27 | /// 28 | /// 1. The classic MNIST set of 60000 images of handwritten digits. 29 | /// 30 | /// It is available at: http://yann.lecun.com/exdb/mnist/ 31 | /// 32 | /// 2. The 'fashion-mnist' data set, which has the exact same file names and format as MNIST, but is a harder 33 | /// data set to train on. It's just as large as MNIST, and has the same 60/10 split of training and test 34 | /// data. 35 | /// It is available at: https://github.com/zalandoresearch/fashion-mnist/tree/master/data/fashion 36 | /// 37 | /// In each case, there are four .gz files to download. Place them in a folder and then point the '_dataLocation' 38 | /// constant below at the folder location. 39 | /// 40 | /// The example is based on the PyTorch tutorial, but the results from attacking the model are very different from 41 | /// what the tutorial article notes, at least on the machine where it was developed. There is an order-of-magnitude lower 42 | /// drop-off in accuracy in this version. That said, when running the PyTorch tutorial on the same machine, the 43 | /// accuracy trajectories are the same between .NET and Python. If the base convulutational model is trained 44 | /// using Python, and then used for the FGSM attack in both .NET and Python, the drop-off trajectories are extremenly 45 | /// close. 46 | /// 47 | public class AdversarialExampleGeneration 48 | { 49 | private readonly static string _dataLocation = Path.Join(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", "mnist"); 50 | 51 | private static int _epochs = 4; 52 | private static int _trainBatchSize = 64; 53 | private static int _testBatchSize = 128; 54 | 55 | static internal void Run(int epochs, int timeout, string logdir, string dataset) 56 | { 57 | _epochs = epochs; 58 | 59 | if (string.IsNullOrEmpty(dataset)) 60 | { 61 | dataset = "mnist"; 62 | } 63 | 64 | var cwd = Environment.CurrentDirectory; 65 | 66 | var datasetPath = Path.Join(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", dataset); 67 | 68 | var _ = torch.random.manual_seed(1); 69 | 70 | //var device = torch.CPU; 71 | var device = torch.cuda.is_available() ? torch.CUDA : torch.CPU; 72 | Console.WriteLine(); 73 | Console.WriteLine($"\tRunning FGSM attack with {dataset} on {device.type.ToString()} for {epochs} epochs, terminating after {TimeSpan.FromSeconds(timeout)}."); 74 | Console.WriteLine(); 75 | 76 | if (device.type == DeviceType.CUDA) { 77 | _trainBatchSize *= 4; 78 | _testBatchSize *= 4; 79 | _epochs *= 4; 80 | } 81 | 82 | Console.WriteLine($"\tPreparing training and test data..."); 83 | 84 | var sourceDir = _dataLocation; 85 | var targetDir = Path.Combine(_dataLocation, "test_data"); 86 | 87 | var writer = String.IsNullOrEmpty(logdir) ? null : torch.utils.tensorboard.SummaryWriter(logdir, createRunName:true); 88 | 89 | if (!Directory.Exists(targetDir)) { 90 | Directory.CreateDirectory(targetDir); 91 | Decompress.DecompressGZipFile(Path.Combine(sourceDir, "train-images-idx3-ubyte.gz"), targetDir); 92 | Decompress.DecompressGZipFile(Path.Combine(sourceDir, "train-labels-idx1-ubyte.gz"), targetDir); 93 | Decompress.DecompressGZipFile(Path.Combine(sourceDir, "t10k-images-idx3-ubyte.gz"), targetDir); 94 | Decompress.DecompressGZipFile(Path.Combine(sourceDir, "t10k-labels-idx1-ubyte.gz"), targetDir); 95 | } 96 | 97 | TorchSharp.Examples.MNIST.Model model = null; 98 | 99 | var normImage = transforms.Normalize(new double[] { 0.1307 }, new double[] { 0.3081 }, device: (Device)device); 100 | 101 | using (var test = new MNISTReader(targetDir, "t10k", _testBatchSize, device: device, transform: normImage)) { 102 | 103 | var modelFile = dataset + ".model.bin"; 104 | 105 | if (!File.Exists(modelFile)) { 106 | // We need the model to be trained first, because we want to start with a trained model. 107 | Console.WriteLine($"\n Running MNIST on {device.type.ToString()} in order to pre-train the model."); 108 | 109 | model = new TorchSharp.Examples.MNIST.Model("model", device); 110 | 111 | using (var train = new MNISTReader(targetDir, "train", _trainBatchSize, device: device, shuffle: true, transform: normImage)) { 112 | MNIST.TrainingLoop(dataset, timeout, writer, (Device)device, model, train, test); 113 | } 114 | 115 | Console.WriteLine("Moving on to the Adversarial model.\n"); 116 | 117 | } else { 118 | model = new TorchSharp.Examples.MNIST.Model("model", torch.CPU); 119 | model.load(modelFile); 120 | } 121 | 122 | model.to((Device)device); 123 | model.eval(); 124 | 125 | var epsilons = new double[] { 0, 0.05, 0.1, 0.15, 0.20, 0.25, 0.30, 0.35, 0.40, 0.45, 0.50 }; 126 | 127 | foreach (var ε in epsilons) { 128 | var attacked = Test(model, NLLLoss(), ε, test, test.Size); 129 | Console.WriteLine($"Epsilon: {ε:F2}, accuracy: {attacked:P2}"); 130 | } 131 | } 132 | } 133 | 134 | private static Tensor Attack(Tensor image, double ε, Tensor data_grad) 135 | { 136 | using (var sign = data_grad.sign()) { 137 | var perturbed = (image + ε * sign).clamp(0.0, 1.0); 138 | return perturbed; 139 | } 140 | } 141 | 142 | private static double Test( 143 | TorchSharp.Examples.MNIST.Model model, 144 | Loss criterion, 145 | double ε, 146 | IEnumerable<(Tensor, Tensor)> dataLoader, 147 | long size) 148 | { 149 | int correct = 0; 150 | 151 | foreach (var (data, target) in dataLoader) { 152 | 153 | using (var d = torch.NewDisposeScope()) 154 | { 155 | data.requires_grad = true; 156 | 157 | using (var output = model.forward(data)) 158 | using (var loss = criterion.forward(output, target)) 159 | { 160 | 161 | model.zero_grad(); 162 | loss.backward(); 163 | 164 | var perturbed = Attack(data, ε, data.grad); 165 | 166 | using (var final = model.forward(perturbed)) 167 | { 168 | 169 | correct += final.argmax(1).eq(target).sum().ToInt32(); 170 | } 171 | } 172 | } 173 | } 174 | 175 | return (double)correct / size; 176 | } 177 | } 178 | } 179 | -------------------------------------------------------------------------------- /src/FSharp/FSharpExamples/AlexNet.fs: -------------------------------------------------------------------------------- 1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. 2 | module FSharpExamples.AlexNet 3 | 4 | open System 5 | open System.IO 6 | open System.Diagnostics 7 | 8 | open TorchSharp 9 | open TorchSharp.Examples 10 | 11 | open type TorchSharp.torch.nn 12 | open type TorchSharp.torch.optim 13 | open type TorchSharp.Scalar 14 | 15 | // Modified version of original AlexNet to fix CIFAR10 32x32 images. 16 | // 17 | // The dataset for this example can be found at: https://www.cs.toronto.edu/~kriz/cifar.html 18 | // Download the binary file, and place it in a dedicated folder, e.g. 'CIFAR10,' then edit 19 | // the '_dataLocation' definition below to point at the right folder. 20 | // 21 | // Note: so far, CIFAR10 is supported, but not CIFAR100. 22 | 23 | let mutable trainBatchSize = 64 24 | let mutable testBatchSize = 128 25 | 26 | let logInterval = 25 27 | let numClasses = 10L 28 | 29 | let cmdArgs = Environment.GetCommandLineArgs() 30 | let dataset = "CIFAR10" 31 | 32 | let datasetPath = Path.Join(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", dataset) 33 | 34 | torch.random.manual_seed(1L) |> ignore 35 | 36 | let hasCUDA = TorchText.Datasets.cuda_is_available() 37 | 38 | let device = if hasCUDA then torch.CUDA else torch.CPU 39 | 40 | let getDataFiles sourceDir targetDir = 41 | 42 | if not (Directory.Exists(targetDir)) then 43 | Directory.CreateDirectory(targetDir) |> ignore 44 | Utils.Decompress.ExtractTGZ(Path.Combine(sourceDir, "cifar-10-binary.tar.gz"), targetDir) 45 | 46 | type Model(name,device:torch.Device) as this = 47 | inherit Module(name) 48 | 49 | let features = Sequential(("c1", Conv2d(3L, 64L, kernel_size=3L, stride=2L, padding=1L) :> Module), 50 | ("r1", ReLU(inplace=true) :> Module), 51 | ("mp1", MaxPool2d(kernel_size=[|2L; 2L|]) :> Module), 52 | ("c2", Conv2d(64L, 192L, kernel_size=3L, padding=1L) :> Module), 53 | ("r2", ReLU(inplace=true) :> Module), 54 | ("mp2", MaxPool2d(kernel_size=[|2L; 2L|]) :> Module), 55 | ("c3", Conv2d(192L, 384L, kernel_size=3L, padding=1L) :> Module), 56 | ("r3", ReLU(inplace=true) :> Module), 57 | ("c4", Conv2d(384L, 256L, kernel_size=3L, padding=1L) :> Module), 58 | ("r4", ReLU(inplace=true) :> Module), 59 | ("c5", Conv2d(256L, 256L, kernel_size=3L, padding=1L) :> Module), 60 | ("r5", ReLU(inplace=true) :> Module), 61 | ("mp3", MaxPool2d(kernel_size=[|2L; 2L|]) :> Module), 62 | ("avg", AdaptiveAvgPool2d([|2L; 2L|]) :> Module)) 63 | 64 | let classifier = Sequential(("d1", Dropout() :> Module), 65 | ("l1", Linear(256L * 2L * 2L, 4096L) :> Module), 66 | ("r6", ReLU(inplace=true) :> Module), 67 | ("d2", Dropout() :> Module), 68 | ("l2", Linear(4096L, 4096L) :> Module), 69 | ("r7", ReLU(inplace=true) :> Module), 70 | ("d3", Dropout() :> Module), 71 | ("l3", Linear(4096L, numClasses) :> Module), 72 | ("logsm", LogSoftmax(1L) :> Module)) 73 | 74 | do 75 | this.RegisterComponents() 76 | 77 | if device.``type`` = DeviceType.CUDA then 78 | this.``to``(device) |> ignore 79 | 80 | override _.forward(input) = 81 | 82 | let avg = features.forward(input) 83 | let x = avg.view([|avg.shape.[0]; 256L*2L*2L|]) 84 | 85 | classifier.forward(x) 86 | 87 | let loss x y = functional.nll_loss(x,y) 88 | 89 | let train (model:Model) (optimizer:Optimizer) (dataLoader: CIFARReader) epoch = 90 | 91 | model.train() 92 | 93 | let size = dataLoader.Size 94 | 95 | let mutable batchID = 1 96 | let mutable total = 0L 97 | let mutable correct = 0L 98 | 99 | printfn $"Epoch: {epoch}..." 100 | 101 | for (input,labels) in dataLoader.Data() do 102 | 103 | use d = torch.NewDisposeScope() 104 | 105 | optimizer.zero_grad() 106 | 107 | begin 108 | use estimate = input --> model 109 | use output = loss estimate labels 110 | 111 | output.backward() 112 | optimizer.step() |> ignore 113 | 114 | total <- total + labels.shape.[0] 115 | 116 | use sum = estimate.argmax(1L).eq(labels).sum() 117 | correct <- correct + sum.ToInt64() 118 | 119 | if batchID % logInterval = 0 then 120 | let count = min (batchID * trainBatchSize) size 121 | let outputString = output.ToSingle().ToString("0.0000") 122 | let accString = ((float correct) / (float total)).ToString("0.0000") 123 | printfn $"\rTrain: epoch {epoch} [{count} / {size}] Loss: {outputString} Acc: {accString}" 124 | 125 | batchID <- batchID + 1 126 | end 127 | 128 | let test (model:Model) (dataLoader:CIFARReader) = 129 | model.eval() 130 | 131 | let sz = single dataLoader.Size 132 | 133 | let mutable testLoss = 0.0f 134 | let mutable correct = 0L 135 | let mutable batchCount = 0L 136 | 137 | for (input,labels) in dataLoader.Data() do 138 | 139 | use d = torch.NewDisposeScope() 140 | 141 | use estimate = input --> model 142 | use output = loss estimate labels 143 | testLoss <- testLoss + output.ToSingle() 144 | batchCount <- batchCount + 1L 145 | 146 | use sum = estimate.argmax(1L).eq(labels).sum() 147 | correct <- correct + sum.ToInt64() 148 | 149 | let avgLossString = (testLoss / (single batchCount)).ToString("0.0000") 150 | let accString = ((single correct) / sz).ToString("0.0000") 151 | 152 | printfn $"\rTest set: Average loss {avgLossString} | Accuracy {accString}" 153 | 154 | 155 | let trainingLoop (model:Model) epochs trainData testData = 156 | 157 | use optimizer = Adam(model.parameters(), 0.001) 158 | //NN.Optimizer.StepLR(optimizer, 1u, 0.7, last_epoch=5) |> ignore 159 | 160 | let sw = Stopwatch() 161 | sw.Start() 162 | 163 | for epoch = 1 to epochs do 164 | train model optimizer trainData epoch 165 | test model testData 166 | 167 | sw.Stop() 168 | 169 | printfn $"Elapsed time: {sw.Elapsed.TotalSeconds:F1} s." 170 | 171 | let run epochs = 172 | 173 | if device.``type`` = DeviceType.CUDA then 174 | trainBatchSize <- trainBatchSize * 8 175 | testBatchSize <- testBatchSize * 8 176 | 177 | printfn "" 178 | printfn $"\tRunning AlexNet with {dataset} on {device.``type``.ToString()} for {epochs} epochs" 179 | printfn "" 180 | 181 | let targetDir = Path.Combine(datasetPath, "test_data") 182 | 183 | getDataFiles datasetPath targetDir 184 | 185 | use trainData = new CIFARReader(targetDir, false, trainBatchSize, shuffle=true, device=device) 186 | use testData = new CIFARReader(targetDir, true, testBatchSize, device=device) 187 | 188 | use model = new Model("model", device) 189 | 190 | trainingLoop model epochs trainData testData 191 | 192 | () -------------------------------------------------------------------------------- /tutorials/FSharp/tutorial7.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "dotnet_interactive": { 8 | "language": "fsharp" 9 | }, 10 | "vscode": { 11 | "languageId": "polyglot-notebook" 12 | } 13 | }, 14 | "outputs": [], 15 | "source": [ 16 | "#r \"nuget: TorchSharp-cpu\"\n", 17 | "\n", 18 | "open TorchSharp\n", 19 | "open type TorchSharp.torch\n", 20 | "open type TorchSharp.TensorExtensionMethods\n", 21 | "open type TorchSharp.torch.distributions" 22 | ] 23 | }, 24 | { 25 | "attachments": {}, 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "# Training with a Learning Rate Scheduler\n", 30 | "\n", 31 | "In Tutorial 6, we saw how the optimizers took an argument called the 'learning rate,' but didn't spend much time on it except to say that it could have a great impact on how quickly training would converge toward a solution. In fact, you can choose the learning rate (LR) so poorly, that the training doesn't converge at all.\n", 32 | "\n", 33 | "If the LR is too small, training will go very slowly, wasting compute resources. If it is too large, training could result in numeric overflow, or NaNs. Either way, you're in trouble." 34 | ] 35 | }, 36 | { 37 | "attachments": {}, 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "To further complicate matters, it turns out that the learning rate shouldn't necessarily be constant. Training can go much better if the learning rate starts out relatively large and gets smaller as you get closer to the end.\n", 42 | "\n", 43 | "There's a solution for this, called a Learning Rate Scheduler. An LRS instance has access to the internal state of the optimizer, and can modify the LR as it goes along. There are several algorithms for scheduling, of which TorchSharp currently implements a significant subset." 44 | ] 45 | }, 46 | { 47 | "attachments": {}, 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "Before demonstrating, let's have a model and a baseline training loop." 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": { 58 | "dotnet_interactive": { 59 | "language": "fsharp" 60 | }, 61 | "vscode": { 62 | "languageId": "polyglot-notebook" 63 | } 64 | }, 65 | "outputs": [], 66 | "source": [ 67 | "type Trivial() as this = \n", 68 | " inherit nn.Module(\"Trivial\")\n", 69 | "\n", 70 | " let lin1 = nn.Linear(1000L, 100L)\n", 71 | " let lin2 = nn.Linear(100L, 10L)\n", 72 | "\n", 73 | " do\n", 74 | " this.RegisterComponents()\n", 75 | "\n", 76 | " override _.forward(input) = \n", 77 | " \n", 78 | " use x = lin1.forward(input)\n", 79 | " use y = nn.functional.relu(x)\n", 80 | " lin2.forward(y)" 81 | ] 82 | }, 83 | { 84 | "attachments": {}, 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "To demonstrate how to correctly use an LR scheduler, our training data needs to look more like real training data, that is, it needs to be divided into batches." 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": { 95 | "dotnet_interactive": { 96 | "language": "fsharp" 97 | }, 98 | "vscode": { 99 | "languageId": "polyglot-notebook" 100 | } 101 | }, 102 | "outputs": [], 103 | "source": [ 104 | "let learning_rate = 0.01\n", 105 | "let model = Trivial()\n", 106 | "\n", 107 | "let data = [for i = 1 to 16 do rand(32,1000)] // Our pretend input data\n", 108 | "let result = [for i = 1 to 16 do rand(32,10)] // Our pretend ground truth.\n", 109 | "\n", 110 | "let loss x y = nn.functional.mse_loss(x,y)\n", 111 | "\n", 112 | "let optimizer = torch.optim.SGD(model.parameters(), learning_rate)\n", 113 | "\n", 114 | "for epoch = 1 to 300 do\n", 115 | "\n", 116 | " for idx = 0 to data.Length-1 do\n", 117 | " // Compute the loss\n", 118 | " let pred = model.forward(data.[idx])\n", 119 | " let output = loss pred result.[idx]\n", 120 | "\n", 121 | " // Clear the gradients before doing the back-propagation\n", 122 | " model.zero_grad()\n", 123 | "\n", 124 | " // Do back-progatation, which computes all the gradients.\n", 125 | " output.backward()\n", 126 | "\n", 127 | " optimizer.step() |> ignore\n", 128 | "\n", 129 | "let pred = model.forward(data.[0])\n", 130 | "(loss pred result.[0]).item()" 131 | ] 132 | }, 133 | { 134 | "attachments": {}, 135 | "cell_type": "markdown", 136 | "metadata": {}, 137 | "source": [ 138 | "When I ran this, the loss was down to 0.051 after 3 seconds. (It took longer the first time around.)" 139 | ] 140 | }, 141 | { 142 | "attachments": {}, 143 | "cell_type": "markdown", 144 | "metadata": {}, 145 | "source": [ 146 | "## StepLR\n", 147 | "\n", 148 | "StepLR uses subtraction to adjust the learning rate every so often. The difference it makes to the training loop is that you wrap the optimizer, and then call `step` on the scheduler (once per epoch) as well as the optimizer (once per batch)." 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "metadata": { 155 | "dotnet_interactive": { 156 | "language": "fsharp" 157 | }, 158 | "vscode": { 159 | "languageId": "polyglot-notebook" 160 | } 161 | }, 162 | "outputs": [], 163 | "source": [ 164 | "let learning_rate = 0.01\n", 165 | "let model = Trivial()\n", 166 | "\n", 167 | "let data = [for i = 1 to 16 do rand(32,1000)] // Our pretend input data\n", 168 | "let result = [for i = 1 to 16 do rand(32,10)] // Our pretend ground truth.\n", 169 | "\n", 170 | "let loss x y = nn.functional.mse_loss(x,y)\n", 171 | "\n", 172 | "let optimizer = torch.optim.SGD(model.parameters(), learning_rate)\n", 173 | "let scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 25, 0.95, verbose=true)\n", 174 | "\n", 175 | "for epoch = 1 to 300 do\n", 176 | "\n", 177 | " for idx = 0 to data.Length-1 do\n", 178 | " // Compute the loss\n", 179 | " let pred = model.forward(data.[idx])\n", 180 | " let output = loss pred result.[idx]\n", 181 | "\n", 182 | " // Clear the gradients before doing the back-propagation\n", 183 | " model.zero_grad()\n", 184 | "\n", 185 | " // Do back-progatation, which computes all the gradients.\n", 186 | " output.backward()\n", 187 | "\n", 188 | " optimizer.step() |> ignore\n", 189 | "\n", 190 | " scheduler.step() |> ignore\n", 191 | "\n", 192 | "let pred = model.forward(data.[0])\n", 193 | "(loss pred result.[0]).item()" 194 | ] 195 | }, 196 | { 197 | "attachments": {}, 198 | "cell_type": "markdown", 199 | "metadata": {}, 200 | "source": [ 201 | "Well, that was underwhelming. The loss (in my case) went up a bit, so that's nothing to get excited about. For this trivial model, using a scheduler isn't going to make a huge difference, and it may not make much of a difference even for complex models. It's very hard to know until you try it, but now you know how to try it out. If you try this trivial example over and over, you will see that the results vary quite a bit. It's simply too simple.\n", 202 | "\n", 203 | "Regardless, you can see from the verbose output that the learning rate is adjusted as the epochs proceed." 204 | ] 205 | }, 206 | { 207 | "attachments": {}, 208 | "cell_type": "markdown", 209 | "metadata": {}, 210 | "source": [] 211 | } 212 | ], 213 | "metadata": { 214 | "kernelspec": { 215 | "display_name": ".NET (F#)", 216 | "language": "F#", 217 | "name": ".net-fsharp" 218 | }, 219 | "language_info": { 220 | "name": "F#" 221 | } 222 | }, 223 | "nbformat": 4, 224 | "nbformat_minor": 2 225 | } 226 | -------------------------------------------------------------------------------- /tutorials/CSharp/tutorial7.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "dotnet_interactive": { 8 | "language": "csharp" 9 | }, 10 | "vscode": { 11 | "languageId": "polyglot-notebook" 12 | } 13 | }, 14 | "outputs": [], 15 | "source": [ 16 | "#r \"nuget: TorchSharp-cpu\"\n", 17 | "\n", 18 | "using TorchSharp;\n", 19 | "using static TorchSharp.torch;\n", 20 | "using static TorchSharp.TensorExtensionMethods;\n", 21 | "using static TorchSharp.torch.distributions;" 22 | ] 23 | }, 24 | { 25 | "attachments": {}, 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "# Training with a Learning Rate Scheduler\n", 30 | "\n", 31 | "In Tutorial 6, we saw how the optimizers took an argument called the 'learning rate,' but didn't spend much time on it except to say that it could have a great impact on how quickly training would converge toward a solution. In fact, you can choose the learning rate (LR) so poorly, that the training doesn't converge at all.\n", 32 | "\n", 33 | "If the LR is too small, training will go very slowly, wasting compute resources. If it is too large, training could result in numeric overflow, or NaNs. Either way, you're in trouble." 34 | ] 35 | }, 36 | { 37 | "attachments": {}, 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "To further complicate matters, it turns out that the learning rate shouldn't necessarily be constant. Training can go much better if the learning rate starts out relatively large and gets smaller as you get closer to the end.\n", 42 | "\n", 43 | "There's a solution for this, called a Learning Rate Scheduler. An LRS instance has access to the internal state of the optimizer, and can modify the LR as it goes along. Some schedulers modify other optimizer state, too, such as the momentum (for optimizers that use momentum).\n", 44 | "\n", 45 | "There are several algorithms for scheduling, and TorchSharp implements a number of them." 46 | ] 47 | }, 48 | { 49 | "attachments": {}, 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "Before demonstrating, let's have a model and a baseline training loop." 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": { 60 | "dotnet_interactive": { 61 | "language": "csharp" 62 | }, 63 | "vscode": { 64 | "languageId": "polyglot-notebook" 65 | } 66 | }, 67 | "outputs": [], 68 | "source": [ 69 | "private class Trivial : nn.Module\n", 70 | "{\n", 71 | " public Trivial()\n", 72 | " : base(nameof(Trivial))\n", 73 | " {\n", 74 | " RegisterComponents();\n", 75 | " }\n", 76 | "\n", 77 | " public override Tensor forward(Tensor input)\n", 78 | " {\n", 79 | " using var x = lin1.forward(input);\n", 80 | " using var y = nn.functional.relu(x);\n", 81 | " return lin2.forward(y);\n", 82 | " }\n", 83 | "\n", 84 | " private nn.Module lin1 = nn.Linear(1000, 100);\n", 85 | " private nn.Module lin2 = nn.Linear(100, 10);\n", 86 | "}" 87 | ] 88 | }, 89 | { 90 | "attachments": {}, 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "To demonstrate how to correctly use an LR scheduler, our training data needs to look more like real training data, that is, it needs to be divided into batches." 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": { 101 | "dotnet_interactive": { 102 | "language": "csharp" 103 | }, 104 | "vscode": { 105 | "languageId": "polyglot-notebook" 106 | } 107 | }, 108 | "outputs": [], 109 | "source": [ 110 | "var learning_rate = 0.01f;\n", 111 | "var model = new Trivial();\n", 112 | "var loss = nn.MSELoss();\n", 113 | "\n", 114 | "var data = Enumerable.Range(0,16).Select(_ => rand(32,1000)).ToList(); // Our pretend input data\n", 115 | "var results = Enumerable.Range(0,16).Select(_ => rand(32,10)).ToList(); // Our pretend ground truth.\n", 116 | "\n", 117 | "var optimizer = torch.optim.SGD(model.parameters(), learning_rate);\n", 118 | "\n", 119 | "for (int i = 0; i < 300; i++) {\n", 120 | "\n", 121 | " for (int idx = 0; i < data.Count; i++) {\n", 122 | " // Compute the loss\n", 123 | " using var output = loss.forward(model.forward(data[idx]), results[idx]);\n", 124 | "\n", 125 | " // Clear the gradients before doing the back-propagation\n", 126 | " model.zero_grad();\n", 127 | "\n", 128 | " // Do back-progatation, which computes all the gradients.\n", 129 | " output.backward();\n", 130 | "\n", 131 | " optimizer.step();\n", 132 | " }\n", 133 | "}\n", 134 | "\n", 135 | "loss.forward(model.forward(data[0]), results[0]).item()" 136 | ] 137 | }, 138 | { 139 | "attachments": {}, 140 | "cell_type": "markdown", 141 | "metadata": {}, 142 | "source": [ 143 | "When I ran this, the loss was down to 0.095 after 1 second. (It took longer the first time around.)" 144 | ] 145 | }, 146 | { 147 | "attachments": {}, 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "## StepLR\n", 152 | "\n", 153 | "StepLR uses subtraction to adjust the learning rate every so often. The difference it makes to the training loop is that you wrap the optimizer, and then call `step` on the scheduler (once per epoch) as well as the optimizer (once per batch)." 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "metadata": { 160 | "dotnet_interactive": { 161 | "language": "csharp" 162 | }, 163 | "vscode": { 164 | "languageId": "polyglot-notebook" 165 | } 166 | }, 167 | "outputs": [], 168 | "source": [ 169 | "var learning_rate = 0.01f;\n", 170 | "var model = new Trivial();\n", 171 | "var loss = nn.MSELoss();\n", 172 | "\n", 173 | "var data = Enumerable.Range(0,16).Select(_ => rand(32,1000)).ToList(); // Our pretend input data\n", 174 | "var results = Enumerable.Range(0,16).Select(_ => rand(32,10)).ToList(); // Our pretend ground truth.\n", 175 | "\n", 176 | "var optimizer = torch.optim.SGD(model.parameters(), learning_rate);\n", 177 | "var scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 25, 0.95);\n", 178 | "\n", 179 | "for (int i = 0; i < 300; i++) {\n", 180 | "\n", 181 | " for (int idx = 0; i < data.Count; i++) {\n", 182 | " // Compute the loss\n", 183 | " using var output = loss.forward(model.forward(data[idx]), results[idx]);\n", 184 | "\n", 185 | " // Clear the gradients before doing the back-propagation\n", 186 | " model.zero_grad();\n", 187 | "\n", 188 | " // Do back-progatation, which computes all the gradients.\n", 189 | " output.backward();\n", 190 | "\n", 191 | " optimizer.step();\n", 192 | " }\n", 193 | "\n", 194 | " scheduler.step();\n", 195 | "}\n", 196 | "\n", 197 | "loss.forward(model.forward(data[0]), results[0]).item()" 198 | ] 199 | }, 200 | { 201 | "attachments": {}, 202 | "cell_type": "markdown", 203 | "metadata": {}, 204 | "source": [ 205 | "Well, that was underwhelming. The loss (in my case) went up a bit, so that's nothing to get excited about. For this trivial model, using a scheduler isn't going to make a huge difference, and it may not make much of a difference even for complex models. It's very hard to know until you try it, but now you know how to try it out. If you try this trivial example over and over, you will see that the results vary quite a bit. It's simply too simple.\n", 206 | "\n", 207 | "Regardless, you can see from the verbose output that the learning rate is adjusted as the epochs proceed. \n", 208 | "\n", 209 | "Note: If you're using 0.93.9 and you see odd dips in the learning rate, that's a bug in the verbose printout logic, not the learning rate scheduler itself." 210 | ] 211 | } 212 | ], 213 | "metadata": { 214 | "kernelspec": { 215 | "display_name": ".NET (C#)", 216 | "language": "C#", 217 | "name": ".net-csharp" 218 | }, 219 | "language_info": { 220 | "name": "C#" 221 | } 222 | }, 223 | "nbformat": 4, 224 | "nbformat_minor": 2 225 | } 226 | -------------------------------------------------------------------------------- /src/CSharp/CSharpExamples/MNIST.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. 2 | using System; 3 | using System.IO; 4 | using System.Collections.Generic; 5 | using System.Diagnostics; 6 | 7 | using TorchSharp; 8 | using static TorchSharp.torchvision; 9 | 10 | using TorchSharp.Examples; 11 | using TorchSharp.Examples.Utils; 12 | 13 | using static TorchSharp.torch; 14 | 15 | using static TorchSharp.torch.nn; 16 | using static TorchSharp.torch.nn.functional; 17 | 18 | namespace CSharpExamples 19 | { 20 | /// 21 | /// Simple MNIST Convolutional model. 22 | /// 23 | /// 24 | /// There are at least two interesting data sets to use with this example: 25 | /// 26 | /// 1. The classic MNIST set of 60000 images of handwritten digits. 27 | /// 28 | /// It is available at: http://yann.lecun.com/exdb/mnist/ 29 | /// 30 | /// 2. The 'fashion-mnist' data set, which has the exact same file names and format as MNIST, but is a harder 31 | /// data set to train on. It's just as large as MNIST, and has the same 60/10 split of training and test 32 | /// data. 33 | /// It is available at: https://github.com/zalandoresearch/fashion-mnist/tree/master/data/fashion 34 | /// 35 | /// In each case, there are four .gz files to download. Place them in a folder and then point the '_dataLocation' 36 | /// constant below at the folder location. 37 | /// 38 | public class MNIST 39 | { 40 | private static int _epochs = 4; 41 | private static int _trainBatchSize = 64; 42 | private static int _testBatchSize = 128; 43 | 44 | private readonly static int _logInterval = 100; 45 | 46 | internal static void Run(int epochs, int timeout, string logdir, string dataset) 47 | { 48 | _epochs = epochs; 49 | 50 | if (string.IsNullOrEmpty(dataset)) 51 | { 52 | dataset = "mnist"; 53 | } 54 | 55 | var device = 56 | torch.cuda.is_available() ? torch.CUDA : 57 | torch.mps_is_available() ? torch.MPS : 58 | torch.CPU; 59 | 60 | Console.WriteLine(); 61 | Console.WriteLine($"\tRunning MNIST with {dataset} on {device.type.ToString()} for {epochs} epochs, terminating after {TimeSpan.FromSeconds(timeout)}."); 62 | Console.WriteLine(); 63 | 64 | var datasetPath = Path.Join(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", dataset); 65 | 66 | random.manual_seed(1); 67 | 68 | var cwd = Environment.CurrentDirectory; 69 | 70 | var writer = String.IsNullOrEmpty(logdir) ? null : torch.utils.tensorboard.SummaryWriter(logdir, createRunName: true); 71 | 72 | var sourceDir = datasetPath; 73 | var targetDir = Path.Combine(datasetPath, "test_data"); 74 | 75 | if (!Directory.Exists(targetDir)) 76 | { 77 | Directory.CreateDirectory(targetDir); 78 | Decompress.DecompressGZipFile(Path.Combine(sourceDir, "train-images-idx3-ubyte.gz"), targetDir); 79 | Decompress.DecompressGZipFile(Path.Combine(sourceDir, "train-labels-idx1-ubyte.gz"), targetDir); 80 | Decompress.DecompressGZipFile(Path.Combine(sourceDir, "t10k-images-idx3-ubyte.gz"), targetDir); 81 | Decompress.DecompressGZipFile(Path.Combine(sourceDir, "t10k-labels-idx1-ubyte.gz"), targetDir); 82 | } 83 | 84 | if (device.type == DeviceType.CUDA) 85 | { 86 | _trainBatchSize *= 4; 87 | _testBatchSize *= 4; 88 | } 89 | 90 | Console.WriteLine($"\tCreating the model..."); 91 | 92 | var model = new TorchSharp.Examples.MNIST.Model("model", device); 93 | 94 | var normImage = transforms.Normalize(new double[] { 0.1307 }, new double[] { 0.3081 }, device: (Device)device); 95 | 96 | Console.WriteLine($"\tPreparing training and test data..."); 97 | Console.WriteLine(); 98 | 99 | using (MNISTReader train = new MNISTReader(targetDir, "train", _trainBatchSize, device: device, shuffle: true, transform: normImage), 100 | test = new MNISTReader(targetDir, "t10k", _testBatchSize, device: device, transform: normImage)) 101 | { 102 | 103 | TrainingLoop(dataset, timeout, writer, device, model, train, test); 104 | } 105 | } 106 | 107 | internal static void TrainingLoop(string dataset, int timeout, TorchSharp.Modules.SummaryWriter writer, Device device, Module model, MNISTReader train, MNISTReader test) 108 | { 109 | var optimizer = optim.Adam(model.parameters()); 110 | 111 | var scheduler = optim.lr_scheduler.StepLR(optimizer, 1, 0.7); 112 | 113 | Stopwatch totalTime = new Stopwatch(); 114 | totalTime.Start(); 115 | 116 | for (var epoch = 1; epoch <= _epochs; epoch++) 117 | { 118 | 119 | Train(model, optimizer, NLLLoss(reduction: Reduction.Mean), device, train, epoch, train.BatchSize, train.Size); 120 | Test(model, NLLLoss(reduction: nn.Reduction.Sum), writer, device, test, epoch, test.Size); 121 | 122 | Console.WriteLine($"End-of-epoch memory use: {GC.GetTotalMemory(false)}"); 123 | 124 | if (totalTime.Elapsed.TotalSeconds > timeout) break; 125 | } 126 | 127 | totalTime.Stop(); 128 | Console.WriteLine($"Elapsed time: {totalTime.Elapsed.TotalSeconds:F1} s."); 129 | 130 | Console.WriteLine("Saving model to '{0}'", dataset + ".model.bin"); 131 | model.save(dataset + ".model.bin"); 132 | } 133 | 134 | private static void Train( 135 | Module model, 136 | optim.Optimizer optimizer, 137 | Loss loss, 138 | Device device, 139 | IEnumerable<(Tensor, Tensor)> dataLoader, 140 | int epoch, 141 | long batchSize, 142 | int size) 143 | { 144 | model.train(); 145 | 146 | int batchId = 1; 147 | 148 | Console.WriteLine($"Epoch: {epoch}..."); 149 | 150 | foreach (var (data, target) in dataLoader) 151 | { 152 | using (var d = torch.NewDisposeScope()) 153 | { 154 | optimizer.zero_grad(); 155 | 156 | var prediction = model.forward(data); 157 | var output = loss.forward(prediction, target); 158 | 159 | output.backward(); 160 | 161 | optimizer.step(); 162 | 163 | if (batchId % _logInterval == 0) 164 | { 165 | Console.WriteLine($"\rTrain: epoch {epoch} [{batchId * batchSize} / {size}] Loss: {output.ToSingle():F4}"); 166 | } 167 | 168 | batchId++; 169 | } 170 | 171 | } 172 | } 173 | 174 | private static void Test( 175 | Module model, 176 | Loss loss, 177 | TorchSharp.Modules.SummaryWriter writer, 178 | Device device, 179 | IEnumerable<(Tensor, Tensor)> dataLoader, 180 | int epoch, 181 | int size) 182 | { 183 | model.eval(); 184 | 185 | double testLoss = 0; 186 | int correct = 0; 187 | 188 | foreach (var (data, target) in dataLoader) 189 | { 190 | using (var d = torch.NewDisposeScope()) 191 | { 192 | var prediction = model.forward(data); 193 | var output = loss.forward(prediction, target); 194 | testLoss += output.ToSingle(); 195 | 196 | correct += prediction.argmax(1).eq(target).sum().ToInt32(); 197 | } 198 | } 199 | 200 | Console.WriteLine($"Size: {size}, Total: {size}"); 201 | 202 | Console.WriteLine($"\rTest set: Average loss {(testLoss / size):F4} | Accuracy {((double)correct / size):P2}"); 203 | 204 | if (writer != null) 205 | { 206 | writer.add_scalar("MNIST/loss", (float)(testLoss / size), epoch); 207 | writer.add_scalar("MNIST/accuracy", (float)correct / size, epoch); 208 | } 209 | } 210 | } 211 | } 212 | -------------------------------------------------------------------------------- /src/Utils/ArgumentParser.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.IO; 5 | using System.Threading.Tasks; 6 | 7 | using Newtonsoft.Json; 8 | 9 | namespace TorchSharp.Examples.Utils 10 | { 11 | /// 12 | /// Yet another argument parser. 13 | /// 14 | public sealed class ArgumentParser 15 | { 16 | public ArgumentParser(FileInfo argumentConfiguration, IList args) 17 | { 18 | Initialize(File.ReadAllText(argumentConfiguration.FullName), args); 19 | } 20 | 21 | public ArgumentParser(string argumentConfiguration, IList args) 22 | { 23 | Initialize(argumentConfiguration, args); 24 | } 25 | 26 | public void UsingMessage(string name, string positionals) 27 | { 28 | Console.Error.WriteLine("using:"); 29 | Console.Error.Write($"{name} "); 30 | foreach (var desc in descriptors) 31 | { 32 | Console.Error.Write($"[--{desc.LongName} | -{desc.ShortName}] {desc.ArgType.ToString().ToLower()} "); 33 | } 34 | 35 | Console.Error.WriteLine($"{positionals}..."); 36 | 37 | foreach (var desc in descriptors) 38 | { 39 | Console.Error.WriteLine($"--{desc.LongName} | -{desc.ShortName}: {desc.ArgType.ToString().ToLower()}, {desc.Explanation} "); 40 | } 41 | } 42 | public int Count => positionalArguments.Count; 43 | 44 | public string this[int index] 45 | { 46 | get { return positionalArguments[index]; } 47 | } 48 | 49 | private void Initialize(string argumentConfiguration, IList args) 50 | { 51 | try 52 | { 53 | descriptors = JsonConvert.DeserializeObject>(argumentConfiguration); 54 | 55 | for (int idx = 0; idx < args.Count; ++idx) 56 | { 57 | var arg = args[idx]; 58 | 59 | if (arg.StartsWith("--")) 60 | { 61 | // Long form argument, --name=value, --name:value, or --name value 62 | string[] kv = null; 63 | 64 | if (arg.Contains(':')) 65 | { 66 | kv = arg.Substring(2).Split(':'); 67 | } 68 | else if (arg.Contains('=')) 69 | { 70 | kv = arg.Substring(2).Split('='); 71 | } 72 | else 73 | { 74 | kv = new string[] { arg.Substring(2) }; 75 | } 76 | 77 | ProcessArgument(kv, args, descriptors, false, ref idx); 78 | } 79 | else if (arg.StartsWith("-")) 80 | { 81 | // Short form argument, -v value 82 | var key = arg.Substring(1); 83 | 84 | if (key.Length == 1) 85 | { 86 | ProcessArgument(new string[] { key }, args, descriptors, true, ref idx); 87 | } 88 | else 89 | { 90 | ProcessFlags(key, args, descriptors); 91 | } 92 | } 93 | else 94 | { 95 | // Positional argument, always interpreted as a string 96 | positionalArguments.Add(arg); 97 | } 98 | } 99 | } 100 | catch (Exception e) 101 | { 102 | Console.Error.WriteLine($"Internal error reading command arguments definition file: {e.Message}"); 103 | } 104 | } 105 | 106 | private void ProcessFlags(string key, IList args, List arguments) 107 | { 108 | foreach (var ch in key) 109 | { 110 | var name = ch.ToString(); 111 | 112 | foreach (var argDesc in arguments) 113 | { 114 | if (name.Equals(argDesc.ShortName)) 115 | { 116 | if (argDesc.ArgType != ArgumentDescriptor.ArgumentType.Flag) 117 | { 118 | Console.Error.WriteLine("Mulitple short-form arguments are only valid if they do not take a value."); 119 | continue; 120 | } 121 | namedArguments.Add(argDesc.LongName, true); 122 | break; 123 | } 124 | } 125 | } 126 | } 127 | 128 | private void ProcessArgument(string[] kv, IList args, List arguments, bool shortForm, ref int idx) 129 | { 130 | var name = kv[0]; 131 | 132 | var argType = ArgumentDescriptor.ArgumentType.Flag; 133 | 134 | foreach (var argDesc in arguments) 135 | { 136 | if (!shortForm && name.ToLowerInvariant().Equals(argDesc.LongName.ToLowerInvariant()) || 137 | shortForm && name.Equals(argDesc.ShortName)) 138 | { 139 | argType = argDesc.ArgType; 140 | name = argDesc.LongName; 141 | break; 142 | } 143 | } 144 | 145 | try 146 | { 147 | switch (argType) 148 | { 149 | case ArgumentDescriptor.ArgumentType.Flag: 150 | namedArguments.Add(name, true); 151 | break; 152 | case ArgumentDescriptor.ArgumentType.Boolean: 153 | { 154 | if (bool.TryParse((kv.Length == 1) ? args[++idx] : kv[1], out bool value)) 155 | { 156 | namedArguments.Add(name, value); 157 | } 158 | break; 159 | } 160 | case ArgumentDescriptor.ArgumentType.Integer: 161 | { 162 | if (int.TryParse((kv.Length == 1) ? args[++idx] : kv[1], out int value)) 163 | { 164 | namedArguments.Add(name, value); 165 | } 166 | break; 167 | } 168 | case ArgumentDescriptor.ArgumentType.String: 169 | { 170 | var value = (kv.Length == 1) ? args[++idx] : kv[1]; 171 | namedArguments.Add(name, value); 172 | break; 173 | } 174 | case ArgumentDescriptor.ArgumentType.List: 175 | { 176 | var value = ((kv.Length == 1) ? args[++idx] : kv[1]).Split(','); 177 | namedArguments.Add(name, value); 178 | break; 179 | } 180 | } 181 | } 182 | catch(ArgumentOutOfRangeException) 183 | { 184 | } 185 | } 186 | 187 | public bool TryGetValueBool(string name, out bool value) 188 | { 189 | return TryGetValue(name, out value); 190 | } 191 | 192 | public bool TryGetValueInt(string name, out int value) 193 | { 194 | return TryGetValue(name, out value); 195 | } 196 | 197 | public bool TryGetValueString(string name, out string value) 198 | { 199 | return TryGetValue(name, out value); 200 | } 201 | 202 | public bool TryGetValueStrings(string name, out string[] value) 203 | { 204 | return TryGetValue(name, out value); 205 | } 206 | 207 | public bool TryGetValue(string name, out T value, T @default = default(T)) 208 | { 209 | if (namedArguments.TryGetValue(name, out var obj) && obj is T) 210 | { 211 | value = (T)obj; 212 | return true; 213 | } 214 | value = @default; 215 | return false; 216 | } 217 | 218 | private List descriptors = null; 219 | 220 | private Dictionary namedArguments = new Dictionary(); 221 | private List positionalArguments = new List(); 222 | } 223 | } 224 | -------------------------------------------------------------------------------- /src/CSharp/Models/ResNet.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. 2 | using System; 3 | using System.Collections.Generic; 4 | using TorchSharp; 5 | using static TorchSharp.torch; 6 | using static TorchSharp.torch.nn; 7 | 8 | namespace TorchSharp.Examples 9 | { 10 | /// 11 | /// Modified version of ResNet to classify CIFAR10 32x32 images. 12 | /// 13 | public class ResNet : Module 14 | { 15 | // The code here is is loosely based on https://github.com/kuangliu/pytorch-cifar/blob/master/models/resnet.py 16 | // Licence and copypright notice at: https://github.com/kuangliu/pytorch-cifar/blob/master/LICENSE 17 | 18 | private readonly long[] planes = new long[] { 64, 128, 128, 256, 256, 512, 512, 512, 512, 512, 512, 1024, 1024 }; 19 | private readonly long[] strides = new long[] { 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 2, 1 }; 20 | 21 | private readonly Module layers; 22 | private int in_planes = 64; 23 | 24 | public static ResNet ResNet18(int numClasses, Device device = null) 25 | { 26 | return new ResNet( 27 | "ResNet18", 28 | (name, in_planes, planes, stride) => new BasicBlock(name, in_planes, planes, stride), 29 | BasicBlock.expansion, new int[] { 2, 2, 2, 2 }, 30 | numClasses, 31 | device); 32 | } 33 | 34 | public static ResNet ResNet34(int numClasses, Device device = null) 35 | { 36 | return new ResNet( 37 | "ResNet34", 38 | (name, in_planes, planes, stride) => new BasicBlock(name, in_planes, planes, stride), 39 | BasicBlock.expansion, new int[] { 3, 4, 6, 3 }, 40 | numClasses, 41 | device); 42 | } 43 | 44 | public static ResNet ResNet50(int numClasses, Device device = null) 45 | { 46 | return new ResNet( 47 | "ResNet50", 48 | (name, in_planes, planes, stride) => new Bottleneck(name, in_planes, planes, stride), 49 | Bottleneck.expansion, new int[] { 3, 4, 6, 3 }, 50 | numClasses, 51 | device); 52 | } 53 | 54 | public static ResNet ResNet101(int numClasses, Device device = null) 55 | { 56 | return new ResNet( 57 | "ResNet101", 58 | (name, in_planes, planes, stride) => new Bottleneck(name, in_planes, planes, stride), 59 | Bottleneck.expansion, new int[] { 3, 4, 23, 3 }, 60 | numClasses, 61 | device); 62 | } 63 | 64 | public static ResNet ResNet152(int numClasses, Device device = null) 65 | { 66 | return new ResNet( 67 | "ResNet101", 68 | (name, in_planes, planes, stride) => new Bottleneck(name, in_planes, planes, stride), 69 | Bottleneck.expansion, new int[] { 3, 4, 36, 3 }, 70 | numClasses, 71 | device); 72 | } 73 | 74 | public ResNet(string name, Func> block, int expansion, IList num_blocks, int numClasses, Device device = null) : base(name) 75 | { 76 | if (planes.Length != strides.Length) throw new ArgumentException("'planes' and 'strides' must have the same length."); 77 | 78 | var modules = new List<(string, Module)>(); 79 | 80 | modules.Add(($"conv2d-first", Conv2d(3, 64, kernel_size:3, stride: 1, padding: 1, bias: false))); 81 | modules.Add(($"bnrm2d-first", BatchNorm2d(64))); 82 | modules.Add(($"relu-first", ReLU(inplace:true))); 83 | MakeLayer(modules, block, expansion, 64, num_blocks[0], 1); 84 | MakeLayer(modules, block, expansion, 128, num_blocks[1], 2); 85 | MakeLayer(modules, block, expansion, 256, num_blocks[2], 2); 86 | MakeLayer(modules, block, expansion, 512, num_blocks[3], 2); 87 | modules.Add(("avgpool", AvgPool2d(new long[] { 4, 4 }))); 88 | modules.Add(("flatten", Flatten())); 89 | modules.Add(($"linear", Linear(512 * expansion, numClasses))); 90 | 91 | layers = Sequential(modules); 92 | 93 | RegisterComponents(); 94 | 95 | if (device != null && device.type != DeviceType.CPU) 96 | this.to(device); 97 | } 98 | 99 | private void MakeLayer(List<(string, Module)> modules, Func> block, int expansion, int planes, int num_blocks, int stride) 100 | { 101 | var strides = new List(); 102 | strides.Add(stride); 103 | for (var i = 0; i < num_blocks-1; i++) { strides.Add(1); } 104 | 105 | for (var i = 0; i < strides.Count; i++) { 106 | var s = strides[i]; 107 | modules.Add(($"blck-{planes}-{i}", block($"blck-{planes}-{i}", in_planes, planes, s))); 108 | in_planes = planes * expansion; 109 | } 110 | } 111 | 112 | public override Tensor forward(Tensor input) 113 | { 114 | return layers.forward(input); 115 | } 116 | 117 | class BasicBlock : Module 118 | { 119 | public BasicBlock (string name, int in_planes, int planes, int stride) : base(name) 120 | { 121 | var modules = new List<(string, Module)>(); 122 | 123 | modules.Add(($"{name}-conv2d-1", Conv2d(in_planes, planes, kernel_size:3, stride: stride, padding: 1, bias: false))); 124 | modules.Add(($"{name}-bnrm2d-1", BatchNorm2d(planes))); 125 | modules.Add(($"{name}-relu-1", ReLU(inplace: true))); 126 | modules.Add(($"{name}-conv2d-2", Conv2d(planes, planes, kernel_size:3, stride: 1, padding: 1, bias: false))); 127 | modules.Add(($"{name}-bnrm2d-2", BatchNorm2d(planes))); 128 | 129 | layers = Sequential(modules); 130 | 131 | if (stride != 1 || in_planes != expansion*planes) { 132 | shortcut = Sequential( 133 | ($"{name}-conv2d-3", Conv2d(in_planes, expansion * planes, kernel_size:1, stride: stride, bias: false)), 134 | ($"{name}-bnrm2d-3", BatchNorm2d(expansion * planes))); 135 | } 136 | else { 137 | shortcut = Sequential(); 138 | } 139 | 140 | modules.Add(($"{name}-relu-2", ReLU(inplace: true))); 141 | 142 | RegisterComponents(); 143 | } 144 | 145 | public override Tensor forward(Tensor t) 146 | { 147 | var x = layers.forward(t); 148 | var y = shortcut.forward(t); 149 | return x.add_(y).relu_(); 150 | } 151 | 152 | public static int expansion = 1; 153 | 154 | private readonly Module layers; 155 | private readonly Module shortcut; 156 | } 157 | 158 | class Bottleneck : Module 159 | { 160 | public Bottleneck(string name, int in_planes, int planes, int stride) : base(name) 161 | { 162 | var modules = new List<(string, Module)>(); 163 | 164 | modules.Add(($"{name}-conv2d-1", Conv2d(in_planes, planes, kernel_size:1, bias: false))); 165 | modules.Add(($"{name}-bnrm2d-1", BatchNorm2d(planes))); 166 | modules.Add(($"{name}relu-1", ReLU(inplace:true))); 167 | modules.Add(($"{name}-conv2d-2", Conv2d(planes, planes, kernel_size:3, stride: stride, padding: 1, bias: false))); 168 | modules.Add(($"{name}-bnrm2d-2", BatchNorm2d(planes))); 169 | modules.Add(($"{name}relu-2", ReLU(inplace: true))); 170 | modules.Add(($"{name}-conv2d-3", Conv2d(planes, expansion * planes, kernel_size:1, bias: false))); 171 | modules.Add(($"{name}-bnrm2d-3", BatchNorm2d(expansion * planes))); 172 | 173 | layers = Sequential(modules); 174 | 175 | if (stride != 1 || in_planes != expansion * planes) { 176 | shortcut = Sequential( 177 | ($"{name}-conv2d-4", Conv2d(in_planes, expansion * planes, kernel_size:1, stride: stride, bias: false)), 178 | ($"{name}-bnrm2d-4", BatchNorm2d(expansion * planes))); 179 | } else { 180 | shortcut = Sequential(); 181 | } 182 | 183 | RegisterComponents(); 184 | } 185 | 186 | public override Tensor forward(Tensor t) 187 | { 188 | var x = layers.forward(t); 189 | var y = shortcut.forward(t); 190 | return x.add_(y).relu_(); 191 | } 192 | 193 | public static int expansion = 4; 194 | 195 | private readonly Module layers; 196 | private readonly Module shortcut; 197 | } 198 | } 199 | } 200 | -------------------------------------------------------------------------------- /src/CSharp/CSharpExamples/CIFAR10.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. 2 | using System; 3 | using System.IO; 4 | using System.Linq; 5 | using System.Collections.Generic; 6 | using System.Diagnostics; 7 | using System.Runtime.InteropServices; 8 | 9 | using TorchSharp; 10 | using static TorchSharp.torchvision; 11 | 12 | using TorchSharp.Examples; 13 | using TorchSharp.Examples.Utils; 14 | 15 | using static TorchSharp.torch; 16 | 17 | using static TorchSharp.torch.nn; 18 | using static TorchSharp.torch.nn.functional; 19 | 20 | namespace CSharpExamples 21 | { 22 | /// 23 | /// Driver for various models trained and evaluated on the CIFAR10 small (32x32) color image data set. 24 | /// 25 | /// 26 | /// The dataset for this example can be found at: https://www.cs.toronto.edu/~kriz/cifar.html 27 | /// Download the binary file, and place it in a dedicated folder, e.g. 'CIFAR10,' then edit 28 | /// the '_dataLocation' definition below to point at the right folder. 29 | /// 30 | /// Note: so far, CIFAR10 is supported, but not CIFAR100. 31 | /// 32 | class CIFAR10 33 | { 34 | private readonly static string _dataset = "CIFAR10"; 35 | private readonly static string _dataLocation = Path.Join(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", _dataset); 36 | 37 | private static int _trainBatchSize = 64; 38 | private static int _testBatchSize = 128; 39 | 40 | private readonly static int _logInterval = 25; 41 | private readonly static int _numClasses = 10; 42 | 43 | internal static void Run(int epochs, int timeout, string logdir, string modelName) 44 | { 45 | torch.random.manual_seed(1); 46 | 47 | var device = 48 | // This worked on a GeForce RTX 2080 SUPER with 8GB, for all the available network architectures. 49 | // It may not fit with less memory than that, but it's worth modifying the batch size to fit in memory. 50 | torch.cuda.is_available() ? torch.CUDA : 51 | torch.mps_is_available() ? torch.MPS : 52 | torch.CPU; 53 | 54 | if (device.type != DeviceType.CPU) 55 | { 56 | _trainBatchSize *= 8; 57 | _testBatchSize *= 8; 58 | } 59 | 60 | Console.WriteLine(); 61 | Console.WriteLine($"\tRunning {modelName} with {_dataset} on {device.type.ToString()} for {epochs} epochs, terminating after {TimeSpan.FromSeconds(timeout)}."); 62 | Console.WriteLine(); 63 | 64 | var writer = String.IsNullOrEmpty(logdir) ? null : torch.utils.tensorboard.SummaryWriter(logdir, createRunName: true); 65 | 66 | var sourceDir = _dataLocation; 67 | var targetDir = Path.Combine(_dataLocation, "test_data"); 68 | 69 | if (!Directory.Exists(targetDir)) 70 | { 71 | Directory.CreateDirectory(targetDir); 72 | Decompress.ExtractTGZ(Path.Combine(sourceDir, "cifar-10-binary.tar.gz"), targetDir); 73 | } 74 | 75 | Console.WriteLine($"\tCreating the model..."); 76 | 77 | Module model = null; 78 | 79 | switch (modelName.ToLower()) 80 | { 81 | case "alexnet": 82 | model = new AlexNet(modelName, _numClasses, device); 83 | break; 84 | case "mobilenet": 85 | model = new MobileNet(modelName, _numClasses, device); 86 | break; 87 | case "vgg11": 88 | case "vgg13": 89 | case "vgg16": 90 | case "vgg19": 91 | model = new VGG(modelName, _numClasses, device); 92 | break; 93 | case "resnet18": 94 | model = ResNet.ResNet18(_numClasses, device); 95 | break; 96 | case "resnet34": 97 | _testBatchSize /= 4; 98 | model = ResNet.ResNet34(_numClasses, device); 99 | break; 100 | case "resnet50": 101 | _trainBatchSize /= 6; 102 | _testBatchSize /= 8; 103 | model = ResNet.ResNet50(_numClasses, device); 104 | break; 105 | case "resnet101": 106 | _trainBatchSize /= 6; 107 | _testBatchSize /= 8; 108 | model = ResNet.ResNet101(_numClasses, device); 109 | break; 110 | case "resnet152": 111 | _testBatchSize /= 4; 112 | model = ResNet.ResNet152(_numClasses, device); 113 | break; 114 | } 115 | 116 | var hflip = transforms.HorizontalFlip(); 117 | var gray = transforms.Grayscale(3); 118 | var rotate = transforms.Rotate(90); 119 | var contrast = transforms.AdjustContrast(1.25); 120 | 121 | Console.WriteLine($"\tPreparing training and test data..."); 122 | Console.WriteLine(); 123 | 124 | using (var train = new CIFARReader(targetDir, false, _trainBatchSize, shuffle: true, device: device, transforms: new ITransform[] { })) 125 | using (var test = new CIFARReader(targetDir, true, _testBatchSize, device: device)) 126 | using (var optimizer = torch.optim.Adam(model.parameters(), 0.001)) 127 | { 128 | 129 | Stopwatch totalSW = new Stopwatch(); 130 | totalSW.Start(); 131 | 132 | for (var epoch = 1; epoch <= epochs; epoch++) 133 | { 134 | 135 | Stopwatch epchSW = new Stopwatch(); 136 | epchSW.Start(); 137 | 138 | var loss = NLLLoss(); 139 | 140 | Train(model, optimizer, loss, train.Data(), epoch, _trainBatchSize, train.Size); 141 | Test(model, loss, writer, modelName.ToLower(), test.Data(), epoch, test.Size); 142 | 143 | epchSW.Stop(); 144 | Console.WriteLine($"Elapsed time for this epoch: {epchSW.Elapsed.TotalSeconds} s."); 145 | 146 | if (totalSW.Elapsed.TotalSeconds > timeout) break; 147 | } 148 | 149 | totalSW.Stop(); 150 | Console.WriteLine($"Elapsed training time: {totalSW.Elapsed} s."); 151 | } 152 | 153 | model.Dispose(); 154 | } 155 | 156 | private static void Train( 157 | Module model, 158 | torch.optim.Optimizer optimizer, 159 | Loss loss, 160 | IEnumerable<(Tensor, Tensor)> dataLoader, 161 | int epoch, 162 | long batchSize, 163 | long size) 164 | { 165 | model.train(); 166 | 167 | int batchId = 1; 168 | long total = 0; 169 | long correct = 0; 170 | 171 | Console.WriteLine($"Epoch: {epoch}..."); 172 | 173 | foreach (var (data, target) in dataLoader) 174 | { 175 | 176 | using (var d = torch.NewDisposeScope()) 177 | { 178 | optimizer.zero_grad(); 179 | 180 | var prediction = model.forward(data); 181 | var lsm = log_softmax(prediction, 1); 182 | var output = loss.forward(lsm, target); 183 | 184 | output.backward(); 185 | 186 | optimizer.step(); 187 | 188 | total += target.shape[0]; 189 | 190 | correct += prediction.argmax(1).eq(target).sum().ToInt64(); 191 | 192 | if (batchId % _logInterval == 0) 193 | { 194 | var count = Math.Min(batchId * batchSize, size); 195 | Console.WriteLine($"\rTrain: epoch {epoch} [{count} / {size}] Loss: {output.ToSingle().ToString("0.000000")} | Accuracy: { ((float)correct / total).ToString("0.000000") }"); 196 | } 197 | 198 | batchId++; 199 | } 200 | } 201 | } 202 | 203 | private static void Test( 204 | Module model, 205 | Loss loss, 206 | TorchSharp.Modules.SummaryWriter writer, 207 | string modelName, 208 | IEnumerable<(Tensor, Tensor)> dataLoader, 209 | int epoch, 210 | long size) 211 | { 212 | model.eval(); 213 | 214 | double testLoss = 0; 215 | long correct = 0; 216 | int batchCount = 0; 217 | 218 | foreach (var (data, target) in dataLoader) 219 | { 220 | 221 | using (var d = torch.NewDisposeScope()) 222 | { 223 | var prediction = model.forward(data); 224 | var lsm = log_softmax(prediction, 1); 225 | var output = loss.forward(lsm, target); 226 | 227 | testLoss += output.ToSingle(); 228 | batchCount += 1; 229 | 230 | correct += prediction.argmax(1).eq(target).sum().ToInt64(); 231 | } 232 | } 233 | 234 | Console.WriteLine($"\rTest set: Average loss {(testLoss / batchCount).ToString("0.0000")} | Accuracy {((float)correct / size).ToString("0.0000")}"); 235 | 236 | if (writer != null) 237 | { 238 | writer.add_scalar($"{modelName}/loss", (float)(testLoss / batchCount), epoch); 239 | writer.add_scalar($"{modelName}/accuracy", (float)correct / size, epoch); 240 | } 241 | } 242 | } 243 | } 244 | -------------------------------------------------------------------------------- /src/CSharp/CSharpExamples/SequenceToSequence.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. 2 | using System; 3 | using System.IO; 4 | using System.Linq; 5 | using System.Collections.Generic; 6 | using System.Diagnostics; 7 | 8 | using TorchSharp; 9 | using static TorchSharp.torchvision; 10 | 11 | using TorchSharp.Examples; 12 | using TorchSharp.Examples.Utils; 13 | 14 | using static TorchSharp.torch; 15 | 16 | using static TorchSharp.torch.nn; 17 | using static TorchSharp.torch.nn.functional; 18 | 19 | namespace CSharpExamples 20 | { 21 | 22 | /// 23 | /// This example is based on the PyTorch tutorial at: 24 | /// 25 | /// https://pytorch.org/tutorials/beginner/transformer_tutorial.html 26 | /// 27 | /// It relies on the WikiText2 dataset, which can be downloaded at: 28 | /// 29 | /// https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip 30 | /// 31 | /// After downloading, extract the files using the defaults (Windows only). 32 | /// 33 | public class SequenceToSequence 34 | { 35 | // This path assumes that you're running this on Windows. 36 | private readonly static string _dataLocation = Path.Join(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", "wikitext-2-v1"); 37 | 38 | private const long emsize = 200; 39 | private const long nhid = 200; 40 | private const long nlayers = 2; 41 | private const long nhead = 2; 42 | private const double dropout = 0.2; 43 | 44 | private const int batch_size = 64; 45 | private const int eval_batch_size = 32; 46 | 47 | internal static void Run(int epochs, int timeout, string logdir) 48 | 49 | { 50 | torch.random.manual_seed(1); 51 | 52 | var cwd = Environment.CurrentDirectory; 53 | 54 | var device = 55 | torch.cuda.is_available() ? torch.CUDA : 56 | torch.mps_is_available() ? torch.MPS : 57 | torch.CPU; 58 | 59 | Console.WriteLine(); 60 | Console.WriteLine($"\tRunning SequenceToSequence on {device.type.ToString()} for {epochs} epochs, terminating after {TimeSpan.FromSeconds(timeout)}."); 61 | Console.WriteLine(); 62 | 63 | Console.WriteLine($"\tPreparing training and test data..."); 64 | 65 | var vocab_iter = TorchText.Datasets.WikiText2("train", _dataLocation); 66 | var tokenizer = TorchText.Data.Utils.get_tokenizer("basic_english"); 67 | 68 | var counter = new TorchText.Vocab.Counter(); 69 | foreach (var item in vocab_iter) 70 | { 71 | counter.update(tokenizer(item)); 72 | } 73 | 74 | var vocab = new TorchText.Vocab.Vocab(counter); 75 | 76 | var (train_iter, valid_iter, test_iter) = TorchText.Datasets.WikiText2(_dataLocation); 77 | 78 | var train_data = Batchify(ProcessInput(train_iter, tokenizer, vocab), batch_size).to((Device)device); 79 | var valid_data = Batchify(ProcessInput(valid_iter, tokenizer, vocab), eval_batch_size).to((Device)device); 80 | var test_data = Batchify(ProcessInput(test_iter, tokenizer, vocab), eval_batch_size).to((Device)device); 81 | 82 | var bptt = 32; 83 | 84 | var ntokens = vocab.Count; 85 | 86 | Console.WriteLine($"\tCreating the model..."); 87 | Console.WriteLine(); 88 | 89 | var model = new TransformerModel(ntokens, emsize, nhead, nhid, nlayers, dropout).to((Device)device); 90 | var loss = CrossEntropyLoss(); 91 | var lr = 2.50; 92 | var optimizer = torch.optim.SGD(model.parameters(), lr); 93 | var scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, 0.95, last_epoch: 15); 94 | 95 | var writer = String.IsNullOrEmpty(logdir) ? null : torch.utils.tensorboard.SummaryWriter(logdir, createRunName: true); 96 | 97 | var totalTime = new Stopwatch(); 98 | totalTime.Start(); 99 | 100 | foreach (var epoch in Enumerable.Range(1, epochs)) 101 | { 102 | 103 | var sw = new Stopwatch(); 104 | sw.Start(); 105 | 106 | train(epoch, train_data, model, loss, bptt, ntokens, optimizer); 107 | 108 | var val_loss = evaluate(valid_data, model, loss, bptt, ntokens, optimizer); 109 | sw.Stop(); 110 | 111 | Console.WriteLine($"\nEnd of epoch: {epoch} | lr: {optimizer.ParamGroups.First().LearningRate:0.00} | time: {sw.Elapsed.TotalSeconds:0.0}s | loss: {val_loss:0.00}\n"); 112 | scheduler.step(); 113 | 114 | if (writer != null) 115 | { 116 | writer.add_scalar("seq2seq/loss", (float)val_loss, epoch); 117 | } 118 | 119 | if (totalTime.Elapsed.TotalSeconds > timeout) break; 120 | } 121 | 122 | var tst_loss = evaluate(test_data, model, loss, bptt, ntokens, optimizer); 123 | totalTime.Stop(); 124 | 125 | Console.WriteLine($"\nEnd of training | time: {totalTime.Elapsed.TotalSeconds:0.0}s | loss: {tst_loss:0.00}\n"); 126 | } 127 | 128 | private static void train(int epoch, Tensor train_data, TransformerModel model, Loss criterion, int bptt, int ntokens, torch.optim.Optimizer optimizer) 129 | { 130 | model.train(); 131 | 132 | var total_loss = 0.0f; 133 | 134 | using (var d = torch.NewDisposeScope()) 135 | { 136 | var batch = 0; 137 | var log_interval = 200; 138 | 139 | var src_mask = model.GenerateSquareSubsequentMask(bptt); 140 | 141 | var tdlen = train_data.shape[0]; 142 | 143 | 144 | for (int i = 0; i < tdlen - 1; batch++, i += bptt) 145 | { 146 | 147 | var (data, targets) = GetBatch(train_data, i, bptt); 148 | optimizer.zero_grad(); 149 | 150 | if (data.shape[0] != bptt) 151 | { 152 | src_mask = model.GenerateSquareSubsequentMask(data.shape[0]); 153 | } 154 | 155 | using (var output = model.forward(data, src_mask)) 156 | { 157 | var loss = criterion.forward(output.view(-1, ntokens), targets); 158 | loss.backward(); 159 | torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5); 160 | optimizer.step(); 161 | 162 | total_loss += loss.to(torch.CPU).item(); 163 | } 164 | 165 | if (batch % log_interval == 0 && batch > 0) 166 | { 167 | var cur_loss = total_loss / log_interval; 168 | Console.WriteLine($"epoch: {epoch} | batch: {batch} / {tdlen / bptt} | loss: {cur_loss:0.00}"); 169 | total_loss = 0; 170 | } 171 | 172 | d.DisposeEverythingBut(src_mask); 173 | } 174 | } 175 | } 176 | 177 | private static double evaluate(Tensor eval_data, TransformerModel model, Loss criterion, int bptt, int ntokens, torch.optim.Optimizer optimizer) 178 | { 179 | model.eval(); 180 | 181 | using (var d = torch.NewDisposeScope()) 182 | { 183 | 184 | var src_mask = model.GenerateSquareSubsequentMask(bptt); 185 | 186 | var total_loss = 0.0f; 187 | var batch = 0; 188 | 189 | 190 | for (int i = 0; i < eval_data.shape[0] - 1; batch++, i += bptt) 191 | { 192 | 193 | var (data, targets) = GetBatch(eval_data, i, bptt); 194 | if (data.shape[0] != bptt) 195 | { 196 | src_mask = model.GenerateSquareSubsequentMask(data.shape[0]); 197 | } 198 | using (var output = model.forward(data, src_mask)) 199 | { 200 | var loss = criterion.forward(output.view(-1, ntokens), targets); 201 | total_loss += data.shape[0] * loss.to(torch.CPU).item(); 202 | } 203 | 204 | data.Dispose(); 205 | targets.Dispose(); 206 | 207 | d.DisposeEverythingBut(src_mask); 208 | } 209 | 210 | return total_loss / eval_data.shape[0]; 211 | } 212 | } 213 | 214 | static Tensor ProcessInput(IEnumerable iter, Func> tokenizer, TorchText.Vocab.Vocab vocab) 215 | { 216 | List data = new List(); 217 | foreach (var item in iter) 218 | { 219 | List itemData = new List(); 220 | foreach (var token in tokenizer(item)) 221 | { 222 | itemData.Add(vocab[token]); 223 | } 224 | data.Add(torch.tensor(itemData.ToArray(), torch.int64)); 225 | } 226 | 227 | var result = torch.cat(data.Where(t => t.NumberOfElements > 0).ToList(), 0); 228 | return result; 229 | } 230 | 231 | static Tensor Batchify(Tensor data, int batch_size) 232 | { 233 | var nbatch = data.shape[0] / batch_size; 234 | using var d2 = data.narrow(0, 0, nbatch * batch_size).view(batch_size, -1).t(); 235 | return d2.contiguous(); 236 | } 237 | 238 | static (Tensor, Tensor) GetBatch(Tensor source, int index, int bptt) 239 | { 240 | var len = Math.Min(bptt, source.shape[0] - 1 - index); 241 | var data = source[TensorIndex.Slice(index, index + len)]; 242 | var target = source[TensorIndex.Slice(index + 1, index + 1 + len)].reshape(-1); 243 | return (data, target); 244 | } 245 | 246 | } 247 | } 248 | -------------------------------------------------------------------------------- /src/FSharp/FSharpExamples/SequenceToSequence.fs: -------------------------------------------------------------------------------- 1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. 2 | module FSharpExamples.SequenceToSequence 3 | 4 | open System 5 | open System.IO 6 | open System.Linq 7 | open System.Diagnostics 8 | open System.Collections.Generic 9 | 10 | open TorchSharp 11 | open type TorchSharp.torch.nn 12 | open type TorchSharp.torch.optim 13 | 14 | open TorchSharp.Examples 15 | 16 | // This example is based on the PyTorch tutorial at: 17 | // 18 | // https://pytorch.org/tutorials/beginner/transformer_tutorial.html 19 | // 20 | // It relies on the WikiText2 dataset, which can be downloaded at: 21 | // 22 | // https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip 23 | // 24 | // After downloading, extract the files using the defaults (Windows only). 25 | // 26 | 27 | let emsize = 200L 28 | let nhidden = 200L 29 | let nlayers = 2L 30 | let nheads = 2L 31 | let dropout = 0.2 32 | let bptt = 32L 33 | 34 | let batch_size = 64L 35 | let eval_batch_size = 256L 36 | 37 | let epochs = 50 38 | 39 | let logInterval = 200 40 | 41 | let cmdArgs = Environment.GetCommandLineArgs() 42 | 43 | let datasetPath = Path.Join(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", "wikitext-2-v1") 44 | 45 | torch.random.manual_seed(1L) |> ignore 46 | 47 | let hasCUDA = TorchText.Datasets.cuda_is_available() 48 | 49 | let device = if hasCUDA then torch.CUDA else torch.CPU 50 | 51 | let criterion x y = torch.nn.functional.cross_entropy(x,y,reduction=Reduction.Mean) 52 | 53 | type PositionalEncoding(dmodel, maxLen) as this = 54 | inherit Module("PositionalEncoding") 55 | 56 | let dropout = Dropout(dropout) 57 | let mutable pe = torch.zeros([| maxLen; dmodel|]) 58 | 59 | do 60 | let position = torch.arange(0L.ToScalar(), maxLen.ToScalar(), 1L.ToScalar()).unsqueeze(1L) 61 | let divTerm = (torch.arange(0L.ToScalar(), dmodel.ToScalar(), 2L.ToScalar()) * (-Math.Log(10000.0) / (float dmodel)).ToScalar()).exp() 62 | 63 | let NULL = System.Nullable() 64 | 65 | // See: https://github.com/dotnet/fsharp/issues/9369 -- for now we have to use an explicit array within the index 66 | // 67 | pe.[ [| torch.TensorIndex.Ellipsis; torch.TensorIndex.Slice(0L, NULL, 2L) |] ] <- (position * divTerm).sin() 68 | pe.[ [| torch.TensorIndex.Ellipsis; torch.TensorIndex.Slice(1L, NULL, 2L) |] ] <- (position * divTerm).cos() 69 | 70 | pe <- pe.unsqueeze(0L).transpose(0L,1L) 71 | 72 | this.RegisterComponents() 73 | 74 | override _.forward(t) = 75 | let NULL = System.Nullable() 76 | use x = t + pe.[torch.TensorIndex.Slice(NULL, t.shape.[0]), torch.TensorIndex.Slice()] 77 | dropout.forward(x) 78 | 79 | type TransformerModel(ntokens, device:torch.Device) as this = 80 | inherit Module("Transformer") 81 | 82 | let pos_encoder = new PositionalEncoding(emsize, 5000L) 83 | let encoder_layers = TransformerEncoderLayer(emsize, nheads, nhidden, dropout) 84 | let transformer_encoder = TransformerEncoder(encoder_layers, nlayers) 85 | let encoder = Embedding(ntokens, emsize) 86 | let decoder = Linear(emsize, ntokens) 87 | 88 | let sqrEmSz = MathF.Sqrt(float32 emsize).ToScalar() 89 | 90 | do 91 | let initrange = 0.1 92 | 93 | init.uniform_(encoder.weight, -initrange, initrange) |> ignore 94 | init.zeros_(decoder.bias) |> ignore 95 | init.uniform_(decoder.weight, -initrange, initrange) |> ignore 96 | 97 | this.RegisterComponents() 98 | 99 | if device.``type`` = DeviceType.CUDA then 100 | this.``to``(device) |> ignore 101 | 102 | override _.forward(t, mask) = 103 | let src = pos_encoder.forward(encoder.forward(t) * sqrEmSz) 104 | let enc = transformer_encoder.call(src, mask) 105 | decoder.forward(enc) 106 | 107 | member _.GenerateSquareSubsequentMask(size:int64) = 108 | use mask = torch.ones([|size;size|]).eq(torch.tensor(1.0f)).triu().transpose(0L,1L) 109 | use maskIsZero = mask.eq(torch.tensor(0.0f)) 110 | use maskIsOne = mask.eq(torch.tensor(1.0f)) 111 | mask.to_type(torch.float32) 112 | .masked_fill(maskIsZero, Single.NegativeInfinity.ToScalar()) 113 | .masked_fill(maskIsOne, 0.0f.ToScalar()).``to``(device) 114 | 115 | let process_input (iter:string seq) (tokenizer:string->string seq) (vocab:TorchText.Vocab.Vocab) = 116 | torch.cat( 117 | [| 118 | for item in iter do 119 | let itemData = [| for token in tokenizer(item) do (int64 vocab.[token]) |] 120 | let t = torch.tensor(itemData) 121 | if t.NumberOfElements > 0L then 122 | t 123 | |], 0L) 124 | 125 | let batchify (data:torch.Tensor) batchSize (device:torch.Device) = 126 | let nbatch = data.shape.[0] / batchSize 127 | let d2 = data.narrow(0L, 0L, nbatch * batchSize).view(batchSize, -1L).t() 128 | d2.contiguous().``to``(device) 129 | 130 | let get_batch (source:torch.Tensor) (index:int64) = 131 | 132 | let len = min bptt (source.shape.[0]-1L-index) 133 | let data = source.[torch.TensorIndex.Slice(index, index + len)] 134 | let target = source.[torch.TensorIndex.Slice(index + 1L, index + 1L + len)].reshape(-1L) 135 | data,target 136 | 137 | let train epoch (model:TransformerModel) (optimizer:Optimizer) (trainData:torch.Tensor) ntokens = 138 | 139 | model.train() 140 | 141 | let mutable total_loss = 0.0f 142 | let mutable src_mask = model.GenerateSquareSubsequentMask(bptt) 143 | 144 | let mutable batch = 0 145 | 146 | let tdlen = trainData.shape.[0] 147 | 148 | let mutable i = 0L 149 | 150 | while i < tdlen - 2L do 151 | 152 | use d = torch.NewDisposeScope() 153 | 154 | begin 155 | let data,targets = get_batch trainData i 156 | use data = data 157 | use targets = targets 158 | 159 | if data.shape.[0] <> bptt then 160 | src_mask.Dispose() 161 | src_mask <- model.GenerateSquareSubsequentMask(data.shape.[0]) 162 | 163 | optimizer.zero_grad() 164 | 165 | use output = model.forward(data, src_mask) 166 | use loss = criterion (output.view(-1L, ntokens)) targets 167 | loss.backward() 168 | torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5) |> ignore 169 | optimizer.step() |> ignore 170 | 171 | total_loss <- total_loss + loss.cpu().item() 172 | end 173 | 174 | if (batch % logInterval = 0) && (batch > 0) then 175 | let cur_loss = (total_loss / (float32 logInterval)).ToString("0.00") 176 | printfn $"epoch: {epoch} | batch: {batch} / {tdlen/bptt} | loss: {cur_loss}" 177 | total_loss <- 0.0f 178 | 179 | batch <- batch + 1 180 | i <- i + bptt 181 | 182 | 183 | let evaluate (model:TransformerModel) (evalData:torch.Tensor) ntokens = 184 | 185 | model.eval() 186 | 187 | let mutable total_loss = 0.0f 188 | let mutable src_mask = model.GenerateSquareSubsequentMask(bptt) 189 | 190 | let mutable batch = 0L 191 | 192 | let tdlen = evalData.shape.[0] 193 | 194 | let mutable i = 0L 195 | 196 | while i < tdlen - 2L do 197 | 198 | use d = torch.NewDisposeScope() 199 | 200 | begin 201 | let data,targets = get_batch evalData i 202 | use data = data 203 | use targets = targets 204 | 205 | if data.shape.[0] <> bptt then 206 | src_mask.Dispose() 207 | src_mask <- model.GenerateSquareSubsequentMask(data.shape.[0]) 208 | 209 | use output = model.forward(data, src_mask) 210 | use loss = criterion (output.view(-1L, ntokens)) targets 211 | total_loss <- total_loss + (float32 data.shape.[0]) * loss.cpu().item() 212 | end 213 | 214 | batch <- batch + 1L 215 | i <- i + bptt 216 | 217 | total_loss / (float32 evalData.shape.[0]) 218 | 219 | let run epochs = 220 | 221 | printfn $"Running SequenceToSequence on {device.``type``.ToString()} for {epochs} epochs." 222 | 223 | let vocabIter = TorchText.Datasets.WikiText2("train", datasetPath) 224 | let tokenizer = TorchText.Data.Utils.get_tokenizer("basic_english") 225 | let tokenizer str = tokenizer.Invoke(str) 226 | 227 | let counter = new TorchText.Vocab.Counter() 228 | 229 | for item in vocabIter do 230 | counter.update(tokenizer(item)) 231 | 232 | let vocab = TorchText.Vocab.Vocab(counter) 233 | 234 | let trainIter,validIter,testIter = TorchText.Datasets.WikiText2(datasetPath).ToTuple() 235 | 236 | let train_data = batchify (process_input trainIter tokenizer vocab) batch_size device 237 | let valid_data = batchify (process_input validIter tokenizer vocab) eval_batch_size device 238 | let test_data = batchify (process_input testIter tokenizer vocab) eval_batch_size device 239 | 240 | let ntokens = int64 vocab.Count 241 | 242 | use model = new TransformerModel(ntokens, device) 243 | let lr = 2.50 244 | let optimizer = SGD(model.parameters(), lr) 245 | let scheduler = lr_scheduler.StepLR(optimizer, 1, 0.95, last_epoch=15) 246 | 247 | let totalTime = Stopwatch() 248 | totalTime.Start() 249 | 250 | 251 | for epoch = 1 to epochs do 252 | let sw = Stopwatch() 253 | sw.Start() 254 | 255 | train epoch model optimizer train_data ntokens 256 | 257 | let val_loss = evaluate model valid_data ntokens 258 | sw.Stop() 259 | 260 | let lrStr = optimizer.ParamGroups.First().LearningRate.ToString("0.00") 261 | let elapsed = sw.Elapsed.TotalSeconds.ToString("0.0") 262 | let lossStr = val_loss.ToString("0.00") 263 | 264 | printfn $"\nEnd of epoch: {epoch} | lr: {lrStr} | time: {elapsed}s | loss: {lossStr}\n" 265 | 266 | scheduler.step() |> ignore 267 | 268 | let tst_loss = evaluate model test_data ntokens 269 | 270 | totalTime.Stop() 271 | 272 | let elapsed = totalTime.Elapsed.TotalSeconds.ToString("0.0") 273 | let lossStr = tst_loss.ToString("0.00") 274 | printfn $"\nEnd of training | time: {elapsed} s | loss: {lossStr}\n" --------------------------------------------------------------------------------