├── src
├── FSharp
│ └── FSharpExamples
│ │ ├── Properties
│ │ └── launchSettings.json
│ │ ├── FSharpExamples.fsproj
│ │ ├── Program.fs
│ │ ├── AdversarialExampleGeneration.fs
│ │ ├── TextClassification.fs
│ │ ├── MNIST.fs
│ │ ├── AlexNet.fs
│ │ └── SequenceToSequence.fs
├── CSharp
│ ├── CSharpExamples
│ │ ├── Properties
│ │ │ └── launchSettings.json
│ │ ├── arguments.json
│ │ ├── CSharpExamples.csproj
│ │ ├── Program.cs
│ │ ├── TextClassification.cs
│ │ ├── AdversarialExampleGeneration.cs
│ │ ├── MNIST.cs
│ │ ├── CIFAR10.cs
│ │ └── SequenceToSequence.cs
│ └── Models
│ │ ├── Models.csproj
│ │ ├── TextClassification.cs
│ │ ├── MNIST.cs
│ │ ├── AlexNet.cs
│ │ ├── VGG.cs
│ │ ├── MobileNet.cs
│ │ ├── SequenceToSequence.cs
│ │ └── ResNet.cs
├── Utils
│ ├── Examples.Utils.csproj
│ ├── BigEndianReader.cs
│ ├── TorchText.Data.Utils.cs
│ ├── Datasets.cs
│ ├── Decompress.cs
│ ├── Arguments.cs
│ ├── Vocab.cs
│ ├── AG_NEWSReader.cs
│ ├── CIFARReader .cs
│ ├── MNISTReader.cs
│ └── ArgumentParser.cs
└── TorchSharpExamples.sln
├── CODE_OF_CONDUCT.md
├── tutorials
├── CSharp
│ ├── README.md
│ ├── tutorial1.ipynb
│ ├── tutorial3.ipynb
│ └── tutorial7.ipynb
├── FSharp
│ ├── README.md
│ ├── tutorial1.ipynb
│ ├── tutorial3.ipynb
│ └── tutorial7.ipynb
└── README.md
├── LICENSE
├── SECURITY.md
├── README.md
└── .gitignore
/src/FSharp/FSharpExamples/Properties/launchSettings.json:
--------------------------------------------------------------------------------
1 | {
2 | "profiles": {
3 | "FSharpExamples": {
4 | "commandName": "Project",
5 | "commandLineArgs": "-e 2 alexnet"
6 | }
7 | }
8 | }
--------------------------------------------------------------------------------
/src/CSharp/CSharpExamples/Properties/launchSettings.json:
--------------------------------------------------------------------------------
1 | {
2 | "profiles": {
3 | "CSharpExamples": {
4 | "commandName": "Project",
5 | "commandLineArgs": "-e 10 alexnet -l tb_runs"
6 | }
7 | }
8 | }
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Code of Conduct
2 |
3 | This project has adopted the code of conduct defined by the Contributor Covenant
4 | to clarify expected behavior in our community.
5 |
6 | For more information, see the [.NET Foundation Code of Conduct](https://dotnetfoundation.org/code-of-conduct).
7 |
--------------------------------------------------------------------------------
/src/CSharp/Models/Models.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | net6.0
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/tutorials/CSharp/README.md:
--------------------------------------------------------------------------------
1 | ## C# Tutorials
2 |
3 |
4 |
5 | [Tutorial 1](tutorial1.ipynb): Setting Things Up
6 |
7 | [Tutorial 2](tutorial2.ipynb): Tensors
8 |
9 | [Tutorial 3](tutorial3.ipynb): Basic Numerics
10 |
11 | [Tutorial 4](tutorial4.ipynb): Random Numbers and Distributions
12 |
13 | [Tutorial 5](tutorial5.ipynb): CUDA
14 |
15 | [Tutorial 6](tutorial6.ipynb): Models
16 |
17 | [Tutorial 7](tutorial7.ipynb): Learning Rate Schedulers
18 |
19 | Tutorial 8: TorchVision
20 |
--------------------------------------------------------------------------------
/tutorials/FSharp/README.md:
--------------------------------------------------------------------------------
1 | ## F# Tutorials
2 |
3 |
4 |
5 | [Tutorial 1](tutorial1.ipynb): Setting Things Up
6 |
7 | [Tutorial 2](tutorial2.ipynb): Tensors
8 |
9 | [Tutorial 3](tutorial3.ipynb): Basic Numerics
10 |
11 | [Tutorial 4](tutorial4.ipynb): Random Numbers and Distributions
12 |
13 | [Tutorial 5](tutorial5.ipynb): CUDA
14 |
15 | [Tutorial 6](tutorial6.ipynb): Models
16 |
17 | [Tutorial 7](tutorial7.ipynb): Learning Rate Schedulers
18 |
19 | Tutorial 8: TorchVision
20 |
--------------------------------------------------------------------------------
/src/CSharp/CSharpExamples/arguments.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "longName": "epochs",
4 | "shortName": "e",
5 | "argType": "integer",
6 | "explanation": "The maximum number of epochs to use for training."
7 | },
8 | {
9 | "longName": "timeout",
10 | "shortName": "t",
11 | "argType": "integer",
12 | "explanation": "The maximum time, measured in seconds, to use for training."
13 | },
14 | {
15 | "longName": "logdir",
16 | "shortName": "l",
17 | "argType": "string",
18 | "explanation": "A directory for Tensorboard logging."
19 | }
20 | ]
21 |
--------------------------------------------------------------------------------
/src/Utils/Examples.Utils.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | net6.0
5 | AnyCPU
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/src/CSharp/CSharpExamples/CSharpExamples.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Exe
5 | net6.0
6 | CSharpExamples.Program
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 | PreserveNewest
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
--------------------------------------------------------------------------------
/src/FSharp/FSharpExamples/FSharpExamples.fsproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Exe
5 | net6.0
6 | 3390;$(WarnOn)
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 | PreserveNewest
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) .NET Foundation. All rights reserved.
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE
22 |
--------------------------------------------------------------------------------
/src/Utils/BigEndianReader.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information.
2 | using System;
3 | using System.IO;
4 |
5 | namespace TorchSharp.Examples.Utils
6 | {
7 | public class BigEndianReader
8 | {
9 | public BigEndianReader(BinaryReader baseReader)
10 | {
11 | mBaseReader = baseReader;
12 | }
13 |
14 | public int ReadInt32()
15 | {
16 | return BitConverter.ToInt32(ReadBigEndianBytes(4), 0);
17 | }
18 |
19 | public byte[] ReadBigEndianBytes(int count)
20 | {
21 | byte[] bytes = new byte[count];
22 | for (int i = count - 1; i >= 0; i--)
23 | bytes[i] = mBaseReader.ReadByte();
24 |
25 | return bytes;
26 | }
27 |
28 | public byte[] ReadBytes(int count)
29 | {
30 | return mBaseReader.ReadBytes(count);
31 | }
32 |
33 | public void Close()
34 | {
35 | mBaseReader.Close();
36 | }
37 |
38 | public Stream BaseStream {
39 | get { return mBaseReader.BaseStream; }
40 | }
41 |
42 | private BinaryReader mBaseReader;
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/src/FSharp/FSharpExamples/Program.fs:
--------------------------------------------------------------------------------
1 | // Learn more about F# at http://docs.microsoft.com/dotnet/fsharp
2 |
3 | open System
4 | open System.IO
5 | open System.Reflection
6 |
7 | open TorchSharp.Examples
8 | open TorchSharp.Examples.Utils
9 |
10 |
11 | []
12 | let main args =
13 |
14 | let argumentsPath = Path.Combine(Path.GetDirectoryName(Assembly.GetEntryAssembly().Location), "arguments.json")
15 | let argumentParser = new ArgumentParser(new FileInfo(argumentsPath), args)
16 |
17 | if argumentParser.Count = 0 then
18 | argumentParser.UsingMessage("CSharpExamples", "")
19 | 1 // return an integer exit code
20 | else
21 |
22 | let epochs =
23 | match argumentParser.TryGetValueInt "epochs" with
24 | | true,e -> e
25 | | false,_ -> 16
26 |
27 | let timeout =
28 | match argumentParser.TryGetValueInt "timeout" with
29 | | true,t -> t
30 | | false,_ -> 3600
31 |
32 | for idx = 0 to argumentParser.Count-1 do
33 |
34 | let modelName = argumentParser.[idx]
35 |
36 | match modelName.ToLowerInvariant() with
37 | | "mnist" -> FSharpExamples.MNIST.run epochs
38 | | "fgsm" -> FSharpExamples.AdversarialExampleGeneration.run epochs
39 | | "alexnet" -> FSharpExamples.AlexNet.run epochs
40 | | "seq2seq" -> FSharpExamples.SequenceToSequence.run epochs
41 | | "text" -> FSharpExamples.TextClassification.run epochs
42 | | _ -> eprintf "Unknown model name"
43 |
44 | 0 // return an integer exit code
--------------------------------------------------------------------------------
/src/CSharp/Models/TextClassification.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information.
2 | using System;
3 | using System.IO;
4 | using System.Linq;
5 | using System.Collections.Generic;
6 | using System.Diagnostics;
7 |
8 | using static TorchSharp.torch;
9 | using static TorchSharp.torch.nn;
10 | using static TorchSharp.torch.nn.functional;
11 |
12 | namespace TorchSharp.Examples
13 | {
14 | ///
15 | /// This example is based on the PyTorch tutorial at:
16 | ///
17 | /// https://pytorch.org/tutorials/beginner/text_sentiment_ngrams_tutorial.html
18 | ///
19 | ///
20 | public class TextClassificationModel : Module
21 | {
22 | private Modules.EmbeddingBag embedding;
23 | private Modules.Linear fc;
24 |
25 | public TextClassificationModel(long vocab_size, long embed_dim, long num_class) : base("TextClassification")
26 | {
27 | embedding = EmbeddingBag(vocab_size, embed_dim, sparse: false);
28 | fc = Linear(embed_dim, num_class);
29 | InitWeights();
30 |
31 | RegisterComponents();
32 | }
33 |
34 | private void InitWeights()
35 | {
36 | var initrange = 0.5;
37 |
38 | init.uniform_(embedding.weight, -initrange, initrange);
39 | init.uniform_(fc.weight, -initrange, initrange);
40 | init.zeros_(fc.bias);
41 | }
42 |
43 | public override Tensor forward(Tensor input, Tensor offsets)
44 | {
45 | var t = embedding.call(input, offsets);
46 | return fc.forward(t);
47 | }
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/tutorials/README.md:
--------------------------------------------------------------------------------
1 | # TorchSharp Tutorials
2 |
3 | Like its Python-based cousin, TorchSharp is a rich and capable numerics library, especially well suited for machine learning using neural networks, that is, deep learning.
4 |
5 | You can dive into TorchSharp by studying the examples provided, which provides an end-to-end view. On the other hand, if you would like a more methodical, step-by-step introduction to the many concepts and capabilities it offers, these tutorials aim to bring you one step at a time toward a comprehensive understanding of what it can do, and how.
6 |
7 | The tutorials are organized to start with the very basics, creating and using tensors, which are generalized vectors and matrices, and the fundamental data type of all deep learning. Once we have treated tensors in depth, we will move on to using tensors to compute things. There are a ton of numerical operators available, and you can express just about anything using TorchSharp.
8 |
9 | Once we have looked at numerics, it is time to move on to constructing models from numerics, and then to train them using optimizers and learning rate schedulers.
10 |
11 | Most tutorials are presented in the form of an interactive notebook, which is intended to be executed under .NET Interactive. These notebooks have been developed and tested using Visual Studio Code with the .NET Interactive extension installed.
12 |
13 | For more information on installing the .NET Interactive extension, see:
14 |
15 | [Installing .NET Interactive Notebooks](https://marketplace.visualstudio.com/items?itemName=ms-dotnettools.dotnet-interactive-vscode)
16 |
17 | or
18 |
19 | [Installing .NET Interactive](https://github.com/dotnet/interactive/blob/main/docs/install-dotnet-interactive.md)
20 |
21 |
22 | For your tutorials, please choose one of: [C#](./CSharp/README.md) or [F#](./FSharp/README.md).
23 |
24 | __More tutorials are coming.__
25 |
--------------------------------------------------------------------------------
/src/Utils/TorchText.Data.Utils.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information.
2 | using System;
3 | using System.Collections.Generic;
4 | using System.Linq;
5 | using System.Text;
6 | using System.Text.RegularExpressions;
7 | using System.Threading.Tasks;
8 |
9 | namespace TorchText.Data
10 | {
11 | public static partial class Utils
12 | {
13 | public static Func> get_tokenizer(string name)
14 | {
15 | if (name == "basic_english") return BasicEnglish;
16 | throw new NotImplementedException($"The '{name}' text tokenizer is not implemented.");
17 | }
18 |
19 | private static string[] _patterns = new string []{
20 | "\'",
21 | "\"",
22 | "\\.",
23 | "
",
24 | ",",
25 | "\\(",
26 | "\\)",
27 | "\\!",
28 | "\\?",
29 | "\\;",
30 | "\\:",
31 | "\\\\",
32 | "\\s+",
33 | };
34 | private static string[] _replacements = new string[] {
35 | " \\' ",
36 | "",
37 | " . ",
38 | " ",
39 | " , ",
40 | " ( ",
41 | " ) ",
42 | " ! ",
43 | " ? ",
44 | " ",
45 | " ",
46 | " ",
47 | " "
48 | };
49 |
50 | private static IEnumerable BasicEnglish(string input)
51 | {
52 | if (_patterns.Length != _replacements.Length)
53 | throw new InvalidProgramException("internal error: patterns and replacements are not the same length");
54 |
55 | input = input.Trim().ToLowerInvariant();
56 |
57 | for (var i = 0; i < _patterns.Length; ++i) {
58 | input = Regex.Replace(input, _patterns[i], _replacements[i]);
59 | }
60 | return input.Split(' ');
61 | }
62 | }
63 | }
64 |
--------------------------------------------------------------------------------
/src/Utils/Datasets.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information.
2 | using System;
3 | using System.Collections.Generic;
4 | using System.IO;
5 | using System.Linq;
6 |
7 | namespace TorchText
8 | {
9 | ///
10 | /// This belongs in its own package, 'TorchText'.
11 | /// For now, it's useful to keep it with the examples that use it.
12 | ///
13 | public static class Datasets
14 | {
15 | ///
16 | /// WikiText2
17 | ///
18 | /// One of 'train', 'valid', or 'test'
19 | /// The folder where the WikiText2 data set was downloaded and extracted.
20 | /// An enumeration of lines from the text.
21 | ///
22 | /// Download the data set at:
23 | /// https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip
24 | ///
25 | public static IEnumerable WikiText2(string split, string root = ".data")
26 | {
27 | var dataPath = Path.Combine(root, "wikitext-2", $"wiki.{split}.tokens");
28 | return File.ReadLines(dataPath).Select(line => line.Trim()).Where(line => line.Length > 0);
29 | }
30 |
31 | ///
32 | /// WikiText2
33 | ///
34 | /// The folder where the WikiText2 data set was downloaded and extracted.
35 | /// An enumeration of lines from the text for each of the data sets (training, validation, and test).
36 | ///
37 | /// Download the data set at:
38 | /// https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip
39 | ///
40 | public static (IEnumerable, IEnumerable, IEnumerable) WikiText2(string root = ".data")
41 | {
42 | return (WikiText2("train", root), WikiText2("valid", root), WikiText2("test", root));
43 | }
44 |
45 | ///
46 | /// Hack to get around F# issue.
47 | ///
48 | ///
49 | public static bool cuda_is_available() => TorchSharp.torch.cuda.is_available();
50 | }
51 | }
52 |
--------------------------------------------------------------------------------
/src/CSharp/Models/MNIST.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information.
2 | using System;
3 | using System.IO;
4 | using System.Collections.Generic;
5 | using System.Diagnostics;
6 | using static TorchSharp.torch;
7 |
8 | using static TorchSharp.torch.nn;
9 | using static TorchSharp.torch.nn.functional;
10 |
11 | namespace TorchSharp.Examples.MNIST
12 | {
13 | public class Model : Module
14 | {
15 | private Module conv1 = Conv2d(1, 32, 3);
16 | private Module conv2 = Conv2d(32, 64, 3);
17 | private Module fc1 = Linear(9216, 128);
18 | private Module fc2 = Linear(128, 10);
19 |
20 | // These don't have any parameters, so the only reason to instantiate
21 | // them is performance, since they will be used over and over.
22 | private Module pool1 = MaxPool2d(kernel_size:new long[] { 2, 2 });
23 |
24 | private Module relu1 = ReLU();
25 | private Module relu2 = ReLU();
26 | private Module relu3 = ReLU();
27 |
28 | private Module dropout1 = Dropout(0.25);
29 | private Module dropout2 = Dropout(0.5);
30 |
31 | private Module flatten = Flatten();
32 | private Module logsm = LogSoftmax(1);
33 |
34 | public Model(string name, torch.Device device = null) : base(name)
35 | {
36 | RegisterComponents();
37 |
38 | if (device != null && device.type != DeviceType.CPU)
39 | this.to(device);
40 | }
41 |
42 | public override Tensor forward(Tensor input)
43 | {
44 | var l11 = conv1.forward(input);
45 | var l12 = relu1.forward(l11);
46 |
47 | var l21 = conv2.forward(l12);
48 | var l22 = relu2.forward(l21);
49 | var l23 = pool1.forward(l22);
50 | var l24 = dropout1.forward(l23);
51 |
52 | var x = flatten.forward(l24);
53 |
54 | var l31 = fc1.forward(x);
55 | var l32 = relu3.forward(l31);
56 | var l33 = dropout2.forward(l32);
57 |
58 | var l41 = fc2.forward(l33);
59 |
60 | return logsm.forward(l41);
61 | }
62 | }
63 | }
64 |
--------------------------------------------------------------------------------
/src/Utils/Decompress.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information.
2 | using System;
3 | using System.IO;
4 | using System.Linq;
5 | using System.Threading;
6 | using System.Threading.Tasks;
7 | using ICSharpCode.SharpZipLib.Core;
8 | using ICSharpCode.SharpZipLib.GZip;
9 | using ICSharpCode.SharpZipLib.Tar;
10 |
11 | //NOTE: This code was inspired by code found int the SciSharpStack-Examples repository.
12 | // https://github.com/SciSharp/SciSharp-Stack-Examples
13 |
14 | namespace TorchSharp.Examples.Utils
15 | {
16 | public static class Decompress
17 | {
18 | public static void DecompressGZipFile(string gzipFileName, string targetDir)
19 | {
20 | byte[] buf = new byte[4096];
21 |
22 | using (var fs = File.OpenRead(gzipFileName))
23 | using (var gzipStream = new GZipInputStream(fs)) {
24 |
25 | string fnOut = Path.Combine(targetDir, Path.GetFileNameWithoutExtension(gzipFileName));
26 |
27 | using (var fsOut = File.Create(fnOut)) {
28 | StreamUtils.Copy(gzipStream, fsOut, buf);
29 | }
30 | }
31 | }
32 | public static void ExtractTGZ(string gzArchiveName, string destFolder)
33 | {
34 | var flag = gzArchiveName.Split(Path.DirectorySeparatorChar).Last().Split('.').First() + ".bin";
35 | if (File.Exists(Path.Combine(destFolder, flag))) return;
36 |
37 | Console.WriteLine($"Extracting.");
38 | var task = Task.Run(() => {
39 | using (var inStream = File.OpenRead(gzArchiveName)) {
40 | using (var gzipStream = new GZipInputStream(inStream)) {
41 | #pragma warning disable CS0618 // Type or member is obsolete
42 | using (TarArchive tarArchive = TarArchive.CreateInputTarArchive(gzipStream))
43 | #pragma warning restore CS0618 // Type or member is obsolete
44 | tarArchive.ExtractContents(destFolder);
45 | }
46 | }
47 | });
48 |
49 | while (!task.IsCompleted) {
50 | Thread.Sleep(200);
51 | Console.Write(".");
52 | }
53 |
54 | File.Create(Path.Combine(destFolder, flag));
55 | Console.WriteLine("");
56 | Console.WriteLine("Extraction completed.");
57 | }
58 |
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/src/TorchSharpExamples.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Version 17
4 | VisualStudioVersion = 17.0.32112.339
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Examples.Utils", "Utils\Examples.Utils.csproj", "{9B26E338-1AAD-4E64-B9EA-CE6D1C10A9E9}"
7 | EndProject
8 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Models", "CSharp\Models\Models.csproj", "{AF43A5E5-DBAE-46CE-9B06-69F2F34140FD}"
9 | EndProject
10 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "CSharpExamples", "CSharp\CSharpExamples\CSharpExamples.csproj", "{E56038AD-B99F-4333-BA8C-3F65C95C638E}"
11 | EndProject
12 | Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "FSharpExamples", "FSharp\FSharpExamples\FSharpExamples.fsproj", "{E9B24578-E02C-4B9B-B4A4-2458E876E8C1}"
13 | EndProject
14 | Global
15 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
16 | Debug|Any CPU = Debug|Any CPU
17 | Release|Any CPU = Release|Any CPU
18 | EndGlobalSection
19 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
20 | {9B26E338-1AAD-4E64-B9EA-CE6D1C10A9E9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
21 | {9B26E338-1AAD-4E64-B9EA-CE6D1C10A9E9}.Debug|Any CPU.Build.0 = Debug|Any CPU
22 | {9B26E338-1AAD-4E64-B9EA-CE6D1C10A9E9}.Release|Any CPU.ActiveCfg = Release|Any CPU
23 | {9B26E338-1AAD-4E64-B9EA-CE6D1C10A9E9}.Release|Any CPU.Build.0 = Release|Any CPU
24 | {AF43A5E5-DBAE-46CE-9B06-69F2F34140FD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
25 | {AF43A5E5-DBAE-46CE-9B06-69F2F34140FD}.Debug|Any CPU.Build.0 = Debug|Any CPU
26 | {AF43A5E5-DBAE-46CE-9B06-69F2F34140FD}.Release|Any CPU.ActiveCfg = Release|Any CPU
27 | {AF43A5E5-DBAE-46CE-9B06-69F2F34140FD}.Release|Any CPU.Build.0 = Release|Any CPU
28 | {E56038AD-B99F-4333-BA8C-3F65C95C638E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
29 | {E56038AD-B99F-4333-BA8C-3F65C95C638E}.Debug|Any CPU.Build.0 = Debug|Any CPU
30 | {E56038AD-B99F-4333-BA8C-3F65C95C638E}.Release|Any CPU.ActiveCfg = Release|Any CPU
31 | {E56038AD-B99F-4333-BA8C-3F65C95C638E}.Release|Any CPU.Build.0 = Release|Any CPU
32 | {E9B24578-E02C-4B9B-B4A4-2458E876E8C1}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
33 | {E9B24578-E02C-4B9B-B4A4-2458E876E8C1}.Debug|Any CPU.Build.0 = Debug|Any CPU
34 | {E9B24578-E02C-4B9B-B4A4-2458E876E8C1}.Release|Any CPU.ActiveCfg = Release|Any CPU
35 | {E9B24578-E02C-4B9B-B4A4-2458E876E8C1}.Release|Any CPU.Build.0 = Release|Any CPU
36 | EndGlobalSection
37 | GlobalSection(SolutionProperties) = preSolution
38 | HideSolutionNode = FALSE
39 | EndGlobalSection
40 | GlobalSection(ExtensibilityGlobals) = postSolution
41 | SolutionGuid = {FC1609FE-9105-4B47-BA8D-5EBF6D388046}
42 | EndGlobalSection
43 | EndGlobal
44 |
--------------------------------------------------------------------------------
/src/Utils/Arguments.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 | using System.Threading.Tasks;
6 |
7 | using Newtonsoft.Json;
8 |
9 | namespace TorchSharp.Examples.Utils
10 | {
11 | [JsonObject]
12 | public sealed class ArgumentDescriptor
13 | {
14 | ///
15 | /// Long names are used with '--' and can be any one word using letters and numbers.
16 | /// The long name spelling are not case-sensitive.
17 | ///
18 | [JsonProperty(Required = Required.Always)]
19 | public string LongName { get; set; }
20 |
21 | ///
22 | /// Short names must be a single character, and are sensitive to case.
23 | ///
24 | [JsonProperty(Required = Required.Default)]
25 | public string ShortName { get; set; }
26 |
27 | ///
28 | /// If true, the parser should allow multiple values.
29 | ///
30 | [JsonProperty(Required = Required.Default)]
31 | public bool AllowMultiple { get; set; }
32 |
33 | ///
34 | /// The kind of argument.
35 | ///
36 | [JsonProperty(Required = Required.Always)]
37 | public ArgumentType ArgType { get; set; }
38 |
39 | ///
40 | /// An explanation of the argument, intended for human consumption as part of a 'using' message.
41 | ///
42 | public String Explanation { get; set; }
43 |
44 | public enum ArgumentType
45 | {
46 | ///
47 | /// A string argument.
48 | ///
49 | ///
50 | /// --name=foobar
51 | ///
52 | String,
53 | ///
54 | /// An integer argument.
55 | ///
56 | ///
57 | /// --count=10
58 | ///
59 | Integer,
60 | ///
61 | /// An comma-separated list of strings.
62 | ///
63 | ///
64 | /// --options=a,b,c
65 | ///
66 | List,
67 | ///
68 | /// A boolean argument, for example
69 | ///
70 | ///
71 | /// --doit=true
72 | ///
73 | Boolean,
74 | ///
75 | /// A Boolean flag that requires no value. Absence is 'false'
76 | ///
77 | ///
78 | /// --doit
79 | ///
80 | Flag,
81 | }
82 | }
83 | }
84 |
--------------------------------------------------------------------------------
/src/CSharp/CSharpExamples/Program.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.IO;
3 | using System.Reflection;
4 | using TorchSharp.Examples.Utils;
5 |
6 | namespace CSharpExamples
7 | {
8 | public class Program
9 | {
10 | static void Main(string[] args)
11 | {
12 | var argumentsPath = Path.Combine(Path.GetDirectoryName(Assembly.GetEntryAssembly().Location), "arguments.json");
13 | var argumentParser = new ArgumentParser(new FileInfo(argumentsPath), args);
14 |
15 | if (argumentParser.Count == 0)
16 | {
17 | argumentParser.UsingMessage("CSharpExamples", "");
18 | return;
19 | }
20 |
21 | argumentParser.TryGetValue("epochs", out int epochs, 16);
22 | argumentParser.TryGetValue("timeout", out int timeout, 3600);
23 | argumentParser.TryGetValue("logdir", out string logdir, null);
24 |
25 | for (var idx = 0; idx < argumentParser.Count; idx++)
26 | {
27 | switch(argumentParser[idx].ToLower())
28 | {
29 | case "mnist":
30 | case "fashion-mnist":
31 | MNIST.Run(epochs, timeout, logdir, argumentParser[idx].ToLower());
32 | break;
33 |
34 | case "fgsm":
35 | case "fashion-fgsm":
36 | AdversarialExampleGeneration.Run(epochs, timeout, logdir, argumentParser[idx].ToLower());
37 | break;
38 |
39 | case "alexnet":
40 | case "resnet":
41 | case "mobilenet":
42 | case "resnet18":
43 | case "resnet34":
44 | case "resnet50":
45 | #if false
46 | // The following are disabled, because they require big CUDA processors in order to run.
47 | case "resnet101":
48 | case "resnet152":
49 | #endif
50 | case "vgg11":
51 | case "vgg13":
52 | case "vgg16":
53 | case "vgg19":
54 | CIFAR10.Run(epochs, timeout, logdir, argumentParser[idx]);
55 | break;
56 |
57 | case "text":
58 | TextClassification.Run(epochs, timeout, logdir);
59 | break;
60 |
61 | case "seq2seq":
62 | SequenceToSequence.Run(epochs, timeout, logdir);
63 | break;
64 |
65 | default:
66 | Console.Error.WriteLine($"Unknown model name: {argumentParser[idx]}");
67 | break;
68 | }
69 | }
70 | }
71 | }
72 | }
73 |
--------------------------------------------------------------------------------
/src/CSharp/Models/AlexNet.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information.
2 | using System;
3 | using System.IO;
4 | using System.Linq;
5 | using System.Collections.Generic;
6 | using System.Diagnostics;
7 |
8 | using TorchSharp;
9 | using static TorchSharp.torch;
10 | using static TorchSharp.torch.nn;
11 | using static TorchSharp.torch.nn.functional;
12 |
13 | namespace TorchSharp.Examples
14 | {
15 | ///
16 | /// Modified version of original AlexNet to fix CIFAR10 32x32 images.
17 | ///
18 | public class AlexNet : Module
19 | {
20 | private readonly Module features;
21 | private readonly Module avgPool;
22 | private readonly Module classifier;
23 |
24 | public AlexNet(string name, int numClasses, Device device = null) : base(name)
25 | {
26 | features = Sequential(
27 | ("c1", Conv2d(3, 64, kernel_size:3, stride: 2, padding: 1)),
28 | ("r1", ReLU(inplace: true)),
29 | ("mp1", MaxPool2d(kernel_size:new long[] { 2, 2 })),
30 | ("c2", Conv2d(64, 192, kernel_size:3, padding: 1)),
31 | ("r2", ReLU(inplace: true)),
32 | ("mp2", MaxPool2d(kernel_size:new long[] { 2, 2 })),
33 | ("c3", Conv2d(192, 384, kernel_size:3, padding: 1)),
34 | ("r3", ReLU(inplace: true)),
35 | ("c4", Conv2d(384, 256, kernel_size:3, padding: 1)),
36 | ("r4", ReLU(inplace: true)),
37 | ("c5", Conv2d(256, 256, kernel_size:3, padding: 1)),
38 | ("r5", ReLU(inplace: true)),
39 | ("mp3", MaxPool2d(kernel_size:new long[] { 2, 2 })));
40 |
41 | avgPool = AdaptiveAvgPool2d(new long[] { 2, 2 });
42 |
43 | classifier = Sequential(
44 | ("d1", Dropout()),
45 | ("l1", Linear(256 * 2 * 2, 4096)),
46 | ("r1", ReLU(inplace: true)),
47 | ("d2", Dropout()),
48 | ("l2", Linear(4096, 4096)),
49 | ("r3", ReLU(inplace: true)),
50 | ("d3", Dropout()),
51 | ("l3", Linear(4096, numClasses))
52 | );
53 |
54 | RegisterComponents();
55 |
56 | if (device != null && device.type != DeviceType.CPU)
57 | this.to(device);
58 | }
59 |
60 | public override Tensor forward(Tensor input)
61 | {
62 | var f = features.forward(input);
63 | var avg = avgPool.forward(f);
64 |
65 | var x = avg.view(new long[] { avg.shape[0], 256 * 2 * 2 });
66 |
67 | return classifier.forward(x);
68 | }
69 | }
70 |
71 | }
72 |
--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | ## Security
4 |
5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
6 |
7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below.
8 |
9 | ## Reporting Security Issues
10 |
11 | **Please do not report security vulnerabilities through public GitHub issues.**
12 |
13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report).
14 |
15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc).
16 |
17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc).
18 |
19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20 |
21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22 | * Full paths of source file(s) related to the manifestation of the issue
23 | * The location of the affected source code (tag/branch/commit or direct URL)
24 | * Any special configuration required to reproduce the issue
25 | * Step-by-step instructions to reproduce the issue
26 | * Proof-of-concept or exploit code (if possible)
27 | * Impact of the issue, including how an attacker might exploit the issue
28 |
29 | This information will help us triage your report more quickly.
30 |
31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs.
32 |
33 | ## Preferred Languages
34 |
35 | We prefer all communications to be in English.
36 |
37 | ## Policy
38 |
39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd).
40 |
41 |
--------------------------------------------------------------------------------
/src/CSharp/Models/VGG.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information.
2 | using System.Collections.Generic;
3 | using TorchSharp;
4 | using static TorchSharp.torch;
5 | using static TorchSharp.torch.nn;
6 |
7 | namespace TorchSharp.Examples
8 | {
9 | ///
10 | /// Modified version of VGG to classify CIFAR10 32x32 images.
11 | ///
12 | ///
13 | /// With an unaugmented CIFAR-10 data set, the author of this saw training converge
14 | /// at roughly 85% accuracy on the test set, after 50 epochs using VGG-16.
15 | ///
16 | public class VGG : Module
17 | {
18 | // The code here is is loosely based on https://github.com/kuangliu/pytorch-cifar/blob/master/models/vgg.py
19 | // Licence and copypright notice at: https://github.com/kuangliu/pytorch-cifar/blob/master/LICENSE
20 |
21 | private readonly Dictionary _channels = new Dictionary() {
22 | { "vgg11", new long[] { 64, 0, 128, 0, 256, 256, 0, 512, 512, 0, 512, 512, 0 } },
23 | { "vgg13", new long[] { 64, 64, 0, 128, 128, 0, 256, 256, 0, 512, 512, 0, 512, 512, 0 } },
24 | { "vgg16", new long[] { 64, 64, 0, 128, 128, 0, 256, 256, 256, 0, 512, 512, 512, 0, 512, 512, 512, 0 } },
25 | { "vgg19", new long[] { 64, 64, 0, 128, 128, 0, 256, 256, 256, 256, 0, 512, 512, 512, 512, 0, 512, 512, 512, 512, 0 } }
26 | };
27 |
28 | private readonly Module layers;
29 |
30 | public VGG(string name, int numClasses, Device device = null) : base(name)
31 | {
32 | var modules = new List<(string, Module)>();
33 |
34 | var channels = _channels[name.ToLower()];
35 |
36 | long in_channels = 3;
37 |
38 | for (var i = 0; i < channels.Length; i++) {
39 |
40 | if (channels[i] == 0) {
41 | modules.Add(($"MaxPool2d-{i}a", MaxPool2d(kernel_size:2, stride: 2)));
42 | } else {
43 | modules.Add(($"conv2d-{i}a", Conv2d(in_channels, channels[i], kernel_size:3, padding: 1)));
44 | modules.Add(($"bnrm2d-{i}a", BatchNorm2d(channels[i])));
45 | modules.Add(($"relu-{i}b", ReLU(inplace: true)));
46 | in_channels = channels[i];
47 | }
48 | }
49 | modules.Add(("avgpool2d", AvgPool2d(kernel_size: 1, stride: 1)));
50 | modules.Add(("flatten", Flatten()));
51 | modules.Add(("linear", Linear(512, numClasses)));
52 |
53 | layers = Sequential(modules);
54 |
55 | RegisterComponents();
56 |
57 | if (device != null && device.type != DeviceType.CPU)
58 | this.to(device);
59 | }
60 |
61 | public override Tensor forward(Tensor input)
62 | {
63 | return layers.forward(input);
64 | }
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/src/CSharp/Models/MobileNet.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information.
2 | using System;
3 | using System.Collections.Generic;
4 |
5 | using TorchSharp;
6 | using static TorchSharp.torch;
7 | using static TorchSharp.torch.nn;
8 |
9 | namespace TorchSharp.Examples
10 | {
11 | ///
12 | /// Modified version of MobileNet to classify CIFAR10 32x32 images.
13 | ///
14 | ///
15 | /// With an unaugmented CIFAR-10 data set, the author of this saw training converge
16 | /// at roughly 75% accuracy on the test set, over the course of 1500 epochs.
17 | ///
18 | public class MobileNet : Module
19 | {
20 | // The code here is is loosely based on https://github.com/kuangliu/pytorch-cifar/blob/master/models/mobilenet.py
21 | // Licence and copypright notice at: https://github.com/kuangliu/pytorch-cifar/blob/master/LICENSE
22 |
23 | private readonly long[] planes = new long[] { 64, 128, 128, 256, 256, 512, 512, 512, 512, 512, 512, 1024, 1024 };
24 | private readonly long[] strides = new long[] { 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 2, 1 };
25 |
26 | private readonly Module layers;
27 |
28 | public MobileNet(string name, int numClasses, Device device = null) : base(name)
29 | {
30 | if (planes.Length != strides.Length) throw new ArgumentException("'planes' and 'strides' must have the same length.");
31 |
32 | var modules = new List<(string, Module)>();
33 |
34 | modules.Add(($"conv2d-first", Conv2d(3, 32, kernel_size:3, stride: 1, padding: 1, bias: false)));
35 | modules.Add(($"bnrm2d-first", BatchNorm2d(32)));
36 | modules.Add(($"relu-first", ReLU()));
37 | MakeLayers(modules, 32);
38 | modules.Add(("avgpool", AvgPool2d(new long[] { 2, 2 })));
39 | modules.Add(("flatten", Flatten()));
40 | modules.Add(($"linear", Linear(planes[^1], numClasses)));
41 |
42 | layers = Sequential(modules);
43 |
44 | RegisterComponents();
45 |
46 | if (device != null && device.type != DeviceType.CPU)
47 | this.to(device);
48 | }
49 |
50 | private void MakeLayers(List<(string, Module)> modules, long in_planes)
51 | {
52 |
53 | for (var i = 0; i < strides.Length; i++) {
54 | var out_planes = planes[i];
55 | var stride = strides[i];
56 |
57 | modules.Add(($"conv2d-{i}a", Conv2d(in_planes, in_planes, kernel_size:3, stride: stride, padding: 1, groups: in_planes, bias: false)));
58 | modules.Add(($"bnrm2d-{i}a", BatchNorm2d(in_planes)));
59 | modules.Add(($"relu-{i}a", ReLU()));
60 | modules.Add(($"conv2d-{i}b", Conv2d(in_planes, out_planes, kernel_size:1L, stride: 1L, padding: 0L, bias: false)));
61 | modules.Add(($"bnrm2d-{i}b", BatchNorm2d(out_planes)));
62 | modules.Add(($"relu-{i}b", ReLU()));
63 |
64 | in_planes = out_planes;
65 | }
66 | }
67 |
68 | public override Tensor forward(Tensor input)
69 | {
70 | return layers.forward(input);
71 | }
72 | }
73 | }
74 |
--------------------------------------------------------------------------------
/src/Utils/Vocab.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information.
2 | using System;
3 | using System.Collections;
4 | using System.Collections.Generic;
5 | using System.Diagnostics.CodeAnalysis;
6 | using System.Linq;
7 | using TorchSharp;
8 |
9 | using static TorchSharp.torch.nn;
10 |
11 | namespace TorchText.Vocab
12 | {
13 | ///
14 | /// This needs a permanent place.
15 | /// The functionality is based on the Python 'Counter' class.
16 | ///
17 | ///
18 | public class Counter : IEnumerable>
19 | {
20 | private Dictionary _dict = new Dictionary();
21 |
22 | public void update(T key)
23 | {
24 | if (_dict.TryGetValue(key, out int count)) {
25 | _dict[key] = count + 1;
26 | } else {
27 | _dict[key] = 1;
28 | }
29 | }
30 | public void update(IEnumerable keys)
31 | {
32 | foreach (T key in keys) {
33 | update(key);
34 | }
35 | }
36 | public int this[T key] { get => _dict[key]; }
37 |
38 | public IEnumerator> GetEnumerator()
39 | {
40 | return _dict.GetEnumerator();
41 | }
42 |
43 | IEnumerator IEnumerable.GetEnumerator()
44 | {
45 | return GetEnumerator();
46 | }
47 | }
48 |
49 | ///
50 | /// This belongs in its own package, 'TorchText'.
51 | /// For now, it's useful to keep it with the examples that use it.
52 | ///
53 | public class Vocab
54 | {
55 | public Vocab(Counter counter, int? maxSize = null, int minFreq = 1, string[] specials = null, Func unkInit = null, bool specialsFirst = true)
56 | {
57 | if (specials == null) specials = new string[] { "", "" };
58 | if (unkInit == null) unkInit = (t => init.zeros_(t.clone()));
59 | if (specialsFirst) {
60 | foreach (var sp in specials) {
61 | _dict.Add(sp, _last++);
62 | }
63 | }
64 | foreach (var kv in counter.Where(kv => kv.Value >= minFreq)) {
65 | if (!specials.Contains(kv.Key)) {
66 | _dict.Add(kv.Key, _last++);
67 | }
68 | if (_last > (maxSize ?? int.MaxValue))
69 | break;
70 | }
71 | if (!specialsFirst) {
72 | foreach (var sp in specials) {
73 | _dict.Add(sp, _last++);
74 | }
75 | }
76 | }
77 |
78 | public int this[string key] { get => _dict.TryGetValue(key, out int value) ? value : _dict[""]; }
79 |
80 | public int Count => _dict.Count;
81 |
82 | public void Add(string key, int value)
83 | {
84 | _dict.Add(key, value);
85 | }
86 |
87 | public void Add(KeyValuePair item)
88 | {
89 | Add(item.Key, item.Value);
90 | }
91 |
92 | public bool TryGetValue(string key, [MaybeNullWhen(false)] out int value)
93 | {
94 | return _dict.TryGetValue(key, out value);
95 | }
96 |
97 | private Dictionary _dict = new Dictionary();
98 | private int _last = 0;
99 | }
100 | }
101 |
--------------------------------------------------------------------------------
/src/CSharp/Models/SequenceToSequence.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information.
2 | using System;
3 | using System.IO;
4 | using System.Linq;
5 | using System.Collections.Generic;
6 | using System.Diagnostics;
7 |
8 | using static TorchSharp.torch;
9 | using static TorchSharp.torch.nn;
10 | using static TorchSharp.torch.nn.functional;
11 |
12 | namespace TorchSharp.Examples
13 | {
14 | ///
15 | /// This example is based on the PyTorch tutorial at:
16 | ///
17 | /// https://pytorch.org/tutorials/beginner/transformer_tutorial.html
18 | ///
19 | ///
20 |
21 | public class TransformerModel : Module
22 | {
23 | private Modules.TransformerEncoder transformer_encoder;
24 | private PositionalEncoding pos_encoder;
25 | private Modules.Embedding encoder;
26 | private Modules.Linear decoder;
27 |
28 | private long ninputs;
29 | private Device device;
30 |
31 | public TransformerModel(long ntokens, long ninputs, long nheads, long nhidden, long nlayers, double dropout = 0.5) : base("Transformer")
32 | {
33 | this.ninputs = ninputs;
34 |
35 | pos_encoder = new PositionalEncoding(ninputs, dropout);
36 | var encoder_layers = TransformerEncoderLayer(ninputs, nheads, nhidden, dropout);
37 | transformer_encoder = TransformerEncoder(encoder_layers, nlayers);
38 | encoder = Embedding(ntokens, ninputs);
39 | decoder = Linear(ninputs, ntokens);
40 | InitWeights();
41 |
42 | RegisterComponents();
43 | }
44 |
45 | public Tensor GenerateSquareSubsequentMask(long size)
46 | {
47 | var mask = (torch.ones(new long[] { size, size }) == 1).triu().transpose(0, 1);
48 | return mask.to_type(ScalarType.Float32)
49 | .masked_fill(mask == 0, float.NegativeInfinity)
50 | .masked_fill(mask == 1, 0.0f).to(device);
51 | }
52 |
53 | private void InitWeights()
54 | {
55 | var initrange = 0.1;
56 |
57 | init.uniform_(encoder.weight, -initrange, initrange);
58 | init.zeros_(decoder.bias);
59 | init.uniform_(decoder.weight, -initrange, initrange);
60 | }
61 |
62 | public override Tensor forward(Tensor t, Tensor mask)
63 | {
64 | using var src = pos_encoder.forward(encoder.forward(t) * MathF.Sqrt(ninputs));
65 | using var enc = transformer_encoder.call(src, mask);
66 | return decoder.forward(enc);
67 | }
68 |
69 | public TransformerModel to(Device device)
70 | {
71 | this.to(device);
72 | this.device = device;
73 | return this;
74 | }
75 | }
76 |
77 | class PositionalEncoding : Module
78 | {
79 | private Module dropout;
80 | private Tensor pe;
81 |
82 | public PositionalEncoding(long dmodel, double dropout, int maxLen = 5000) : base("PositionalEncoding")
83 | {
84 | this.dropout = Dropout(dropout);
85 | var pe = torch.zeros(new long[] { maxLen, dmodel });
86 | var position = torch.arange(0, maxLen, 1).unsqueeze(1);
87 | var divTerm = (torch.arange(0, dmodel, 2) * (-Math.Log(10000.0) / dmodel)).exp();
88 | pe[TensorIndex.Ellipsis, TensorIndex.Slice(0, null, 2)] = (position * divTerm).sin();
89 | pe[TensorIndex.Ellipsis, TensorIndex.Slice(1, null, 2)] = (position * divTerm).cos();
90 | this.pe = pe.unsqueeze(0).transpose(0, 1);
91 |
92 | RegisterComponents();
93 | }
94 |
95 | public override Tensor forward(Tensor t)
96 | {
97 | var x = t + pe[TensorIndex.Slice(null, t.shape[0]), TensorIndex.Slice()];
98 | return dropout.forward(x);
99 | }
100 | }
101 | }
102 |
--------------------------------------------------------------------------------
/src/Utils/AG_NEWSReader.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information.
2 | using System;
3 | using System.Collections.Generic;
4 | using System.IO;
5 | using System.Linq;
6 | using TorchSharp;
7 | using static TorchSharp.torch;
8 |
9 | namespace TorchText.Data
10 | {
11 | public class AG_NEWSReader : IDisposable
12 | {
13 | public static AG_NEWSReader AG_NEWS(string split, Device device, string root = ".data")
14 | {
15 | var dataPath = Path.Combine(root, $"{split}.csv");
16 | return new AG_NEWSReader(dataPath, device);
17 | }
18 |
19 | private AG_NEWSReader(string path, Device device)
20 | {
21 | _path = path;
22 | _device = device;
23 | }
24 |
25 | private string _path;
26 | private Device _device;
27 |
28 | public IEnumerable<(int, string)> Enumerate()
29 | {
30 | return File.ReadLines(_path).Select(line => ParseLine(line));
31 | }
32 |
33 | public IEnumerable<(Tensor, Tensor, Tensor)> GetBatches(Func> tokenizer, Vocab.Vocab vocab, long batch_size)
34 | {
35 | // This data set fits in memory, so we will simply load it all and cache it between epochs.
36 |
37 | var inputs = new List<(int, string)>();
38 |
39 | if (_data == null) {
40 |
41 | _data = new List<(Tensor, Tensor, Tensor)>();
42 |
43 | var counter = 0;
44 | var lines = Enumerate().ToList();
45 | var left = lines.Count;
46 |
47 | foreach (var line in lines) {
48 |
49 | inputs.Add(line);
50 | left -= 1;
51 |
52 | if (++counter == batch_size || left == 0) {
53 | _data.Add(Batchifier(inputs, tokenizer, vocab));
54 | inputs.Clear();
55 | counter = 0;
56 | }
57 | }
58 | }
59 |
60 | return _data;
61 | }
62 |
63 | private List<(Tensor, Tensor, Tensor)> _data;
64 | private bool disposedValue;
65 |
66 | private (Tensor, Tensor, Tensor) Batchifier(IEnumerable<(int, string)> input, Func> tokenizer, Vocab.Vocab vocab)
67 | {
68 | var label_list = new List();
69 | var text_list = new List();
70 | var offsets = new List();
71 | offsets.Add(0);
72 |
73 | long last = 0;
74 |
75 | foreach (var (label, text) in input) {
76 | label_list.Add(label);
77 | var processed_text = torch.tensor(tokenizer(text).Select(t => (long)vocab[t]).ToArray(),dtype:torch.int64);
78 | text_list.Add(processed_text);
79 | last += processed_text.size(0);
80 | offsets.Add(last);
81 | }
82 |
83 | var labels = torch.tensor(label_list.ToArray(), dtype: torch.int64).to(_device);
84 | var texts = torch.cat(text_list.ToArray(), 0).to(_device);
85 | var offs = torch.tensor(offsets.Take(label_list.Count).ToArray(), dtype:torch.int64).to(_device);
86 |
87 | return (labels, texts, offs);
88 | }
89 |
90 | public (int, string) ParseLine(string line)
91 | {
92 | int label = 0;
93 | string text = "";
94 |
95 | int firstComma = line.IndexOf("\",\"");
96 | label = int.Parse(line.Substring(1, firstComma - 1));
97 | text = line.Substring(firstComma + 2, line.Length - firstComma - 2);
98 | int secondComma = text.IndexOf("\",\"");
99 | text = text.Substring(secondComma + 2, text.Length - secondComma - 2);
100 | int thirdComma = text.IndexOf("\",\"");
101 |
102 | text = text.Substring(thirdComma + 2, text.Length - thirdComma - 3);
103 |
104 | return (label-1, text);
105 | }
106 |
107 | protected virtual void Dispose(bool disposing)
108 | {
109 | if (!disposedValue) {
110 | if (disposing && _data != null) {
111 | foreach (var (l, t, o) in _data) {
112 | l.Dispose();
113 | t.Dispose();
114 | o.Dispose();
115 | }
116 | }
117 |
118 | disposedValue = true;
119 | }
120 | }
121 |
122 | public void Dispose()
123 | {
124 | // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method
125 | Dispose(disposing: true);
126 | GC.SuppressFinalize(this);
127 | }
128 | }
129 | }
130 |
--------------------------------------------------------------------------------
/src/FSharp/FSharpExamples/AdversarialExampleGeneration.fs:
--------------------------------------------------------------------------------
1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information.
2 | module FSharpExamples.AdversarialExampleGeneration
3 |
4 | open System
5 | open System.IO
6 | open System.Diagnostics
7 |
8 | open TorchSharp
9 | open type TorchSharp.torch.nn
10 | open type TorchSharp.torch.optim
11 | open type TorchSharp.Scalar
12 |
13 | open FSharpExamples
14 | open TorchSharp.Examples
15 |
16 | // FGSM Attack
17 | //
18 | // Based on : https://pytorch.org/tutorials/beginner/fgsm_tutorial.html
19 | //
20 | // There are at least two interesting data sets to use with this example:
21 | //
22 | // 1. The classic MNIST set of 60000 images of handwritten digits.
23 | //
24 | // It is available at: http://yann.lecun.com/exdb/mnist/
25 | //
26 | // 2. The 'fashion-mnist' data set, which has the exact same file names and format as MNIST, but is a harder
27 | // data set to train on. It's just as large as MNIST, and has the same 60/10 split of training and test
28 | // data.
29 | // It is available at: https://github.com/zalandoresearch/fashion-mnist/tree/master/data/fashion
30 | //
31 | // In each case, there are four .gz files to download. Place them in a folder and then point the '_dataLocation'
32 | // constant below at the folder location.
33 | //
34 | // The example is based on the PyTorch tutorial, but the results from attacking the model are very different from
35 | // what the tutorial article notes, at least on the machine where it was developed. There is an order-of-magnitude lower
36 | // drop-off in accuracy in this version. That said, when running the PyTorch tutorial on the same machine, the
37 | // accuracy trajectories are the same between .NET and Python. If the base convulutational model is trained
38 | // using Python, and then used for the FGSM attack in both .NET and Python, the drop-off trajectories are extremenly
39 | // close.
40 |
41 | let mutable trainBatchSize = 64
42 | let mutable testBatchSize = 128
43 |
44 | let logInterval = 100
45 |
46 | let cmdArgs = Environment.GetCommandLineArgs()
47 | let dataset = if cmdArgs.Length = 2 then cmdArgs.[1] else "mnist"
48 |
49 | let datasetPath = Path.Join(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", dataset)
50 |
51 | torch.random.manual_seed(1L) |> ignore
52 |
53 | let hasCUDA = TorchText.Datasets.cuda_is_available()
54 |
55 | let device = if hasCUDA then torch.CUDA else torch.CPU
56 |
57 | let criterion x y = functional.nll_loss(x,y)
58 |
59 | let attack (image:torch.Tensor) (eps:Scalar) (data_grad:torch.Tensor) =
60 | use sign = data_grad.sign()
61 | (image + eps * sign).clamp(0.0.ToScalar(), 1.0.ToScalar())
62 |
63 | let test (model:MNIST.Model) (eps:float) (dataLoader:MNISTReader) size =
64 |
65 | let mutable correct = 0
66 |
67 | for (input,labels) in dataLoader do
68 |
69 | use d = torch.NewDisposeScope()
70 |
71 | input.requires_grad <- true
72 |
73 | begin // This is introduced in order to let a few tensors go out of scope before GC
74 | use estimate = input --> model
75 | use loss = criterion estimate labels
76 |
77 | model.zero_grad()
78 | loss.backward()
79 |
80 | use perturbed = attack input (eps.ToScalar()) (input.grad)
81 | use final = perturbed --> model
82 | correct <- correct + final.argmax(1L).eq(labels).sum().ToInt32()
83 | end
84 |
85 | float correct / size
86 |
87 | let run epochs =
88 |
89 | printfn $"Running AdversarialExampleGeneration on {device.``type``.ToString()}"
90 | printfn $"Dataset: {dataset}"
91 |
92 | let targetDir = Path.Combine(datasetPath, "test_data")
93 |
94 | MNIST.getDataFiles datasetPath targetDir
95 |
96 | if device.``type`` = DeviceType.CUDA then
97 | trainBatchSize <- trainBatchSize * 4
98 | testBatchSize <- testBatchSize * 4
99 |
100 | let normImage = torchvision.transforms.Normalize( [|0.1307|], [|0.3081|], device=device)
101 | use testData = new MNISTReader(targetDir, "t10k", testBatchSize, device=device, transform=normImage)
102 |
103 | let modelFile = dataset + ".model.bin"
104 |
105 | let model =
106 | if not (File.Exists(modelFile)) then
107 | printfn $"\n Running MNIST on {device.``type``.ToString()} in order to pre-train the model."
108 |
109 | let model = new MNIST.Model("model",device)
110 |
111 | use train = new MNISTReader(targetDir, "train", trainBatchSize, device=device, shuffle=true, transform=normImage)
112 | MNIST.trainingLoop model epochs dataset train testData |> ignore
113 |
114 | printfn "Moving on to the Adversarial model.\n"
115 |
116 | model
117 |
118 | else
119 | let model = new MNIST.Model("model", torch.CPU)
120 | model.load(modelFile) |> ignore
121 | model
122 |
123 | model.``to``(device) |> ignore
124 |
125 | model.eval()
126 |
127 | let epsilons = [| 0.0; 0.05; 0.1; 0.15; 0.20; 0.25; 0.30; 0.35; 0.40; 0.45; 0.50|]
128 |
129 | for eps in epsilons do
130 | let attacked = test model eps testData (float testData.Size)
131 | printfn $"Epsilon: {eps:F2}, accuracy: {attacked:P2}"
132 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://gitter.im/dotnet/TorchSharp?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)
2 |
3 | # TorchSharp Examples
4 |
5 | This repo holds examples and tutorials related to [TorchSharp](https://github.com/dotnet/TorchSharp), .NET-only bindings to libtorch, the engine behind PyTorch. If you are trying to familiarize yourself with TorchSharp, rather than contributing to it, this is the place to go.
6 |
7 | Currently, the examples are the same that are also found in the TorchSharp repo. Unlike the setup in that repo, where the examples are part of the overall VS solution file and use project references to pick up the TorchSharp dependencies, in this repo, the example solution is using the publically available TorchSharp packages form NuGet.
8 |
9 | The examples and tutorials assume that you are on the latest version of TorchSharp, which currently is 0.97.5.
10 |
11 | ### System / Environment Requirements
12 |
13 | In order to use TorchSharp, you will need both the most recent TorchSharp package, as well as one of the several libtorch-* packages that are available. The most basic one, which is used in this repository, is the libtorch-cpu package. As the name suggests, it uses a CPU backend to do training and inference.
14 |
15 | There is also support for CUDA 11.3 on both Windows and Linux, and each of these combinations has its own NuGet package. If you want to train on CUDA, you need to replace references to libtorch-cpu in the solution and projects.
16 |
17 | __Note__: Starting with NuGet release 0.93.4, we have simplified the package structure, so you only need to select one of these three packages, and it will include the others:
18 |
19 | TorchSharp-cpu
20 | TorchSharp-cuda-windows
21 | TorchSharp-cuda-linux
22 |
23 | The examples solution should build without any modifications, either with Visual Studio, or using `dotnet build'. All of the examples build on an Nvidia GPU with 8GB of memory, while only a subset build on a GPU with 6GB. Running more than a few epochs while training on a CPU will take a very long time, especially on the CIFAR10 examples. MNIST is the most reasonable example to train on a CPU.
24 |
25 | ## Structure
26 |
27 | There are variants of all models in both C# and F#. For C#, there is a 'Models' library, and a 'XXXExamples' console app, which is what is used for batch training of the model. For F#, the models are bundled with the training code (we may restructure this in the future). There is also a utility library that is written in C# only, and used from both C# and F#.
28 |
29 | The console apps are, as mentioned, meant to be used for batch training. The command line must specify the model to be used. In the case of MNIST, there are two data sets -- the original 'MNIST' as well as the harder 'Fashion MNIST'.
30 |
31 | The repo contains no actual data sets. You have to download them manually and, in some cases, extract the data from archives.
32 |
33 | ## Data Sets
34 |
35 | The MNIST model uses either:
36 |
37 | * [MNIST](http://yann.lecun.com/exdb/mnist/)
38 |
39 | * [Fashion MNIST](https://github.com/zalandoresearch/fashion-mnist/tree/master/data/fashion)
40 |
41 | Both sets are 28x28 grayscale images, archived in .gz files.
42 |
43 | The AlexNet, ResNet*, MobileNet, and VGG* models use the [CIFAR10](https://www.cs.toronto.edu/~kriz/cifar.html) data set. Instructions on how to download it is available in the CIFAR10 source files.
44 |
45 | SequenceToSequence uses the [WikiText2](https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip) dataset. It's kept in a regular .zip file.
46 |
47 | TextClassification uses the [AG_NEWS](https://github.com/mhjabreel/CharCnn_Keras/tree/master/data/ag_news_csv) dataset, a CSV file.
48 |
49 | # Tutorials
50 |
51 | We have started work on tutorials, but they are not ready yet. They will mostly be based on .NET Interactive notebooks. If you haven't tried that environment yet, it's worth playing around with it inside VS Code.
52 |
53 | # Contributing
54 |
55 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
56 |
57 | There are two main things we would like help with:
58 |
59 | 1. Adding completely new examples. File an issue and assign it to yourself, so we can track it.
60 |
61 | 2. Picking up an issue from the 'Issues' list. For example, the examples are currently set up to run on Windows, picking up data from under the 'Downloads' folder. If you have thoughts on the best way to do this on MacOS or Linux, please help with that.
62 |
63 | If you add a new example, please adjust it to work on a mainstream CUDA processor. This means making sure that it builds on an 8GB processor, with sufficient invocations of the garbage collector.
64 |
65 | ## A Useful Tip for Contributors
66 |
67 | A useful tip from the Tensorflow.NET repo:
68 |
69 | After you fork, add dotnet/TorchSharp as 'upstream' to your local repo ...
70 |
71 | ```git
72 | git remote add upstream https://github.com/dotnet/TorchSharpExamples.git
73 | ```
74 |
75 | This makes it easy to keep your fork up to date by regularly pulling and merging from upstream.
76 |
77 | Assuming that you do all your development off your main branch, keep your main updated
78 | with these commands:
79 |
80 | ```git
81 | git checkout main
82 | git pull upstream main
83 | git push origin main
84 | ```
85 |
86 | Then, you merge onto your dev branch:
87 |
88 | ```git
89 | git checkout <>
90 | git merge main
91 | ```
92 |
--------------------------------------------------------------------------------
/src/Utils/CIFARReader .cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information.
2 | using System;
3 | using System.Collections;
4 | using System.Collections.Generic;
5 | using System.IO;
6 | using System.Linq;
7 | using static TorchSharp.torch;
8 |
9 | namespace TorchSharp.Examples
10 | {
11 | ///
12 | /// Data reader utility for datasets that follow the MNIST data set's layout:
13 | ///
14 | /// A number of single-channel (grayscale) images are laid out in a flat file with four 32-bit integers at the head.
15 | /// The format is documented at the bottom of the page at: http://yann.lecun.com/exdb/mnist/
16 | ///
17 | public sealed class CIFARReader : IDisposable
18 | {
19 | ///
20 | /// Constructor
21 | ///
22 | /// Path to the folder containing the image files.
23 | /// True if this is a test set, otherwise false.
24 | /// The batch size
25 | /// Randomly shuffle the images.
26 | /// The device, i.e. CPU or GPU to place the output tensors on.
27 | /// A list of image transformations, helpful for data augmentation.
28 | public CIFARReader(string path, bool test, int batch_size = 32, bool shuffle = false, Device device = null, IList transforms = null)
29 | {
30 | _transforms = transforms == null ? new List() : transforms;
31 |
32 | // The MNIST data set is small enough to fit in memory, so let's load it there.
33 |
34 | var dataPath = Path.Combine(path, "cifar-10-batches-bin");
35 |
36 | if (test) {
37 | _size = ReadSingleFile(Path.Combine(dataPath, "test_batch.bin"), batch_size, shuffle, device);
38 | } else {
39 | _size += ReadSingleFile(Path.Combine(dataPath, "data_batch_1.bin"), batch_size, shuffle, device);
40 | _size += ReadSingleFile(Path.Combine(dataPath, "data_batch_2.bin"), batch_size, shuffle, device);
41 | _size += ReadSingleFile(Path.Combine(dataPath, "data_batch_3.bin"), batch_size, shuffle, device);
42 | _size += ReadSingleFile(Path.Combine(dataPath, "data_batch_4.bin"), batch_size, shuffle, device);
43 | _size += ReadSingleFile(Path.Combine(dataPath, "data_batch_5.bin"), batch_size, shuffle, device);
44 | }
45 | }
46 |
47 | private int ReadSingleFile(string path, int batch_size, bool shuffle, Device device)
48 | {
49 | const int height = 32;
50 | const int width = 32;
51 | const int channels = 3;
52 | const int count = 10000;
53 |
54 | byte[] dataBytes = File.ReadAllBytes(path);
55 |
56 | if (dataBytes.Length != (1 + channels * height * width) * count)
57 | throw new InvalidDataException($"Not a proper CIFAR10 file: {path}");
58 |
59 | // Set up the indices array.
60 | Random rnd = new Random();
61 | var indices = !shuffle ?
62 | Enumerable.Range(0, count).ToArray() :
63 | Enumerable.Range(0, count).OrderBy(c => rnd.Next()).ToArray();
64 |
65 | var imgSize = channels * height * width;
66 |
67 | // Go through the data and create tensors
68 | for (var i = 0; i < count;) {
69 |
70 | var take = Math.Min(batch_size, Math.Max(0, count - i));
71 |
72 | if (take < 1) break;
73 |
74 | var dataTensor = torch.zeros(new long[] { take, imgSize }, device: device);
75 | var lablTensor = torch.zeros(new long[] { take }, torch.int64, device: device);
76 |
77 | // Take
78 | for (var j = 0; j < take; j++) {
79 | var idx = indices[i++];
80 | var lblStart = idx * (1 + imgSize);
81 | var imgStart = lblStart + 1;
82 |
83 | lablTensor[j] = torch.tensor(dataBytes[lblStart], torch.int64);
84 |
85 | var floats = dataBytes[imgStart..(imgStart + imgSize)].Select(b => (float)b).ToArray();
86 | using (var inputTensor = torch.tensor(floats))
87 | dataTensor.index_put_(inputTensor, TensorIndex.Single(j));
88 | }
89 |
90 | data.Add(dataTensor.reshape(take, channels, height, width));
91 | dataTensor.Dispose();
92 | labels.Add(lablTensor);
93 | }
94 |
95 | return count;
96 | }
97 |
98 | public int Size { get {
99 | return _size * (_transforms.Count + 1);
100 | } }
101 | private int _size = 0;
102 |
103 | private List data = new List();
104 | private List labels = new List();
105 |
106 | private IList _transforms;
107 |
108 | public IEnumerable<(Tensor, Tensor)> Data()
109 | {
110 | for (var i = 0; i < data.Count; i++) {
111 | yield return (data[i], labels[i]);
112 |
113 | foreach (var tfrm in _transforms) {
114 | yield return (tfrm.call(data[i]), labels[i]);
115 | }
116 | }
117 | }
118 |
119 | public void Dispose()
120 | {
121 | data.ForEach(d => d.Dispose());
122 | labels.ForEach(d => d.Dispose());
123 | }
124 | }
125 | }
126 |
--------------------------------------------------------------------------------
/src/FSharp/FSharpExamples/TextClassification.fs:
--------------------------------------------------------------------------------
1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information.
2 | module FSharpExamples.TextClassification
3 |
4 | open System
5 | open System.IO
6 | open System.Linq
7 | open System.Diagnostics
8 | open System.Collections.Generic
9 |
10 | open TorchSharp
11 | open type TorchSharp.torch.nn
12 |
13 | open TorchSharp.Examples
14 |
15 | // This example is based on the PyTorch tutorial at:
16 | //
17 | // https://pytorch.org/tutorials/beginner/text_sentiment_ngrams_tutorial.html
18 | //
19 | // It relies on the AG_NEWS dataset, which can be downloaded in CSV form at:
20 | //
21 | // https://github.com/mhjabreel/CharCnn_Keras/tree/master/data/ag_news_csv
22 | //
23 | // Download the two files, and place them in a folder called "AG_NEWS" in
24 | // accordance with the file path below (Windows only).
25 |
26 | let emsize = 200L
27 |
28 | let batch_size = 64L
29 | let eval_batch_size = 256L
30 |
31 | let epochs = 16
32 |
33 | let lr = 5.0
34 |
35 | let logInterval = 200
36 |
37 | let cmdArgs = Environment.GetCommandLineArgs()
38 |
39 | let datasetPath = Path.Join(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", "AG_NEWS")
40 |
41 | torch.random.manual_seed(1L) |> ignore
42 |
43 | let hasCUDA = TorchText.Datasets.cuda_is_available()
44 |
45 | let device = if hasCUDA then torch.CUDA else torch.CPU
46 |
47 | let criterion x y = torch.nn.functional.cross_entropy(x,y)
48 |
49 | type TextClassificationModel(vocabSize, embedDim, nClasses, device:torch.Device) as this =
50 | inherit Module("Transformer")
51 |
52 | let embedding = EmbeddingBag(vocabSize, embedDim, sparse=false)
53 | let fc = Linear(embedDim, nClasses)
54 |
55 | do
56 | let initrange = 0.5
57 |
58 | init.uniform_(embedding.weight, -initrange, initrange) |> ignore
59 | init.uniform_(fc.weight, -initrange, initrange) |> ignore
60 | init.zeros_(fc.bias) |> ignore
61 |
62 | this.RegisterComponents()
63 |
64 | if device.``type`` = DeviceType.CUDA then
65 | this.``to``(device) |> ignore
66 |
67 | override _.forward(input, offsets) =
68 | embedding.call(input, offsets) --> fc
69 |
70 | let train epoch (trainData:IEnumerable) (model:TextClassificationModel) (optimizer:torch.optim.Optimizer) =
71 |
72 | model.train()
73 |
74 | let mutable total_acc = 0.0
75 | let mutable total_count = 0L
76 | let mutable batch = 0
77 |
78 | let batch_count = trainData.Count()
79 |
80 | for labels,texts,offsets in trainData do
81 |
82 | use d = torch.NewDisposeScope()
83 |
84 | optimizer.zero_grad()
85 |
86 | let predicted_labels = model.forward(texts, offsets)
87 | let loss = criterion predicted_labels labels
88 |
89 | loss.backward()
90 | torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5) |> ignore
91 | optimizer.step() |> ignore
92 |
93 | total_acc <- total_acc + float ((predicted_labels.argmax(1L).eq(labels)).sum().cpu().item())
94 | total_count <- total_count + labels.size(0)
95 |
96 | if (batch % logInterval = 0) && (batch > 0) then
97 | let accuracy = (total_acc / (float total_count)).ToString("0.00")
98 | printfn $"epoch: {epoch} | batch: {batch} / {batch_count} | accuracy: {accuracy}"
99 |
100 | batch <- batch + 1
101 |
102 | let evaluate (testData:IEnumerable) (model:TextClassificationModel) =
103 |
104 | model.eval()
105 |
106 | let mutable total_acc = 0.0
107 | let mutable total_count = 0L
108 |
109 | for labels,texts,offsets in testData do
110 |
111 | let predicted_labels = model.forward(texts, offsets)
112 | let loss = criterion predicted_labels labels
113 |
114 | total_acc <- total_acc + float ((predicted_labels.argmax(1L).eq(labels)).sum().cpu().item())
115 | total_count <- total_count + labels.size(0)
116 |
117 | total_acc / (float total_count)
118 |
119 | let run epochs =
120 |
121 | printfn $"Running TextClassification on {device.``type``.ToString()} for {epochs} epochs."
122 |
123 | use reader = TorchText.Data.AG_NEWSReader.AG_NEWS("train", device, datasetPath)
124 | let dataloader = reader.Enumerate()
125 |
126 | let tokenizer = TorchText.Data.Utils.get_tokenizer("basic_english")
127 | let counter = new TorchText.Vocab.Counter()
128 |
129 | for label,text in dataloader do
130 | counter.update(tokenizer.Invoke(text))
131 |
132 | let vocab = TorchText.Vocab.Vocab(counter)
133 |
134 | let model = new TextClassificationModel((int64 vocab.Count), emsize, 4L, device)
135 |
136 | let optimizer = torch.optim.SGD(model.parameters(), lr)
137 | let scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, 0.2, last_epoch=5)
138 |
139 | let sw = Stopwatch()
140 |
141 | for epoch = 1 to epochs do
142 |
143 | sw.Restart()
144 |
145 | let batches = [| for b in reader.GetBatches(tokenizer, vocab, batch_size) -> b.ToTuple() |]
146 | train epoch batches model optimizer
147 |
148 | sw.Stop()
149 |
150 | let lrStr = optimizer.ParamGroups.First().LearningRate.ToString("0.0000")
151 | let tsStr = sw.Elapsed.TotalSeconds.ToString("0.0")
152 | printfn $"\nEnd of epoch: {epoch} | lr: {lrStr} | time: {tsStr}s\n"
153 | scheduler.step() |> ignore
154 |
155 | use test_reader = TorchText.Data.AG_NEWSReader.AG_NEWS("test", device, datasetPath)
156 |
157 | sw.Restart()
158 |
159 | let batches = [| for b in test_reader.GetBatches(tokenizer, vocab, batch_size) -> b.ToTuple() |]
160 | let accuracy = evaluate batches model
161 |
162 | let accStr = accuracy.ToString("0.00")
163 | let tsStr = sw.Elapsed.TotalSeconds.ToString("0.0")
164 | printf $"\nEnd of training: test accuracy: {accStr} | eval time: {tsStr}s\n"
165 |
166 | sw.Stop()
167 |
--------------------------------------------------------------------------------
/src/FSharp/FSharpExamples/MNIST.fs:
--------------------------------------------------------------------------------
1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information.
2 | module FSharpExamples.MNIST
3 |
4 | open System
5 | open System.IO
6 | open System.Diagnostics
7 |
8 | open TorchSharp
9 | open type TorchSharp.torch
10 | open type TorchSharp.torch.nn
11 | open type TorchSharp.torch.optim
12 | open type TorchSharp.Scalar
13 |
14 | open TorchSharp.Examples
15 |
16 | // Simple MNIST Convolutional model.
17 | //
18 | // There are at least two interesting data sets to use with this example:
19 | //
20 | // 1. The classic MNIST set of 60000 images of handwritten digits.
21 | //
22 | // It is available at: http://yann.lecun.com/exdb/mnist/
23 | //
24 | // 2. The 'fashion-mnist' data set, which has the exact same file names and format as MNIST, but is a harder
25 | // data set to train on. It's just as large as MNIST, and has the same 60/10 split of training and test
26 | // data.
27 | // It is available at: https://github.com/zalandoresearch/fashion-mnist/tree/master/data/fashion
28 | //
29 | // In each case, there are four .gz files to download. Place them in a folder and then point the '_dataLocation'
30 | // constant below at the folder location.
31 |
32 | let mutable trainBatchSize = 64
33 | let mutable testBatchSize = 128
34 |
35 | let logInterval = 100
36 |
37 | let cmdArgs = Environment.GetCommandLineArgs()
38 | let dataset = if cmdArgs.Length = 2 then cmdArgs.[1] else "mnist"
39 |
40 | let datasetPath = Path.Join(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", dataset)
41 |
42 | torch.random.manual_seed(1L) |> ignore
43 |
44 | let hasCUDA = TorchText.Datasets.cuda_is_available()
45 |
46 | let device = if hasCUDA then torch.CUDA else torch.CPU
47 |
48 | let getDataFiles sourceDir targetDir =
49 |
50 | if not (Directory.Exists(targetDir)) then
51 | Directory.CreateDirectory(targetDir) |> ignore
52 | Utils.Decompress.DecompressGZipFile(Path.Combine(sourceDir, "train-images-idx3-ubyte.gz"), targetDir)
53 | Utils.Decompress.DecompressGZipFile(Path.Combine(sourceDir, "train-labels-idx1-ubyte.gz"), targetDir)
54 | Utils.Decompress.DecompressGZipFile(Path.Combine(sourceDir, "t10k-images-idx3-ubyte.gz"), targetDir)
55 | Utils.Decompress.DecompressGZipFile(Path.Combine(sourceDir, "t10k-labels-idx1-ubyte.gz"), targetDir)
56 |
57 | type Model(name,device:torch.Device) as this =
58 | inherit Module(name)
59 |
60 | let conv1 = Conv2d(1L, 32L, 3L)
61 | let conv2 = Conv2d(32L, 64L, 3L)
62 | let fc1 = Linear(9216L, 128L)
63 | let fc2 = Linear(128L, 10L)
64 |
65 | let pool1 = MaxPool2d(kernel_size=[|2L; 2L|])
66 |
67 | let relu = ReLU()
68 |
69 | let dropout1 = Dropout(0.25)
70 | let dropout2 = Dropout(0.5)
71 | let flatten = Flatten()
72 |
73 | let logsm = LogSoftmax(1L)
74 |
75 | do
76 | this.RegisterComponents()
77 |
78 | if device.``type`` = DeviceType.CUDA then
79 | this.``to``(device) |> ignore
80 |
81 | override _.forward(input) =
82 | input
83 | --> conv1 --> relu --> conv2 --> relu --> pool1 --> dropout1
84 | --> flatten
85 | --> fc1 --> relu --> dropout2 --> fc2
86 | --> logsm
87 |
88 | let loss x y = functional.nll_loss(x,y,reduction=Reduction.Mean)
89 |
90 | let train (model:Module) (optimizer:Optimizer) (dataLoader: MNISTReader) epoch =
91 | model.train()
92 |
93 | let size = dataLoader.Size
94 | let batchSize = dataLoader.BatchSize
95 |
96 | let mutable batchID = 1
97 |
98 | printfn $"Epoch: {epoch}..."
99 |
100 | for (input,labels) in dataLoader do
101 |
102 | use d = torch.NewDisposeScope()
103 |
104 | optimizer.zero_grad()
105 |
106 | begin // This is introduced in order to let a few tensors go out of scope before GC
107 | use estimate = input --> model
108 | use output = loss estimate labels
109 |
110 | output.backward()
111 | optimizer.step() |> ignore
112 |
113 | if batchID % logInterval = 0 then
114 | printfn $"\rTrain: epoch {epoch} [{batchID * batchSize} / {size}] Loss: {output.ToSingle():F4}"
115 |
116 | batchID <- batchID + 1
117 | end
118 |
119 | let test (model:Model) (dataLoader:MNISTReader) =
120 | model.eval()
121 |
122 | let sz = single dataLoader.Size
123 |
124 | let mutable testLoss = 0.0f
125 | let mutable correct = 0
126 |
127 | for (input,labels) in dataLoader do
128 |
129 | use d = torch.NewDisposeScope()
130 |
131 | begin // This is introduced in order to let a few tensors go out of scope before GC
132 | use estimate = input --> model
133 | use output = loss estimate labels
134 | testLoss <- testLoss + output.ToSingle()
135 |
136 | let pred = estimate.argmax(1L)
137 | correct <- correct + pred.eq(labels).sum().ToInt32()
138 | end
139 |
140 | printfn $"Size: {sz}, Total: {sz}"
141 | printfn $"\rTest set: Average loss {(testLoss / sz):F4} | Accuracy {(single correct / sz):P2}"
142 |
143 | let trainingLoop (model:Model) epochs dataset trainData testData =
144 |
145 | let epochs = if device.``type`` = DeviceType.CUDA then epochs * 4 else epochs
146 |
147 | let optimizer = Adam(model.parameters())
148 | lr_scheduler.StepLR(optimizer, 1, 0.7, last_epoch=5) |> ignore
149 |
150 | let sw = Stopwatch()
151 | sw.Start()
152 |
153 | for epoch = 1 to epochs do
154 | train model optimizer trainData epoch
155 | test model testData
156 |
157 | sw.Stop()
158 |
159 | printfn $"Elapsed time: {sw.Elapsed.TotalSeconds:F1} s."
160 | printfn $"Saving model to '{dataset}'.model.bin"
161 |
162 | model.save(dataset + ".model.bin") |> ignore
163 |
164 | let run epochs =
165 | printfn $"Running MNIST on {device.``type``.ToString()}"
166 | printfn $"Dataset: {dataset}"
167 |
168 | let targetDir = Path.Combine(datasetPath, "test_data")
169 |
170 | getDataFiles datasetPath targetDir
171 |
172 | if device.``type`` = DeviceType.CUDA then
173 | trainBatchSize <- trainBatchSize * 4
174 | testBatchSize <- testBatchSize * 4
175 |
176 | let normImage = torchvision.transforms.Normalize( [|0.1307|], [|0.3081|], device=device)
177 | use train = new MNISTReader(targetDir, "train", trainBatchSize, device=device, shuffle=true, transform=normImage)
178 | use test = new MNISTReader(targetDir, "t10k", testBatchSize, device=device, transform=normImage)
179 |
180 | let model = new Model("model", device)
181 |
182 | trainingLoop model epochs dataset train test
183 |
--------------------------------------------------------------------------------
/src/Utils/MNISTReader.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information.
2 | using System;
3 | using System.Collections;
4 | using System.Collections.Generic;
5 | using System.IO;
6 | using System.Linq;
7 | using static TorchSharp.torchvision;
8 | using static TorchSharp.torch;
9 |
10 | namespace TorchSharp.Examples
11 | {
12 | ///
13 | /// Data reader utility for datasets that follow the MNIST data set's layout:
14 | ///
15 | /// A number of single-channel (grayscale) images are laid out in a flat file with four 32-bit integers at the head.
16 | /// The format is documented at the bottom of the page at: http://yann.lecun.com/exdb/mnist/
17 | ///
18 | public sealed class MNISTReader : IEnumerable<(Tensor, Tensor)>, IDisposable
19 | {
20 | ///
21 | /// Constructor
22 | ///
23 | /// Path to the folder containing the image files.
24 | /// The file name prefix, either 'train' or 't10k' (the latter being the test data set).
25 | /// The batch size
26 | /// Randomly shuffle the images.
27 | /// The device, i.e. CPU or GPU to place the output tensors on.
28 | ///
29 | public MNISTReader(string path, string prefix, int batch_size = 32, bool shuffle = false, torch.Device device = null, ITransform transform = null)
30 | {
31 | // The MNIST data set is small enough to fit in memory, so let's load it there.
32 |
33 | BatchSize = batch_size;
34 |
35 | var dataPath = Path.Combine(path, prefix + "-images-idx3-ubyte");
36 | var labelPath = Path.Combine(path, prefix + "-labels-idx1-ubyte");
37 |
38 | var count = -1;
39 | var height = 0;
40 | var width = 0;
41 |
42 | byte[] dataBytes = null;
43 | byte[] labelBytes = null;
44 |
45 | using (var file = File.Open(dataPath, FileMode.Open, FileAccess.Read, FileShare.Read))
46 | using (var rdr = new System.IO.BinaryReader(file)) {
47 |
48 | var reader = new Utils.BigEndianReader(rdr);
49 | var x = reader.ReadInt32(); // Magic number
50 | count = reader.ReadInt32();
51 |
52 | height = reader.ReadInt32();
53 | width = reader.ReadInt32();
54 |
55 | // Read all the data into memory.
56 | dataBytes = reader.ReadBytes(height * width * count);
57 | }
58 |
59 | using (var file = File.Open(labelPath, FileMode.Open, FileAccess.Read, FileShare.Read))
60 | using (var rdr = new System.IO.BinaryReader(file)) {
61 |
62 | var reader = new Utils.BigEndianReader(rdr);
63 | var x = reader.ReadInt32(); // Magic number
64 | var lblcnt = reader.ReadInt32();
65 |
66 | if (lblcnt != count) throw new InvalidDataException("Image data and label counts are different.");
67 |
68 | // Read all the data into memory.
69 | labelBytes = reader.ReadBytes(lblcnt);
70 | }
71 |
72 | // Set up the indices array.
73 | Random rnd = new Random();
74 | var indices = !shuffle ?
75 | Enumerable.Range(0, count).ToArray() :
76 | Enumerable.Range(0, count).OrderBy(c => rnd.Next()).ToArray();
77 |
78 | var imgSize = height * width;
79 |
80 | // Go through the data and create tensors
81 | for (var i = 0; i < count;) {
82 |
83 | var take = Math.Min(batch_size, Math.Max(0, count - i));
84 |
85 | if (take < 1) break;
86 |
87 | var dataTensor = torch.zeros(new long[] { take, imgSize}, device: device);
88 | var lablTensor = torch.zeros(new long[] { take }, torch.int64, device: device);
89 |
90 | // Take
91 | for (var j = 0; j < take; j++) {
92 | var idx = indices[i++];
93 | var imgStart = idx * imgSize;
94 |
95 | var floats = dataBytes[imgStart.. (imgStart+imgSize)].Select(b => b/256.0f).ToArray();
96 | using (var inputTensor = torch.tensor(floats))
97 | dataTensor.index_put_(inputTensor, TensorIndex.Single(j));
98 | lablTensor[j] = torch.tensor(labelBytes[idx], torch.int64);
99 | }
100 |
101 | var batch = dataTensor.reshape(take, 1, height, width);
102 |
103 | if (transform != null) {
104 | // Carefully dispose the original
105 | using(var batch_copy = batch)
106 | batch = transform.call(batch);
107 | }
108 |
109 | data.Add(batch);
110 | dataTensor.Dispose();
111 | labels.Add(lablTensor);
112 | }
113 |
114 | Size = count;
115 | }
116 |
117 | public int Size { get; set; }
118 |
119 | public int BatchSize { get; private set; }
120 |
121 | private List data = new List();
122 | private List labels = new List();
123 |
124 | public IEnumerator<(Tensor, Tensor)> GetEnumerator()
125 | {
126 | return new MNISTEnumerator(data, labels);
127 | }
128 |
129 | IEnumerator IEnumerable.GetEnumerator()
130 | {
131 | return GetEnumerator();
132 | }
133 |
134 | public void Dispose()
135 | {
136 | data.ForEach(d => d.Dispose());
137 | labels.ForEach(d => d.Dispose());
138 | }
139 |
140 | private class MNISTEnumerator : IEnumerator<(Tensor, Tensor)>
141 | {
142 | public MNISTEnumerator(List data, List labels)
143 | {
144 | this.data = data;
145 | this.labels = labels;
146 | }
147 |
148 | public (Tensor, Tensor) Current {
149 | get {
150 | if (curIdx == -1) throw new InvalidOperationException("Calling 'Current' before 'MoveNext()'");
151 | return (data[curIdx], labels[curIdx]);
152 | }
153 | }
154 |
155 | object IEnumerator.Current => Current;
156 |
157 | public void Dispose()
158 | {
159 | }
160 |
161 | public bool MoveNext()
162 | {
163 | curIdx += 1;
164 | return curIdx < data.Count;
165 | }
166 |
167 | public void Reset()
168 | {
169 | curIdx = -1;
170 | }
171 |
172 | private int curIdx = -1;
173 | private List data = null;
174 | private List labels = null;
175 | }
176 | }
177 | }
178 |
--------------------------------------------------------------------------------
/src/CSharp/CSharpExamples/TextClassification.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information.
2 | using System;
3 | using System.IO;
4 | using System.Linq;
5 | using System.Collections.Generic;
6 | using System.Diagnostics;
7 |
8 | using TorchSharp;
9 |
10 | using TorchSharp.Examples;
11 | using TorchSharp.Examples.Utils;
12 |
13 | using static TorchSharp.torch;
14 |
15 | using static TorchSharp.torch.nn;
16 | using static TorchSharp.torch.nn.functional;
17 |
18 | namespace CSharpExamples
19 | {
20 | ///
21 | /// This example is based on the PyTorch tutorial at:
22 | ///
23 | /// https://pytorch.org/tutorials/beginner/text_sentiment_ngrams_tutorial.html
24 | ///
25 | /// It relies on the AG_NEWS dataset, which can be downloaded in CSV form at:
26 | ///
27 | /// https://github.com/mhjabreel/CharCnn_Keras/tree/master/data/ag_news_csv
28 | ///
29 | /// Download the two files, and place them in a folder called "AG_NEWS" in
30 | /// accordance with the file path below (Windows only).
31 | ///
32 | ///
33 | public class TextClassification
34 | {
35 | private const long emsize = 200;
36 |
37 | private const long batch_size = 128;
38 | private const long eval_batch_size = 128;
39 |
40 | private const int epochs = 15;
41 |
42 | // This path assumes that you're running this on Windows.
43 | private readonly static string _dataLocation = Path.Join(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", "AG_NEWS");
44 |
45 | internal static void Run(int epochs, int timeout, string logdir)
46 | {
47 | torch.random.manual_seed(1);
48 |
49 | var cwd = Environment.CurrentDirectory;
50 |
51 | var device =
52 | torch.cuda.is_available() ? torch.CUDA :
53 | torch.mps_is_available() ? torch.MPS :
54 | torch.CPU;
55 |
56 | Console.WriteLine();
57 | Console.WriteLine($"\tRunning TextClassification on {device.type.ToString()} for {epochs} epochs, terminating after {TimeSpan.FromSeconds(timeout)}.");
58 | Console.WriteLine();
59 |
60 | Console.WriteLine($"\tPreparing training and test data...");
61 |
62 | using (var reader = TorchText.Data.AG_NEWSReader.AG_NEWS("train", (Device)device, _dataLocation))
63 | {
64 |
65 | var dataloader = reader.Enumerate();
66 |
67 | var tokenizer = TorchText.Data.Utils.get_tokenizer("basic_english");
68 |
69 | var counter = new TorchText.Vocab.Counter();
70 | foreach (var (label, text) in dataloader)
71 | {
72 | counter.update(tokenizer(text));
73 | }
74 |
75 | var vocab = new TorchText.Vocab.Vocab(counter);
76 |
77 |
78 | Console.WriteLine($"\tCreating the model...");
79 | Console.WriteLine();
80 |
81 | var model = new TextClassificationModel(vocab.Count, emsize, 4).to((Device)device);
82 |
83 | var loss = CrossEntropyLoss();
84 | var lr = 5.0;
85 | var optimizer = torch.optim.SGD(model.parameters(), lr);
86 | var scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, 0.2, last_epoch: 5);
87 |
88 | var totalTime = new Stopwatch();
89 | totalTime.Start();
90 |
91 | foreach (var epoch in Enumerable.Range(1, epochs))
92 | {
93 |
94 | var sw = new Stopwatch();
95 | sw.Start();
96 |
97 | train(epoch, reader.GetBatches(tokenizer, vocab, batch_size), model, loss, optimizer);
98 |
99 | sw.Stop();
100 |
101 | Console.WriteLine($"\nEnd of epoch: {epoch} | lr: {optimizer.ParamGroups.First().LearningRate:0.0000} | time: {sw.Elapsed.TotalSeconds:0.0}s\n");
102 | scheduler.step();
103 |
104 | if (totalTime.Elapsed.TotalSeconds > timeout) break;
105 | }
106 |
107 | totalTime.Stop();
108 |
109 | using (var test_reader = TorchText.Data.AG_NEWSReader.AG_NEWS("test", (Device)device, _dataLocation))
110 | {
111 |
112 | var sw = new Stopwatch();
113 | sw.Start();
114 |
115 | var accuracy = evaluate(test_reader.GetBatches(tokenizer, vocab, eval_batch_size), model, loss);
116 |
117 | sw.Stop();
118 |
119 | Console.WriteLine($"\nEnd of training: test accuracy: {accuracy:0.00} | eval time: {sw.Elapsed.TotalSeconds:0.0}s\n");
120 | scheduler.step();
121 | }
122 | }
123 |
124 | }
125 |
126 | static void train(int epoch, IEnumerable<(Tensor, Tensor, Tensor)> train_data, TextClassificationModel model, Loss criterion, torch.optim.Optimizer optimizer)
127 | {
128 | model.train();
129 |
130 | double total_acc = 0.0;
131 | long total_count = 0;
132 | long log_interval = 250;
133 |
134 | var batch = 0;
135 |
136 | var batch_count = train_data.Count();
137 |
138 | using (var d = torch.NewDisposeScope())
139 | {
140 | foreach (var (labels, texts, offsets) in train_data)
141 | {
142 |
143 | optimizer.zero_grad();
144 |
145 | using (var predicted_labels = model.forward(texts, offsets))
146 | {
147 |
148 | var loss = criterion.forward(predicted_labels, labels);
149 | loss.backward();
150 | torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5);
151 | optimizer.step();
152 |
153 | total_acc += (predicted_labels.argmax(1) == labels).sum().to(torch.CPU).item();
154 | total_count += labels.size(0);
155 | }
156 |
157 | if (batch % log_interval == 0 && batch > 0)
158 | {
159 | var accuracy = total_acc / total_count;
160 | Console.WriteLine($"epoch: {epoch} | batch: {batch} / {batch_count} | accuracy: {accuracy:0.00}");
161 | }
162 | batch += 1;
163 | }
164 | }
165 | }
166 |
167 | static double evaluate(IEnumerable<(Tensor, Tensor, Tensor)> test_data, TextClassificationModel model, Loss criterion)
168 | {
169 | model.eval();
170 |
171 | double total_acc = 0.0;
172 | long total_count = 0;
173 |
174 | using (var d = torch.NewDisposeScope())
175 | {
176 | foreach (var (labels, texts, offsets) in test_data)
177 | {
178 |
179 | using (var predicted_labels = model.forward(texts, offsets))
180 | {
181 | var loss = criterion.forward(predicted_labels, labels);
182 |
183 | total_acc += (predicted_labels.argmax(1) == labels).sum().to(torch.CPU).item();
184 | total_count += labels.size(0);
185 | }
186 | }
187 |
188 | return total_acc / total_count;
189 | }
190 | }
191 | }
192 | }
193 |
--------------------------------------------------------------------------------
/tutorials/CSharp/tutorial1.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "attachments": {},
5 | "cell_type": "markdown",
6 | "metadata": {},
7 | "source": [
8 | "# Setting Things Up\n",
9 | "\n",
10 | "To use TorchSharp, you need some packages from NuGet.\n",
11 | "\n",
12 | "First and foremost, you need to download the most recent version of the `TorchSharp` package at [https://www.nuget.org/packages/TorchSharp/](https://www.nuget.org/packages/TorchSharp/). That's the .NET bindings to libtorch, and it contains the .NET API. However, you also need one of several packages containing distributions of libtorch itself, the highly capable native-code engine behind PyTorch.\n",
13 | "\n",
14 | "Starting with version 0.93.4, there are bundled versions of these packages, which is what these tutorials rely on.\n",
15 | "\n",
16 | "The basic backend supports training and inference on CPUs, but there is also support for CUDA on Windows and Linux, for use on machines with compatible hardware. Using CUDA for training can speed things up by orders of magnitude, so it's important to use the right backend.\n",
17 | "\n",
18 | "These are the various libtorch packages:\n",
19 | "\n",
20 | "|Name|URL|Description|\n",
21 | "|-----|-----------------|--------------|\n",
22 | "|TorchSharp-cpu|https://www.nuget.org/packages/TorchSharp-cpu/|TorchSharp with only the CPU backend, which works on Windows, Linux, and MacOS|\n",
23 | "|TorchSharp-cuda-windows|https://www.nuget.org/packages/TorchSharp-cuda-windows/|A CUDA backend with only Windows binaries|\n",
24 | "|TorchSharp-cuda-linux|https://www.nuget.org/packages/TorchSharp-cuda-linux/|A CUDA backend with only Linux binaries|"
25 | ]
26 | },
27 | {
28 | "attachments": {},
29 | "cell_type": "markdown",
30 | "metadata": {},
31 | "source": [
32 | "All the tutorial notebooks (with the exception of the one that covers CUDA) will rely on the CPU package, since that takes up the least amount of disk space and works everywhere. If you have access to a CUDA processor, replace the package name with the applicable Windows or Linux package from NuGet (TorchSharp-cuda-windows and TorchSharp-cuda-linux, respectively)."
33 | ]
34 | },
35 | {
36 | "cell_type": "code",
37 | "execution_count": null,
38 | "metadata": {
39 | "dotnet_interactive": {
40 | "language": "csharp"
41 | },
42 | "vscode": {
43 | "languageId": "polyglot-notebook"
44 | }
45 | },
46 | "outputs": [],
47 | "source": [
48 | "#r \"nuget: TorchSharp-cpu\"\n"
49 | ]
50 | },
51 | {
52 | "attachments": {},
53 | "cell_type": "markdown",
54 | "metadata": {},
55 | "source": [
56 | "# Usings\n",
57 | "\n",
58 | "Once you have the right NuGet packages, the next thing is to get the right usings directives at the top of your source files. TorchSharp consists of a lot of namespaces and static classes, and to make programming TorchSharp convenient, you usually need to include a several of them."
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": null,
64 | "metadata": {
65 | "dotnet_interactive": {
66 | "language": "csharp"
67 | },
68 | "vscode": {
69 | "languageId": "polyglot-notebook"
70 | }
71 | },
72 | "outputs": [],
73 | "source": [
74 | "using TorchSharp;\n",
75 | "\n",
76 | "using static TorchSharp.torch.nn;\n",
77 | "using static TorchSharp.torch.nn.functional;\n",
78 | "using static TorchSharp.TensorExtensionMethods;"
79 | ]
80 | },
81 | {
82 | "attachments": {},
83 | "cell_type": "markdown",
84 | "metadata": {},
85 | "source": [
86 | "However, for these tutorials, it would obscure the API to have too many usings. It's better, for pedagocial reasons, to explicitly qualify names until their scope becomes well known. So, the tutorials will generally use a minimal set of usings."
87 | ]
88 | },
89 | {
90 | "cell_type": "markdown",
91 | "metadata": {},
92 | "source": [
93 | "# Formatting\n",
94 | "\n",
95 | ".NET Interactive will, by default, display the fields and properties of the objects that are found at the end of each cell. \n",
96 | "\n",
97 | "Before v0.100.3, you had to add the following code to the top of each notebook:"
98 | ]
99 | },
100 | {
101 | "cell_type": "code",
102 | "execution_count": null,
103 | "metadata": {
104 | "dotnet_interactive": {
105 | "language": "csharp"
106 | },
107 | "vscode": {
108 | "languageId": "polyglot-notebook"
109 | }
110 | },
111 | "outputs": [],
112 | "source": [
113 | "using Microsoft.DotNet.Interactive.Formatting;\n",
114 | "\n",
115 | "Formatter.SetPreferredMimeTypesFor(typeof(torch.Tensor), \"text/plain\");\n",
116 | "Formatter.Register((torch.Tensor x) => x.ToString(TorchSharp.TensorStringStyle.Default, fltFormat:\"G7\"));"
117 | ]
118 | },
119 | {
120 | "attachments": {},
121 | "cell_type": "markdown",
122 | "metadata": {},
123 | "source": [
124 | "But that is no longer necessary. You can still do so if you want to keep code as it is, but TorchSharp is now better integrated with .NET Interactive and will do it automatically."
125 | ]
126 | },
127 | {
128 | "cell_type": "markdown",
129 | "metadata": {},
130 | "source": [
131 | "Note that there are now three styles that may be used to format tensor output: C#-style, NumPy-style and Julia-style. The default is 'Julia,' but if you continue to use he top-of-notebook formatting, whatever you set it to in the cell at the top will be used to format tensors automatically."
132 | ]
133 | },
134 | {
135 | "cell_type": "code",
136 | "execution_count": null,
137 | "metadata": {
138 | "dotnet_interactive": {
139 | "language": "csharp"
140 | },
141 | "vscode": {
142 | "languageId": "polyglot-notebook"
143 | }
144 | },
145 | "outputs": [],
146 | "source": [
147 | "torch.ones(2,3,3)"
148 | ]
149 | },
150 | {
151 | "cell_type": "markdown",
152 | "metadata": {},
153 | "source": [
154 | " The 'Default' style means that the actual format is picked up at runtime from the global style, which is 'Julia' unless you set it to 'NumPy' or 'CSharp': "
155 | ]
156 | },
157 | {
158 | "cell_type": "code",
159 | "execution_count": null,
160 | "metadata": {
161 | "dotnet_interactive": {
162 | "language": "csharp"
163 | },
164 | "vscode": {
165 | "languageId": "polyglot-notebook"
166 | }
167 | },
168 | "outputs": [],
169 | "source": [
170 | "torch.TensorStringStyle = torch.numpy;\n",
171 | "\n",
172 | "torch.ones(2,3,3)"
173 | ]
174 | },
175 | {
176 | "cell_type": "code",
177 | "execution_count": null,
178 | "metadata": {
179 | "vscode": {
180 | "languageId": "polyglot-notebook"
181 | }
182 | },
183 | "outputs": [],
184 | "source": [
185 | "torch.TensorStringStyle = torch.csharp;\n",
186 | "\n",
187 | "torch.rand(2,3,3)"
188 | ]
189 | },
190 | {
191 | "attachments": {},
192 | "cell_type": "markdown",
193 | "metadata": {},
194 | "source": [
195 | "For a more in-depth description of tensor formatting, see: [TorchSharp Wiki - Tensor String Frmatting](https://github.com/dotnet/TorchSharp/wiki/Tensor-String-Formatting)"
196 | ]
197 | },
198 | {
199 | "attachments": {},
200 | "cell_type": "markdown",
201 | "metadata": {},
202 | "source": []
203 | }
204 | ],
205 | "metadata": {
206 | "kernelspec": {
207 | "display_name": ".NET (C#)",
208 | "language": "C#",
209 | "name": ".net-csharp"
210 | },
211 | "language_info": {
212 | "name": "C#"
213 | }
214 | },
215 | "nbformat": 4,
216 | "nbformat_minor": 2
217 | }
218 |
--------------------------------------------------------------------------------
/tutorials/FSharp/tutorial1.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "attachments": {},
5 | "cell_type": "markdown",
6 | "metadata": {},
7 | "source": [
8 | "# Setting Things Up\n",
9 | "\n",
10 | "To use TorchSharp, you need some packages from NuGet.\n",
11 | "\n",
12 | "First and foremost, you need to download the most recent version of the `TorchSharp` package at [https://www.nuget.org/packages/TorchSharp/](https://www.nuget.org/packages/TorchSharp/). That's the .NET bindings to libtorch, and it contains the .NET API. However, you also need one of several packages containing distributions of libtorch itself, the highly capable native-code engine behind PyTorch.\n",
13 | "\n",
14 | "Starting with version 0.93.4, there are bundled versions of these packages, which is what these tutorials rely on.\n",
15 | "\n",
16 | "The basic backend supports training and inference on CPUs, but there is also support for CUDA on Windows and Linux, for use on machines with compatible hardware. Using CUDA for training can speed things up by orders of magnitude, so it's important to use the right backend.\n",
17 | "\n",
18 | "These are the various libtorch packages:\n",
19 | "\n",
20 | "|Name|URL|Description|\n",
21 | "|-----|-----------------|--------------|\n",
22 | "|TorchSharp-cpu|https://www.nuget.org/packages/TorchSharp-cpu/|TorchSharp with only the CPU backend, which works on Windows, Linux, and MacOS|\n",
23 | "|TorchSharp-cuda-windows|https://www.nuget.org/packages/TorchSharp-cuda-windows/|A CUDA backend with only Windows binaries|\n",
24 | "|TorchSharp-cuda-linux|https://www.nuget.org/packages/TorchSharp-cuda-linux/|A CUDA backend with only Linux binaries|"
25 | ]
26 | },
27 | {
28 | "attachments": {},
29 | "cell_type": "markdown",
30 | "metadata": {},
31 | "source": [
32 | "All the tutorial notebooks (with the exception of the one that covers CUDA) will rely on the CPU package, since that takes up the least amount of disk space and works everywhere. If you have access to a CUDA processor, replace the package name with the applicable Windows or Linux package."
33 | ]
34 | },
35 | {
36 | "cell_type": "code",
37 | "execution_count": null,
38 | "metadata": {
39 | "dotnet_interactive": {
40 | "language": "fsharp"
41 | },
42 | "vscode": {
43 | "languageId": "polyglot-notebook"
44 | }
45 | },
46 | "outputs": [],
47 | "source": [
48 | "#r \"nuget:TorchSharp-cpu\""
49 | ]
50 | },
51 | {
52 | "attachments": {},
53 | "cell_type": "markdown",
54 | "metadata": {},
55 | "source": [
56 | "# Opening\n",
57 | "\n",
58 | "Once you have the right NuGet packages, the next thing is to get the right open directives at the top of your source files. TorchSharp consists of a lot of namespaces and static classes, and to make programming TorchSharp convenient, you usually need to include a several of them."
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": null,
64 | "metadata": {
65 | "dotnet_interactive": {
66 | "language": "fsharp"
67 | },
68 | "vscode": {
69 | "languageId": "polyglot-notebook"
70 | }
71 | },
72 | "outputs": [],
73 | "source": [
74 | "open TorchSharp\n",
75 | "\n",
76 | "open type TorchSharp.torch\n",
77 | "open type TorchSharp.torch.nn\n",
78 | "open type TorchSharp.torch.nn.functional\n",
79 | "open type TorchSharp.TensorExtensionMethods"
80 | ]
81 | },
82 | {
83 | "attachments": {},
84 | "cell_type": "markdown",
85 | "metadata": {},
86 | "source": [
87 | "However, for these tutorials, it would obscure the API to have too many `open` directives. It's better, for pedagocial reasons, to explicitly qualify names until their scope becomes well known. So, the tutorials will generally use a minimal set of `open` directives."
88 | ]
89 | },
90 | {
91 | "attachments": {},
92 | "cell_type": "markdown",
93 | "metadata": {},
94 | "source": [
95 | "# Formatting\n",
96 | "\n",
97 | ".NET Interactive will, by default, display the fields and properties of the objects that are found at the end of each cell. \n",
98 | "\n",
99 | "Before v0.100.3, you had to add the following code to the top of each notebook:"
100 | ]
101 | },
102 | {
103 | "cell_type": "code",
104 | "execution_count": null,
105 | "metadata": {
106 | "dotnet_interactive": {
107 | "language": "fsharp"
108 | },
109 | "vscode": {
110 | "languageId": "polyglot-notebook"
111 | }
112 | },
113 | "outputs": [],
114 | "source": [
115 | "open Microsoft.DotNet.Interactive.Formatting\n",
116 | "\n",
117 | "Formatter.SetPreferredMimeTypesFor(typeof, \"text/plain\")\n",
118 | "Formatter.Register(fun (x:torch.Tensor) -> x.ToString(TorchSharp.TensorStringStyle.Default))"
119 | ]
120 | },
121 | {
122 | "attachments": {},
123 | "cell_type": "markdown",
124 | "metadata": {},
125 | "source": [
126 | "But that is no longer necessary. You can still do so if you want to keep code as it is, but TorchSharp is now better integrated with .NET Interactive and will do it automatically."
127 | ]
128 | },
129 | {
130 | "attachments": {},
131 | "cell_type": "markdown",
132 | "metadata": {},
133 | "source": [
134 | "Note that there are now three styles that may be used to format tensor output: C#-style, NumPy-style and Julia-style. The default is 'Julia,' but if you continue to use he top-of-notebook formatting, whatever you set it to in the cell at the top will be used to format tensors automatically."
135 | ]
136 | },
137 | {
138 | "cell_type": "code",
139 | "execution_count": null,
140 | "metadata": {
141 | "dotnet_interactive": {
142 | "language": "fsharp"
143 | },
144 | "vscode": {
145 | "languageId": "polyglot-notebook"
146 | }
147 | },
148 | "outputs": [],
149 | "source": [
150 | "torch.ones(2,3,3)"
151 | ]
152 | },
153 | {
154 | "attachments": {},
155 | "cell_type": "markdown",
156 | "metadata": {},
157 | "source": [
158 | " The 'Default' style means that the actual format is picked up at runtime from the global style, which is 'Julia' unless you set it to 'NumPy' or 'CSharp' (sorry, no F# styling yet.):"
159 | ]
160 | },
161 | {
162 | "cell_type": "code",
163 | "execution_count": null,
164 | "metadata": {
165 | "dotnet_interactive": {
166 | "language": "fsharp"
167 | },
168 | "vscode": {
169 | "languageId": "polyglot-notebook"
170 | }
171 | },
172 | "outputs": [],
173 | "source": [
174 | "torch.TensorStringStyle <- torch.numpy;\n",
175 | "\n",
176 | "torch.ones(2,3,3)"
177 | ]
178 | },
179 | {
180 | "cell_type": "code",
181 | "execution_count": null,
182 | "metadata": {
183 | "dotnet_interactive": {
184 | "language": "fsharp"
185 | },
186 | "vscode": {
187 | "languageId": "polyglot-notebook"
188 | }
189 | },
190 | "outputs": [],
191 | "source": [
192 | "torch.TensorStringStyle <- torch.csharp;\n",
193 | "\n",
194 | "torch.rand(2,3,3)"
195 | ]
196 | },
197 | {
198 | "attachments": {},
199 | "cell_type": "markdown",
200 | "metadata": {},
201 | "source": [
202 | "For a more in-depth description of tensor formatting, see: [TorchSharp Wiki - Tensor String Frmatting](https://github.com/dotnet/TorchSharp/wiki/Tensor-String-Formatting)"
203 | ]
204 | }
205 | ],
206 | "metadata": {
207 | "kernelspec": {
208 | "display_name": ".NET (C#)",
209 | "language": "C#",
210 | "name": ".net-csharp"
211 | },
212 | "language_info": {
213 | "file_extension": ".cs",
214 | "mimetype": "text/x-csharp",
215 | "name": "C#",
216 | "pygments_lexer": "csharp",
217 | "version": "9.0"
218 | },
219 | "orig_nbformat": 4
220 | },
221 | "nbformat": 4,
222 | "nbformat_minor": 2
223 | }
224 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ## Ignore Visual Studio temporary files, build results, and
2 | ## files generated by popular Visual Studio add-ons.
3 | ##
4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
5 |
6 | # User-specific files
7 | *.rsuser
8 | *.suo
9 | *.user
10 | *.userosscache
11 | *.sln.docstates
12 |
13 | # TorchSharp serialized model files from tutorials
14 | *.model.bin
15 |
16 | # User-specific files (MonoDevelop/Xamarin Studio)
17 | *.userprefs
18 |
19 | # Mono auto generated files
20 | mono_crash.*
21 |
22 | # Build results
23 | [Dd]ebug/
24 | [Dd]ebugPublic/
25 | [Rr]elease/
26 | [Rr]eleases/
27 | x64/
28 | x86/
29 | [Aa][Rr][Mm]/
30 | [Aa][Rr][Mm]64/
31 | bld/
32 | [Bb]in/
33 | [Oo]bj/
34 | [Ll]og/
35 | [Ll]ogs/
36 |
37 | # Visual Studio 2015/2017 cache/options directory
38 | .vs/
39 | # Uncomment if you have tasks that create the project's static files in wwwroot
40 | #wwwroot/
41 |
42 | # Visual Studio 2017 auto generated files
43 | Generated\ Files/
44 |
45 | # MSTest test Results
46 | [Tt]est[Rr]esult*/
47 | [Bb]uild[Ll]og.*
48 |
49 | # NUnit
50 | *.VisualState.xml
51 | TestResult.xml
52 | nunit-*.xml
53 |
54 | # Build Results of an ATL Project
55 | [Dd]ebugPS/
56 | [Rr]eleasePS/
57 | dlldata.c
58 |
59 | # Benchmark Results
60 | BenchmarkDotNet.Artifacts/
61 |
62 | # .NET Core
63 | project.lock.json
64 | project.fragment.lock.json
65 | artifacts/
66 |
67 | # StyleCop
68 | StyleCopReport.xml
69 |
70 | # Files built by Visual Studio
71 | *_i.c
72 | *_p.c
73 | *_h.h
74 | *.ilk
75 | *.meta
76 | *.obj
77 | *.iobj
78 | *.pch
79 | *.pdb
80 | *.ipdb
81 | *.pgc
82 | *.pgd
83 | *.rsp
84 | *.sbr
85 | *.tlb
86 | *.tli
87 | *.tlh
88 | *.tmp
89 | *.tmp_proj
90 | *_wpftmp.csproj
91 | *.log
92 | *.vspscc
93 | *.vssscc
94 | .builds
95 | *.pidb
96 | *.svclog
97 | *.scc
98 |
99 | # Chutzpah Test files
100 | _Chutzpah*
101 |
102 | # Visual C++ cache files
103 | ipch/
104 | *.aps
105 | *.ncb
106 | *.opendb
107 | *.opensdf
108 | *.sdf
109 | *.cachefile
110 | *.VC.db
111 | *.VC.VC.opendb
112 |
113 | # Visual Studio profiler
114 | *.psess
115 | *.vsp
116 | *.vspx
117 | *.sap
118 |
119 | # Visual Studio Trace Files
120 | *.e2e
121 |
122 | # TFS 2012 Local Workspace
123 | $tf/
124 |
125 | # Guidance Automation Toolkit
126 | *.gpState
127 |
128 | # ReSharper is a .NET coding add-in
129 | _ReSharper*/
130 | *.[Rr]e[Ss]harper
131 | *.DotSettings.user
132 |
133 | # TeamCity is a build add-in
134 | _TeamCity*
135 |
136 | # DotCover is a Code Coverage Tool
137 | *.dotCover
138 |
139 | # AxoCover is a Code Coverage Tool
140 | .axoCover/*
141 | !.axoCover/settings.json
142 |
143 | # Visual Studio code coverage results
144 | *.coverage
145 | *.coveragexml
146 |
147 | # NCrunch
148 | _NCrunch_*
149 | .*crunch*.local.xml
150 | nCrunchTemp_*
151 |
152 | # MightyMoose
153 | *.mm.*
154 | AutoTest.Net/
155 |
156 | # Web workbench (sass)
157 | .sass-cache/
158 |
159 | # Installshield output folder
160 | [Ee]xpress/
161 |
162 | # DocProject is a documentation generator add-in
163 | DocProject/buildhelp/
164 | DocProject/Help/*.HxT
165 | DocProject/Help/*.HxC
166 | DocProject/Help/*.hhc
167 | DocProject/Help/*.hhk
168 | DocProject/Help/*.hhp
169 | DocProject/Help/Html2
170 | DocProject/Help/html
171 |
172 | # Click-Once directory
173 | publish/
174 |
175 | # Publish Web Output
176 | *.[Pp]ublish.xml
177 | *.azurePubxml
178 | # Note: Comment the next line if you want to checkin your web deploy settings,
179 | # but database connection strings (with potential passwords) will be unencrypted
180 | *.pubxml
181 | *.publishproj
182 |
183 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
184 | # checkin your Azure Web App publish settings, but sensitive information contained
185 | # in these scripts will be unencrypted
186 | PublishScripts/
187 |
188 | # NuGet Packages
189 | *.nupkg
190 | # NuGet Symbol Packages
191 | *.snupkg
192 | # The packages folder can be ignored because of Package Restore
193 | **/[Pp]ackages/*
194 | # except build/, which is used as an MSBuild target.
195 | !**/[Pp]ackages/build/
196 | # Uncomment if necessary however generally it will be regenerated when needed
197 | #!**/[Pp]ackages/repositories.config
198 | # NuGet v3's project.json files produces more ignorable files
199 | *.nuget.props
200 | *.nuget.targets
201 |
202 | # Microsoft Azure Build Output
203 | csx/
204 | *.build.csdef
205 |
206 | # Microsoft Azure Emulator
207 | ecf/
208 | rcf/
209 |
210 | # Windows Store app package directories and files
211 | AppPackages/
212 | BundleArtifacts/
213 | Package.StoreAssociation.xml
214 | _pkginfo.txt
215 | *.appx
216 | *.appxbundle
217 | *.appxupload
218 |
219 | # Visual Studio cache files
220 | # files ending in .cache can be ignored
221 | *.[Cc]ache
222 | # but keep track of directories ending in .cache
223 | !?*.[Cc]ache/
224 |
225 | # Others
226 | ClientBin/
227 | ~$*
228 | *~
229 | *.dbmdl
230 | *.dbproj.schemaview
231 | *.jfm
232 | *.pfx
233 | *.publishsettings
234 | orleans.codegen.cs
235 |
236 | # Including strong name files can present a security risk
237 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
238 | #*.snk
239 |
240 | # Since there are multiple workflows, uncomment next line to ignore bower_components
241 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
242 | #bower_components/
243 |
244 | # RIA/Silverlight projects
245 | Generated_Code/
246 |
247 | # Backup & report files from converting an old project file
248 | # to a newer Visual Studio version. Backup files are not needed,
249 | # because we have git ;-)
250 | _UpgradeReport_Files/
251 | Backup*/
252 | UpgradeLog*.XML
253 | UpgradeLog*.htm
254 | ServiceFabricBackup/
255 | *.rptproj.bak
256 |
257 | # SQL Server files
258 | *.mdf
259 | *.ldf
260 | *.ndf
261 |
262 | # Business Intelligence projects
263 | *.rdl.data
264 | *.bim.layout
265 | *.bim_*.settings
266 | *.rptproj.rsuser
267 | *- [Bb]ackup.rdl
268 | *- [Bb]ackup ([0-9]).rdl
269 | *- [Bb]ackup ([0-9][0-9]).rdl
270 |
271 | # Microsoft Fakes
272 | FakesAssemblies/
273 |
274 | # GhostDoc plugin setting file
275 | *.GhostDoc.xml
276 |
277 | # Node.js Tools for Visual Studio
278 | .ntvs_analysis.dat
279 | node_modules/
280 |
281 | # Visual Studio 6 build log
282 | *.plg
283 |
284 | # Visual Studio 6 workspace options file
285 | *.opt
286 |
287 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
288 | *.vbw
289 |
290 | # Visual Studio LightSwitch build output
291 | **/*.HTMLClient/GeneratedArtifacts
292 | **/*.DesktopClient/GeneratedArtifacts
293 | **/*.DesktopClient/ModelManifest.xml
294 | **/*.Server/GeneratedArtifacts
295 | **/*.Server/ModelManifest.xml
296 | _Pvt_Extensions
297 |
298 | # Paket dependency manager
299 | .paket/paket.exe
300 | paket-files/
301 |
302 | # FAKE - F# Make
303 | .fake/
304 |
305 | # CodeRush personal settings
306 | .cr/personal
307 |
308 | # Python Tools for Visual Studio (PTVS)
309 | __pycache__/
310 | *.pyc
311 |
312 | # Cake - Uncomment if you are using it
313 | # tools/**
314 | # !tools/packages.config
315 |
316 | # Tabs Studio
317 | *.tss
318 |
319 | # Telerik's JustMock configuration file
320 | *.jmconfig
321 |
322 | # BizTalk build output
323 | *.btp.cs
324 | *.btm.cs
325 | *.odx.cs
326 | *.xsd.cs
327 |
328 | # OpenCover UI analysis results
329 | OpenCover/
330 |
331 | # Azure Stream Analytics local run output
332 | ASALocalRun/
333 |
334 | # MSBuild Binary and Structured Log
335 | *.binlog
336 |
337 | # NVidia Nsight GPU debugger configuration file
338 | *.nvuser
339 |
340 | # MFractors (Xamarin productivity tool) working folder
341 | .mfractor/
342 |
343 | # Local History for Visual Studio
344 | .localhistory/
345 |
346 | # BeatPulse healthcheck temp database
347 | healthchecksdb
348 |
349 | # Backup folder for Package Reference Convert tool in Visual Studio 2017
350 | MigrationBackup/
351 |
352 | Downloads/
353 | runs/
354 |
355 | # Ionide (cross platform F# VS Code tools) working folder
356 | .ionide/
357 |
358 | *.dat.x
359 | *.dat.y
360 |
361 | nuget.config
362 |
--------------------------------------------------------------------------------
/tutorials/CSharp/tutorial3.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {
7 | "dotnet_interactive": {
8 | "language": "csharp"
9 | },
10 | "vscode": {
11 | "languageId": "polyglot-notebook"
12 | }
13 | },
14 | "outputs": [],
15 | "source": [
16 | "#r \"nuget:TorchSharp-cpu\"\n",
17 | "\n",
18 | "using TorchSharp;\n",
19 | "using static TorchSharp.TensorExtensionMethods;\n",
20 | "using Microsoft.DotNet.Interactive.Formatting;"
21 | ]
22 | },
23 | {
24 | "attachments": {},
25 | "cell_type": "markdown",
26 | "metadata": {},
27 | "source": [
28 | "# Basic Numerics\n",
29 | "\n",
30 | "Arithmetic is what TorchSharp is all about, and the capabilities are rich. It's all about tensor arithmetic, though -- that's where GPU acceleration makes sense."
31 | ]
32 | },
33 | {
34 | "cell_type": "code",
35 | "execution_count": null,
36 | "metadata": {
37 | "dotnet_interactive": {
38 | "language": "csharp"
39 | },
40 | "vscode": {
41 | "languageId": "polyglot-notebook"
42 | }
43 | },
44 | "outputs": [],
45 | "source": [
46 | "var a = torch.ones(3,4);\n",
47 | "var b = torch.zeros(3,4);\n",
48 | "var c = torch.tensor(5);\n",
49 | "a * c + b"
50 | ]
51 | },
52 | {
53 | "attachments": {},
54 | "cell_type": "markdown",
55 | "metadata": {},
56 | "source": [
57 | "It's often the case that you can reuse the storage for one of the operands, so TorchSharp defines a number of 'in place' operators. These will only work if the operand has the same shape and layout as the result, of course. To use the in-place operators, you can't use the nice math syntax, you have to use functions. TorchSharp follows the PyTorch convention of appending a '_' to the name of in-place operators. It's very similar to the '*=', '+=', etc. operators in C#, except that they can be chained together.\n",
58 | "\n",
59 | "In the expression below, the storage for 'a' is used to hold first the result of multiplying with c, and then adding b."
60 | ]
61 | },
62 | {
63 | "cell_type": "code",
64 | "execution_count": null,
65 | "metadata": {
66 | "dotnet_interactive": {
67 | "language": "csharp"
68 | },
69 | "vscode": {
70 | "languageId": "polyglot-notebook"
71 | }
72 | },
73 | "outputs": [],
74 | "source": [
75 | "a.mul_(c).add_(b)"
76 | ]
77 | },
78 | {
79 | "attachments": {},
80 | "cell_type": "markdown",
81 | "metadata": {},
82 | "source": [
83 | "After this, 'a' is not longer holding ones, since it's been overwritten. The impact on performance that using in-place operators has is significant, if used consistently, but it's important to know what you're overwriting and not to over-use in-place operators. Think of it as a performance optimization."
84 | ]
85 | },
86 | {
87 | "cell_type": "code",
88 | "execution_count": null,
89 | "metadata": {
90 | "dotnet_interactive": {
91 | "language": "csharp"
92 | },
93 | "vscode": {
94 | "languageId": "polyglot-notebook"
95 | }
96 | },
97 | "outputs": [],
98 | "source": [
99 | "a"
100 | ]
101 | },
102 | {
103 | "attachments": {},
104 | "cell_type": "markdown",
105 | "metadata": {},
106 | "source": [
107 | "## Broadcasting"
108 | ]
109 | },
110 | {
111 | "attachments": {},
112 | "cell_type": "markdown",
113 | "metadata": {},
114 | "source": [
115 | "In the simple example above, you saw that 'c' was defined from a single value. If we look at it, we can see that it's a singleton tensor. That is, it has no shape."
116 | ]
117 | },
118 | {
119 | "cell_type": "code",
120 | "execution_count": null,
121 | "metadata": {
122 | "dotnet_interactive": {
123 | "language": "csharp"
124 | },
125 | "vscode": {
126 | "languageId": "polyglot-notebook"
127 | }
128 | },
129 | "outputs": [],
130 | "source": [
131 | "c.shape"
132 | ]
133 | },
134 | {
135 | "cell_type": "code",
136 | "execution_count": null,
137 | "metadata": {
138 | "dotnet_interactive": {
139 | "language": "csharp"
140 | },
141 | "vscode": {
142 | "languageId": "polyglot-notebook"
143 | }
144 | },
145 | "outputs": [],
146 | "source": [
147 | "c"
148 | ]
149 | },
150 | {
151 | "attachments": {},
152 | "cell_type": "markdown",
153 | "metadata": {},
154 | "source": [
155 | "Even though its shape differed from that of 'a,' we were able to use it in the computation. How come?\n",
156 | "\n",
157 | "TorchSharp will adjust the shape, without allocating new memory, of a tensor to be compatible with another tensor in situations like this. This is called 'broadcasting' and is found in most every numerics and deep learning library around. It's not just singletons that can be broadcast -- any tensor that is compatible will have it work."
158 | ]
159 | },
160 | {
161 | "cell_type": "code",
162 | "execution_count": null,
163 | "metadata": {
164 | "dotnet_interactive": {
165 | "language": "csharp"
166 | },
167 | "vscode": {
168 | "languageId": "polyglot-notebook"
169 | }
170 | },
171 | "outputs": [],
172 | "source": [
173 | "a = torch.ones(3,4);\n",
174 | "(a + torch.ones(4)).print();\n",
175 | "a + torch.ones(1,4)"
176 | ]
177 | },
178 | {
179 | "attachments": {},
180 | "cell_type": "markdown",
181 | "metadata": {},
182 | "source": [
183 | "# Numerics Library\n",
184 | "\n",
185 | "The collection of numerical operators that are available is too large to go through here, but suffice it to say that all the usual suspects are available. Most of the operate on an element-wise basis, i.e. the operator is applied to each element of the operands, possibly with broadcasting getting involved.\n",
186 | "\n",
187 | "One notable and __very__ significant exception is matrix multiplication, which is vector dot product generalized to matrices. The '*' operator denotes element-wise multiplication, while matrix multiplication is performed by the 'mm' method:"
188 | ]
189 | },
190 | {
191 | "cell_type": "code",
192 | "execution_count": null,
193 | "metadata": {
194 | "dotnet_interactive": {
195 | "language": "csharp"
196 | },
197 | "vscode": {
198 | "languageId": "polyglot-notebook"
199 | }
200 | },
201 | "outputs": [],
202 | "source": [
203 | "a = torch.full(4,4, 17);\n",
204 | "b = torch.full(4,4, 12);\n",
205 | "\n",
206 | "(a * b).print();\n",
207 | "(a.mm(b)).str()"
208 | ]
209 | },
210 | {
211 | "attachments": {},
212 | "cell_type": "markdown",
213 | "metadata": {},
214 | "source": [
215 | "There are some very specialized operators doing more than one thing at a time avoiding creating temporaries. Some of them are there because the absence of temporaries can lead to more numerical stability (such as avoiding rounding error propagation), or because you don't have to go back and forth between the CPU and GPU as often. It is almost always the right choice to use these special composite operators when they are a match for your computation.\n",
216 | "\n",
217 | "An example is xlogy(), which performs x * log(y) all in one operation."
218 | ]
219 | },
220 | {
221 | "cell_type": "code",
222 | "execution_count": null,
223 | "metadata": {
224 | "dotnet_interactive": {
225 | "language": "csharp"
226 | },
227 | "vscode": {
228 | "languageId": "polyglot-notebook"
229 | }
230 | },
231 | "outputs": [],
232 | "source": [
233 | "var x = torch.rand(5);\n",
234 | "var y = torch.rand(5);\n",
235 | "(x * torch.log(y)).print();\n",
236 | "x.xlogy(y)"
237 | ]
238 | }
239 | ],
240 | "metadata": {
241 | "kernelspec": {
242 | "display_name": ".NET (C#)",
243 | "language": "C#",
244 | "name": ".net-csharp"
245 | },
246 | "language_info": {
247 | "name": "C#"
248 | }
249 | },
250 | "nbformat": 4,
251 | "nbformat_minor": 2
252 | }
253 |
--------------------------------------------------------------------------------
/tutorials/FSharp/tutorial3.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {
7 | "dotnet_interactive": {
8 | "language": "fsharp"
9 | },
10 | "vscode": {
11 | "languageId": "polyglot-notebook"
12 | }
13 | },
14 | "outputs": [],
15 | "source": [
16 | "#r \"nuget: TorchSharp-cpu\"\n",
17 | "\n",
18 | "open TorchSharp\n",
19 | "open type TorchSharp.torch\n",
20 | "open type TorchSharp.TensorExtensionMethods"
21 | ]
22 | },
23 | {
24 | "attachments": {},
25 | "cell_type": "markdown",
26 | "metadata": {},
27 | "source": [
28 | "# Basic Numerics\n",
29 | "\n",
30 | "Arithmetic is what TorchSharp is all about, and the capabilities are rich. It's all about tensor arithmetic, though -- that's where GPU acceleration makes sense."
31 | ]
32 | },
33 | {
34 | "cell_type": "code",
35 | "execution_count": null,
36 | "metadata": {
37 | "dotnet_interactive": {
38 | "language": "fsharp"
39 | },
40 | "vscode": {
41 | "languageId": "polyglot-notebook"
42 | }
43 | },
44 | "outputs": [],
45 | "source": [
46 | "let a = torch.ones(3,4)\n",
47 | "let b = torch.zeros(3,4)\n",
48 | "let c = torch.tensor(5)\n",
49 | "a * c + b"
50 | ]
51 | },
52 | {
53 | "attachments": {},
54 | "cell_type": "markdown",
55 | "metadata": {},
56 | "source": [
57 | "It's often the case that you can reuse the storage for one of the operands, so TorchSharp defines a number of 'in place' operators. These will only work if the operand has the same shape and layout as the result, of course. To use the in-place operators, you can't use the nice math syntax, you have to use functions. TorchSharp follows the PyTorch convention of appending a '_' to the name of in-place operators. It's very similar to the '*=', '+=', etc. operators in C#, except that they can be chained together.\n",
58 | "\n",
59 | "In the expression below, the storage for 'a' is used to hold first the result of multiplying with c, and then adding b."
60 | ]
61 | },
62 | {
63 | "cell_type": "code",
64 | "execution_count": null,
65 | "metadata": {
66 | "dotnet_interactive": {
67 | "language": "fsharp"
68 | },
69 | "vscode": {
70 | "languageId": "polyglot-notebook"
71 | }
72 | },
73 | "outputs": [],
74 | "source": [
75 | "a.mul_(c).add_(b)"
76 | ]
77 | },
78 | {
79 | "attachments": {},
80 | "cell_type": "markdown",
81 | "metadata": {},
82 | "source": [
83 | "After this, 'a' is not longer holding ones, since it's been overwritten. The impact on performance that using in-place operators has is significant, if used consistently, but it's important to know what you're overwriting and not to over-use in-place operators. Think of it as a performance optimization."
84 | ]
85 | },
86 | {
87 | "cell_type": "code",
88 | "execution_count": null,
89 | "metadata": {
90 | "dotnet_interactive": {
91 | "language": "fsharp"
92 | },
93 | "vscode": {
94 | "languageId": "polyglot-notebook"
95 | }
96 | },
97 | "outputs": [],
98 | "source": [
99 | "a"
100 | ]
101 | },
102 | {
103 | "attachments": {},
104 | "cell_type": "markdown",
105 | "metadata": {},
106 | "source": [
107 | "## Broadcasting"
108 | ]
109 | },
110 | {
111 | "attachments": {},
112 | "cell_type": "markdown",
113 | "metadata": {},
114 | "source": [
115 | "In the simple example above, you saw that 'c' was defined from a single value. If we look at it, we can see that it's a singleton tensor. That is, it has no shape."
116 | ]
117 | },
118 | {
119 | "cell_type": "code",
120 | "execution_count": null,
121 | "metadata": {
122 | "dotnet_interactive": {
123 | "language": "fsharp"
124 | },
125 | "vscode": {
126 | "languageId": "polyglot-notebook"
127 | }
128 | },
129 | "outputs": [],
130 | "source": [
131 | "c.shape"
132 | ]
133 | },
134 | {
135 | "cell_type": "code",
136 | "execution_count": null,
137 | "metadata": {
138 | "dotnet_interactive": {
139 | "language": "fsharp"
140 | },
141 | "vscode": {
142 | "languageId": "polyglot-notebook"
143 | }
144 | },
145 | "outputs": [],
146 | "source": [
147 | "c"
148 | ]
149 | },
150 | {
151 | "attachments": {},
152 | "cell_type": "markdown",
153 | "metadata": {},
154 | "source": [
155 | "Even though its shape differed from that of 'a,' we were able to use it in the computation. How come?\n",
156 | "\n",
157 | "TorchSharp will adjust the shape, without allocating new memory, of a tensor to be compatible with another tensor in situations like this. This is called 'broadcasting' and is found in most every numerics and deep learning library around. It's not just singletons that can be broadcast -- any tensor that is compatible will have it work."
158 | ]
159 | },
160 | {
161 | "cell_type": "code",
162 | "execution_count": null,
163 | "metadata": {
164 | "dotnet_interactive": {
165 | "language": "fsharp"
166 | },
167 | "vscode": {
168 | "languageId": "polyglot-notebook"
169 | }
170 | },
171 | "outputs": [],
172 | "source": [
173 | "let a = torch.ones(3,4)\n",
174 | "(a + torch.ones(4)).print()\n",
175 | "a + torch.ones(1,4)"
176 | ]
177 | },
178 | {
179 | "attachments": {},
180 | "cell_type": "markdown",
181 | "metadata": {},
182 | "source": [
183 | "# Numerics Library\n",
184 | "\n",
185 | "The collection of numerical operators that are available is too large to go through here, but suffice it to say that all the usual suspects are available. Most of the operate on an element-wise basis, i.e. the operator is applied to each element of the operands, possibly with broadcasting getting involved.\n",
186 | "\n",
187 | "One notable and __very__ significant exception is matrix multiplication, which is vector dot product generalized to matrices. The '*' operator denotes element-wise multiplication, while matrix multiplication is performed by the 'mm' method:"
188 | ]
189 | },
190 | {
191 | "cell_type": "code",
192 | "execution_count": null,
193 | "metadata": {
194 | "dotnet_interactive": {
195 | "language": "fsharp"
196 | },
197 | "vscode": {
198 | "languageId": "polyglot-notebook"
199 | }
200 | },
201 | "outputs": [],
202 | "source": [
203 | "let a = torch.full(4L,4L, (17).ToScalar())\n",
204 | "let b = torch.full(4L,4L, (12).ToScalar())\n",
205 | "\n",
206 | "(a * b).print()\n",
207 | "(a.mm(b))"
208 | ]
209 | },
210 | {
211 | "attachments": {},
212 | "cell_type": "markdown",
213 | "metadata": {},
214 | "source": [
215 | "There are some very specialized operators doing more than one thing at a time avoiding creating temporaries. Some of them are there because the absence of temporaries can lead to more numerical stability (such as avoiding rounding error propagation), or because you don't have to go back and forth between the CPU and GPU as often. It is almost always the right choice to use these special composite operators when they are a match for your computation.\n",
216 | "\n",
217 | "An example is xlogy(), which performs x * log(y) all in one operation."
218 | ]
219 | },
220 | {
221 | "cell_type": "code",
222 | "execution_count": null,
223 | "metadata": {
224 | "dotnet_interactive": {
225 | "language": "fsharp"
226 | },
227 | "vscode": {
228 | "languageId": "polyglot-notebook"
229 | }
230 | },
231 | "outputs": [],
232 | "source": [
233 | "let x = torch.rand(5)\n",
234 | "let y = torch.rand(5)\n",
235 | "(x * torch.log(y)).print()\n",
236 | "x.xlogy(y)"
237 | ]
238 | }
239 | ],
240 | "metadata": {
241 | "kernelspec": {
242 | "display_name": ".NET (F#)",
243 | "language": "F#",
244 | "name": ".net-fsharp"
245 | },
246 | "language_info": {
247 | "name": "F#"
248 | }
249 | },
250 | "nbformat": 4,
251 | "nbformat_minor": 2
252 | }
253 |
--------------------------------------------------------------------------------
/src/CSharp/CSharpExamples/AdversarialExampleGeneration.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information.
2 | using System;
3 | using System.IO;
4 | using System.Collections.Generic;
5 |
6 | using TorchSharp;
7 | using static TorchSharp.torchvision;
8 |
9 | using TorchSharp.Examples;
10 | using TorchSharp.Examples.Utils;
11 |
12 | using static TorchSharp.torch;
13 |
14 | using static TorchSharp.torch.nn;
15 | using static TorchSharp.torch.nn.functional;
16 |
17 |
18 | namespace CSharpExamples
19 | {
20 | ///
21 | /// FGSM Attack
22 | ///
23 | /// Based on : https://pytorch.org/tutorials/beginner/fgsm_tutorial.html
24 | ///
25 | ///
26 | /// There are at least two interesting data sets to use with this example:
27 | ///
28 | /// 1. The classic MNIST set of 60000 images of handwritten digits.
29 | ///
30 | /// It is available at: http://yann.lecun.com/exdb/mnist/
31 | ///
32 | /// 2. The 'fashion-mnist' data set, which has the exact same file names and format as MNIST, but is a harder
33 | /// data set to train on. It's just as large as MNIST, and has the same 60/10 split of training and test
34 | /// data.
35 | /// It is available at: https://github.com/zalandoresearch/fashion-mnist/tree/master/data/fashion
36 | ///
37 | /// In each case, there are four .gz files to download. Place them in a folder and then point the '_dataLocation'
38 | /// constant below at the folder location.
39 | ///
40 | /// The example is based on the PyTorch tutorial, but the results from attacking the model are very different from
41 | /// what the tutorial article notes, at least on the machine where it was developed. There is an order-of-magnitude lower
42 | /// drop-off in accuracy in this version. That said, when running the PyTorch tutorial on the same machine, the
43 | /// accuracy trajectories are the same between .NET and Python. If the base convulutational model is trained
44 | /// using Python, and then used for the FGSM attack in both .NET and Python, the drop-off trajectories are extremenly
45 | /// close.
46 | ///
47 | public class AdversarialExampleGeneration
48 | {
49 | private readonly static string _dataLocation = Path.Join(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", "mnist");
50 |
51 | private static int _epochs = 4;
52 | private static int _trainBatchSize = 64;
53 | private static int _testBatchSize = 128;
54 |
55 | static internal void Run(int epochs, int timeout, string logdir, string dataset)
56 | {
57 | _epochs = epochs;
58 |
59 | if (string.IsNullOrEmpty(dataset))
60 | {
61 | dataset = "mnist";
62 | }
63 |
64 | var cwd = Environment.CurrentDirectory;
65 |
66 | var datasetPath = Path.Join(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", dataset);
67 |
68 | var _ = torch.random.manual_seed(1);
69 |
70 | //var device = torch.CPU;
71 | var device = torch.cuda.is_available() ? torch.CUDA : torch.CPU;
72 | Console.WriteLine();
73 | Console.WriteLine($"\tRunning FGSM attack with {dataset} on {device.type.ToString()} for {epochs} epochs, terminating after {TimeSpan.FromSeconds(timeout)}.");
74 | Console.WriteLine();
75 |
76 | if (device.type == DeviceType.CUDA) {
77 | _trainBatchSize *= 4;
78 | _testBatchSize *= 4;
79 | _epochs *= 4;
80 | }
81 |
82 | Console.WriteLine($"\tPreparing training and test data...");
83 |
84 | var sourceDir = _dataLocation;
85 | var targetDir = Path.Combine(_dataLocation, "test_data");
86 |
87 | var writer = String.IsNullOrEmpty(logdir) ? null : torch.utils.tensorboard.SummaryWriter(logdir, createRunName:true);
88 |
89 | if (!Directory.Exists(targetDir)) {
90 | Directory.CreateDirectory(targetDir);
91 | Decompress.DecompressGZipFile(Path.Combine(sourceDir, "train-images-idx3-ubyte.gz"), targetDir);
92 | Decompress.DecompressGZipFile(Path.Combine(sourceDir, "train-labels-idx1-ubyte.gz"), targetDir);
93 | Decompress.DecompressGZipFile(Path.Combine(sourceDir, "t10k-images-idx3-ubyte.gz"), targetDir);
94 | Decompress.DecompressGZipFile(Path.Combine(sourceDir, "t10k-labels-idx1-ubyte.gz"), targetDir);
95 | }
96 |
97 | TorchSharp.Examples.MNIST.Model model = null;
98 |
99 | var normImage = transforms.Normalize(new double[] { 0.1307 }, new double[] { 0.3081 }, device: (Device)device);
100 |
101 | using (var test = new MNISTReader(targetDir, "t10k", _testBatchSize, device: device, transform: normImage)) {
102 |
103 | var modelFile = dataset + ".model.bin";
104 |
105 | if (!File.Exists(modelFile)) {
106 | // We need the model to be trained first, because we want to start with a trained model.
107 | Console.WriteLine($"\n Running MNIST on {device.type.ToString()} in order to pre-train the model.");
108 |
109 | model = new TorchSharp.Examples.MNIST.Model("model", device);
110 |
111 | using (var train = new MNISTReader(targetDir, "train", _trainBatchSize, device: device, shuffle: true, transform: normImage)) {
112 | MNIST.TrainingLoop(dataset, timeout, writer, (Device)device, model, train, test);
113 | }
114 |
115 | Console.WriteLine("Moving on to the Adversarial model.\n");
116 |
117 | } else {
118 | model = new TorchSharp.Examples.MNIST.Model("model", torch.CPU);
119 | model.load(modelFile);
120 | }
121 |
122 | model.to((Device)device);
123 | model.eval();
124 |
125 | var epsilons = new double[] { 0, 0.05, 0.1, 0.15, 0.20, 0.25, 0.30, 0.35, 0.40, 0.45, 0.50 };
126 |
127 | foreach (var ε in epsilons) {
128 | var attacked = Test(model, NLLLoss(), ε, test, test.Size);
129 | Console.WriteLine($"Epsilon: {ε:F2}, accuracy: {attacked:P2}");
130 | }
131 | }
132 | }
133 |
134 | private static Tensor Attack(Tensor image, double ε, Tensor data_grad)
135 | {
136 | using (var sign = data_grad.sign()) {
137 | var perturbed = (image + ε * sign).clamp(0.0, 1.0);
138 | return perturbed;
139 | }
140 | }
141 |
142 | private static double Test(
143 | TorchSharp.Examples.MNIST.Model model,
144 | Loss criterion,
145 | double ε,
146 | IEnumerable<(Tensor, Tensor)> dataLoader,
147 | long size)
148 | {
149 | int correct = 0;
150 |
151 | foreach (var (data, target) in dataLoader) {
152 |
153 | using (var d = torch.NewDisposeScope())
154 | {
155 | data.requires_grad = true;
156 |
157 | using (var output = model.forward(data))
158 | using (var loss = criterion.forward(output, target))
159 | {
160 |
161 | model.zero_grad();
162 | loss.backward();
163 |
164 | var perturbed = Attack(data, ε, data.grad);
165 |
166 | using (var final = model.forward(perturbed))
167 | {
168 |
169 | correct += final.argmax(1).eq(target).sum().ToInt32();
170 | }
171 | }
172 | }
173 | }
174 |
175 | return (double)correct / size;
176 | }
177 | }
178 | }
179 |
--------------------------------------------------------------------------------
/src/FSharp/FSharpExamples/AlexNet.fs:
--------------------------------------------------------------------------------
1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information.
2 | module FSharpExamples.AlexNet
3 |
4 | open System
5 | open System.IO
6 | open System.Diagnostics
7 |
8 | open TorchSharp
9 | open TorchSharp.Examples
10 |
11 | open type TorchSharp.torch.nn
12 | open type TorchSharp.torch.optim
13 | open type TorchSharp.Scalar
14 |
15 | // Modified version of original AlexNet to fix CIFAR10 32x32 images.
16 | //
17 | // The dataset for this example can be found at: https://www.cs.toronto.edu/~kriz/cifar.html
18 | // Download the binary file, and place it in a dedicated folder, e.g. 'CIFAR10,' then edit
19 | // the '_dataLocation' definition below to point at the right folder.
20 | //
21 | // Note: so far, CIFAR10 is supported, but not CIFAR100.
22 |
23 | let mutable trainBatchSize = 64
24 | let mutable testBatchSize = 128
25 |
26 | let logInterval = 25
27 | let numClasses = 10L
28 |
29 | let cmdArgs = Environment.GetCommandLineArgs()
30 | let dataset = "CIFAR10"
31 |
32 | let datasetPath = Path.Join(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", dataset)
33 |
34 | torch.random.manual_seed(1L) |> ignore
35 |
36 | let hasCUDA = TorchText.Datasets.cuda_is_available()
37 |
38 | let device = if hasCUDA then torch.CUDA else torch.CPU
39 |
40 | let getDataFiles sourceDir targetDir =
41 |
42 | if not (Directory.Exists(targetDir)) then
43 | Directory.CreateDirectory(targetDir) |> ignore
44 | Utils.Decompress.ExtractTGZ(Path.Combine(sourceDir, "cifar-10-binary.tar.gz"), targetDir)
45 |
46 | type Model(name,device:torch.Device) as this =
47 | inherit Module(name)
48 |
49 | let features = Sequential(("c1", Conv2d(3L, 64L, kernel_size=3L, stride=2L, padding=1L) :> Module),
50 | ("r1", ReLU(inplace=true) :> Module),
51 | ("mp1", MaxPool2d(kernel_size=[|2L; 2L|]) :> Module),
52 | ("c2", Conv2d(64L, 192L, kernel_size=3L, padding=1L) :> Module),
53 | ("r2", ReLU(inplace=true) :> Module),
54 | ("mp2", MaxPool2d(kernel_size=[|2L; 2L|]) :> Module),
55 | ("c3", Conv2d(192L, 384L, kernel_size=3L, padding=1L) :> Module),
56 | ("r3", ReLU(inplace=true) :> Module),
57 | ("c4", Conv2d(384L, 256L, kernel_size=3L, padding=1L) :> Module),
58 | ("r4", ReLU(inplace=true) :> Module),
59 | ("c5", Conv2d(256L, 256L, kernel_size=3L, padding=1L) :> Module),
60 | ("r5", ReLU(inplace=true) :> Module),
61 | ("mp3", MaxPool2d(kernel_size=[|2L; 2L|]) :> Module),
62 | ("avg", AdaptiveAvgPool2d([|2L; 2L|]) :> Module))
63 |
64 | let classifier = Sequential(("d1", Dropout() :> Module),
65 | ("l1", Linear(256L * 2L * 2L, 4096L) :> Module),
66 | ("r6", ReLU(inplace=true) :> Module),
67 | ("d2", Dropout() :> Module),
68 | ("l2", Linear(4096L, 4096L) :> Module),
69 | ("r7", ReLU(inplace=true) :> Module),
70 | ("d3", Dropout() :> Module),
71 | ("l3", Linear(4096L, numClasses) :> Module),
72 | ("logsm", LogSoftmax(1L) :> Module))
73 |
74 | do
75 | this.RegisterComponents()
76 |
77 | if device.``type`` = DeviceType.CUDA then
78 | this.``to``(device) |> ignore
79 |
80 | override _.forward(input) =
81 |
82 | let avg = features.forward(input)
83 | let x = avg.view([|avg.shape.[0]; 256L*2L*2L|])
84 |
85 | classifier.forward(x)
86 |
87 | let loss x y = functional.nll_loss(x,y)
88 |
89 | let train (model:Model) (optimizer:Optimizer) (dataLoader: CIFARReader) epoch =
90 |
91 | model.train()
92 |
93 | let size = dataLoader.Size
94 |
95 | let mutable batchID = 1
96 | let mutable total = 0L
97 | let mutable correct = 0L
98 |
99 | printfn $"Epoch: {epoch}..."
100 |
101 | for (input,labels) in dataLoader.Data() do
102 |
103 | use d = torch.NewDisposeScope()
104 |
105 | optimizer.zero_grad()
106 |
107 | begin
108 | use estimate = input --> model
109 | use output = loss estimate labels
110 |
111 | output.backward()
112 | optimizer.step() |> ignore
113 |
114 | total <- total + labels.shape.[0]
115 |
116 | use sum = estimate.argmax(1L).eq(labels).sum()
117 | correct <- correct + sum.ToInt64()
118 |
119 | if batchID % logInterval = 0 then
120 | let count = min (batchID * trainBatchSize) size
121 | let outputString = output.ToSingle().ToString("0.0000")
122 | let accString = ((float correct) / (float total)).ToString("0.0000")
123 | printfn $"\rTrain: epoch {epoch} [{count} / {size}] Loss: {outputString} Acc: {accString}"
124 |
125 | batchID <- batchID + 1
126 | end
127 |
128 | let test (model:Model) (dataLoader:CIFARReader) =
129 | model.eval()
130 |
131 | let sz = single dataLoader.Size
132 |
133 | let mutable testLoss = 0.0f
134 | let mutable correct = 0L
135 | let mutable batchCount = 0L
136 |
137 | for (input,labels) in dataLoader.Data() do
138 |
139 | use d = torch.NewDisposeScope()
140 |
141 | use estimate = input --> model
142 | use output = loss estimate labels
143 | testLoss <- testLoss + output.ToSingle()
144 | batchCount <- batchCount + 1L
145 |
146 | use sum = estimate.argmax(1L).eq(labels).sum()
147 | correct <- correct + sum.ToInt64()
148 |
149 | let avgLossString = (testLoss / (single batchCount)).ToString("0.0000")
150 | let accString = ((single correct) / sz).ToString("0.0000")
151 |
152 | printfn $"\rTest set: Average loss {avgLossString} | Accuracy {accString}"
153 |
154 |
155 | let trainingLoop (model:Model) epochs trainData testData =
156 |
157 | use optimizer = Adam(model.parameters(), 0.001)
158 | //NN.Optimizer.StepLR(optimizer, 1u, 0.7, last_epoch=5) |> ignore
159 |
160 | let sw = Stopwatch()
161 | sw.Start()
162 |
163 | for epoch = 1 to epochs do
164 | train model optimizer trainData epoch
165 | test model testData
166 |
167 | sw.Stop()
168 |
169 | printfn $"Elapsed time: {sw.Elapsed.TotalSeconds:F1} s."
170 |
171 | let run epochs =
172 |
173 | if device.``type`` = DeviceType.CUDA then
174 | trainBatchSize <- trainBatchSize * 8
175 | testBatchSize <- testBatchSize * 8
176 |
177 | printfn ""
178 | printfn $"\tRunning AlexNet with {dataset} on {device.``type``.ToString()} for {epochs} epochs"
179 | printfn ""
180 |
181 | let targetDir = Path.Combine(datasetPath, "test_data")
182 |
183 | getDataFiles datasetPath targetDir
184 |
185 | use trainData = new CIFARReader(targetDir, false, trainBatchSize, shuffle=true, device=device)
186 | use testData = new CIFARReader(targetDir, true, testBatchSize, device=device)
187 |
188 | use model = new Model("model", device)
189 |
190 | trainingLoop model epochs trainData testData
191 |
192 | ()
--------------------------------------------------------------------------------
/tutorials/FSharp/tutorial7.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {
7 | "dotnet_interactive": {
8 | "language": "fsharp"
9 | },
10 | "vscode": {
11 | "languageId": "polyglot-notebook"
12 | }
13 | },
14 | "outputs": [],
15 | "source": [
16 | "#r \"nuget: TorchSharp-cpu\"\n",
17 | "\n",
18 | "open TorchSharp\n",
19 | "open type TorchSharp.torch\n",
20 | "open type TorchSharp.TensorExtensionMethods\n",
21 | "open type TorchSharp.torch.distributions"
22 | ]
23 | },
24 | {
25 | "attachments": {},
26 | "cell_type": "markdown",
27 | "metadata": {},
28 | "source": [
29 | "# Training with a Learning Rate Scheduler\n",
30 | "\n",
31 | "In Tutorial 6, we saw how the optimizers took an argument called the 'learning rate,' but didn't spend much time on it except to say that it could have a great impact on how quickly training would converge toward a solution. In fact, you can choose the learning rate (LR) so poorly, that the training doesn't converge at all.\n",
32 | "\n",
33 | "If the LR is too small, training will go very slowly, wasting compute resources. If it is too large, training could result in numeric overflow, or NaNs. Either way, you're in trouble."
34 | ]
35 | },
36 | {
37 | "attachments": {},
38 | "cell_type": "markdown",
39 | "metadata": {},
40 | "source": [
41 | "To further complicate matters, it turns out that the learning rate shouldn't necessarily be constant. Training can go much better if the learning rate starts out relatively large and gets smaller as you get closer to the end.\n",
42 | "\n",
43 | "There's a solution for this, called a Learning Rate Scheduler. An LRS instance has access to the internal state of the optimizer, and can modify the LR as it goes along. There are several algorithms for scheduling, of which TorchSharp currently implements a significant subset."
44 | ]
45 | },
46 | {
47 | "attachments": {},
48 | "cell_type": "markdown",
49 | "metadata": {},
50 | "source": [
51 | "Before demonstrating, let's have a model and a baseline training loop."
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": null,
57 | "metadata": {
58 | "dotnet_interactive": {
59 | "language": "fsharp"
60 | },
61 | "vscode": {
62 | "languageId": "polyglot-notebook"
63 | }
64 | },
65 | "outputs": [],
66 | "source": [
67 | "type Trivial() as this = \n",
68 | " inherit nn.Module(\"Trivial\")\n",
69 | "\n",
70 | " let lin1 = nn.Linear(1000L, 100L)\n",
71 | " let lin2 = nn.Linear(100L, 10L)\n",
72 | "\n",
73 | " do\n",
74 | " this.RegisterComponents()\n",
75 | "\n",
76 | " override _.forward(input) = \n",
77 | " \n",
78 | " use x = lin1.forward(input)\n",
79 | " use y = nn.functional.relu(x)\n",
80 | " lin2.forward(y)"
81 | ]
82 | },
83 | {
84 | "attachments": {},
85 | "cell_type": "markdown",
86 | "metadata": {},
87 | "source": [
88 | "To demonstrate how to correctly use an LR scheduler, our training data needs to look more like real training data, that is, it needs to be divided into batches."
89 | ]
90 | },
91 | {
92 | "cell_type": "code",
93 | "execution_count": null,
94 | "metadata": {
95 | "dotnet_interactive": {
96 | "language": "fsharp"
97 | },
98 | "vscode": {
99 | "languageId": "polyglot-notebook"
100 | }
101 | },
102 | "outputs": [],
103 | "source": [
104 | "let learning_rate = 0.01\n",
105 | "let model = Trivial()\n",
106 | "\n",
107 | "let data = [for i = 1 to 16 do rand(32,1000)] // Our pretend input data\n",
108 | "let result = [for i = 1 to 16 do rand(32,10)] // Our pretend ground truth.\n",
109 | "\n",
110 | "let loss x y = nn.functional.mse_loss(x,y)\n",
111 | "\n",
112 | "let optimizer = torch.optim.SGD(model.parameters(), learning_rate)\n",
113 | "\n",
114 | "for epoch = 1 to 300 do\n",
115 | "\n",
116 | " for idx = 0 to data.Length-1 do\n",
117 | " // Compute the loss\n",
118 | " let pred = model.forward(data.[idx])\n",
119 | " let output = loss pred result.[idx]\n",
120 | "\n",
121 | " // Clear the gradients before doing the back-propagation\n",
122 | " model.zero_grad()\n",
123 | "\n",
124 | " // Do back-progatation, which computes all the gradients.\n",
125 | " output.backward()\n",
126 | "\n",
127 | " optimizer.step() |> ignore\n",
128 | "\n",
129 | "let pred = model.forward(data.[0])\n",
130 | "(loss pred result.[0]).item()"
131 | ]
132 | },
133 | {
134 | "attachments": {},
135 | "cell_type": "markdown",
136 | "metadata": {},
137 | "source": [
138 | "When I ran this, the loss was down to 0.051 after 3 seconds. (It took longer the first time around.)"
139 | ]
140 | },
141 | {
142 | "attachments": {},
143 | "cell_type": "markdown",
144 | "metadata": {},
145 | "source": [
146 | "## StepLR\n",
147 | "\n",
148 | "StepLR uses subtraction to adjust the learning rate every so often. The difference it makes to the training loop is that you wrap the optimizer, and then call `step` on the scheduler (once per epoch) as well as the optimizer (once per batch)."
149 | ]
150 | },
151 | {
152 | "cell_type": "code",
153 | "execution_count": null,
154 | "metadata": {
155 | "dotnet_interactive": {
156 | "language": "fsharp"
157 | },
158 | "vscode": {
159 | "languageId": "polyglot-notebook"
160 | }
161 | },
162 | "outputs": [],
163 | "source": [
164 | "let learning_rate = 0.01\n",
165 | "let model = Trivial()\n",
166 | "\n",
167 | "let data = [for i = 1 to 16 do rand(32,1000)] // Our pretend input data\n",
168 | "let result = [for i = 1 to 16 do rand(32,10)] // Our pretend ground truth.\n",
169 | "\n",
170 | "let loss x y = nn.functional.mse_loss(x,y)\n",
171 | "\n",
172 | "let optimizer = torch.optim.SGD(model.parameters(), learning_rate)\n",
173 | "let scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 25, 0.95, verbose=true)\n",
174 | "\n",
175 | "for epoch = 1 to 300 do\n",
176 | "\n",
177 | " for idx = 0 to data.Length-1 do\n",
178 | " // Compute the loss\n",
179 | " let pred = model.forward(data.[idx])\n",
180 | " let output = loss pred result.[idx]\n",
181 | "\n",
182 | " // Clear the gradients before doing the back-propagation\n",
183 | " model.zero_grad()\n",
184 | "\n",
185 | " // Do back-progatation, which computes all the gradients.\n",
186 | " output.backward()\n",
187 | "\n",
188 | " optimizer.step() |> ignore\n",
189 | "\n",
190 | " scheduler.step() |> ignore\n",
191 | "\n",
192 | "let pred = model.forward(data.[0])\n",
193 | "(loss pred result.[0]).item()"
194 | ]
195 | },
196 | {
197 | "attachments": {},
198 | "cell_type": "markdown",
199 | "metadata": {},
200 | "source": [
201 | "Well, that was underwhelming. The loss (in my case) went up a bit, so that's nothing to get excited about. For this trivial model, using a scheduler isn't going to make a huge difference, and it may not make much of a difference even for complex models. It's very hard to know until you try it, but now you know how to try it out. If you try this trivial example over and over, you will see that the results vary quite a bit. It's simply too simple.\n",
202 | "\n",
203 | "Regardless, you can see from the verbose output that the learning rate is adjusted as the epochs proceed."
204 | ]
205 | },
206 | {
207 | "attachments": {},
208 | "cell_type": "markdown",
209 | "metadata": {},
210 | "source": []
211 | }
212 | ],
213 | "metadata": {
214 | "kernelspec": {
215 | "display_name": ".NET (F#)",
216 | "language": "F#",
217 | "name": ".net-fsharp"
218 | },
219 | "language_info": {
220 | "name": "F#"
221 | }
222 | },
223 | "nbformat": 4,
224 | "nbformat_minor": 2
225 | }
226 |
--------------------------------------------------------------------------------
/tutorials/CSharp/tutorial7.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {
7 | "dotnet_interactive": {
8 | "language": "csharp"
9 | },
10 | "vscode": {
11 | "languageId": "polyglot-notebook"
12 | }
13 | },
14 | "outputs": [],
15 | "source": [
16 | "#r \"nuget: TorchSharp-cpu\"\n",
17 | "\n",
18 | "using TorchSharp;\n",
19 | "using static TorchSharp.torch;\n",
20 | "using static TorchSharp.TensorExtensionMethods;\n",
21 | "using static TorchSharp.torch.distributions;"
22 | ]
23 | },
24 | {
25 | "attachments": {},
26 | "cell_type": "markdown",
27 | "metadata": {},
28 | "source": [
29 | "# Training with a Learning Rate Scheduler\n",
30 | "\n",
31 | "In Tutorial 6, we saw how the optimizers took an argument called the 'learning rate,' but didn't spend much time on it except to say that it could have a great impact on how quickly training would converge toward a solution. In fact, you can choose the learning rate (LR) so poorly, that the training doesn't converge at all.\n",
32 | "\n",
33 | "If the LR is too small, training will go very slowly, wasting compute resources. If it is too large, training could result in numeric overflow, or NaNs. Either way, you're in trouble."
34 | ]
35 | },
36 | {
37 | "attachments": {},
38 | "cell_type": "markdown",
39 | "metadata": {},
40 | "source": [
41 | "To further complicate matters, it turns out that the learning rate shouldn't necessarily be constant. Training can go much better if the learning rate starts out relatively large and gets smaller as you get closer to the end.\n",
42 | "\n",
43 | "There's a solution for this, called a Learning Rate Scheduler. An LRS instance has access to the internal state of the optimizer, and can modify the LR as it goes along. Some schedulers modify other optimizer state, too, such as the momentum (for optimizers that use momentum).\n",
44 | "\n",
45 | "There are several algorithms for scheduling, and TorchSharp implements a number of them."
46 | ]
47 | },
48 | {
49 | "attachments": {},
50 | "cell_type": "markdown",
51 | "metadata": {},
52 | "source": [
53 | "Before demonstrating, let's have a model and a baseline training loop."
54 | ]
55 | },
56 | {
57 | "cell_type": "code",
58 | "execution_count": null,
59 | "metadata": {
60 | "dotnet_interactive": {
61 | "language": "csharp"
62 | },
63 | "vscode": {
64 | "languageId": "polyglot-notebook"
65 | }
66 | },
67 | "outputs": [],
68 | "source": [
69 | "private class Trivial : nn.Module\n",
70 | "{\n",
71 | " public Trivial()\n",
72 | " : base(nameof(Trivial))\n",
73 | " {\n",
74 | " RegisterComponents();\n",
75 | " }\n",
76 | "\n",
77 | " public override Tensor forward(Tensor input)\n",
78 | " {\n",
79 | " using var x = lin1.forward(input);\n",
80 | " using var y = nn.functional.relu(x);\n",
81 | " return lin2.forward(y);\n",
82 | " }\n",
83 | "\n",
84 | " private nn.Module lin1 = nn.Linear(1000, 100);\n",
85 | " private nn.Module lin2 = nn.Linear(100, 10);\n",
86 | "}"
87 | ]
88 | },
89 | {
90 | "attachments": {},
91 | "cell_type": "markdown",
92 | "metadata": {},
93 | "source": [
94 | "To demonstrate how to correctly use an LR scheduler, our training data needs to look more like real training data, that is, it needs to be divided into batches."
95 | ]
96 | },
97 | {
98 | "cell_type": "code",
99 | "execution_count": null,
100 | "metadata": {
101 | "dotnet_interactive": {
102 | "language": "csharp"
103 | },
104 | "vscode": {
105 | "languageId": "polyglot-notebook"
106 | }
107 | },
108 | "outputs": [],
109 | "source": [
110 | "var learning_rate = 0.01f;\n",
111 | "var model = new Trivial();\n",
112 | "var loss = nn.MSELoss();\n",
113 | "\n",
114 | "var data = Enumerable.Range(0,16).Select(_ => rand(32,1000)).ToList(); // Our pretend input data\n",
115 | "var results = Enumerable.Range(0,16).Select(_ => rand(32,10)).ToList(); // Our pretend ground truth.\n",
116 | "\n",
117 | "var optimizer = torch.optim.SGD(model.parameters(), learning_rate);\n",
118 | "\n",
119 | "for (int i = 0; i < 300; i++) {\n",
120 | "\n",
121 | " for (int idx = 0; i < data.Count; i++) {\n",
122 | " // Compute the loss\n",
123 | " using var output = loss.forward(model.forward(data[idx]), results[idx]);\n",
124 | "\n",
125 | " // Clear the gradients before doing the back-propagation\n",
126 | " model.zero_grad();\n",
127 | "\n",
128 | " // Do back-progatation, which computes all the gradients.\n",
129 | " output.backward();\n",
130 | "\n",
131 | " optimizer.step();\n",
132 | " }\n",
133 | "}\n",
134 | "\n",
135 | "loss.forward(model.forward(data[0]), results[0]).item()"
136 | ]
137 | },
138 | {
139 | "attachments": {},
140 | "cell_type": "markdown",
141 | "metadata": {},
142 | "source": [
143 | "When I ran this, the loss was down to 0.095 after 1 second. (It took longer the first time around.)"
144 | ]
145 | },
146 | {
147 | "attachments": {},
148 | "cell_type": "markdown",
149 | "metadata": {},
150 | "source": [
151 | "## StepLR\n",
152 | "\n",
153 | "StepLR uses subtraction to adjust the learning rate every so often. The difference it makes to the training loop is that you wrap the optimizer, and then call `step` on the scheduler (once per epoch) as well as the optimizer (once per batch)."
154 | ]
155 | },
156 | {
157 | "cell_type": "code",
158 | "execution_count": null,
159 | "metadata": {
160 | "dotnet_interactive": {
161 | "language": "csharp"
162 | },
163 | "vscode": {
164 | "languageId": "polyglot-notebook"
165 | }
166 | },
167 | "outputs": [],
168 | "source": [
169 | "var learning_rate = 0.01f;\n",
170 | "var model = new Trivial();\n",
171 | "var loss = nn.MSELoss();\n",
172 | "\n",
173 | "var data = Enumerable.Range(0,16).Select(_ => rand(32,1000)).ToList(); // Our pretend input data\n",
174 | "var results = Enumerable.Range(0,16).Select(_ => rand(32,10)).ToList(); // Our pretend ground truth.\n",
175 | "\n",
176 | "var optimizer = torch.optim.SGD(model.parameters(), learning_rate);\n",
177 | "var scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 25, 0.95);\n",
178 | "\n",
179 | "for (int i = 0; i < 300; i++) {\n",
180 | "\n",
181 | " for (int idx = 0; i < data.Count; i++) {\n",
182 | " // Compute the loss\n",
183 | " using var output = loss.forward(model.forward(data[idx]), results[idx]);\n",
184 | "\n",
185 | " // Clear the gradients before doing the back-propagation\n",
186 | " model.zero_grad();\n",
187 | "\n",
188 | " // Do back-progatation, which computes all the gradients.\n",
189 | " output.backward();\n",
190 | "\n",
191 | " optimizer.step();\n",
192 | " }\n",
193 | "\n",
194 | " scheduler.step();\n",
195 | "}\n",
196 | "\n",
197 | "loss.forward(model.forward(data[0]), results[0]).item()"
198 | ]
199 | },
200 | {
201 | "attachments": {},
202 | "cell_type": "markdown",
203 | "metadata": {},
204 | "source": [
205 | "Well, that was underwhelming. The loss (in my case) went up a bit, so that's nothing to get excited about. For this trivial model, using a scheduler isn't going to make a huge difference, and it may not make much of a difference even for complex models. It's very hard to know until you try it, but now you know how to try it out. If you try this trivial example over and over, you will see that the results vary quite a bit. It's simply too simple.\n",
206 | "\n",
207 | "Regardless, you can see from the verbose output that the learning rate is adjusted as the epochs proceed. \n",
208 | "\n",
209 | "Note: If you're using 0.93.9 and you see odd dips in the learning rate, that's a bug in the verbose printout logic, not the learning rate scheduler itself."
210 | ]
211 | }
212 | ],
213 | "metadata": {
214 | "kernelspec": {
215 | "display_name": ".NET (C#)",
216 | "language": "C#",
217 | "name": ".net-csharp"
218 | },
219 | "language_info": {
220 | "name": "C#"
221 | }
222 | },
223 | "nbformat": 4,
224 | "nbformat_minor": 2
225 | }
226 |
--------------------------------------------------------------------------------
/src/CSharp/CSharpExamples/MNIST.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information.
2 | using System;
3 | using System.IO;
4 | using System.Collections.Generic;
5 | using System.Diagnostics;
6 |
7 | using TorchSharp;
8 | using static TorchSharp.torchvision;
9 |
10 | using TorchSharp.Examples;
11 | using TorchSharp.Examples.Utils;
12 |
13 | using static TorchSharp.torch;
14 |
15 | using static TorchSharp.torch.nn;
16 | using static TorchSharp.torch.nn.functional;
17 |
18 | namespace CSharpExamples
19 | {
20 | ///
21 | /// Simple MNIST Convolutional model.
22 | ///
23 | ///
24 | /// There are at least two interesting data sets to use with this example:
25 | ///
26 | /// 1. The classic MNIST set of 60000 images of handwritten digits.
27 | ///
28 | /// It is available at: http://yann.lecun.com/exdb/mnist/
29 | ///
30 | /// 2. The 'fashion-mnist' data set, which has the exact same file names and format as MNIST, but is a harder
31 | /// data set to train on. It's just as large as MNIST, and has the same 60/10 split of training and test
32 | /// data.
33 | /// It is available at: https://github.com/zalandoresearch/fashion-mnist/tree/master/data/fashion
34 | ///
35 | /// In each case, there are four .gz files to download. Place them in a folder and then point the '_dataLocation'
36 | /// constant below at the folder location.
37 | ///
38 | public class MNIST
39 | {
40 | private static int _epochs = 4;
41 | private static int _trainBatchSize = 64;
42 | private static int _testBatchSize = 128;
43 |
44 | private readonly static int _logInterval = 100;
45 |
46 | internal static void Run(int epochs, int timeout, string logdir, string dataset)
47 | {
48 | _epochs = epochs;
49 |
50 | if (string.IsNullOrEmpty(dataset))
51 | {
52 | dataset = "mnist";
53 | }
54 |
55 | var device =
56 | torch.cuda.is_available() ? torch.CUDA :
57 | torch.mps_is_available() ? torch.MPS :
58 | torch.CPU;
59 |
60 | Console.WriteLine();
61 | Console.WriteLine($"\tRunning MNIST with {dataset} on {device.type.ToString()} for {epochs} epochs, terminating after {TimeSpan.FromSeconds(timeout)}.");
62 | Console.WriteLine();
63 |
64 | var datasetPath = Path.Join(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", dataset);
65 |
66 | random.manual_seed(1);
67 |
68 | var cwd = Environment.CurrentDirectory;
69 |
70 | var writer = String.IsNullOrEmpty(logdir) ? null : torch.utils.tensorboard.SummaryWriter(logdir, createRunName: true);
71 |
72 | var sourceDir = datasetPath;
73 | var targetDir = Path.Combine(datasetPath, "test_data");
74 |
75 | if (!Directory.Exists(targetDir))
76 | {
77 | Directory.CreateDirectory(targetDir);
78 | Decompress.DecompressGZipFile(Path.Combine(sourceDir, "train-images-idx3-ubyte.gz"), targetDir);
79 | Decompress.DecompressGZipFile(Path.Combine(sourceDir, "train-labels-idx1-ubyte.gz"), targetDir);
80 | Decompress.DecompressGZipFile(Path.Combine(sourceDir, "t10k-images-idx3-ubyte.gz"), targetDir);
81 | Decompress.DecompressGZipFile(Path.Combine(sourceDir, "t10k-labels-idx1-ubyte.gz"), targetDir);
82 | }
83 |
84 | if (device.type == DeviceType.CUDA)
85 | {
86 | _trainBatchSize *= 4;
87 | _testBatchSize *= 4;
88 | }
89 |
90 | Console.WriteLine($"\tCreating the model...");
91 |
92 | var model = new TorchSharp.Examples.MNIST.Model("model", device);
93 |
94 | var normImage = transforms.Normalize(new double[] { 0.1307 }, new double[] { 0.3081 }, device: (Device)device);
95 |
96 | Console.WriteLine($"\tPreparing training and test data...");
97 | Console.WriteLine();
98 |
99 | using (MNISTReader train = new MNISTReader(targetDir, "train", _trainBatchSize, device: device, shuffle: true, transform: normImage),
100 | test = new MNISTReader(targetDir, "t10k", _testBatchSize, device: device, transform: normImage))
101 | {
102 |
103 | TrainingLoop(dataset, timeout, writer, device, model, train, test);
104 | }
105 | }
106 |
107 | internal static void TrainingLoop(string dataset, int timeout, TorchSharp.Modules.SummaryWriter writer, Device device, Module model, MNISTReader train, MNISTReader test)
108 | {
109 | var optimizer = optim.Adam(model.parameters());
110 |
111 | var scheduler = optim.lr_scheduler.StepLR(optimizer, 1, 0.7);
112 |
113 | Stopwatch totalTime = new Stopwatch();
114 | totalTime.Start();
115 |
116 | for (var epoch = 1; epoch <= _epochs; epoch++)
117 | {
118 |
119 | Train(model, optimizer, NLLLoss(reduction: Reduction.Mean), device, train, epoch, train.BatchSize, train.Size);
120 | Test(model, NLLLoss(reduction: nn.Reduction.Sum), writer, device, test, epoch, test.Size);
121 |
122 | Console.WriteLine($"End-of-epoch memory use: {GC.GetTotalMemory(false)}");
123 |
124 | if (totalTime.Elapsed.TotalSeconds > timeout) break;
125 | }
126 |
127 | totalTime.Stop();
128 | Console.WriteLine($"Elapsed time: {totalTime.Elapsed.TotalSeconds:F1} s.");
129 |
130 | Console.WriteLine("Saving model to '{0}'", dataset + ".model.bin");
131 | model.save(dataset + ".model.bin");
132 | }
133 |
134 | private static void Train(
135 | Module model,
136 | optim.Optimizer optimizer,
137 | Loss loss,
138 | Device device,
139 | IEnumerable<(Tensor, Tensor)> dataLoader,
140 | int epoch,
141 | long batchSize,
142 | int size)
143 | {
144 | model.train();
145 |
146 | int batchId = 1;
147 |
148 | Console.WriteLine($"Epoch: {epoch}...");
149 |
150 | foreach (var (data, target) in dataLoader)
151 | {
152 | using (var d = torch.NewDisposeScope())
153 | {
154 | optimizer.zero_grad();
155 |
156 | var prediction = model.forward(data);
157 | var output = loss.forward(prediction, target);
158 |
159 | output.backward();
160 |
161 | optimizer.step();
162 |
163 | if (batchId % _logInterval == 0)
164 | {
165 | Console.WriteLine($"\rTrain: epoch {epoch} [{batchId * batchSize} / {size}] Loss: {output.ToSingle():F4}");
166 | }
167 |
168 | batchId++;
169 | }
170 |
171 | }
172 | }
173 |
174 | private static void Test(
175 | Module model,
176 | Loss loss,
177 | TorchSharp.Modules.SummaryWriter writer,
178 | Device device,
179 | IEnumerable<(Tensor, Tensor)> dataLoader,
180 | int epoch,
181 | int size)
182 | {
183 | model.eval();
184 |
185 | double testLoss = 0;
186 | int correct = 0;
187 |
188 | foreach (var (data, target) in dataLoader)
189 | {
190 | using (var d = torch.NewDisposeScope())
191 | {
192 | var prediction = model.forward(data);
193 | var output = loss.forward(prediction, target);
194 | testLoss += output.ToSingle();
195 |
196 | correct += prediction.argmax(1).eq(target).sum().ToInt32();
197 | }
198 | }
199 |
200 | Console.WriteLine($"Size: {size}, Total: {size}");
201 |
202 | Console.WriteLine($"\rTest set: Average loss {(testLoss / size):F4} | Accuracy {((double)correct / size):P2}");
203 |
204 | if (writer != null)
205 | {
206 | writer.add_scalar("MNIST/loss", (float)(testLoss / size), epoch);
207 | writer.add_scalar("MNIST/accuracy", (float)correct / size, epoch);
208 | }
209 | }
210 | }
211 | }
212 |
--------------------------------------------------------------------------------
/src/Utils/ArgumentParser.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.IO;
5 | using System.Threading.Tasks;
6 |
7 | using Newtonsoft.Json;
8 |
9 | namespace TorchSharp.Examples.Utils
10 | {
11 | ///
12 | /// Yet another argument parser.
13 | ///
14 | public sealed class ArgumentParser
15 | {
16 | public ArgumentParser(FileInfo argumentConfiguration, IList args)
17 | {
18 | Initialize(File.ReadAllText(argumentConfiguration.FullName), args);
19 | }
20 |
21 | public ArgumentParser(string argumentConfiguration, IList args)
22 | {
23 | Initialize(argumentConfiguration, args);
24 | }
25 |
26 | public void UsingMessage(string name, string positionals)
27 | {
28 | Console.Error.WriteLine("using:");
29 | Console.Error.Write($"{name} ");
30 | foreach (var desc in descriptors)
31 | {
32 | Console.Error.Write($"[--{desc.LongName} | -{desc.ShortName}] {desc.ArgType.ToString().ToLower()} ");
33 | }
34 |
35 | Console.Error.WriteLine($"{positionals}...");
36 |
37 | foreach (var desc in descriptors)
38 | {
39 | Console.Error.WriteLine($"--{desc.LongName} | -{desc.ShortName}: {desc.ArgType.ToString().ToLower()}, {desc.Explanation} ");
40 | }
41 | }
42 | public int Count => positionalArguments.Count;
43 |
44 | public string this[int index]
45 | {
46 | get { return positionalArguments[index]; }
47 | }
48 |
49 | private void Initialize(string argumentConfiguration, IList args)
50 | {
51 | try
52 | {
53 | descriptors = JsonConvert.DeserializeObject>(argumentConfiguration);
54 |
55 | for (int idx = 0; idx < args.Count; ++idx)
56 | {
57 | var arg = args[idx];
58 |
59 | if (arg.StartsWith("--"))
60 | {
61 | // Long form argument, --name=value, --name:value, or --name value
62 | string[] kv = null;
63 |
64 | if (arg.Contains(':'))
65 | {
66 | kv = arg.Substring(2).Split(':');
67 | }
68 | else if (arg.Contains('='))
69 | {
70 | kv = arg.Substring(2).Split('=');
71 | }
72 | else
73 | {
74 | kv = new string[] { arg.Substring(2) };
75 | }
76 |
77 | ProcessArgument(kv, args, descriptors, false, ref idx);
78 | }
79 | else if (arg.StartsWith("-"))
80 | {
81 | // Short form argument, -v value
82 | var key = arg.Substring(1);
83 |
84 | if (key.Length == 1)
85 | {
86 | ProcessArgument(new string[] { key }, args, descriptors, true, ref idx);
87 | }
88 | else
89 | {
90 | ProcessFlags(key, args, descriptors);
91 | }
92 | }
93 | else
94 | {
95 | // Positional argument, always interpreted as a string
96 | positionalArguments.Add(arg);
97 | }
98 | }
99 | }
100 | catch (Exception e)
101 | {
102 | Console.Error.WriteLine($"Internal error reading command arguments definition file: {e.Message}");
103 | }
104 | }
105 |
106 | private void ProcessFlags(string key, IList args, List arguments)
107 | {
108 | foreach (var ch in key)
109 | {
110 | var name = ch.ToString();
111 |
112 | foreach (var argDesc in arguments)
113 | {
114 | if (name.Equals(argDesc.ShortName))
115 | {
116 | if (argDesc.ArgType != ArgumentDescriptor.ArgumentType.Flag)
117 | {
118 | Console.Error.WriteLine("Mulitple short-form arguments are only valid if they do not take a value.");
119 | continue;
120 | }
121 | namedArguments.Add(argDesc.LongName, true);
122 | break;
123 | }
124 | }
125 | }
126 | }
127 |
128 | private void ProcessArgument(string[] kv, IList args, List arguments, bool shortForm, ref int idx)
129 | {
130 | var name = kv[0];
131 |
132 | var argType = ArgumentDescriptor.ArgumentType.Flag;
133 |
134 | foreach (var argDesc in arguments)
135 | {
136 | if (!shortForm && name.ToLowerInvariant().Equals(argDesc.LongName.ToLowerInvariant()) ||
137 | shortForm && name.Equals(argDesc.ShortName))
138 | {
139 | argType = argDesc.ArgType;
140 | name = argDesc.LongName;
141 | break;
142 | }
143 | }
144 |
145 | try
146 | {
147 | switch (argType)
148 | {
149 | case ArgumentDescriptor.ArgumentType.Flag:
150 | namedArguments.Add(name, true);
151 | break;
152 | case ArgumentDescriptor.ArgumentType.Boolean:
153 | {
154 | if (bool.TryParse((kv.Length == 1) ? args[++idx] : kv[1], out bool value))
155 | {
156 | namedArguments.Add(name, value);
157 | }
158 | break;
159 | }
160 | case ArgumentDescriptor.ArgumentType.Integer:
161 | {
162 | if (int.TryParse((kv.Length == 1) ? args[++idx] : kv[1], out int value))
163 | {
164 | namedArguments.Add(name, value);
165 | }
166 | break;
167 | }
168 | case ArgumentDescriptor.ArgumentType.String:
169 | {
170 | var value = (kv.Length == 1) ? args[++idx] : kv[1];
171 | namedArguments.Add(name, value);
172 | break;
173 | }
174 | case ArgumentDescriptor.ArgumentType.List:
175 | {
176 | var value = ((kv.Length == 1) ? args[++idx] : kv[1]).Split(',');
177 | namedArguments.Add(name, value);
178 | break;
179 | }
180 | }
181 | }
182 | catch(ArgumentOutOfRangeException)
183 | {
184 | }
185 | }
186 |
187 | public bool TryGetValueBool(string name, out bool value)
188 | {
189 | return TryGetValue(name, out value);
190 | }
191 |
192 | public bool TryGetValueInt(string name, out int value)
193 | {
194 | return TryGetValue(name, out value);
195 | }
196 |
197 | public bool TryGetValueString(string name, out string value)
198 | {
199 | return TryGetValue(name, out value);
200 | }
201 |
202 | public bool TryGetValueStrings(string name, out string[] value)
203 | {
204 | return TryGetValue(name, out value);
205 | }
206 |
207 | public bool TryGetValue(string name, out T value, T @default = default(T))
208 | {
209 | if (namedArguments.TryGetValue(name, out var obj) && obj is T)
210 | {
211 | value = (T)obj;
212 | return true;
213 | }
214 | value = @default;
215 | return false;
216 | }
217 |
218 | private List descriptors = null;
219 |
220 | private Dictionary namedArguments = new Dictionary();
221 | private List positionalArguments = new List();
222 | }
223 | }
224 |
--------------------------------------------------------------------------------
/src/CSharp/Models/ResNet.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information.
2 | using System;
3 | using System.Collections.Generic;
4 | using TorchSharp;
5 | using static TorchSharp.torch;
6 | using static TorchSharp.torch.nn;
7 |
8 | namespace TorchSharp.Examples
9 | {
10 | ///
11 | /// Modified version of ResNet to classify CIFAR10 32x32 images.
12 | ///
13 | public class ResNet : Module
14 | {
15 | // The code here is is loosely based on https://github.com/kuangliu/pytorch-cifar/blob/master/models/resnet.py
16 | // Licence and copypright notice at: https://github.com/kuangliu/pytorch-cifar/blob/master/LICENSE
17 |
18 | private readonly long[] planes = new long[] { 64, 128, 128, 256, 256, 512, 512, 512, 512, 512, 512, 1024, 1024 };
19 | private readonly long[] strides = new long[] { 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 2, 1 };
20 |
21 | private readonly Module layers;
22 | private int in_planes = 64;
23 |
24 | public static ResNet ResNet18(int numClasses, Device device = null)
25 | {
26 | return new ResNet(
27 | "ResNet18",
28 | (name, in_planes, planes, stride) => new BasicBlock(name, in_planes, planes, stride),
29 | BasicBlock.expansion, new int[] { 2, 2, 2, 2 },
30 | numClasses,
31 | device);
32 | }
33 |
34 | public static ResNet ResNet34(int numClasses, Device device = null)
35 | {
36 | return new ResNet(
37 | "ResNet34",
38 | (name, in_planes, planes, stride) => new BasicBlock(name, in_planes, planes, stride),
39 | BasicBlock.expansion, new int[] { 3, 4, 6, 3 },
40 | numClasses,
41 | device);
42 | }
43 |
44 | public static ResNet ResNet50(int numClasses, Device device = null)
45 | {
46 | return new ResNet(
47 | "ResNet50",
48 | (name, in_planes, planes, stride) => new Bottleneck(name, in_planes, planes, stride),
49 | Bottleneck.expansion, new int[] { 3, 4, 6, 3 },
50 | numClasses,
51 | device);
52 | }
53 |
54 | public static ResNet ResNet101(int numClasses, Device device = null)
55 | {
56 | return new ResNet(
57 | "ResNet101",
58 | (name, in_planes, planes, stride) => new Bottleneck(name, in_planes, planes, stride),
59 | Bottleneck.expansion, new int[] { 3, 4, 23, 3 },
60 | numClasses,
61 | device);
62 | }
63 |
64 | public static ResNet ResNet152(int numClasses, Device device = null)
65 | {
66 | return new ResNet(
67 | "ResNet101",
68 | (name, in_planes, planes, stride) => new Bottleneck(name, in_planes, planes, stride),
69 | Bottleneck.expansion, new int[] { 3, 4, 36, 3 },
70 | numClasses,
71 | device);
72 | }
73 |
74 | public ResNet(string name, Func> block, int expansion, IList num_blocks, int numClasses, Device device = null) : base(name)
75 | {
76 | if (planes.Length != strides.Length) throw new ArgumentException("'planes' and 'strides' must have the same length.");
77 |
78 | var modules = new List<(string, Module)>();
79 |
80 | modules.Add(($"conv2d-first", Conv2d(3, 64, kernel_size:3, stride: 1, padding: 1, bias: false)));
81 | modules.Add(($"bnrm2d-first", BatchNorm2d(64)));
82 | modules.Add(($"relu-first", ReLU(inplace:true)));
83 | MakeLayer(modules, block, expansion, 64, num_blocks[0], 1);
84 | MakeLayer(modules, block, expansion, 128, num_blocks[1], 2);
85 | MakeLayer(modules, block, expansion, 256, num_blocks[2], 2);
86 | MakeLayer(modules, block, expansion, 512, num_blocks[3], 2);
87 | modules.Add(("avgpool", AvgPool2d(new long[] { 4, 4 })));
88 | modules.Add(("flatten", Flatten()));
89 | modules.Add(($"linear", Linear(512 * expansion, numClasses)));
90 |
91 | layers = Sequential(modules);
92 |
93 | RegisterComponents();
94 |
95 | if (device != null && device.type != DeviceType.CPU)
96 | this.to(device);
97 | }
98 |
99 | private void MakeLayer(List<(string, Module)> modules, Func> block, int expansion, int planes, int num_blocks, int stride)
100 | {
101 | var strides = new List();
102 | strides.Add(stride);
103 | for (var i = 0; i < num_blocks-1; i++) { strides.Add(1); }
104 |
105 | for (var i = 0; i < strides.Count; i++) {
106 | var s = strides[i];
107 | modules.Add(($"blck-{planes}-{i}", block($"blck-{planes}-{i}", in_planes, planes, s)));
108 | in_planes = planes * expansion;
109 | }
110 | }
111 |
112 | public override Tensor forward(Tensor input)
113 | {
114 | return layers.forward(input);
115 | }
116 |
117 | class BasicBlock : Module
118 | {
119 | public BasicBlock (string name, int in_planes, int planes, int stride) : base(name)
120 | {
121 | var modules = new List<(string, Module)>();
122 |
123 | modules.Add(($"{name}-conv2d-1", Conv2d(in_planes, planes, kernel_size:3, stride: stride, padding: 1, bias: false)));
124 | modules.Add(($"{name}-bnrm2d-1", BatchNorm2d(planes)));
125 | modules.Add(($"{name}-relu-1", ReLU(inplace: true)));
126 | modules.Add(($"{name}-conv2d-2", Conv2d(planes, planes, kernel_size:3, stride: 1, padding: 1, bias: false)));
127 | modules.Add(($"{name}-bnrm2d-2", BatchNorm2d(planes)));
128 |
129 | layers = Sequential(modules);
130 |
131 | if (stride != 1 || in_planes != expansion*planes) {
132 | shortcut = Sequential(
133 | ($"{name}-conv2d-3", Conv2d(in_planes, expansion * planes, kernel_size:1, stride: stride, bias: false)),
134 | ($"{name}-bnrm2d-3", BatchNorm2d(expansion * planes)));
135 | }
136 | else {
137 | shortcut = Sequential();
138 | }
139 |
140 | modules.Add(($"{name}-relu-2", ReLU(inplace: true)));
141 |
142 | RegisterComponents();
143 | }
144 |
145 | public override Tensor forward(Tensor t)
146 | {
147 | var x = layers.forward(t);
148 | var y = shortcut.forward(t);
149 | return x.add_(y).relu_();
150 | }
151 |
152 | public static int expansion = 1;
153 |
154 | private readonly Module layers;
155 | private readonly Module shortcut;
156 | }
157 |
158 | class Bottleneck : Module
159 | {
160 | public Bottleneck(string name, int in_planes, int planes, int stride) : base(name)
161 | {
162 | var modules = new List<(string, Module)>();
163 |
164 | modules.Add(($"{name}-conv2d-1", Conv2d(in_planes, planes, kernel_size:1, bias: false)));
165 | modules.Add(($"{name}-bnrm2d-1", BatchNorm2d(planes)));
166 | modules.Add(($"{name}relu-1", ReLU(inplace:true)));
167 | modules.Add(($"{name}-conv2d-2", Conv2d(planes, planes, kernel_size:3, stride: stride, padding: 1, bias: false)));
168 | modules.Add(($"{name}-bnrm2d-2", BatchNorm2d(planes)));
169 | modules.Add(($"{name}relu-2", ReLU(inplace: true)));
170 | modules.Add(($"{name}-conv2d-3", Conv2d(planes, expansion * planes, kernel_size:1, bias: false)));
171 | modules.Add(($"{name}-bnrm2d-3", BatchNorm2d(expansion * planes)));
172 |
173 | layers = Sequential(modules);
174 |
175 | if (stride != 1 || in_planes != expansion * planes) {
176 | shortcut = Sequential(
177 | ($"{name}-conv2d-4", Conv2d(in_planes, expansion * planes, kernel_size:1, stride: stride, bias: false)),
178 | ($"{name}-bnrm2d-4", BatchNorm2d(expansion * planes)));
179 | } else {
180 | shortcut = Sequential();
181 | }
182 |
183 | RegisterComponents();
184 | }
185 |
186 | public override Tensor forward(Tensor t)
187 | {
188 | var x = layers.forward(t);
189 | var y = shortcut.forward(t);
190 | return x.add_(y).relu_();
191 | }
192 |
193 | public static int expansion = 4;
194 |
195 | private readonly Module layers;
196 | private readonly Module shortcut;
197 | }
198 | }
199 | }
200 |
--------------------------------------------------------------------------------
/src/CSharp/CSharpExamples/CIFAR10.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information.
2 | using System;
3 | using System.IO;
4 | using System.Linq;
5 | using System.Collections.Generic;
6 | using System.Diagnostics;
7 | using System.Runtime.InteropServices;
8 |
9 | using TorchSharp;
10 | using static TorchSharp.torchvision;
11 |
12 | using TorchSharp.Examples;
13 | using TorchSharp.Examples.Utils;
14 |
15 | using static TorchSharp.torch;
16 |
17 | using static TorchSharp.torch.nn;
18 | using static TorchSharp.torch.nn.functional;
19 |
20 | namespace CSharpExamples
21 | {
22 | ///
23 | /// Driver for various models trained and evaluated on the CIFAR10 small (32x32) color image data set.
24 | ///
25 | ///
26 | /// The dataset for this example can be found at: https://www.cs.toronto.edu/~kriz/cifar.html
27 | /// Download the binary file, and place it in a dedicated folder, e.g. 'CIFAR10,' then edit
28 | /// the '_dataLocation' definition below to point at the right folder.
29 | ///
30 | /// Note: so far, CIFAR10 is supported, but not CIFAR100.
31 | ///
32 | class CIFAR10
33 | {
34 | private readonly static string _dataset = "CIFAR10";
35 | private readonly static string _dataLocation = Path.Join(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", _dataset);
36 |
37 | private static int _trainBatchSize = 64;
38 | private static int _testBatchSize = 128;
39 |
40 | private readonly static int _logInterval = 25;
41 | private readonly static int _numClasses = 10;
42 |
43 | internal static void Run(int epochs, int timeout, string logdir, string modelName)
44 | {
45 | torch.random.manual_seed(1);
46 |
47 | var device =
48 | // This worked on a GeForce RTX 2080 SUPER with 8GB, for all the available network architectures.
49 | // It may not fit with less memory than that, but it's worth modifying the batch size to fit in memory.
50 | torch.cuda.is_available() ? torch.CUDA :
51 | torch.mps_is_available() ? torch.MPS :
52 | torch.CPU;
53 |
54 | if (device.type != DeviceType.CPU)
55 | {
56 | _trainBatchSize *= 8;
57 | _testBatchSize *= 8;
58 | }
59 |
60 | Console.WriteLine();
61 | Console.WriteLine($"\tRunning {modelName} with {_dataset} on {device.type.ToString()} for {epochs} epochs, terminating after {TimeSpan.FromSeconds(timeout)}.");
62 | Console.WriteLine();
63 |
64 | var writer = String.IsNullOrEmpty(logdir) ? null : torch.utils.tensorboard.SummaryWriter(logdir, createRunName: true);
65 |
66 | var sourceDir = _dataLocation;
67 | var targetDir = Path.Combine(_dataLocation, "test_data");
68 |
69 | if (!Directory.Exists(targetDir))
70 | {
71 | Directory.CreateDirectory(targetDir);
72 | Decompress.ExtractTGZ(Path.Combine(sourceDir, "cifar-10-binary.tar.gz"), targetDir);
73 | }
74 |
75 | Console.WriteLine($"\tCreating the model...");
76 |
77 | Module model = null;
78 |
79 | switch (modelName.ToLower())
80 | {
81 | case "alexnet":
82 | model = new AlexNet(modelName, _numClasses, device);
83 | break;
84 | case "mobilenet":
85 | model = new MobileNet(modelName, _numClasses, device);
86 | break;
87 | case "vgg11":
88 | case "vgg13":
89 | case "vgg16":
90 | case "vgg19":
91 | model = new VGG(modelName, _numClasses, device);
92 | break;
93 | case "resnet18":
94 | model = ResNet.ResNet18(_numClasses, device);
95 | break;
96 | case "resnet34":
97 | _testBatchSize /= 4;
98 | model = ResNet.ResNet34(_numClasses, device);
99 | break;
100 | case "resnet50":
101 | _trainBatchSize /= 6;
102 | _testBatchSize /= 8;
103 | model = ResNet.ResNet50(_numClasses, device);
104 | break;
105 | case "resnet101":
106 | _trainBatchSize /= 6;
107 | _testBatchSize /= 8;
108 | model = ResNet.ResNet101(_numClasses, device);
109 | break;
110 | case "resnet152":
111 | _testBatchSize /= 4;
112 | model = ResNet.ResNet152(_numClasses, device);
113 | break;
114 | }
115 |
116 | var hflip = transforms.HorizontalFlip();
117 | var gray = transforms.Grayscale(3);
118 | var rotate = transforms.Rotate(90);
119 | var contrast = transforms.AdjustContrast(1.25);
120 |
121 | Console.WriteLine($"\tPreparing training and test data...");
122 | Console.WriteLine();
123 |
124 | using (var train = new CIFARReader(targetDir, false, _trainBatchSize, shuffle: true, device: device, transforms: new ITransform[] { }))
125 | using (var test = new CIFARReader(targetDir, true, _testBatchSize, device: device))
126 | using (var optimizer = torch.optim.Adam(model.parameters(), 0.001))
127 | {
128 |
129 | Stopwatch totalSW = new Stopwatch();
130 | totalSW.Start();
131 |
132 | for (var epoch = 1; epoch <= epochs; epoch++)
133 | {
134 |
135 | Stopwatch epchSW = new Stopwatch();
136 | epchSW.Start();
137 |
138 | var loss = NLLLoss();
139 |
140 | Train(model, optimizer, loss, train.Data(), epoch, _trainBatchSize, train.Size);
141 | Test(model, loss, writer, modelName.ToLower(), test.Data(), epoch, test.Size);
142 |
143 | epchSW.Stop();
144 | Console.WriteLine($"Elapsed time for this epoch: {epchSW.Elapsed.TotalSeconds} s.");
145 |
146 | if (totalSW.Elapsed.TotalSeconds > timeout) break;
147 | }
148 |
149 | totalSW.Stop();
150 | Console.WriteLine($"Elapsed training time: {totalSW.Elapsed} s.");
151 | }
152 |
153 | model.Dispose();
154 | }
155 |
156 | private static void Train(
157 | Module model,
158 | torch.optim.Optimizer optimizer,
159 | Loss loss,
160 | IEnumerable<(Tensor, Tensor)> dataLoader,
161 | int epoch,
162 | long batchSize,
163 | long size)
164 | {
165 | model.train();
166 |
167 | int batchId = 1;
168 | long total = 0;
169 | long correct = 0;
170 |
171 | Console.WriteLine($"Epoch: {epoch}...");
172 |
173 | foreach (var (data, target) in dataLoader)
174 | {
175 |
176 | using (var d = torch.NewDisposeScope())
177 | {
178 | optimizer.zero_grad();
179 |
180 | var prediction = model.forward(data);
181 | var lsm = log_softmax(prediction, 1);
182 | var output = loss.forward(lsm, target);
183 |
184 | output.backward();
185 |
186 | optimizer.step();
187 |
188 | total += target.shape[0];
189 |
190 | correct += prediction.argmax(1).eq(target).sum().ToInt64();
191 |
192 | if (batchId % _logInterval == 0)
193 | {
194 | var count = Math.Min(batchId * batchSize, size);
195 | Console.WriteLine($"\rTrain: epoch {epoch} [{count} / {size}] Loss: {output.ToSingle().ToString("0.000000")} | Accuracy: { ((float)correct / total).ToString("0.000000") }");
196 | }
197 |
198 | batchId++;
199 | }
200 | }
201 | }
202 |
203 | private static void Test(
204 | Module model,
205 | Loss loss,
206 | TorchSharp.Modules.SummaryWriter writer,
207 | string modelName,
208 | IEnumerable<(Tensor, Tensor)> dataLoader,
209 | int epoch,
210 | long size)
211 | {
212 | model.eval();
213 |
214 | double testLoss = 0;
215 | long correct = 0;
216 | int batchCount = 0;
217 |
218 | foreach (var (data, target) in dataLoader)
219 | {
220 |
221 | using (var d = torch.NewDisposeScope())
222 | {
223 | var prediction = model.forward(data);
224 | var lsm = log_softmax(prediction, 1);
225 | var output = loss.forward(lsm, target);
226 |
227 | testLoss += output.ToSingle();
228 | batchCount += 1;
229 |
230 | correct += prediction.argmax(1).eq(target).sum().ToInt64();
231 | }
232 | }
233 |
234 | Console.WriteLine($"\rTest set: Average loss {(testLoss / batchCount).ToString("0.0000")} | Accuracy {((float)correct / size).ToString("0.0000")}");
235 |
236 | if (writer != null)
237 | {
238 | writer.add_scalar($"{modelName}/loss", (float)(testLoss / batchCount), epoch);
239 | writer.add_scalar($"{modelName}/accuracy", (float)correct / size, epoch);
240 | }
241 | }
242 | }
243 | }
244 |
--------------------------------------------------------------------------------
/src/CSharp/CSharpExamples/SequenceToSequence.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information.
2 | using System;
3 | using System.IO;
4 | using System.Linq;
5 | using System.Collections.Generic;
6 | using System.Diagnostics;
7 |
8 | using TorchSharp;
9 | using static TorchSharp.torchvision;
10 |
11 | using TorchSharp.Examples;
12 | using TorchSharp.Examples.Utils;
13 |
14 | using static TorchSharp.torch;
15 |
16 | using static TorchSharp.torch.nn;
17 | using static TorchSharp.torch.nn.functional;
18 |
19 | namespace CSharpExamples
20 | {
21 |
22 | ///
23 | /// This example is based on the PyTorch tutorial at:
24 | ///
25 | /// https://pytorch.org/tutorials/beginner/transformer_tutorial.html
26 | ///
27 | /// It relies on the WikiText2 dataset, which can be downloaded at:
28 | ///
29 | /// https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip
30 | ///
31 | /// After downloading, extract the files using the defaults (Windows only).
32 | ///
33 | public class SequenceToSequence
34 | {
35 | // This path assumes that you're running this on Windows.
36 | private readonly static string _dataLocation = Path.Join(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", "wikitext-2-v1");
37 |
38 | private const long emsize = 200;
39 | private const long nhid = 200;
40 | private const long nlayers = 2;
41 | private const long nhead = 2;
42 | private const double dropout = 0.2;
43 |
44 | private const int batch_size = 64;
45 | private const int eval_batch_size = 32;
46 |
47 | internal static void Run(int epochs, int timeout, string logdir)
48 |
49 | {
50 | torch.random.manual_seed(1);
51 |
52 | var cwd = Environment.CurrentDirectory;
53 |
54 | var device =
55 | torch.cuda.is_available() ? torch.CUDA :
56 | torch.mps_is_available() ? torch.MPS :
57 | torch.CPU;
58 |
59 | Console.WriteLine();
60 | Console.WriteLine($"\tRunning SequenceToSequence on {device.type.ToString()} for {epochs} epochs, terminating after {TimeSpan.FromSeconds(timeout)}.");
61 | Console.WriteLine();
62 |
63 | Console.WriteLine($"\tPreparing training and test data...");
64 |
65 | var vocab_iter = TorchText.Datasets.WikiText2("train", _dataLocation);
66 | var tokenizer = TorchText.Data.Utils.get_tokenizer("basic_english");
67 |
68 | var counter = new TorchText.Vocab.Counter();
69 | foreach (var item in vocab_iter)
70 | {
71 | counter.update(tokenizer(item));
72 | }
73 |
74 | var vocab = new TorchText.Vocab.Vocab(counter);
75 |
76 | var (train_iter, valid_iter, test_iter) = TorchText.Datasets.WikiText2(_dataLocation);
77 |
78 | var train_data = Batchify(ProcessInput(train_iter, tokenizer, vocab), batch_size).to((Device)device);
79 | var valid_data = Batchify(ProcessInput(valid_iter, tokenizer, vocab), eval_batch_size).to((Device)device);
80 | var test_data = Batchify(ProcessInput(test_iter, tokenizer, vocab), eval_batch_size).to((Device)device);
81 |
82 | var bptt = 32;
83 |
84 | var ntokens = vocab.Count;
85 |
86 | Console.WriteLine($"\tCreating the model...");
87 | Console.WriteLine();
88 |
89 | var model = new TransformerModel(ntokens, emsize, nhead, nhid, nlayers, dropout).to((Device)device);
90 | var loss = CrossEntropyLoss();
91 | var lr = 2.50;
92 | var optimizer = torch.optim.SGD(model.parameters(), lr);
93 | var scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, 0.95, last_epoch: 15);
94 |
95 | var writer = String.IsNullOrEmpty(logdir) ? null : torch.utils.tensorboard.SummaryWriter(logdir, createRunName: true);
96 |
97 | var totalTime = new Stopwatch();
98 | totalTime.Start();
99 |
100 | foreach (var epoch in Enumerable.Range(1, epochs))
101 | {
102 |
103 | var sw = new Stopwatch();
104 | sw.Start();
105 |
106 | train(epoch, train_data, model, loss, bptt, ntokens, optimizer);
107 |
108 | var val_loss = evaluate(valid_data, model, loss, bptt, ntokens, optimizer);
109 | sw.Stop();
110 |
111 | Console.WriteLine($"\nEnd of epoch: {epoch} | lr: {optimizer.ParamGroups.First().LearningRate:0.00} | time: {sw.Elapsed.TotalSeconds:0.0}s | loss: {val_loss:0.00}\n");
112 | scheduler.step();
113 |
114 | if (writer != null)
115 | {
116 | writer.add_scalar("seq2seq/loss", (float)val_loss, epoch);
117 | }
118 |
119 | if (totalTime.Elapsed.TotalSeconds > timeout) break;
120 | }
121 |
122 | var tst_loss = evaluate(test_data, model, loss, bptt, ntokens, optimizer);
123 | totalTime.Stop();
124 |
125 | Console.WriteLine($"\nEnd of training | time: {totalTime.Elapsed.TotalSeconds:0.0}s | loss: {tst_loss:0.00}\n");
126 | }
127 |
128 | private static void train(int epoch, Tensor train_data, TransformerModel model, Loss criterion, int bptt, int ntokens, torch.optim.Optimizer optimizer)
129 | {
130 | model.train();
131 |
132 | var total_loss = 0.0f;
133 |
134 | using (var d = torch.NewDisposeScope())
135 | {
136 | var batch = 0;
137 | var log_interval = 200;
138 |
139 | var src_mask = model.GenerateSquareSubsequentMask(bptt);
140 |
141 | var tdlen = train_data.shape[0];
142 |
143 |
144 | for (int i = 0; i < tdlen - 1; batch++, i += bptt)
145 | {
146 |
147 | var (data, targets) = GetBatch(train_data, i, bptt);
148 | optimizer.zero_grad();
149 |
150 | if (data.shape[0] != bptt)
151 | {
152 | src_mask = model.GenerateSquareSubsequentMask(data.shape[0]);
153 | }
154 |
155 | using (var output = model.forward(data, src_mask))
156 | {
157 | var loss = criterion.forward(output.view(-1, ntokens), targets);
158 | loss.backward();
159 | torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5);
160 | optimizer.step();
161 |
162 | total_loss += loss.to(torch.CPU).item();
163 | }
164 |
165 | if (batch % log_interval == 0 && batch > 0)
166 | {
167 | var cur_loss = total_loss / log_interval;
168 | Console.WriteLine($"epoch: {epoch} | batch: {batch} / {tdlen / bptt} | loss: {cur_loss:0.00}");
169 | total_loss = 0;
170 | }
171 |
172 | d.DisposeEverythingBut(src_mask);
173 | }
174 | }
175 | }
176 |
177 | private static double evaluate(Tensor eval_data, TransformerModel model, Loss criterion, int bptt, int ntokens, torch.optim.Optimizer optimizer)
178 | {
179 | model.eval();
180 |
181 | using (var d = torch.NewDisposeScope())
182 | {
183 |
184 | var src_mask = model.GenerateSquareSubsequentMask(bptt);
185 |
186 | var total_loss = 0.0f;
187 | var batch = 0;
188 |
189 |
190 | for (int i = 0; i < eval_data.shape[0] - 1; batch++, i += bptt)
191 | {
192 |
193 | var (data, targets) = GetBatch(eval_data, i, bptt);
194 | if (data.shape[0] != bptt)
195 | {
196 | src_mask = model.GenerateSquareSubsequentMask(data.shape[0]);
197 | }
198 | using (var output = model.forward(data, src_mask))
199 | {
200 | var loss = criterion.forward(output.view(-1, ntokens), targets);
201 | total_loss += data.shape[0] * loss.to(torch.CPU).item();
202 | }
203 |
204 | data.Dispose();
205 | targets.Dispose();
206 |
207 | d.DisposeEverythingBut(src_mask);
208 | }
209 |
210 | return total_loss / eval_data.shape[0];
211 | }
212 | }
213 |
214 | static Tensor ProcessInput(IEnumerable iter, Func> tokenizer, TorchText.Vocab.Vocab vocab)
215 | {
216 | List data = new List();
217 | foreach (var item in iter)
218 | {
219 | List itemData = new List();
220 | foreach (var token in tokenizer(item))
221 | {
222 | itemData.Add(vocab[token]);
223 | }
224 | data.Add(torch.tensor(itemData.ToArray(), torch.int64));
225 | }
226 |
227 | var result = torch.cat(data.Where(t => t.NumberOfElements > 0).ToList(), 0);
228 | return result;
229 | }
230 |
231 | static Tensor Batchify(Tensor data, int batch_size)
232 | {
233 | var nbatch = data.shape[0] / batch_size;
234 | using var d2 = data.narrow(0, 0, nbatch * batch_size).view(batch_size, -1).t();
235 | return d2.contiguous();
236 | }
237 |
238 | static (Tensor, Tensor) GetBatch(Tensor source, int index, int bptt)
239 | {
240 | var len = Math.Min(bptt, source.shape[0] - 1 - index);
241 | var data = source[TensorIndex.Slice(index, index + len)];
242 | var target = source[TensorIndex.Slice(index + 1, index + 1 + len)].reshape(-1);
243 | return (data, target);
244 | }
245 |
246 | }
247 | }
248 |
--------------------------------------------------------------------------------
/src/FSharp/FSharpExamples/SequenceToSequence.fs:
--------------------------------------------------------------------------------
1 | // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information.
2 | module FSharpExamples.SequenceToSequence
3 |
4 | open System
5 | open System.IO
6 | open System.Linq
7 | open System.Diagnostics
8 | open System.Collections.Generic
9 |
10 | open TorchSharp
11 | open type TorchSharp.torch.nn
12 | open type TorchSharp.torch.optim
13 |
14 | open TorchSharp.Examples
15 |
16 | // This example is based on the PyTorch tutorial at:
17 | //
18 | // https://pytorch.org/tutorials/beginner/transformer_tutorial.html
19 | //
20 | // It relies on the WikiText2 dataset, which can be downloaded at:
21 | //
22 | // https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip
23 | //
24 | // After downloading, extract the files using the defaults (Windows only).
25 | //
26 |
27 | let emsize = 200L
28 | let nhidden = 200L
29 | let nlayers = 2L
30 | let nheads = 2L
31 | let dropout = 0.2
32 | let bptt = 32L
33 |
34 | let batch_size = 64L
35 | let eval_batch_size = 256L
36 |
37 | let epochs = 50
38 |
39 | let logInterval = 200
40 |
41 | let cmdArgs = Environment.GetCommandLineArgs()
42 |
43 | let datasetPath = Path.Join(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", "wikitext-2-v1")
44 |
45 | torch.random.manual_seed(1L) |> ignore
46 |
47 | let hasCUDA = TorchText.Datasets.cuda_is_available()
48 |
49 | let device = if hasCUDA then torch.CUDA else torch.CPU
50 |
51 | let criterion x y = torch.nn.functional.cross_entropy(x,y,reduction=Reduction.Mean)
52 |
53 | type PositionalEncoding(dmodel, maxLen) as this =
54 | inherit Module("PositionalEncoding")
55 |
56 | let dropout = Dropout(dropout)
57 | let mutable pe = torch.zeros([| maxLen; dmodel|])
58 |
59 | do
60 | let position = torch.arange(0L.ToScalar(), maxLen.ToScalar(), 1L.ToScalar()).unsqueeze(1L)
61 | let divTerm = (torch.arange(0L.ToScalar(), dmodel.ToScalar(), 2L.ToScalar()) * (-Math.Log(10000.0) / (float dmodel)).ToScalar()).exp()
62 |
63 | let NULL = System.Nullable()
64 |
65 | // See: https://github.com/dotnet/fsharp/issues/9369 -- for now we have to use an explicit array within the index
66 | //
67 | pe.[ [| torch.TensorIndex.Ellipsis; torch.TensorIndex.Slice(0L, NULL, 2L) |] ] <- (position * divTerm).sin()
68 | pe.[ [| torch.TensorIndex.Ellipsis; torch.TensorIndex.Slice(1L, NULL, 2L) |] ] <- (position * divTerm).cos()
69 |
70 | pe <- pe.unsqueeze(0L).transpose(0L,1L)
71 |
72 | this.RegisterComponents()
73 |
74 | override _.forward(t) =
75 | let NULL = System.Nullable()
76 | use x = t + pe.[torch.TensorIndex.Slice(NULL, t.shape.[0]), torch.TensorIndex.Slice()]
77 | dropout.forward(x)
78 |
79 | type TransformerModel(ntokens, device:torch.Device) as this =
80 | inherit Module("Transformer")
81 |
82 | let pos_encoder = new PositionalEncoding(emsize, 5000L)
83 | let encoder_layers = TransformerEncoderLayer(emsize, nheads, nhidden, dropout)
84 | let transformer_encoder = TransformerEncoder(encoder_layers, nlayers)
85 | let encoder = Embedding(ntokens, emsize)
86 | let decoder = Linear(emsize, ntokens)
87 |
88 | let sqrEmSz = MathF.Sqrt(float32 emsize).ToScalar()
89 |
90 | do
91 | let initrange = 0.1
92 |
93 | init.uniform_(encoder.weight, -initrange, initrange) |> ignore
94 | init.zeros_(decoder.bias) |> ignore
95 | init.uniform_(decoder.weight, -initrange, initrange) |> ignore
96 |
97 | this.RegisterComponents()
98 |
99 | if device.``type`` = DeviceType.CUDA then
100 | this.``to``(device) |> ignore
101 |
102 | override _.forward(t, mask) =
103 | let src = pos_encoder.forward(encoder.forward(t) * sqrEmSz)
104 | let enc = transformer_encoder.call(src, mask)
105 | decoder.forward(enc)
106 |
107 | member _.GenerateSquareSubsequentMask(size:int64) =
108 | use mask = torch.ones([|size;size|]).eq(torch.tensor(1.0f)).triu().transpose(0L,1L)
109 | use maskIsZero = mask.eq(torch.tensor(0.0f))
110 | use maskIsOne = mask.eq(torch.tensor(1.0f))
111 | mask.to_type(torch.float32)
112 | .masked_fill(maskIsZero, Single.NegativeInfinity.ToScalar())
113 | .masked_fill(maskIsOne, 0.0f.ToScalar()).``to``(device)
114 |
115 | let process_input (iter:string seq) (tokenizer:string->string seq) (vocab:TorchText.Vocab.Vocab) =
116 | torch.cat(
117 | [|
118 | for item in iter do
119 | let itemData = [| for token in tokenizer(item) do (int64 vocab.[token]) |]
120 | let t = torch.tensor(itemData)
121 | if t.NumberOfElements > 0L then
122 | t
123 | |], 0L)
124 |
125 | let batchify (data:torch.Tensor) batchSize (device:torch.Device) =
126 | let nbatch = data.shape.[0] / batchSize
127 | let d2 = data.narrow(0L, 0L, nbatch * batchSize).view(batchSize, -1L).t()
128 | d2.contiguous().``to``(device)
129 |
130 | let get_batch (source:torch.Tensor) (index:int64) =
131 |
132 | let len = min bptt (source.shape.[0]-1L-index)
133 | let data = source.[torch.TensorIndex.Slice(index, index + len)]
134 | let target = source.[torch.TensorIndex.Slice(index + 1L, index + 1L + len)].reshape(-1L)
135 | data,target
136 |
137 | let train epoch (model:TransformerModel) (optimizer:Optimizer) (trainData:torch.Tensor) ntokens =
138 |
139 | model.train()
140 |
141 | let mutable total_loss = 0.0f
142 | let mutable src_mask = model.GenerateSquareSubsequentMask(bptt)
143 |
144 | let mutable batch = 0
145 |
146 | let tdlen = trainData.shape.[0]
147 |
148 | let mutable i = 0L
149 |
150 | while i < tdlen - 2L do
151 |
152 | use d = torch.NewDisposeScope()
153 |
154 | begin
155 | let data,targets = get_batch trainData i
156 | use data = data
157 | use targets = targets
158 |
159 | if data.shape.[0] <> bptt then
160 | src_mask.Dispose()
161 | src_mask <- model.GenerateSquareSubsequentMask(data.shape.[0])
162 |
163 | optimizer.zero_grad()
164 |
165 | use output = model.forward(data, src_mask)
166 | use loss = criterion (output.view(-1L, ntokens)) targets
167 | loss.backward()
168 | torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5) |> ignore
169 | optimizer.step() |> ignore
170 |
171 | total_loss <- total_loss + loss.cpu().item()
172 | end
173 |
174 | if (batch % logInterval = 0) && (batch > 0) then
175 | let cur_loss = (total_loss / (float32 logInterval)).ToString("0.00")
176 | printfn $"epoch: {epoch} | batch: {batch} / {tdlen/bptt} | loss: {cur_loss}"
177 | total_loss <- 0.0f
178 |
179 | batch <- batch + 1
180 | i <- i + bptt
181 |
182 |
183 | let evaluate (model:TransformerModel) (evalData:torch.Tensor) ntokens =
184 |
185 | model.eval()
186 |
187 | let mutable total_loss = 0.0f
188 | let mutable src_mask = model.GenerateSquareSubsequentMask(bptt)
189 |
190 | let mutable batch = 0L
191 |
192 | let tdlen = evalData.shape.[0]
193 |
194 | let mutable i = 0L
195 |
196 | while i < tdlen - 2L do
197 |
198 | use d = torch.NewDisposeScope()
199 |
200 | begin
201 | let data,targets = get_batch evalData i
202 | use data = data
203 | use targets = targets
204 |
205 | if data.shape.[0] <> bptt then
206 | src_mask.Dispose()
207 | src_mask <- model.GenerateSquareSubsequentMask(data.shape.[0])
208 |
209 | use output = model.forward(data, src_mask)
210 | use loss = criterion (output.view(-1L, ntokens)) targets
211 | total_loss <- total_loss + (float32 data.shape.[0]) * loss.cpu().item()
212 | end
213 |
214 | batch <- batch + 1L
215 | i <- i + bptt
216 |
217 | total_loss / (float32 evalData.shape.[0])
218 |
219 | let run epochs =
220 |
221 | printfn $"Running SequenceToSequence on {device.``type``.ToString()} for {epochs} epochs."
222 |
223 | let vocabIter = TorchText.Datasets.WikiText2("train", datasetPath)
224 | let tokenizer = TorchText.Data.Utils.get_tokenizer("basic_english")
225 | let tokenizer str = tokenizer.Invoke(str)
226 |
227 | let counter = new TorchText.Vocab.Counter()
228 |
229 | for item in vocabIter do
230 | counter.update(tokenizer(item))
231 |
232 | let vocab = TorchText.Vocab.Vocab(counter)
233 |
234 | let trainIter,validIter,testIter = TorchText.Datasets.WikiText2(datasetPath).ToTuple()
235 |
236 | let train_data = batchify (process_input trainIter tokenizer vocab) batch_size device
237 | let valid_data = batchify (process_input validIter tokenizer vocab) eval_batch_size device
238 | let test_data = batchify (process_input testIter tokenizer vocab) eval_batch_size device
239 |
240 | let ntokens = int64 vocab.Count
241 |
242 | use model = new TransformerModel(ntokens, device)
243 | let lr = 2.50
244 | let optimizer = SGD(model.parameters(), lr)
245 | let scheduler = lr_scheduler.StepLR(optimizer, 1, 0.95, last_epoch=15)
246 |
247 | let totalTime = Stopwatch()
248 | totalTime.Start()
249 |
250 |
251 | for epoch = 1 to epochs do
252 | let sw = Stopwatch()
253 | sw.Start()
254 |
255 | train epoch model optimizer train_data ntokens
256 |
257 | let val_loss = evaluate model valid_data ntokens
258 | sw.Stop()
259 |
260 | let lrStr = optimizer.ParamGroups.First().LearningRate.ToString("0.00")
261 | let elapsed = sw.Elapsed.TotalSeconds.ToString("0.0")
262 | let lossStr = val_loss.ToString("0.00")
263 |
264 | printfn $"\nEnd of epoch: {epoch} | lr: {lrStr} | time: {elapsed}s | loss: {lossStr}\n"
265 |
266 | scheduler.step() |> ignore
267 |
268 | let tst_loss = evaluate model test_data ntokens
269 |
270 | totalTime.Stop()
271 |
272 | let elapsed = totalTime.Elapsed.TotalSeconds.ToString("0.0")
273 | let lossStr = tst_loss.ToString("0.00")
274 | printfn $"\nEnd of training | time: {elapsed} s | loss: {lossStr}\n"
--------------------------------------------------------------------------------