├── Samples
├── .nuget
│ ├── NuGet.exe
│ ├── NuGet.Config
│ └── NuGet.targets
├── SampleActivities
│ ├── Basic
│ │ ├── DataExtraction
│ │ │ ├── SimpleExtractorDesigner.xaml
│ │ │ ├── SimpleExtractorDesigner.xaml.cs
│ │ │ └── SimpleExtractor.cs
│ │ ├── DocumentClassification
│ │ │ ├── SimpleClassifierDesigner.xaml
│ │ │ ├── SimpleClassifierDesigner.xaml.cs
│ │ │ └── SimpleClassifier.cs
│ │ └── OCR
│ │ │ ├── SimpleOCRScrapeFactory.cs
│ │ │ ├── SimpleScrapeControl.xaml.cs
│ │ │ ├── OCRResultHelper.cs
│ │ │ ├── UsageToVisibilityConverter.cs
│ │ │ ├── SimpleOCRScrape.cs
│ │ │ ├── SimpleOCREngine.cs
│ │ │ └── SimpleScrapeControl.xaml
│ ├── Properties
│ │ └── AssemblyInfo.cs
│ ├── DesignerMetadata.cs
│ └── SampleActivities.csproj
└── Samples.sln
├── README.md
├── .gitattributes
└── .gitignore
/Samples/.nuget/NuGet.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UiPath/Document-Processing-Code-Samples/HEAD/Samples/.nuget/NuGet.exe
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Document-Processing-Code-Samples
2 | Code samples for document processing activities.
3 |
4 | Activies are located in Samples/SampleActivities/Basic/
5 |
--------------------------------------------------------------------------------
/Samples/SampleActivities/Basic/DataExtraction/SimpleExtractorDesigner.xaml:
--------------------------------------------------------------------------------
1 |
5 |
--------------------------------------------------------------------------------
/Samples/SampleActivities/Basic/DocumentClassification/SimpleClassifierDesigner.xaml:
--------------------------------------------------------------------------------
1 |
5 |
--------------------------------------------------------------------------------
/Samples/SampleActivities/Basic/OCR/SimpleOCRScrapeFactory.cs:
--------------------------------------------------------------------------------
1 | #if !NETPORTABLE_UIPATH
2 | using UiPath.OCR.Contracts.Scrape;
3 |
4 | namespace SampleActivities.Basic.OCR
5 | {
6 | public class SampleOCRScrapeFactory : OCRScrapeFactory
7 | {
8 | public override OCRScrapeBase CreateEngine(ScrapeEngineUsages usage)
9 | {
10 | return new SampleOCRScrape(new SimpleOCREngine(), usage);
11 | }
12 | }
13 | }
14 | #endif
--------------------------------------------------------------------------------
/Samples/SampleActivities/Basic/DataExtraction/SimpleExtractorDesigner.xaml.cs:
--------------------------------------------------------------------------------
1 | #if !NETPORTABLE_UIPATH
2 | namespace SampleActivities.Basic.DataExtraction
3 | {
4 | ///
5 | /// Interaction logic for SimpleExtractorDesigner.xaml
6 | ///
7 | public partial class SimpleExtractorDesigner
8 | {
9 | public SimpleExtractorDesigner()
10 | {
11 | InitializeComponent();
12 | }
13 | }
14 | }
15 | #endif
--------------------------------------------------------------------------------
/Samples/SampleActivities/Basic/DocumentClassification/SimpleClassifierDesigner.xaml.cs:
--------------------------------------------------------------------------------
1 | #if !NETPORTABLE_UIPATH
2 | namespace SampleActivities.Basic.DocumentClassification
3 | {
4 | ///
5 | /// Interaction logic for SimpleClassifierDesigner.xaml
6 | ///
7 | public partial class SimpleClassifierDesigner
8 | {
9 | public SimpleClassifierDesigner()
10 | {
11 | InitializeComponent();
12 | }
13 | }
14 | }
15 | #endif
--------------------------------------------------------------------------------
/Samples/SampleActivities/Basic/OCR/SimpleScrapeControl.xaml.cs:
--------------------------------------------------------------------------------
1 | #if !NETPORTABLE_UIPATH
2 | using UiPath.OCR.Contracts.Scrape;
3 |
4 | namespace SampleActivities.Basic.OCR
5 | {
6 | ///
7 | /// Interaction logic for SimpleScrapeControl.xaml
8 | ///
9 | internal partial class SimpleScrapeControl : ScrapeControlBase
10 | {
11 | public string SampleInput { get; set; }
12 |
13 | public SimpleScrapeControl()
14 | : this (ScrapeEngineUsages.Screen)
15 | {
16 | }
17 |
18 | public SimpleScrapeControl(ScrapeEngineUsages usage)
19 | {
20 | Usage = usage;
21 | InitializeComponent();
22 | DataContext = this;
23 | }
24 | }
25 | }
26 | #endif
--------------------------------------------------------------------------------
/Samples/.nuget/NuGet.Config:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/Samples/SampleActivities/Basic/OCR/OCRResultHelper.cs:
--------------------------------------------------------------------------------
1 | using System.Drawing;
2 | using System.Linq;
3 | using UiPath.OCR.Contracts.DataContracts;
4 |
5 | namespace SampleActivities.Basic.OCR
6 | {
7 | internal static class OCRResultHelper
8 | {
9 | internal static OCRResult FromText(string text)
10 | {
11 | return new OCRResult
12 | {
13 | Text = text,
14 | Words = text.Split(' ').Select((word, i) => new Word
15 | {
16 | Text = word,
17 | Characters = word.Select(ch => new Character
18 | {
19 | Char = ch,
20 | PolygonPoints = new[] { new PointF((i + 1) * 100, (i + 1) * 100), new PointF((i + 1) * 200, (i + 1) * 100), new PointF((i + 1) * 100, (i + 1) * 200), new PointF((i + 1) * 200, (i + 1) * 200), }
21 | }).ToArray()
22 | }).ToArray(),
23 | Confidence = 0,
24 | SkewAngle = 0
25 | };
26 | }
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/Samples/SampleActivities/Basic/OCR/UsageToVisibilityConverter.cs:
--------------------------------------------------------------------------------
1 | #if !NETPORTABLE_UIPATH
2 | using System;
3 | using System.Globalization;
4 | using System.Windows;
5 | using System.Windows.Data;
6 | using UiPath.OCR.Contracts.Scrape;
7 |
8 | namespace SampleActivities.Basic.OCR
9 | {
10 | internal class UsageToVisibilityConverter : IValueConverter
11 | {
12 | public Visibility Document { get; set; }
13 |
14 | public Visibility Screen { get; set; }
15 |
16 | public object Convert(object value, Type targetType, object parameter, CultureInfo culture)
17 | {
18 | ScrapeEngineUsages? usage = value as ScrapeEngineUsages?;
19 | if (usage == null)
20 | {
21 | return null;
22 | }
23 |
24 | return usage == ScrapeEngineUsages.Screen ? Screen : Document;
25 | }
26 |
27 | public object ConvertBack(object value, Type targetType, object parameter, CultureInfo culture)
28 | {
29 | throw new NotImplementedException();
30 | }
31 | }
32 | }
33 | #endif
--------------------------------------------------------------------------------
/Samples/Samples.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Version 17
4 | VisualStudioVersion = 17.1.32414.318
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SampleActivities", "SampleActivities\SampleActivities.csproj", "{CF089ADF-8E06-4168-A812-A1AD23D9B983}"
7 | EndProject
8 | Global
9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | Debug|Any CPU = Debug|Any CPU
11 | Release|Any CPU = Release|Any CPU
12 | EndGlobalSection
13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
14 | {CF089ADF-8E06-4168-A812-A1AD23D9B983}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
15 | {CF089ADF-8E06-4168-A812-A1AD23D9B983}.Debug|Any CPU.Build.0 = Debug|Any CPU
16 | {CF089ADF-8E06-4168-A812-A1AD23D9B983}.Release|Any CPU.ActiveCfg = Release|Any CPU
17 | {CF089ADF-8E06-4168-A812-A1AD23D9B983}.Release|Any CPU.Build.0 = Release|Any CPU
18 | EndGlobalSection
19 | GlobalSection(SolutionProperties) = preSolution
20 | HideSolutionNode = FALSE
21 | EndGlobalSection
22 | GlobalSection(ExtensibilityGlobals) = postSolution
23 | SolutionGuid = {C6F0CD1A-DBA9-4755-AB6E-C02DAAEC4C1B}
24 | EndGlobalSection
25 | EndGlobal
26 |
--------------------------------------------------------------------------------
/Samples/SampleActivities/Basic/OCR/SimpleOCRScrape.cs:
--------------------------------------------------------------------------------
1 | #if !NETPORTABLE_UIPATH
2 | using System.Collections.Generic;
3 | using UiPath.OCR.Contracts.Activities;
4 | using UiPath.OCR.Contracts.Scrape;
5 |
6 | namespace SampleActivities.Basic.OCR
7 | {
8 | // Extend OCRScrapeBase to allow your OCR engine to display custom user controls when integrating
9 | // with wizards such as Screen Scraping or Template Manager.
10 | internal class SampleOCRScrape : OCRScrapeBase
11 | {
12 | private readonly SimpleScrapeControl _sampleScrapeControl;
13 |
14 | public override ScrapeEngineUsages Usage { get; } = ScrapeEngineUsages.Document | ScrapeEngineUsages.Screen;
15 |
16 | public SampleOCRScrape(IOCRActivity ocrEngineActivity, ScrapeEngineUsages usage) : base(ocrEngineActivity)
17 | {
18 | _sampleScrapeControl = new SimpleScrapeControl(usage);
19 | }
20 |
21 | public override ScrapeControlBase GetScrapeControl()
22 | {
23 | return _sampleScrapeControl;
24 | }
25 |
26 | public override Dictionary GetScrapeArguments()
27 | {
28 | return new Dictionary
29 | {
30 | { nameof(SimpleOCREngine.CustomInput), _sampleScrapeControl.SampleInput }
31 | };
32 | }
33 | }
34 | }
35 | #endif
--------------------------------------------------------------------------------
/Samples/SampleActivities/Properties/AssemblyInfo.cs:
--------------------------------------------------------------------------------
1 | using System.Reflection;
2 | using System.Runtime.InteropServices;
3 |
4 | // General Information about an assembly is controlled through the following
5 | // set of attributes. Change these attribute values to modify the information
6 | // associated with an assembly.
7 | [assembly: AssemblyTitle("SampleActivities")]
8 | [assembly: AssemblyDescription("")]
9 | [assembly: AssemblyConfiguration("")]
10 | [assembly: AssemblyTrademark("")]
11 | [assembly: AssemblyCulture("")]
12 | [assembly: AssemblyCompany("UiPath")]
13 | [assembly: AssemblyProduct("UiPath")]
14 | [assembly: AssemblyCopyright("Copyright © UiPath")]
15 |
16 | // Setting ComVisible to false makes the types in this assembly not visible
17 | // to COM components. If you need to access a type in this assembly from
18 | // COM, set the ComVisible attribute to true on that type.
19 | [assembly: ComVisible(false)]
20 |
21 | // The following GUID is for the ID of the typelib if this project is exposed to COM
22 | [assembly: Guid("8a3c8ed7-9769-438c-ad60-f3001db9d911")]
23 |
24 | // Version information for an assembly consists of the following four values:
25 | //
26 | // Major Version
27 | // Minor Version
28 | // Build Number
29 | // Revision
30 | //
31 | // You can specify all the values or you can default the Build and Revision Numbers
32 | // by using the '*' as shown below:
33 | // [assembly: AssemblyVersion("1.0.*")]
34 | [assembly: AssemblyVersion("1.0.*")]
35 |
--------------------------------------------------------------------------------
/Samples/SampleActivities/DesignerMetadata.cs:
--------------------------------------------------------------------------------
1 | #if !NETPORTABLE_UIPATH
2 | using System.Activities.Presentation.Metadata;
3 | using System.ComponentModel;
4 | using SampleActivities.Basic.DataExtraction;
5 | using SampleActivities.Basic.DocumentClassification;
6 | using SampleActivities.Basic.OCR;
7 |
8 | namespace SampleActivities
9 | {
10 | public class DesignerMetadata : IRegisterMetadata
11 | {
12 | public void Register()
13 | {
14 | var builder = new AttributeTableBuilder();
15 |
16 | // Designers
17 | var simpleClassifierDesigner = new DesignerAttribute(typeof(SimpleClassifierDesigner));
18 | var simpleExtractorDesigner = new DesignerAttribute(typeof(SimpleExtractorDesigner));
19 |
20 | //Categories
21 | var classifierCategoryAttribute = new CategoryAttribute("Sample Classifiers");
22 | var extractorCategoryAttribute = new CategoryAttribute("Sample Extractors");
23 | var ocrCategoryAttribute = new CategoryAttribute("Sample OCR Engines");
24 |
25 | builder.AddCustomAttributes(typeof(SimpleClassifier), classifierCategoryAttribute);
26 | builder.AddCustomAttributes(typeof(SimpleClassifier), simpleClassifierDesigner);
27 |
28 | builder.AddCustomAttributes(typeof(SimpleExtractor), extractorCategoryAttribute);
29 | builder.AddCustomAttributes(typeof(SimpleExtractor), simpleExtractorDesigner);
30 |
31 | builder.AddCustomAttributes(typeof(SimpleOCREngine), ocrCategoryAttribute);
32 | builder.AddCustomAttributes(typeof(SimpleOCREngine), nameof(SimpleOCREngine.Result), new CategoryAttribute("Output"));
33 |
34 | MetadataStore.AddAttributeTable(builder.CreateTable());
35 | }
36 | }
37 | }
38 | #endif
--------------------------------------------------------------------------------
/Samples/SampleActivities/Basic/OCR/SimpleOCREngine.cs:
--------------------------------------------------------------------------------
1 | using System.Activities;
2 | using System.Collections.Generic;
3 | using System.ComponentModel;
4 | using System.Drawing;
5 | using System.Threading;
6 | using System.Threading.Tasks;
7 | using UiPath.OCR.Contracts.Activities;
8 | using UiPath.OCR.Contracts.DataContracts;
9 |
10 | namespace SampleActivities.Basic.OCR
11 | {
12 | public class SimpleOCREngine : OCRCodeActivity
13 | {
14 | [Category("Input")]
15 | [Browsable(true)]
16 | public override InArgument Image { get => base.Image; set => base.Image = value; }
17 |
18 | [Category("Output")]
19 | [Browsable(true)]
20 | public override OutArgument Text { get => base.Text; set => base.Text = value; }
21 |
22 | [Category("Input")]
23 | public InArgument CustomInput { get; set; }
24 |
25 | [Category("Output")]
26 | public OutArgument CustomOutput { get; set; }
27 |
28 | public override Task PerformOCRAsync(Image image, Dictionary options, CancellationToken ct)
29 | {
30 | string customInput = options[nameof(CustomInput)] as string;
31 | string text = $"Text from {nameof(SimpleOCREngine)} with custom input: {customInput}";
32 | return Task.FromResult(OCRResultHelper.FromText(text));
33 | }
34 |
35 | protected override void OnSuccess(CodeActivityContext context, OCRResult result)
36 | {
37 | CustomOutput.Set(context, $"Custom output: '{result.Text}' has {result.Words.Length} words.");
38 | }
39 |
40 | protected override Dictionary BeforeExecute(CodeActivityContext context)
41 | {
42 | return new Dictionary
43 | {
44 | { nameof(CustomInput), CustomInput.Get(context) }
45 | };
46 | }
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/Samples/SampleActivities/Basic/OCR/SimpleScrapeControl.xaml:
--------------------------------------------------------------------------------
1 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
20 |
23 |
27 |
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | # Set default behavior to automatically normalize line endings.
3 | ###############################################################################
4 | * text=auto
5 |
6 | ###############################################################################
7 | # Set default behavior for command prompt diff.
8 | #
9 | # This is need for earlier builds of msysgit that does not have it on by
10 | # default for csharp files.
11 | # Note: This is only used by command line
12 | ###############################################################################
13 | #*.cs diff=csharp
14 |
15 | ###############################################################################
16 | # Set the merge driver for project and solution files
17 | #
18 | # Merging from the command prompt will add diff markers to the files if there
19 | # are conflicts (Merging from VS is not affected by the settings below, in VS
20 | # the diff markers are never inserted). Diff markers may cause the following
21 | # file extensions to fail to load in VS. An alternative would be to treat
22 | # these files as binary and thus will always conflict and require user
23 | # intervention with every merge. To do so, just uncomment the entries below
24 | ###############################################################################
25 | #*.sln merge=binary
26 | #*.csproj merge=binary
27 | #*.vbproj merge=binary
28 | #*.vcxproj merge=binary
29 | #*.vcproj merge=binary
30 | #*.dbproj merge=binary
31 | #*.fsproj merge=binary
32 | #*.lsproj merge=binary
33 | #*.wixproj merge=binary
34 | #*.modelproj merge=binary
35 | #*.sqlproj merge=binary
36 | #*.wwaproj merge=binary
37 |
38 | ###############################################################################
39 | # behavior for image files
40 | #
41 | # image files are treated as binary by default.
42 | ###############################################################################
43 | #*.jpg binary
44 | #*.png binary
45 | #*.gif binary
46 |
47 | ###############################################################################
48 | # diff behavior for common document formats
49 | #
50 | # Convert binary document formats to text before diffing them. This feature
51 | # is only available from the command line. Turn it on by uncommenting the
52 | # entries below.
53 | ###############################################################################
54 | #*.doc diff=astextplain
55 | #*.DOC diff=astextplain
56 | #*.docx diff=astextplain
57 | #*.DOCX diff=astextplain
58 | #*.dot diff=astextplain
59 | #*.DOT diff=astextplain
60 | #*.pdf diff=astextplain
61 | #*.PDF diff=astextplain
62 | #*.rtf diff=astextplain
63 | #*.RTF diff=astextplain
64 |
--------------------------------------------------------------------------------
/Samples/SampleActivities/SampleActivities.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | net461;net6.0-windows7.0;net6.0
5 | false
6 | false
7 | $(ProjectDir)..\Output\SampleActivities
8 | $(DefineConstants);NETSTANDARD;NETPORTABLE_UIPATH
9 |
10 | SampleActivities
11 | $(Version)
12 | UiPath Sample Activities
13 | UiPath
14 | UiPath Document Understanding Sample Activities.
15 | $(ProjectDir)..\Output\Packages
16 | true
17 | false
18 | OnBuildSuccess
19 |
20 |
21 |
22 | SampleActivities
23 | 1
24 | 0
25 | 0
26 |
27 |
28 |
29 | $(VersionBuild)$([System.DateTime]::UtcNow.DayOfYear.ToString("F0"))
30 | $([System.Math]::Floor($([MSBuild]::Divide($([System.DateTime]::UtcNow.TimeOfDay.TotalMinutes), 5))))
31 | $(VersionMajor).$(VersionMinor).$(VersionBuild)-dev.$(VersionRevision)
32 | $(VersionMajor).$(VersionMinor).$(VersionBuild).$(VersionRevision)
33 |
34 |
35 |
36 | true
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
--------------------------------------------------------------------------------
/Samples/SampleActivities/Basic/DocumentClassification/SimpleClassifier.cs:
--------------------------------------------------------------------------------
1 | using System.Activities;
2 | using System.Linq;
3 | using UiPath.DocumentProcessing.Contracts.Classification;
4 | using UiPath.DocumentProcessing.Contracts.Dom;
5 | using UiPath.DocumentProcessing.Contracts.Results;
6 |
7 | namespace SampleActivities.Basic.DocumentClassification
8 | {
9 | ///
10 | /// This sample classifier takes the first word from the given document page as evidence, returning the first document type as the classification result.
11 | ///
12 | public class SimpleClassifier : ClassifierCodeActivity
13 | {
14 | // Example input argument
15 | public InArgument EvidencePage { get; set; }
16 |
17 | protected override void Execute(CodeActivityContext context)
18 | {
19 | string text = DocumentText.Get(context);
20 | Document document = DocumentObjectModel.Get(context);
21 | string documentPath = DocumentPath.Get(context);
22 | ClassifierDocumentType[] documentTypes = DocumentTypes.Get(context);
23 | int evidencePage = EvidencePage.Get(context);
24 |
25 | ClassifierResult.Set(context, ComputeResult(text, document, documentPath, documentTypes, evidencePage));
26 | }
27 |
28 | private ClassifierResult ComputeResult(string text, Document document, string documentPath, ClassifierDocumentType[] documentTypes, int evidencePage)
29 | {
30 | // example of unsuccessful classification
31 | if (documentTypes == null || !documentTypes.Any() || document.Pages.Length <= evidencePage)
32 | {
33 | return null;
34 | }
35 |
36 | // take first word from the evidence page in the document and consider it as evidence for the classification
37 | var firstWord = document.Pages[evidencePage].Sections[0].WordGroups[0].Words[0];
38 | var firstWordValueToken = new ResultsValueTokens(0, (float)document.Pages[evidencePage].Size.Width, (float)document.Pages[evidencePage].Size.Height, new[] { firstWord.Box });
39 |
40 | // return first document type, with evidecing based on the first word in the document
41 | var classificationResult = new ClassificationResult(
42 | // consider the first document type requested
43 | documentTypes.First().DocumentTypeId,
44 | // fill in document id from the Document Object Model information
45 | document.DocumentId,
46 | // simulate a 85% confidence
47 | 0.85f,
48 | // take OCR confidence of the words used for evidencing
49 | firstWord.OcrConfidence,
50 | // build the evidencing information
51 | new ResultsContentReference(firstWord.IndexInText, firstWord.Text.Length, new[] { firstWordValueToken }),
52 | // consider the classification applying to the entire document (all pages)
53 | new ResultsDocumentBounds(document.Pages.Length, document.Length));
54 |
55 | return new ClassifierResult
56 | {
57 | Classifications = new[] { classificationResult }
58 | };
59 | }
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ## Ignore Visual Studio temporary files, build results, and
2 | ## files generated by popular Visual Studio add-ons.
3 |
4 | # User-specific files
5 | *.suo
6 | *.user
7 | *.userosscache
8 | *.sln.docstates
9 | *.deps.json
10 |
11 | # User-specific files (MonoDevelop/Xamarin Studio)
12 | *.userprefs
13 |
14 | # Build results
15 | [Dd]ebug/
16 | [Dd]ebugPublic/
17 | [Rr]elease/
18 | [Rr]eleases/
19 | x64/
20 | x86/
21 | bld/
22 | [Bb]in/
23 | [Oo]bj/
24 | [Ll]og/
25 |
26 | # Visual Studio 2015 cache/options directory
27 | .vs/
28 | # Uncomment if you have tasks that create the project's static files in wwwroot
29 | #wwwroot/
30 |
31 | # MSTest test Results
32 | [Tt]est[Rr]esult*/
33 | [Bb]uild[Ll]og.*
34 |
35 | # NUNIT
36 | *.VisualState.xml
37 | TestResult.xml
38 |
39 | # Build Results of an ATL Project
40 | [Dd]ebugPS/
41 | [Rr]eleasePS/
42 | dlldata.c
43 |
44 | # DNX
45 | project.lock.json
46 | project.fragment.lock.json
47 | artifacts/
48 |
49 | *_i.c
50 | *_p.c
51 | *_i.h
52 | *.ilk
53 | *.meta
54 | *.obj
55 | *.pch
56 | *.pdb
57 | *.pgc
58 | *.pgd
59 | *.rsp
60 | *.sbr
61 | *.tlb
62 | *.tli
63 | *.tlh
64 | *.tmp
65 | *.tmp_proj
66 | *.log
67 | *.vspscc
68 | *.vssscc
69 | .builds
70 | *.pidb
71 | *.svclog
72 | *.scc
73 |
74 | # Chutzpah Test files
75 | _Chutzpah*
76 |
77 | # Visual C++ cache files
78 | ipch/
79 | *.aps
80 | *.ncb
81 | *.opendb
82 | *.opensdf
83 | *.sdf
84 | *.cachefile
85 | *.VC.db
86 | *.VC.VC.opendb
87 |
88 | # Visual Studio profiler
89 | *.psess
90 | *.vsp
91 | *.vspx
92 | *.sap
93 |
94 | # TFS 2012 Local Workspace
95 | $tf/
96 |
97 | # Guidance Automation Toolkit
98 | *.gpState
99 |
100 | # ReSharper is a .NET coding add-in
101 | _ReSharper*/
102 | *.[Rr]e[Ss]harper
103 | *.DotSettings.user
104 |
105 | # JustCode is a .NET coding add-in
106 | .JustCode
107 |
108 | # TeamCity is a build add-in
109 | _TeamCity*
110 |
111 | # DotCover is a Code Coverage Tool
112 | *.dotCover
113 |
114 | # NCrunch
115 | _NCrunch_*
116 | .*crunch*.local.xml
117 | nCrunchTemp_*
118 |
119 | # MightyMoose
120 | *.mm.*
121 | AutoTest.Net/
122 |
123 | # Web workbench (sass)
124 | .sass-cache/
125 |
126 | # Installshield output folder
127 | [Ee]xpress/
128 |
129 | # DocProject is a documentation generator add-in
130 | DocProject/buildhelp/
131 | DocProject/Help/*.HxT
132 | DocProject/Help/*.HxC
133 | DocProject/Help/*.hhc
134 | DocProject/Help/*.hhk
135 | DocProject/Help/*.hhp
136 | DocProject/Help/Html2
137 | DocProject/Help/html
138 |
139 | # Click-Once directory
140 | publish/
141 |
142 | # Publish Web Output
143 | *.[Pp]ublish.xml
144 | *.azurePubxml
145 | # TODO: Comment the next line if you want to checkin your web deploy settings
146 | # but database connection strings (with potential passwords) will be unencrypted
147 | #*.pubxml
148 | *.publishproj
149 |
150 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
151 | # checkin your Azure Web App publish settings, but sensitive information contained
152 | # in these scripts will be unencrypted
153 | PublishScripts/
154 |
155 | # NuGet Packages
156 | *.nupkg
157 | # The packages folder can be ignored because of Package Restore
158 | **/packages/*
159 | # except build/, which is used as an MSBuild target.
160 | !**/packages/build/
161 | # Uncomment if necessary however generally it will be regenerated when needed
162 | #!**/packages/repositories.config
163 | # NuGet v3's project.json files produces more ignoreable files
164 | *.nuget.props
165 | *.nuget.targets
166 |
167 | # Microsoft Azure Build Output
168 | csx/
169 | *.build.csdef
170 |
171 | # Microsoft Azure Emulator
172 | ecf/
173 | rcf/
174 |
175 | # Windows Store app package directories and files
176 | AppPackages/
177 | BundleArtifacts/
178 | Package.StoreAssociation.xml
179 | _pkginfo.txt
180 |
181 | # Visual Studio cache files
182 | # files ending in .cache can be ignored
183 | *.[Cc]ache
184 | # but keep track of directories ending in .cache
185 | !*.[Cc]ache/
186 |
187 | # Others
188 | ClientBin/
189 | ~$*
190 | *~
191 | *.dbmdl
192 | *.dbproj.schemaview
193 | *.jfm
194 | *.pfx
195 | *.publishsettings
196 | node_modules/
197 | orleans.codegen.cs
198 |
199 | # Since there are multiple workflows, uncomment next line to ignore bower_components
200 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
201 | #bower_components/
202 |
203 | # RIA/Silverlight projects
204 | Generated_Code/
205 |
206 | # Backup & report files from converting an old project file
207 | # to a newer Visual Studio version. Backup files are not needed,
208 | # because we have git ;-)
209 | _UpgradeReport_Files/
210 | Backup*/
211 | UpgradeLog*.XML
212 | UpgradeLog*.htm
213 |
214 | # SQL Server files
215 | *.mdf
216 | *.ldf
217 |
218 | # Business Intelligence projects
219 | *.rdl.data
220 | *.bim.layout
221 | *.bim_*.settings
222 |
223 | # Microsoft Fakes
224 | FakesAssemblies/
225 |
226 | # GhostDoc plugin setting file
227 | *.GhostDoc.xml
228 |
229 | # Node.js Tools for Visual Studio
230 | .ntvs_analysis.dat
231 |
232 | # Visual Studio 6 build log
233 | *.plg
234 |
235 | # Visual Studio 6 workspace options file
236 | *.opt
237 |
238 | # Visual Studio LightSwitch build output
239 | **/*.HTMLClient/GeneratedArtifacts
240 | **/*.DesktopClient/GeneratedArtifacts
241 | **/*.DesktopClient/ModelManifest.xml
242 | **/*.Server/GeneratedArtifacts
243 | **/*.Server/ModelManifest.xml
244 | _Pvt_Extensions
245 |
246 | # Paket dependency manager
247 | .paket/paket.exe
248 | paket-files/
249 |
250 | # FAKE - F# Make
251 | .fake/
252 |
253 | # JetBrains Rider
254 | .idea/
255 | *.sln.iml
256 |
257 | # CodeRush
258 | .cr/
259 |
260 | # Python Tools for Visual Studio (PTVS)
261 | __pycache__/
262 | *.pyc
--------------------------------------------------------------------------------
/Samples/SampleActivities/Basic/DataExtraction/SimpleExtractor.cs:
--------------------------------------------------------------------------------
1 | using System.Activities;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Threading.Tasks;
5 | using UiPath.DocumentProcessing.Contracts;
6 | using UiPath.DocumentProcessing.Contracts.DataExtraction;
7 | using UiPath.DocumentProcessing.Contracts.Dom;
8 | using UiPath.DocumentProcessing.Contracts.Results;
9 | using UiPath.DocumentProcessing.Contracts.Taxonomy;
10 |
11 | namespace SampleActivities.Basic.DataExtraction
12 | {
13 | public class SimpleExtractor : ExtractorCodeActivity
14 | {
15 | public override Task GetCapabilities()
16 | {
17 | return Task.FromResult(new ExtractorDocumentTypeCapabilities[0]);
18 | }
19 |
20 | protected override void Execute(CodeActivityContext context)
21 | {
22 | ExtractorDocumentType documentType = ExtractorDocumentType.Get(context);
23 | ResultsDocumentBounds documentBounds = DocumentBounds.Get(context);
24 | string text = DocumentText.Get(context);
25 | Document document = DocumentObjectModel.Get(context);
26 | string documentPath = DocumentPath.Get(context);
27 |
28 | ExtractorResult.Set(context, ComputeResult(documentType, documentBounds, text, document, documentPath));
29 | }
30 |
31 | private ExtractorResult ComputeResult(ExtractorDocumentType documentType, ResultsDocumentBounds documentBounds, string text, Document document, string documentPath)
32 | {
33 | var extractorResult = new ExtractorResult();
34 | var resultsDataPoints = new List();
35 |
36 | // example of reporting a value with derived parts
37 | Field firstDateField = documentType.Fields.FirstOrDefault(f => f.Type == FieldType.Date);
38 | if (firstDateField != null)
39 | {
40 | // if any field is of type Date return first word on first page as reference and report Jan 1st 2002 as value
41 | resultsDataPoints.Add(CreateDateFieldDataPoint(firstDateField, document));
42 | }
43 |
44 | // example of report a value with no textual reference (only visual reference)
45 | Field firstBooleanField = documentType.Fields.FirstOrDefault(f => f.Type == FieldType.Boolean);
46 | if (firstBooleanField != null)
47 | {
48 | // if any field is of type Boolean return "true" with a visual reference from pixel position (50, 100) and width 200 and height 300.
49 | resultsDataPoints.Add(CreateBooleanFieldDataPoint(firstBooleanField, document));
50 | }
51 |
52 | // example of table value
53 | Field firstTableField = documentType.Fields.FirstOrDefault(f => f.Type == FieldType.Table);
54 | if (firstTableField != null)
55 | {
56 | // if any field is of type Table return a table with headers referencing the first N words and 2 rows referencing the next N * 2 words.
57 | // N will be the number of columns in the table field.
58 | resultsDataPoints.Add(CreateTableFieldDataPoint(firstTableField, document));
59 | }
60 |
61 | extractorResult.DataPoints = resultsDataPoints.ToArray();
62 | return extractorResult;
63 | }
64 |
65 | private static ResultsDataPoint CreateDateFieldDataPoint(Field firstDateField, Document document)
66 | {
67 | // TODO
68 | var derivedFields = ResultsDerivedField.CreateDerivedFieldsForDate(1, 1, 2002);
69 | var firstDateValue = CreateResultsValue(0, document, "Jan 1st 2002");
70 | firstDateValue.DerivedFields = derivedFields;
71 |
72 | return new ResultsDataPoint(
73 | firstDateField.FieldId,
74 | firstDateField.FieldName,
75 | firstDateField.Type,
76 | new[] { firstDateValue });
77 | }
78 |
79 | private static ResultsDataPoint CreateBooleanFieldDataPoint(Field firstBooleanField, Document document)
80 | {
81 | var booleanToken = new ResultsValueTokens(0, (float)document.Pages[0].Size.Width, (float)document.Pages[0].Size.Height, new[] { Box.CreateChecked(50, 100, 200, 300) });
82 | var reference = new ResultsContentReference(0, 0, new[] { booleanToken });
83 | var firstBooleanValue = new ResultsValue("Yes", reference, 0.9f, 1f);
84 |
85 | return new ResultsDataPoint(
86 | firstBooleanField.FieldId,
87 | firstBooleanField.FieldName,
88 | firstBooleanField.Type,
89 | new[] { firstBooleanValue });
90 | }
91 |
92 | private static ResultsDataPoint CreateTableFieldDataPoint(Field firstTableField, Document document)
93 | {
94 | int i = 0;
95 | var headerCells = firstTableField.Components.Select(c => new ResultsDataPoint(c.FieldId, c.FieldName, c.Type, new[] { CreateResultsValue(i++, document) }));
96 |
97 | var firstRowCells = firstTableField.Components.Select(c => new ResultsDataPoint(c.FieldId, c.FieldName, c.Type, new[] { CreateResultsValue(i++, document) }));
98 | var secondRowCells = firstTableField.Components.Select(c => new ResultsDataPoint(c.FieldId, c.FieldName, c.Type, new[] { CreateResultsValue(i++, document) }));
99 |
100 | var tableValue = ResultsValue.CreateTableValue(firstTableField, headerCells, new[] { firstRowCells, secondRowCells }, 0.9f, 1f);
101 |
102 | return new ResultsDataPoint(
103 | firstTableField.FieldId,
104 | firstTableField.FieldName,
105 | firstTableField.Type,
106 | new[] { tableValue });
107 | }
108 |
109 | private static ResultsValue CreateResultsValue(int wordIndex, Document document, string value = null)
110 | {
111 | var word = document.Pages[0].Sections.SelectMany(s => s.WordGroups).SelectMany(w => w.Words).ToArray()[wordIndex];
112 | var wordValueToken = new ResultsValueTokens(word.IndexInText, word.Text.Length, 0, (float)document.Pages[0].Size.Width, (float)document.Pages[0].Size.Height, new[] { word.Box });
113 | var reference = new ResultsContentReference(word.IndexInText, word.Text.Length, new[] { wordValueToken });
114 |
115 | return new ResultsValue(value ?? word.Text, reference, 0.9f, word.OcrConfidence);
116 | }
117 | }
118 | }
119 |
--------------------------------------------------------------------------------
/Samples/.nuget/NuGet.targets:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | $(MSBuildProjectDirectory)\..\
5 |
6 |
7 | false
8 |
9 |
10 | false
11 |
12 |
13 | true
14 |
15 |
16 | false
17 |
18 |
19 |
20 |
21 |
22 |
26 |
27 |
28 |
29 |
30 | $([System.IO.Path]::Combine($(SolutionDir), ".nuget"))
31 |
32 |
33 |
34 |
35 | $(SolutionDir).nuget
36 |
37 |
38 |
39 | $(MSBuildProjectDirectory)\packages.$(MSBuildProjectName.Replace(' ', '_')).config
40 | $(MSBuildProjectDirectory)\packages.$(MSBuildProjectName).config
41 |
42 |
43 |
44 | $(MSBuildProjectDirectory)\packages.config
45 | $(PackagesProjectConfig)
46 |
47 |
48 |
49 |
50 | $(NuGetToolsPath)\NuGet.exe
51 | @(PackageSource)
52 |
53 | "$(NuGetExePath)"
54 | mono --runtime=v4.0.30319 $(NuGetExePath)
55 |
56 | $(TargetDir.Trim('\\'))
57 |
58 | -RequireConsent
59 | -NonInteractive
60 |
61 | "$(SolutionDir) "
62 | "$(SolutionDir)"
63 |
64 |
65 | $(NuGetCommand) install "$(PackagesConfig)" -source "$(PackageSources)" $(NonInteractiveSwitch) $(RequireConsentSwitch) -solutionDir $(PaddedSolutionDir)
66 | $(NuGetCommand) pack "$(ProjectPath)" -Properties "Configuration=$(Configuration);Platform=$(Platform)" $(NonInteractiveSwitch) -OutputDirectory "$(PackageOutputDir)" -symbols
67 |
68 |
69 |
70 | RestorePackages;
71 | $(BuildDependsOn);
72 |
73 |
74 |
75 |
76 | $(BuildDependsOn);
77 | BuildPackage;
78 |
79 |
80 |
81 |
82 |
83 |
84 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
99 |
100 |
103 |
104 |
105 |
106 |
108 |
109 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
141 |
142 |
143 |
144 |
145 |
--------------------------------------------------------------------------------