├── Samples ├── .nuget │ ├── NuGet.exe │ ├── NuGet.Config │ └── NuGet.targets ├── SampleActivities │ ├── Basic │ │ ├── DataExtraction │ │ │ ├── SimpleExtractorDesigner.xaml │ │ │ ├── SimpleExtractorDesigner.xaml.cs │ │ │ └── SimpleExtractor.cs │ │ ├── DocumentClassification │ │ │ ├── SimpleClassifierDesigner.xaml │ │ │ ├── SimpleClassifierDesigner.xaml.cs │ │ │ └── SimpleClassifier.cs │ │ └── OCR │ │ │ ├── SimpleOCRScrapeFactory.cs │ │ │ ├── SimpleScrapeControl.xaml.cs │ │ │ ├── OCRResultHelper.cs │ │ │ ├── UsageToVisibilityConverter.cs │ │ │ ├── SimpleOCRScrape.cs │ │ │ ├── SimpleOCREngine.cs │ │ │ └── SimpleScrapeControl.xaml │ ├── Properties │ │ └── AssemblyInfo.cs │ ├── DesignerMetadata.cs │ └── SampleActivities.csproj └── Samples.sln ├── README.md ├── .gitattributes └── .gitignore /Samples/.nuget/NuGet.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UiPath/Document-Processing-Code-Samples/HEAD/Samples/.nuget/NuGet.exe -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Document-Processing-Code-Samples 2 | Code samples for document processing activities. 3 | 4 | Activies are located in Samples/SampleActivities/Basic/ 5 | -------------------------------------------------------------------------------- /Samples/SampleActivities/Basic/DataExtraction/SimpleExtractorDesigner.xaml: -------------------------------------------------------------------------------- 1 | 5 | -------------------------------------------------------------------------------- /Samples/SampleActivities/Basic/DocumentClassification/SimpleClassifierDesigner.xaml: -------------------------------------------------------------------------------- 1 | 5 | -------------------------------------------------------------------------------- /Samples/SampleActivities/Basic/OCR/SimpleOCRScrapeFactory.cs: -------------------------------------------------------------------------------- 1 | #if !NETPORTABLE_UIPATH 2 | using UiPath.OCR.Contracts.Scrape; 3 | 4 | namespace SampleActivities.Basic.OCR 5 | { 6 | public class SampleOCRScrapeFactory : OCRScrapeFactory 7 | { 8 | public override OCRScrapeBase CreateEngine(ScrapeEngineUsages usage) 9 | { 10 | return new SampleOCRScrape(new SimpleOCREngine(), usage); 11 | } 12 | } 13 | } 14 | #endif -------------------------------------------------------------------------------- /Samples/SampleActivities/Basic/DataExtraction/SimpleExtractorDesigner.xaml.cs: -------------------------------------------------------------------------------- 1 | #if !NETPORTABLE_UIPATH 2 | namespace SampleActivities.Basic.DataExtraction 3 | { 4 | /// 5 | /// Interaction logic for SimpleExtractorDesigner.xaml 6 | /// 7 | public partial class SimpleExtractorDesigner 8 | { 9 | public SimpleExtractorDesigner() 10 | { 11 | InitializeComponent(); 12 | } 13 | } 14 | } 15 | #endif -------------------------------------------------------------------------------- /Samples/SampleActivities/Basic/DocumentClassification/SimpleClassifierDesigner.xaml.cs: -------------------------------------------------------------------------------- 1 | #if !NETPORTABLE_UIPATH 2 | namespace SampleActivities.Basic.DocumentClassification 3 | { 4 | /// 5 | /// Interaction logic for SimpleClassifierDesigner.xaml 6 | /// 7 | public partial class SimpleClassifierDesigner 8 | { 9 | public SimpleClassifierDesigner() 10 | { 11 | InitializeComponent(); 12 | } 13 | } 14 | } 15 | #endif -------------------------------------------------------------------------------- /Samples/SampleActivities/Basic/OCR/SimpleScrapeControl.xaml.cs: -------------------------------------------------------------------------------- 1 | #if !NETPORTABLE_UIPATH 2 | using UiPath.OCR.Contracts.Scrape; 3 | 4 | namespace SampleActivities.Basic.OCR 5 | { 6 | /// 7 | /// Interaction logic for SimpleScrapeControl.xaml 8 | /// 9 | internal partial class SimpleScrapeControl : ScrapeControlBase 10 | { 11 | public string SampleInput { get; set; } 12 | 13 | public SimpleScrapeControl() 14 | : this (ScrapeEngineUsages.Screen) 15 | { 16 | } 17 | 18 | public SimpleScrapeControl(ScrapeEngineUsages usage) 19 | { 20 | Usage = usage; 21 | InitializeComponent(); 22 | DataContext = this; 23 | } 24 | } 25 | } 26 | #endif -------------------------------------------------------------------------------- /Samples/.nuget/NuGet.Config: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /Samples/SampleActivities/Basic/OCR/OCRResultHelper.cs: -------------------------------------------------------------------------------- 1 | using System.Drawing; 2 | using System.Linq; 3 | using UiPath.OCR.Contracts.DataContracts; 4 | 5 | namespace SampleActivities.Basic.OCR 6 | { 7 | internal static class OCRResultHelper 8 | { 9 | internal static OCRResult FromText(string text) 10 | { 11 | return new OCRResult 12 | { 13 | Text = text, 14 | Words = text.Split(' ').Select((word, i) => new Word 15 | { 16 | Text = word, 17 | Characters = word.Select(ch => new Character 18 | { 19 | Char = ch, 20 | PolygonPoints = new[] { new PointF((i + 1) * 100, (i + 1) * 100), new PointF((i + 1) * 200, (i + 1) * 100), new PointF((i + 1) * 100, (i + 1) * 200), new PointF((i + 1) * 200, (i + 1) * 200), } 21 | }).ToArray() 22 | }).ToArray(), 23 | Confidence = 0, 24 | SkewAngle = 0 25 | }; 26 | } 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /Samples/SampleActivities/Basic/OCR/UsageToVisibilityConverter.cs: -------------------------------------------------------------------------------- 1 | #if !NETPORTABLE_UIPATH 2 | using System; 3 | using System.Globalization; 4 | using System.Windows; 5 | using System.Windows.Data; 6 | using UiPath.OCR.Contracts.Scrape; 7 | 8 | namespace SampleActivities.Basic.OCR 9 | { 10 | internal class UsageToVisibilityConverter : IValueConverter 11 | { 12 | public Visibility Document { get; set; } 13 | 14 | public Visibility Screen { get; set; } 15 | 16 | public object Convert(object value, Type targetType, object parameter, CultureInfo culture) 17 | { 18 | ScrapeEngineUsages? usage = value as ScrapeEngineUsages?; 19 | if (usage == null) 20 | { 21 | return null; 22 | } 23 | 24 | return usage == ScrapeEngineUsages.Screen ? Screen : Document; 25 | } 26 | 27 | public object ConvertBack(object value, Type targetType, object parameter, CultureInfo culture) 28 | { 29 | throw new NotImplementedException(); 30 | } 31 | } 32 | } 33 | #endif -------------------------------------------------------------------------------- /Samples/Samples.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.1.32414.318 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SampleActivities", "SampleActivities\SampleActivities.csproj", "{CF089ADF-8E06-4168-A812-A1AD23D9B983}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|Any CPU = Debug|Any CPU 11 | Release|Any CPU = Release|Any CPU 12 | EndGlobalSection 13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 14 | {CF089ADF-8E06-4168-A812-A1AD23D9B983}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 15 | {CF089ADF-8E06-4168-A812-A1AD23D9B983}.Debug|Any CPU.Build.0 = Debug|Any CPU 16 | {CF089ADF-8E06-4168-A812-A1AD23D9B983}.Release|Any CPU.ActiveCfg = Release|Any CPU 17 | {CF089ADF-8E06-4168-A812-A1AD23D9B983}.Release|Any CPU.Build.0 = Release|Any CPU 18 | EndGlobalSection 19 | GlobalSection(SolutionProperties) = preSolution 20 | HideSolutionNode = FALSE 21 | EndGlobalSection 22 | GlobalSection(ExtensibilityGlobals) = postSolution 23 | SolutionGuid = {C6F0CD1A-DBA9-4755-AB6E-C02DAAEC4C1B} 24 | EndGlobalSection 25 | EndGlobal 26 | -------------------------------------------------------------------------------- /Samples/SampleActivities/Basic/OCR/SimpleOCRScrape.cs: -------------------------------------------------------------------------------- 1 | #if !NETPORTABLE_UIPATH 2 | using System.Collections.Generic; 3 | using UiPath.OCR.Contracts.Activities; 4 | using UiPath.OCR.Contracts.Scrape; 5 | 6 | namespace SampleActivities.Basic.OCR 7 | { 8 | // Extend OCRScrapeBase to allow your OCR engine to display custom user controls when integrating 9 | // with wizards such as Screen Scraping or Template Manager. 10 | internal class SampleOCRScrape : OCRScrapeBase 11 | { 12 | private readonly SimpleScrapeControl _sampleScrapeControl; 13 | 14 | public override ScrapeEngineUsages Usage { get; } = ScrapeEngineUsages.Document | ScrapeEngineUsages.Screen; 15 | 16 | public SampleOCRScrape(IOCRActivity ocrEngineActivity, ScrapeEngineUsages usage) : base(ocrEngineActivity) 17 | { 18 | _sampleScrapeControl = new SimpleScrapeControl(usage); 19 | } 20 | 21 | public override ScrapeControlBase GetScrapeControl() 22 | { 23 | return _sampleScrapeControl; 24 | } 25 | 26 | public override Dictionary GetScrapeArguments() 27 | { 28 | return new Dictionary 29 | { 30 | { nameof(SimpleOCREngine.CustomInput), _sampleScrapeControl.SampleInput } 31 | }; 32 | } 33 | } 34 | } 35 | #endif -------------------------------------------------------------------------------- /Samples/SampleActivities/Properties/AssemblyInfo.cs: -------------------------------------------------------------------------------- 1 | using System.Reflection; 2 | using System.Runtime.InteropServices; 3 | 4 | // General Information about an assembly is controlled through the following 5 | // set of attributes. Change these attribute values to modify the information 6 | // associated with an assembly. 7 | [assembly: AssemblyTitle("SampleActivities")] 8 | [assembly: AssemblyDescription("")] 9 | [assembly: AssemblyConfiguration("")] 10 | [assembly: AssemblyTrademark("")] 11 | [assembly: AssemblyCulture("")] 12 | [assembly: AssemblyCompany("UiPath")] 13 | [assembly: AssemblyProduct("UiPath")] 14 | [assembly: AssemblyCopyright("Copyright © UiPath")] 15 | 16 | // Setting ComVisible to false makes the types in this assembly not visible 17 | // to COM components. If you need to access a type in this assembly from 18 | // COM, set the ComVisible attribute to true on that type. 19 | [assembly: ComVisible(false)] 20 | 21 | // The following GUID is for the ID of the typelib if this project is exposed to COM 22 | [assembly: Guid("8a3c8ed7-9769-438c-ad60-f3001db9d911")] 23 | 24 | // Version information for an assembly consists of the following four values: 25 | // 26 | // Major Version 27 | // Minor Version 28 | // Build Number 29 | // Revision 30 | // 31 | // You can specify all the values or you can default the Build and Revision Numbers 32 | // by using the '*' as shown below: 33 | // [assembly: AssemblyVersion("1.0.*")] 34 | [assembly: AssemblyVersion("1.0.*")] 35 | -------------------------------------------------------------------------------- /Samples/SampleActivities/DesignerMetadata.cs: -------------------------------------------------------------------------------- 1 | #if !NETPORTABLE_UIPATH 2 | using System.Activities.Presentation.Metadata; 3 | using System.ComponentModel; 4 | using SampleActivities.Basic.DataExtraction; 5 | using SampleActivities.Basic.DocumentClassification; 6 | using SampleActivities.Basic.OCR; 7 | 8 | namespace SampleActivities 9 | { 10 | public class DesignerMetadata : IRegisterMetadata 11 | { 12 | public void Register() 13 | { 14 | var builder = new AttributeTableBuilder(); 15 | 16 | // Designers 17 | var simpleClassifierDesigner = new DesignerAttribute(typeof(SimpleClassifierDesigner)); 18 | var simpleExtractorDesigner = new DesignerAttribute(typeof(SimpleExtractorDesigner)); 19 | 20 | //Categories 21 | var classifierCategoryAttribute = new CategoryAttribute("Sample Classifiers"); 22 | var extractorCategoryAttribute = new CategoryAttribute("Sample Extractors"); 23 | var ocrCategoryAttribute = new CategoryAttribute("Sample OCR Engines"); 24 | 25 | builder.AddCustomAttributes(typeof(SimpleClassifier), classifierCategoryAttribute); 26 | builder.AddCustomAttributes(typeof(SimpleClassifier), simpleClassifierDesigner); 27 | 28 | builder.AddCustomAttributes(typeof(SimpleExtractor), extractorCategoryAttribute); 29 | builder.AddCustomAttributes(typeof(SimpleExtractor), simpleExtractorDesigner); 30 | 31 | builder.AddCustomAttributes(typeof(SimpleOCREngine), ocrCategoryAttribute); 32 | builder.AddCustomAttributes(typeof(SimpleOCREngine), nameof(SimpleOCREngine.Result), new CategoryAttribute("Output")); 33 | 34 | MetadataStore.AddAttributeTable(builder.CreateTable()); 35 | } 36 | } 37 | } 38 | #endif -------------------------------------------------------------------------------- /Samples/SampleActivities/Basic/OCR/SimpleOCREngine.cs: -------------------------------------------------------------------------------- 1 | using System.Activities; 2 | using System.Collections.Generic; 3 | using System.ComponentModel; 4 | using System.Drawing; 5 | using System.Threading; 6 | using System.Threading.Tasks; 7 | using UiPath.OCR.Contracts.Activities; 8 | using UiPath.OCR.Contracts.DataContracts; 9 | 10 | namespace SampleActivities.Basic.OCR 11 | { 12 | public class SimpleOCREngine : OCRCodeActivity 13 | { 14 | [Category("Input")] 15 | [Browsable(true)] 16 | public override InArgument Image { get => base.Image; set => base.Image = value; } 17 | 18 | [Category("Output")] 19 | [Browsable(true)] 20 | public override OutArgument Text { get => base.Text; set => base.Text = value; } 21 | 22 | [Category("Input")] 23 | public InArgument CustomInput { get; set; } 24 | 25 | [Category("Output")] 26 | public OutArgument CustomOutput { get; set; } 27 | 28 | public override Task PerformOCRAsync(Image image, Dictionary options, CancellationToken ct) 29 | { 30 | string customInput = options[nameof(CustomInput)] as string; 31 | string text = $"Text from {nameof(SimpleOCREngine)} with custom input: {customInput}"; 32 | return Task.FromResult(OCRResultHelper.FromText(text)); 33 | } 34 | 35 | protected override void OnSuccess(CodeActivityContext context, OCRResult result) 36 | { 37 | CustomOutput.Set(context, $"Custom output: '{result.Text}' has {result.Words.Length} words."); 38 | } 39 | 40 | protected override Dictionary BeforeExecute(CodeActivityContext context) 41 | { 42 | return new Dictionary 43 | { 44 | { nameof(CustomInput), CustomInput.Get(context) } 45 | }; 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /Samples/SampleActivities/Basic/OCR/SimpleScrapeControl.xaml: -------------------------------------------------------------------------------- 1 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Set default behavior to automatically normalize line endings. 3 | ############################################################################### 4 | * text=auto 5 | 6 | ############################################################################### 7 | # Set default behavior for command prompt diff. 8 | # 9 | # This is need for earlier builds of msysgit that does not have it on by 10 | # default for csharp files. 11 | # Note: This is only used by command line 12 | ############################################################################### 13 | #*.cs diff=csharp 14 | 15 | ############################################################################### 16 | # Set the merge driver for project and solution files 17 | # 18 | # Merging from the command prompt will add diff markers to the files if there 19 | # are conflicts (Merging from VS is not affected by the settings below, in VS 20 | # the diff markers are never inserted). Diff markers may cause the following 21 | # file extensions to fail to load in VS. An alternative would be to treat 22 | # these files as binary and thus will always conflict and require user 23 | # intervention with every merge. To do so, just uncomment the entries below 24 | ############################################################################### 25 | #*.sln merge=binary 26 | #*.csproj merge=binary 27 | #*.vbproj merge=binary 28 | #*.vcxproj merge=binary 29 | #*.vcproj merge=binary 30 | #*.dbproj merge=binary 31 | #*.fsproj merge=binary 32 | #*.lsproj merge=binary 33 | #*.wixproj merge=binary 34 | #*.modelproj merge=binary 35 | #*.sqlproj merge=binary 36 | #*.wwaproj merge=binary 37 | 38 | ############################################################################### 39 | # behavior for image files 40 | # 41 | # image files are treated as binary by default. 42 | ############################################################################### 43 | #*.jpg binary 44 | #*.png binary 45 | #*.gif binary 46 | 47 | ############################################################################### 48 | # diff behavior for common document formats 49 | # 50 | # Convert binary document formats to text before diffing them. This feature 51 | # is only available from the command line. Turn it on by uncommenting the 52 | # entries below. 53 | ############################################################################### 54 | #*.doc diff=astextplain 55 | #*.DOC diff=astextplain 56 | #*.docx diff=astextplain 57 | #*.DOCX diff=astextplain 58 | #*.dot diff=astextplain 59 | #*.DOT diff=astextplain 60 | #*.pdf diff=astextplain 61 | #*.PDF diff=astextplain 62 | #*.rtf diff=astextplain 63 | #*.RTF diff=astextplain 64 | -------------------------------------------------------------------------------- /Samples/SampleActivities/SampleActivities.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | net461;net6.0-windows7.0;net6.0 5 | false 6 | false 7 | $(ProjectDir)..\Output\SampleActivities 8 | $(DefineConstants);NETSTANDARD;NETPORTABLE_UIPATH 9 | 10 | SampleActivities 11 | $(Version) 12 | UiPath Sample Activities 13 | UiPath 14 | UiPath Document Understanding Sample Activities. 15 | $(ProjectDir)..\Output\Packages 16 | true 17 | false 18 | OnBuildSuccess 19 | 20 | 21 | 22 | SampleActivities 23 | 1 24 | 0 25 | 0 26 | 27 | 28 | 29 | $(VersionBuild)$([System.DateTime]::UtcNow.DayOfYear.ToString("F0")) 30 | $([System.Math]::Floor($([MSBuild]::Divide($([System.DateTime]::UtcNow.TimeOfDay.TotalMinutes), 5)))) 31 | $(VersionMajor).$(VersionMinor).$(VersionBuild)-dev.$(VersionRevision) 32 | $(VersionMajor).$(VersionMinor).$(VersionBuild).$(VersionRevision) 33 | 34 | 35 | 36 | true 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /Samples/SampleActivities/Basic/DocumentClassification/SimpleClassifier.cs: -------------------------------------------------------------------------------- 1 | using System.Activities; 2 | using System.Linq; 3 | using UiPath.DocumentProcessing.Contracts.Classification; 4 | using UiPath.DocumentProcessing.Contracts.Dom; 5 | using UiPath.DocumentProcessing.Contracts.Results; 6 | 7 | namespace SampleActivities.Basic.DocumentClassification 8 | { 9 | /// 10 | /// This sample classifier takes the first word from the given document page as evidence, returning the first document type as the classification result. 11 | /// 12 | public class SimpleClassifier : ClassifierCodeActivity 13 | { 14 | // Example input argument 15 | public InArgument EvidencePage { get; set; } 16 | 17 | protected override void Execute(CodeActivityContext context) 18 | { 19 | string text = DocumentText.Get(context); 20 | Document document = DocumentObjectModel.Get(context); 21 | string documentPath = DocumentPath.Get(context); 22 | ClassifierDocumentType[] documentTypes = DocumentTypes.Get(context); 23 | int evidencePage = EvidencePage.Get(context); 24 | 25 | ClassifierResult.Set(context, ComputeResult(text, document, documentPath, documentTypes, evidencePage)); 26 | } 27 | 28 | private ClassifierResult ComputeResult(string text, Document document, string documentPath, ClassifierDocumentType[] documentTypes, int evidencePage) 29 | { 30 | // example of unsuccessful classification 31 | if (documentTypes == null || !documentTypes.Any() || document.Pages.Length <= evidencePage) 32 | { 33 | return null; 34 | } 35 | 36 | // take first word from the evidence page in the document and consider it as evidence for the classification 37 | var firstWord = document.Pages[evidencePage].Sections[0].WordGroups[0].Words[0]; 38 | var firstWordValueToken = new ResultsValueTokens(0, (float)document.Pages[evidencePage].Size.Width, (float)document.Pages[evidencePage].Size.Height, new[] { firstWord.Box }); 39 | 40 | // return first document type, with evidecing based on the first word in the document 41 | var classificationResult = new ClassificationResult( 42 | // consider the first document type requested 43 | documentTypes.First().DocumentTypeId, 44 | // fill in document id from the Document Object Model information 45 | document.DocumentId, 46 | // simulate a 85% confidence 47 | 0.85f, 48 | // take OCR confidence of the words used for evidencing 49 | firstWord.OcrConfidence, 50 | // build the evidencing information 51 | new ResultsContentReference(firstWord.IndexInText, firstWord.Text.Length, new[] { firstWordValueToken }), 52 | // consider the classification applying to the entire document (all pages) 53 | new ResultsDocumentBounds(document.Pages.Length, document.Length)); 54 | 55 | return new ClassifierResult 56 | { 57 | Classifications = new[] { classificationResult } 58 | }; 59 | } 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | 4 | # User-specific files 5 | *.suo 6 | *.user 7 | *.userosscache 8 | *.sln.docstates 9 | *.deps.json 10 | 11 | # User-specific files (MonoDevelop/Xamarin Studio) 12 | *.userprefs 13 | 14 | # Build results 15 | [Dd]ebug/ 16 | [Dd]ebugPublic/ 17 | [Rr]elease/ 18 | [Rr]eleases/ 19 | x64/ 20 | x86/ 21 | bld/ 22 | [Bb]in/ 23 | [Oo]bj/ 24 | [Ll]og/ 25 | 26 | # Visual Studio 2015 cache/options directory 27 | .vs/ 28 | # Uncomment if you have tasks that create the project's static files in wwwroot 29 | #wwwroot/ 30 | 31 | # MSTest test Results 32 | [Tt]est[Rr]esult*/ 33 | [Bb]uild[Ll]og.* 34 | 35 | # NUNIT 36 | *.VisualState.xml 37 | TestResult.xml 38 | 39 | # Build Results of an ATL Project 40 | [Dd]ebugPS/ 41 | [Rr]eleasePS/ 42 | dlldata.c 43 | 44 | # DNX 45 | project.lock.json 46 | project.fragment.lock.json 47 | artifacts/ 48 | 49 | *_i.c 50 | *_p.c 51 | *_i.h 52 | *.ilk 53 | *.meta 54 | *.obj 55 | *.pch 56 | *.pdb 57 | *.pgc 58 | *.pgd 59 | *.rsp 60 | *.sbr 61 | *.tlb 62 | *.tli 63 | *.tlh 64 | *.tmp 65 | *.tmp_proj 66 | *.log 67 | *.vspscc 68 | *.vssscc 69 | .builds 70 | *.pidb 71 | *.svclog 72 | *.scc 73 | 74 | # Chutzpah Test files 75 | _Chutzpah* 76 | 77 | # Visual C++ cache files 78 | ipch/ 79 | *.aps 80 | *.ncb 81 | *.opendb 82 | *.opensdf 83 | *.sdf 84 | *.cachefile 85 | *.VC.db 86 | *.VC.VC.opendb 87 | 88 | # Visual Studio profiler 89 | *.psess 90 | *.vsp 91 | *.vspx 92 | *.sap 93 | 94 | # TFS 2012 Local Workspace 95 | $tf/ 96 | 97 | # Guidance Automation Toolkit 98 | *.gpState 99 | 100 | # ReSharper is a .NET coding add-in 101 | _ReSharper*/ 102 | *.[Rr]e[Ss]harper 103 | *.DotSettings.user 104 | 105 | # JustCode is a .NET coding add-in 106 | .JustCode 107 | 108 | # TeamCity is a build add-in 109 | _TeamCity* 110 | 111 | # DotCover is a Code Coverage Tool 112 | *.dotCover 113 | 114 | # NCrunch 115 | _NCrunch_* 116 | .*crunch*.local.xml 117 | nCrunchTemp_* 118 | 119 | # MightyMoose 120 | *.mm.* 121 | AutoTest.Net/ 122 | 123 | # Web workbench (sass) 124 | .sass-cache/ 125 | 126 | # Installshield output folder 127 | [Ee]xpress/ 128 | 129 | # DocProject is a documentation generator add-in 130 | DocProject/buildhelp/ 131 | DocProject/Help/*.HxT 132 | DocProject/Help/*.HxC 133 | DocProject/Help/*.hhc 134 | DocProject/Help/*.hhk 135 | DocProject/Help/*.hhp 136 | DocProject/Help/Html2 137 | DocProject/Help/html 138 | 139 | # Click-Once directory 140 | publish/ 141 | 142 | # Publish Web Output 143 | *.[Pp]ublish.xml 144 | *.azurePubxml 145 | # TODO: Comment the next line if you want to checkin your web deploy settings 146 | # but database connection strings (with potential passwords) will be unencrypted 147 | #*.pubxml 148 | *.publishproj 149 | 150 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 151 | # checkin your Azure Web App publish settings, but sensitive information contained 152 | # in these scripts will be unencrypted 153 | PublishScripts/ 154 | 155 | # NuGet Packages 156 | *.nupkg 157 | # The packages folder can be ignored because of Package Restore 158 | **/packages/* 159 | # except build/, which is used as an MSBuild target. 160 | !**/packages/build/ 161 | # Uncomment if necessary however generally it will be regenerated when needed 162 | #!**/packages/repositories.config 163 | # NuGet v3's project.json files produces more ignoreable files 164 | *.nuget.props 165 | *.nuget.targets 166 | 167 | # Microsoft Azure Build Output 168 | csx/ 169 | *.build.csdef 170 | 171 | # Microsoft Azure Emulator 172 | ecf/ 173 | rcf/ 174 | 175 | # Windows Store app package directories and files 176 | AppPackages/ 177 | BundleArtifacts/ 178 | Package.StoreAssociation.xml 179 | _pkginfo.txt 180 | 181 | # Visual Studio cache files 182 | # files ending in .cache can be ignored 183 | *.[Cc]ache 184 | # but keep track of directories ending in .cache 185 | !*.[Cc]ache/ 186 | 187 | # Others 188 | ClientBin/ 189 | ~$* 190 | *~ 191 | *.dbmdl 192 | *.dbproj.schemaview 193 | *.jfm 194 | *.pfx 195 | *.publishsettings 196 | node_modules/ 197 | orleans.codegen.cs 198 | 199 | # Since there are multiple workflows, uncomment next line to ignore bower_components 200 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 201 | #bower_components/ 202 | 203 | # RIA/Silverlight projects 204 | Generated_Code/ 205 | 206 | # Backup & report files from converting an old project file 207 | # to a newer Visual Studio version. Backup files are not needed, 208 | # because we have git ;-) 209 | _UpgradeReport_Files/ 210 | Backup*/ 211 | UpgradeLog*.XML 212 | UpgradeLog*.htm 213 | 214 | # SQL Server files 215 | *.mdf 216 | *.ldf 217 | 218 | # Business Intelligence projects 219 | *.rdl.data 220 | *.bim.layout 221 | *.bim_*.settings 222 | 223 | # Microsoft Fakes 224 | FakesAssemblies/ 225 | 226 | # GhostDoc plugin setting file 227 | *.GhostDoc.xml 228 | 229 | # Node.js Tools for Visual Studio 230 | .ntvs_analysis.dat 231 | 232 | # Visual Studio 6 build log 233 | *.plg 234 | 235 | # Visual Studio 6 workspace options file 236 | *.opt 237 | 238 | # Visual Studio LightSwitch build output 239 | **/*.HTMLClient/GeneratedArtifacts 240 | **/*.DesktopClient/GeneratedArtifacts 241 | **/*.DesktopClient/ModelManifest.xml 242 | **/*.Server/GeneratedArtifacts 243 | **/*.Server/ModelManifest.xml 244 | _Pvt_Extensions 245 | 246 | # Paket dependency manager 247 | .paket/paket.exe 248 | paket-files/ 249 | 250 | # FAKE - F# Make 251 | .fake/ 252 | 253 | # JetBrains Rider 254 | .idea/ 255 | *.sln.iml 256 | 257 | # CodeRush 258 | .cr/ 259 | 260 | # Python Tools for Visual Studio (PTVS) 261 | __pycache__/ 262 | *.pyc -------------------------------------------------------------------------------- /Samples/SampleActivities/Basic/DataExtraction/SimpleExtractor.cs: -------------------------------------------------------------------------------- 1 | using System.Activities; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Threading.Tasks; 5 | using UiPath.DocumentProcessing.Contracts; 6 | using UiPath.DocumentProcessing.Contracts.DataExtraction; 7 | using UiPath.DocumentProcessing.Contracts.Dom; 8 | using UiPath.DocumentProcessing.Contracts.Results; 9 | using UiPath.DocumentProcessing.Contracts.Taxonomy; 10 | 11 | namespace SampleActivities.Basic.DataExtraction 12 | { 13 | public class SimpleExtractor : ExtractorCodeActivity 14 | { 15 | public override Task GetCapabilities() 16 | { 17 | return Task.FromResult(new ExtractorDocumentTypeCapabilities[0]); 18 | } 19 | 20 | protected override void Execute(CodeActivityContext context) 21 | { 22 | ExtractorDocumentType documentType = ExtractorDocumentType.Get(context); 23 | ResultsDocumentBounds documentBounds = DocumentBounds.Get(context); 24 | string text = DocumentText.Get(context); 25 | Document document = DocumentObjectModel.Get(context); 26 | string documentPath = DocumentPath.Get(context); 27 | 28 | ExtractorResult.Set(context, ComputeResult(documentType, documentBounds, text, document, documentPath)); 29 | } 30 | 31 | private ExtractorResult ComputeResult(ExtractorDocumentType documentType, ResultsDocumentBounds documentBounds, string text, Document document, string documentPath) 32 | { 33 | var extractorResult = new ExtractorResult(); 34 | var resultsDataPoints = new List(); 35 | 36 | // example of reporting a value with derived parts 37 | Field firstDateField = documentType.Fields.FirstOrDefault(f => f.Type == FieldType.Date); 38 | if (firstDateField != null) 39 | { 40 | // if any field is of type Date return first word on first page as reference and report Jan 1st 2002 as value 41 | resultsDataPoints.Add(CreateDateFieldDataPoint(firstDateField, document)); 42 | } 43 | 44 | // example of report a value with no textual reference (only visual reference) 45 | Field firstBooleanField = documentType.Fields.FirstOrDefault(f => f.Type == FieldType.Boolean); 46 | if (firstBooleanField != null) 47 | { 48 | // if any field is of type Boolean return "true" with a visual reference from pixel position (50, 100) and width 200 and height 300. 49 | resultsDataPoints.Add(CreateBooleanFieldDataPoint(firstBooleanField, document)); 50 | } 51 | 52 | // example of table value 53 | Field firstTableField = documentType.Fields.FirstOrDefault(f => f.Type == FieldType.Table); 54 | if (firstTableField != null) 55 | { 56 | // if any field is of type Table return a table with headers referencing the first N words and 2 rows referencing the next N * 2 words. 57 | // N will be the number of columns in the table field. 58 | resultsDataPoints.Add(CreateTableFieldDataPoint(firstTableField, document)); 59 | } 60 | 61 | extractorResult.DataPoints = resultsDataPoints.ToArray(); 62 | return extractorResult; 63 | } 64 | 65 | private static ResultsDataPoint CreateDateFieldDataPoint(Field firstDateField, Document document) 66 | { 67 | // TODO 68 | var derivedFields = ResultsDerivedField.CreateDerivedFieldsForDate(1, 1, 2002); 69 | var firstDateValue = CreateResultsValue(0, document, "Jan 1st 2002"); 70 | firstDateValue.DerivedFields = derivedFields; 71 | 72 | return new ResultsDataPoint( 73 | firstDateField.FieldId, 74 | firstDateField.FieldName, 75 | firstDateField.Type, 76 | new[] { firstDateValue }); 77 | } 78 | 79 | private static ResultsDataPoint CreateBooleanFieldDataPoint(Field firstBooleanField, Document document) 80 | { 81 | var booleanToken = new ResultsValueTokens(0, (float)document.Pages[0].Size.Width, (float)document.Pages[0].Size.Height, new[] { Box.CreateChecked(50, 100, 200, 300) }); 82 | var reference = new ResultsContentReference(0, 0, new[] { booleanToken }); 83 | var firstBooleanValue = new ResultsValue("Yes", reference, 0.9f, 1f); 84 | 85 | return new ResultsDataPoint( 86 | firstBooleanField.FieldId, 87 | firstBooleanField.FieldName, 88 | firstBooleanField.Type, 89 | new[] { firstBooleanValue }); 90 | } 91 | 92 | private static ResultsDataPoint CreateTableFieldDataPoint(Field firstTableField, Document document) 93 | { 94 | int i = 0; 95 | var headerCells = firstTableField.Components.Select(c => new ResultsDataPoint(c.FieldId, c.FieldName, c.Type, new[] { CreateResultsValue(i++, document) })); 96 | 97 | var firstRowCells = firstTableField.Components.Select(c => new ResultsDataPoint(c.FieldId, c.FieldName, c.Type, new[] { CreateResultsValue(i++, document) })); 98 | var secondRowCells = firstTableField.Components.Select(c => new ResultsDataPoint(c.FieldId, c.FieldName, c.Type, new[] { CreateResultsValue(i++, document) })); 99 | 100 | var tableValue = ResultsValue.CreateTableValue(firstTableField, headerCells, new[] { firstRowCells, secondRowCells }, 0.9f, 1f); 101 | 102 | return new ResultsDataPoint( 103 | firstTableField.FieldId, 104 | firstTableField.FieldName, 105 | firstTableField.Type, 106 | new[] { tableValue }); 107 | } 108 | 109 | private static ResultsValue CreateResultsValue(int wordIndex, Document document, string value = null) 110 | { 111 | var word = document.Pages[0].Sections.SelectMany(s => s.WordGroups).SelectMany(w => w.Words).ToArray()[wordIndex]; 112 | var wordValueToken = new ResultsValueTokens(word.IndexInText, word.Text.Length, 0, (float)document.Pages[0].Size.Width, (float)document.Pages[0].Size.Height, new[] { word.Box }); 113 | var reference = new ResultsContentReference(word.IndexInText, word.Text.Length, new[] { wordValueToken }); 114 | 115 | return new ResultsValue(value ?? word.Text, reference, 0.9f, word.OcrConfidence); 116 | } 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /Samples/.nuget/NuGet.targets: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | $(MSBuildProjectDirectory)\..\ 5 | 6 | 7 | false 8 | 9 | 10 | false 11 | 12 | 13 | true 14 | 15 | 16 | false 17 | 18 | 19 | 20 | 21 | 22 | 26 | 27 | 28 | 29 | 30 | $([System.IO.Path]::Combine($(SolutionDir), ".nuget")) 31 | 32 | 33 | 34 | 35 | $(SolutionDir).nuget 36 | 37 | 38 | 39 | $(MSBuildProjectDirectory)\packages.$(MSBuildProjectName.Replace(' ', '_')).config 40 | $(MSBuildProjectDirectory)\packages.$(MSBuildProjectName).config 41 | 42 | 43 | 44 | $(MSBuildProjectDirectory)\packages.config 45 | $(PackagesProjectConfig) 46 | 47 | 48 | 49 | 50 | $(NuGetToolsPath)\NuGet.exe 51 | @(PackageSource) 52 | 53 | "$(NuGetExePath)" 54 | mono --runtime=v4.0.30319 $(NuGetExePath) 55 | 56 | $(TargetDir.Trim('\\')) 57 | 58 | -RequireConsent 59 | -NonInteractive 60 | 61 | "$(SolutionDir) " 62 | "$(SolutionDir)" 63 | 64 | 65 | $(NuGetCommand) install "$(PackagesConfig)" -source "$(PackageSources)" $(NonInteractiveSwitch) $(RequireConsentSwitch) -solutionDir $(PaddedSolutionDir) 66 | $(NuGetCommand) pack "$(ProjectPath)" -Properties "Configuration=$(Configuration);Platform=$(Platform)" $(NonInteractiveSwitch) -OutputDirectory "$(PackageOutputDir)" -symbols 67 | 68 | 69 | 70 | RestorePackages; 71 | $(BuildDependsOn); 72 | 73 | 74 | 75 | 76 | $(BuildDependsOn); 77 | BuildPackage; 78 | 79 | 80 | 81 | 82 | 83 | 84 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 99 | 100 | 103 | 104 | 105 | 106 | 108 | 109 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 141 | 142 | 143 | 144 | 145 | --------------------------------------------------------------------------------