├── .gitattributes ├── .gitignore ├── .gitmodules ├── LICENSE.md ├── README.md ├── Scraperion ├── ConnectWebScraper.cs ├── ConvertFromImageToText.cs ├── DisconnectWebScraper.cs ├── GetImage.cs ├── GetWebScraperSnapshot.cs ├── InvokeWebScraperExpression.cs ├── MoveMouse.cs ├── MoveWebScraperMouse.cs ├── Properties │ └── AssemblyInfo.cs ├── Scraperion.csproj ├── Scraperion.psd1 ├── SelectImage.cs ├── SendKeys.cs ├── SendMouse.cs ├── SendWebScraperKeys.cs ├── SendWebScraperMouse.cs ├── SetWebScraperFocus.cs ├── ShowImage.cs ├── Startup.cs ├── TestImage.cs ├── WaitImage.cs ├── WaitWebScraperExpression.cs ├── app.config ├── frmImage.Designer.cs ├── frmImage.cs ├── frmImage.resx └── packages.config ├── ScraperionFramework.sln ├── ScraperionFramework ├── MouseButton.cs ├── Properties │ └── AssemblyInfo.cs ├── ScraperionFramework.csproj ├── ScreenScraper.cs ├── WebScraper.cs ├── app.config ├── packages.config └── tessdata │ ├── eng.cube.bigrams │ ├── eng.cube.fold │ ├── eng.cube.lm │ ├── eng.cube.nn │ ├── eng.cube.params │ ├── eng.cube.size │ ├── eng.cube.word-freq │ ├── eng.tesseract_cube.nn │ └── eng.traineddata └── TestHarmess ├── App.config ├── Program.cs ├── Properties └── AssemblyInfo.cs ├── TestHarmess.csproj └── packages.config /.gitattributes: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Set default behavior to automatically normalize line endings. 3 | ############################################################################### 4 | * text=auto 5 | 6 | ############################################################################### 7 | # Set default behavior for command prompt diff. 8 | # 9 | # This is need for earlier builds of msysgit that does not have it on by 10 | # default for csharp files. 11 | # Note: This is only used by command line 12 | ############################################################################### 13 | #*.cs diff=csharp 14 | 15 | ############################################################################### 16 | # Set the merge driver for project and solution files 17 | # 18 | # Merging from the command prompt will add diff markers to the files if there 19 | # are conflicts (Merging from VS is not affected by the settings below, in VS 20 | # the diff markers are never inserted). Diff markers may cause the following 21 | # file extensions to fail to load in VS. An alternative would be to treat 22 | # these files as binary and thus will always conflict and require user 23 | # intervention with every merge. To do so, just uncomment the entries below 24 | ############################################################################### 25 | #*.sln merge=binary 26 | #*.csproj merge=binary 27 | #*.vbproj merge=binary 28 | #*.vcxproj merge=binary 29 | #*.vcproj merge=binary 30 | #*.dbproj merge=binary 31 | #*.fsproj merge=binary 32 | #*.lsproj merge=binary 33 | #*.wixproj merge=binary 34 | #*.modelproj merge=binary 35 | #*.sqlproj merge=binary 36 | #*.wwaproj merge=binary 37 | 38 | ############################################################################### 39 | # behavior for image files 40 | # 41 | # image files are treated as binary by default. 42 | ############################################################################### 43 | #*.jpg binary 44 | #*.png binary 45 | #*.gif binary 46 | 47 | ############################################################################### 48 | # diff behavior for common document formats 49 | # 50 | # Convert binary document formats to text before diffing them. This feature 51 | # is only available from the command line. Turn it on by uncommenting the 52 | # entries below. 53 | ############################################################################### 54 | #*.doc diff=astextplain 55 | #*.DOC diff=astextplain 56 | #*.docx diff=astextplain 57 | #*.DOCX diff=astextplain 58 | #*.dot diff=astextplain 59 | #*.DOT diff=astextplain 60 | #*.pdf diff=astextplain 61 | #*.PDF diff=astextplain 62 | #*.rtf diff=astextplain 63 | #*.RTF diff=astextplain 64 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | 4 | # User-specific files 5 | *.suo 6 | *.user 7 | *.userosscache 8 | *.sln.docstates 9 | 10 | # User-specific files (MonoDevelop/Xamarin Studio) 11 | *.userprefs 12 | 13 | # Build results 14 | [Dd]ebug/ 15 | [Dd]ebugPublic/ 16 | [Rr]elease/ 17 | [Rr]eleases/ 18 | x64/ 19 | x86/ 20 | bld/ 21 | [Bb]in/ 22 | [Oo]bj/ 23 | [Ll]og/ 24 | 25 | # Visual Studio 2015 cache/options directory 26 | .vs/ 27 | # Uncomment if you have tasks that create the project's static files in wwwroot 28 | #wwwroot/ 29 | 30 | # MSTest test Results 31 | [Tt]est[Rr]esult*/ 32 | [Bb]uild[Ll]og.* 33 | 34 | # NUNIT 35 | *.VisualState.xml 36 | TestResult.xml 37 | 38 | # Build Results of an ATL Project 39 | [Dd]ebugPS/ 40 | [Rr]eleasePS/ 41 | dlldata.c 42 | 43 | # DNX 44 | project.lock.json 45 | project.fragment.lock.json 46 | artifacts/ 47 | 48 | *_i.c 49 | *_p.c 50 | *_i.h 51 | *.ilk 52 | *.meta 53 | *.obj 54 | *.pch 55 | *.pdb 56 | *.pgc 57 | *.pgd 58 | *.rsp 59 | *.sbr 60 | *.tlb 61 | *.tli 62 | *.tlh 63 | *.tmp 64 | *.tmp_proj 65 | *.log 66 | *.vspscc 67 | *.vssscc 68 | .builds 69 | *.pidb 70 | *.svclog 71 | *.scc 72 | 73 | # Chutzpah Test files 74 | _Chutzpah* 75 | 76 | # Visual C++ cache files 77 | ipch/ 78 | *.aps 79 | *.ncb 80 | *.opendb 81 | *.opensdf 82 | *.sdf 83 | *.cachefile 84 | *.VC.db 85 | *.VC.VC.opendb 86 | 87 | # Visual Studio profiler 88 | *.psess 89 | *.vsp 90 | *.vspx 91 | *.sap 92 | 93 | # TFS 2012 Local Workspace 94 | $tf/ 95 | 96 | # Guidance Automation Toolkit 97 | *.gpState 98 | 99 | # ReSharper is a .NET coding add-in 100 | _ReSharper*/ 101 | *.[Rr]e[Ss]harper 102 | *.DotSettings.user 103 | 104 | # JustCode is a .NET coding add-in 105 | .JustCode 106 | 107 | # TeamCity is a build add-in 108 | _TeamCity* 109 | 110 | # DotCover is a Code Coverage Tool 111 | *.dotCover 112 | 113 | # NCrunch 114 | _NCrunch_* 115 | .*crunch*.local.xml 116 | nCrunchTemp_* 117 | 118 | # MightyMoose 119 | *.mm.* 120 | AutoTest.Net/ 121 | 122 | # Web workbench (sass) 123 | .sass-cache/ 124 | 125 | # Installshield output folder 126 | [Ee]xpress/ 127 | 128 | # DocProject is a documentation generator add-in 129 | DocProject/buildhelp/ 130 | DocProject/Help/*.HxT 131 | DocProject/Help/*.HxC 132 | DocProject/Help/*.hhc 133 | DocProject/Help/*.hhk 134 | DocProject/Help/*.hhp 135 | DocProject/Help/Html2 136 | DocProject/Help/html 137 | 138 | # Click-Once directory 139 | publish/ 140 | 141 | # Publish Web Output 142 | *.[Pp]ublish.xml 143 | *.azurePubxml 144 | # TODO: Comment the next line if you want to checkin your web deploy settings 145 | # but database connection strings (with potential passwords) will be unencrypted 146 | #*.pubxml 147 | *.publishproj 148 | 149 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 150 | # checkin your Azure Web App publish settings, but sensitive information contained 151 | # in these scripts will be unencrypted 152 | PublishScripts/ 153 | 154 | # NuGet Packages 155 | *.nupkg 156 | # The packages folder can be ignored because of Package Restore 157 | **/packages/* 158 | # except build/, which is used as an MSBuild target. 159 | !**/packages/build/ 160 | # Uncomment if necessary however generally it will be regenerated when needed 161 | #!**/packages/repositories.config 162 | # NuGet v3's project.json files produces more ignoreable files 163 | *.nuget.props 164 | *.nuget.targets 165 | 166 | # Microsoft Azure Build Output 167 | csx/ 168 | *.build.csdef 169 | 170 | # Microsoft Azure Emulator 171 | ecf/ 172 | rcf/ 173 | 174 | # Windows Store app package directories and files 175 | AppPackages/ 176 | BundleArtifacts/ 177 | Package.StoreAssociation.xml 178 | _pkginfo.txt 179 | 180 | # Visual Studio cache files 181 | # files ending in .cache can be ignored 182 | *.[Cc]ache 183 | # but keep track of directories ending in .cache 184 | !*.[Cc]ache/ 185 | 186 | # Others 187 | ClientBin/ 188 | ~$* 189 | *~ 190 | *.dbmdl 191 | *.dbproj.schemaview 192 | *.jfm 193 | *.pfx 194 | *.publishsettings 195 | node_modules/ 196 | orleans.codegen.cs 197 | 198 | # Since there are multiple workflows, uncomment next line to ignore bower_components 199 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 200 | #bower_components/ 201 | 202 | # RIA/Silverlight projects 203 | Generated_Code/ 204 | 205 | # Backup & report files from converting an old project file 206 | # to a newer Visual Studio version. Backup files are not needed, 207 | # because we have git ;-) 208 | _UpgradeReport_Files/ 209 | Backup*/ 210 | UpgradeLog*.XML 211 | UpgradeLog*.htm 212 | 213 | # SQL Server files 214 | *.mdf 215 | *.ldf 216 | 217 | # Business Intelligence projects 218 | *.rdl.data 219 | *.bim.layout 220 | *.bim_*.settings 221 | 222 | # Microsoft Fakes 223 | FakesAssemblies/ 224 | 225 | # GhostDoc plugin setting file 226 | *.GhostDoc.xml 227 | 228 | # Node.js Tools for Visual Studio 229 | .ntvs_analysis.dat 230 | 231 | # Visual Studio 6 build log 232 | *.plg 233 | 234 | # Visual Studio 6 workspace options file 235 | *.opt 236 | 237 | # Visual Studio LightSwitch build output 238 | **/*.HTMLClient/GeneratedArtifacts 239 | **/*.DesktopClient/GeneratedArtifacts 240 | **/*.DesktopClient/ModelManifest.xml 241 | **/*.Server/GeneratedArtifacts 242 | **/*.Server/ModelManifest.xml 243 | _Pvt_Extensions 244 | 245 | # Paket dependency manager 246 | .paket/paket.exe 247 | paket-files/ 248 | 249 | # FAKE - F# Make 250 | .fake/ 251 | 252 | # JetBrains Rider 253 | .idea/ 254 | *.sln.iml 255 | 256 | # CodeRush 257 | .cr/ 258 | 259 | # Python Tools for Visual Studio (PTVS) 260 | __pycache__/ 261 | *.pyc 262 | 263 | 264 | Tools/ 265 | 266 | vmlabFiles.wxs 267 | 268 | 269 | Lab/HyperV/.vmlab/ 270 | Lab/VirtualBox/.vmlab/ 271 | Lab/VMwareWorkstation/.vmlab/ 272 | Lab/Template/Windows10x64/.vmlab/ 273 | 274 | *.vfd 275 | *.ncrunch* -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "puppeteer-sharp"] 2 | path = puppeteer-sharp 3 | url = https://github.com/kblok/puppeteer-sharp.git 4 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright 2018 Wil Taylor 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Scraperion Framework 2 | Scraperion is a framework that allows easy UI and Web automation primarily designed for dealing with legacy environments where automation is hard. 3 | 4 | The main features of scraperion are as follows: 5 | 6 | * UI based automation - Scraperion will find screen elements via reference images. This allows a lot of different scenarios like automation via a VNC/RDP session or Automation of multimedia applications which don't have user controls. 7 | 8 | * Web page automation - Using a chromium backend. This allows for the automation of pages which are heavy on javascript or require authentication. 9 | 10 | ## How to install 11 | To install from PowerShell run the following: 12 | ``` 13 | install-module scaperion 14 | ``` 15 | 16 | ## Examples of screen automation 17 | 18 | Loading an image for use. 19 | ``` 20 | $img = Get-Image -Path ".\Path\To\Image.png" 21 | ``` 22 | 23 | Getting a screenshot 24 | ``` 25 | $img = Get-Image -Screen 26 | ``` 27 | 28 | Testing if image exists on the screen 29 | ``` 30 | $img = Get-Image -Path ".\Path\To\Image.png" 31 | if(($img | Test-Image)) { <# Do Something #> } 32 | ``` 33 | Waiting for an image to appear on screen 34 | ``` 35 | $img = Get-Image -Path ".\Path\To\Image.png" 36 | $img | Wait-Image 37 | ``` 38 | 39 | Example of clicking on an image on the screen. 40 | ``` 41 | $img = Get-Image -path ".\Path\To\Image.png" 42 | $img | Select-Image -Click 43 | ``` 44 | 45 | Showing an image preview so you can see what it is 46 | ``` 47 | $img = Get-Image -path ".\Path\To\Image.png" 48 | $img | Show-Image 49 | ``` 50 | 51 | Extract a string from an image using OCR 52 | ``` 53 | $img = Get-Image -path ".\Path\To\Image.png" 54 | $text = $img | ConvertFrom-ImageToText 55 | ``` 56 | 57 | Moving the mouse to a location and clicking on it. 58 | ``` 59 | Move-Mouse -X 100 -Y 100 60 | Send-Mouse -Click 61 | ``` 62 | 63 | Sending keys 64 | ``` 65 | Send-Keys -Text "hello world" 66 | ``` 67 | 68 | Sending a secure string 69 | ``` 70 | # $apikey contains a password as a secure string 71 | Send-Keys -SecureText $apikey 72 | ``` 73 | 74 | Sending a ps credential object. This will press tab between username and password. 75 | ``` 76 | $creds = Get-Credential 77 | Send-Keys -Credential $creds 78 | ``` 79 | 80 | ## Examples of web page automation 81 | 82 | Connecting to web scraping session 83 | ``` 84 | $scraper = Connect-WebScraper -Url 'http://mywebsite.com' 85 | ``` 86 | 87 | Disconnecting to web scraping session 88 | ``` 89 | $scraper | Disconnect-WebScraper 90 | ``` 91 | 92 | Use credentials for web scraping session 93 | ``` 94 | $creds = Get-Credential 95 | $scraper = Connect-WebScraper -Url 'http://mywebsite.com' -Credential $creds 96 | ``` 97 | 98 | Setting dimensions of web browser used for scraping 99 | ``` 100 | $scraper = Connect-WebScraper -Url 'http://mywebsite.com' -Width 800 -Height 600 101 | ``` 102 | 103 | Show chromium UI 104 | ``` 105 | $scraper = Connect-WebScraper -Url 'http://mywebsite.com' -ShowUI 106 | ``` 107 | 108 | Change browser agent string 109 | By default it uses Chrome on windows 10. 110 | If you need IE or other strings check out http://www.useragentstring.com/pages/useragentstring.php?name=Internet+Explorer 111 | ``` 112 | $scraper = Connect-WebScraper -Url 'http://mywebsite.com' -Agent 'super awesome string' 113 | ``` 114 | 115 | Take a snapshot of a web page 116 | ``` 117 | $scraper = Connect-WebScraper -Url 'http://mywebsite.com' 118 | $img = $scraper | Get-WebScraperSnapshot 119 | ``` 120 | 121 | Save snapshot to file 122 | ``` 123 | $scraper = Connect-WebScraper -Url 'http://mywebsite.com' 124 | $scraper | Get-WebScraperSnapshot -Path "c:\images\myimg.png" 125 | ``` 126 | 127 | Create PDF of web page 128 | ``` 129 | $scraper = Connect-WebScraper -Url 'http://mywebsite.com' 130 | $scraper | Get-WebScraperSnapshot -Path "c:\pdfs\my.pdf" -PDF 131 | ``` 132 | 133 | Simulating a mouse movement in browser 134 | ``` 135 | $scraper = Connect-WebScraper -Url 'http://mywebsite.com' 136 | $scraper | Move-WebScraperMouse -X 100 -Y 100 137 | ``` 138 | 139 | Simulate clicking on an element in browser 140 | ``` 141 | $scraper = Connect-WebScraper -Url 'http://mywebsite.com' 142 | $scraper | Send-WebScraperMouse -Click -Target '#foo' 143 | ``` 144 | 145 | Simulating a tap on an element in the browser 146 | ``` 147 | $scraper = Connect-WebScraper -Url 'http://mywebsite.com' 148 | $scraper | Send-WebScraperMouse -Click -Target '.foo' -Tap 149 | ``` 150 | 151 | Simulate key presses in browser window. 152 | ``` 153 | $scraper = Connect-WebScraper -Url 'http://mywebsite.com' 154 | $scraper | Send-WebScraperKeys "Hello world" 155 | ``` 156 | 157 | Invoke javascript expression in browser (like typing it into the console). 158 | ``` 159 | $scraper = Connect-WebScraper -Url 'http://mywebsite.com' 160 | $tag = $scraper | invoke-WebScraperExpression -Expression "document.querySelector('.foo');" 161 | ``` 162 | 163 | ## Scraperion Framework vs Scraperion PowerShell module 164 | The Scraperion Framework is a .net DLL which contains a simple interface to do all the actions lised above. The Scraperion PowerShell 165 | module consumes the library and exposes the functionality to PowerShell. 166 | 167 | Eventually the plan is to release the framework as a nuget package but at the moment you need to either download the PowerShell module or 168 | build it from source. 169 | 170 | ## Credits 171 | This project makes use of the following Open Source projects: 172 | 173 | * [Puppet Sharp](https://www.puppeteersharp.com) - Awesome chromium automation framework for .net. (License: MIT) 174 | * [Tesseract and .NET Wrapper](https://github.com/charlesw/tesseract) - OCR library used to ocr images. (License: Apache 2.0 for Tesseract and MIT for the .net interop library. See project page for details.). 175 | * [Newtonsoft JSON](https://www.newtonsoft.com/json) - JSON parsing library 176 | * [XmlDoc2CmdletDoc](https://github.com/red-gate/XmlDoc2CmdletDoc) - Awesome tool that converts xml docs in C# into cmdlet doc in your PowerShell module. -------------------------------------------------------------------------------- /Scraperion/ConnectWebScraper.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Management.Automation; 3 | using System.Runtime.InteropServices; 4 | using System.Security; 5 | using ScraperionFramework; 6 | 7 | namespace Scraperion 8 | { 9 | 10 | /// 11 | /// This cmdlet creates a WebScrapper object. 12 | /// This cmdlet creates a WebScrapper object and connects a chromium instance to it. 13 | /// 14 | [Cmdlet(VerbsCommunications.Connect, "WebScraper")] 15 | public class ConnectWebScraper : Cmdlet 16 | { 17 | /// 18 | /// Credentials to use to connect to page. 19 | /// 20 | [Parameter] 21 | public PSCredential Credential { get; set; } 22 | 23 | /// 24 | /// Width of web page in pixels. 25 | /// 26 | [Parameter] 27 | public int Width { get; set; } = 1024; 28 | 29 | /// 30 | /// Height of web page in pixels. 31 | /// 32 | [Parameter] 33 | public int Height { get; set; } = 768; 34 | 35 | /// 36 | /// Initial url to connect to. 37 | /// 38 | [Parameter(Mandatory = true, ValueFromPipeline = true, ValueFromPipelineByPropertyName = true, Position = 0)] 39 | public string Url { get; set; } 40 | 41 | /// 42 | /// Pass switch to show chromium browser window. Useful for debugging. 43 | /// 44 | [Parameter] 45 | public SwitchParameter ShowUI { get; set; } 46 | 47 | /// 48 | /// Browser agent to use when browsing pages. 49 | /// 50 | [Parameter] 51 | public string Agent { get; set; } = WebScraper.DefaultAgent; 52 | 53 | /// 54 | /// Powershell logic 55 | /// 56 | protected override void ProcessRecord() 57 | { 58 | var scrapper = new WebScraper(!ShowUI, Agent); 59 | 60 | if(Credential != null) 61 | scrapper.SetAuth(Credential.UserName, SecureStringToString(Credential.Password)); 62 | 63 | scrapper.SetViewPort(Width, Height); 64 | 65 | scrapper.Url = Url; 66 | 67 | WriteObject(scrapper); 68 | } 69 | 70 | private string SecureStringToString(SecureString value) 71 | { 72 | var valuePtr = IntPtr.Zero; 73 | try 74 | { 75 | valuePtr = Marshal.SecureStringToGlobalAllocUnicode(value); 76 | return Marshal.PtrToStringUni(valuePtr); 77 | } 78 | finally 79 | { 80 | Marshal.ZeroFreeGlobalAllocUnicode(valuePtr); 81 | } 82 | } 83 | } 84 | 85 | 86 | } 87 | -------------------------------------------------------------------------------- /Scraperion/ConvertFromImageToText.cs: -------------------------------------------------------------------------------- 1 | using System.Drawing; 2 | using System.Management.Automation; 3 | using ScraperionFramework; 4 | 5 | namespace Scraperion 6 | { 7 | /// 8 | /// Converts image to Text 9 | /// Runs OCR over the image and returns the text returend. 10 | /// 11 | [Cmdlet(VerbsData.ConvertFrom, "ImageToText")] 12 | public class ConvertFromImageToText : Cmdlet 13 | { 14 | 15 | /// 16 | /// Image to run ocr over. 17 | /// 18 | [Parameter(Mandatory = true, ValueFromPipeline = true, ValueFromPipelineByPropertyName = true, Position = 0)] 19 | public Bitmap Image { get; set; } 20 | 21 | /// 22 | /// Powershell logic 23 | /// 24 | protected override void ProcessRecord() 25 | { 26 | var ss = new ScreenScraper(); 27 | WriteObject(ss.OCR(Image)); 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /Scraperion/DisconnectWebScraper.cs: -------------------------------------------------------------------------------- 1 | using System.Management.Automation; 2 | using ScraperionFramework; 3 | 4 | namespace Scraperion 5 | { 6 | /// 7 | /// Disconnects Web scrapper 8 | /// Disconnects from chromium and closes it. 9 | /// 10 | [Cmdlet(VerbsCommunications.Disconnect, "WebScraper")] 11 | public class DisconnectWebScraper : Cmdlet 12 | { 13 | /// 14 | /// Instance of web scrapper to close. 15 | /// 16 | [Parameter(Mandatory = true, Position = 0, ValueFromPipeline = true, ValueFromPipelineByPropertyName = true)] 17 | public WebScraper Scraper { get; set; } 18 | 19 | /// 20 | /// Powershell logic. 21 | /// 22 | protected override void ProcessRecord() 23 | { 24 | Scraper.Dispose(); 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /Scraperion/GetImage.cs: -------------------------------------------------------------------------------- 1 | using System.Drawing; 2 | using System.Management.Automation; 3 | using ScraperionFramework; 4 | 5 | namespace Scraperion 6 | { 7 | /// 8 | /// Retrives an image 9 | /// Retrives an image ready for use with screen scrapping cmdlets. 10 | /// 11 | [Cmdlet(VerbsCommon.Get, "Image")] 12 | public class GetImage : Cmdlet 13 | { 14 | /// 15 | /// Path to image to load. Can be most common image formats png, bmp, jpg, etc 16 | /// 17 | [Parameter(Mandatory = true, Position = 0, ParameterSetName = "PathSet")] 18 | public string Path { get; set; } 19 | 20 | /// 21 | /// Use this switch to grab a screenshot of the screen. 22 | /// 23 | [Parameter(Mandatory = true, ParameterSetName = "ScreenSet")] 24 | public SwitchParameter Screen { get; set; } 25 | 26 | /// 27 | /// X coordinates to being capture from 28 | /// 29 | [Parameter(ParameterSetName = "ScreenSet")] 30 | [Parameter(ParameterSetName = "ImageSet", Mandatory = true, Position = 1)] 31 | public int X { get; set; } 32 | 33 | /// 34 | /// Y coordinates to being capture from 35 | /// 36 | 37 | [Parameter(ParameterSetName = "ScreenSet")] 38 | [Parameter(ParameterSetName = "ImageSet", Mandatory = true, Position = 2)] 39 | public int Y { get; set; } 40 | 41 | /// 42 | /// Width of capture. 43 | /// 44 | [Parameter(ParameterSetName = "ScreenSet")] 45 | [Parameter(ParameterSetName = "ImageSet", Mandatory = true, Position = 3)] 46 | public int Width { get; set; } 47 | 48 | /// 49 | /// Height of capture. 50 | /// 51 | [Parameter(ParameterSetName = "ScreenSet")] 52 | [Parameter(ParameterSetName = "ImageSet", Mandatory = true, Position = 4)] 53 | public int Height { get; set; } 54 | 55 | /// 56 | /// Another image to do capture from, use X, Y, Width and Hight to select a subsection of image. 57 | /// 58 | [Parameter(Mandatory = true, ValueFromPipeline = true, ValueFromPipelineByPropertyName = true, Position = 0, ParameterSetName = "ImageSet")] 59 | public Bitmap Image { get; set; } 60 | 61 | /// 62 | /// Powershell logic 63 | /// 64 | protected override void ProcessRecord() 65 | { 66 | if (Path != null) 67 | { 68 | WriteObject(new Bitmap(Path)); 69 | return; 70 | } 71 | 72 | if (Screen) 73 | { 74 | if (X == 0 && Y == 0 && Width == 0 && Height == 0) 75 | { 76 | WriteObject(new ScreenScraper().CaptureScreen()); 77 | return; 78 | } 79 | else 80 | { 81 | WriteObject(new ScreenScraper().CaptureArea(new Rectangle(X, Y, Width, Height))); 82 | return; 83 | } 84 | } 85 | 86 | var result = new Bitmap(Width, Height); 87 | using (var g = Graphics.FromImage(result)) 88 | { 89 | g.DrawImage(Image, new Rectangle(0,0, Width, Height), X, Y, Width, Height, GraphicsUnit.Pixel); 90 | } 91 | 92 | WriteObject(result); 93 | } 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /Scraperion/GetWebScraperSnapshot.cs: -------------------------------------------------------------------------------- 1 | using System.IO; 2 | using System.Management.Automation; 3 | using ScraperionFramework; 4 | 5 | namespace Scraperion 6 | { 7 | /// 8 | /// Captures a screenshot of current page. 9 | /// Captures a screenshot of the current page. 10 | /// 11 | [Cmdlet(VerbsCommon.Get, "WebScraperSnapshot")] 12 | public class GetWebScraperSnapshot : Cmdlet 13 | { 14 | /// 15 | /// Scrapper object to take screenshot from. 16 | /// 17 | [Parameter(Mandatory = true, Position = 0, ValueFromPipeline = true, ValueFromPipelineByPropertyName = true)] 18 | public WebScraper Scraper { get; set; } 19 | 20 | /// 21 | /// Creates a pdf of the target page instead. 22 | /// 23 | [Parameter] 24 | public SwitchParameter Pdf { get; set; } 25 | 26 | /// 27 | /// Optional path to store the image or pdf. 28 | /// 29 | [Parameter] 30 | public string Path { get; set; } 31 | 32 | /// 33 | /// Main cmdlet logic. 34 | /// 35 | protected override void ProcessRecord() 36 | { 37 | if (Pdf) 38 | { 39 | var pdf = Scraper.CreatePdf(); 40 | 41 | WriteObject(pdf); 42 | 43 | if (Path == null) 44 | return; 45 | 46 | var buffer = new byte[pdf.Length]; 47 | pdf.Read(buffer, 0, buffer.Length); 48 | File.WriteAllBytes(Path, buffer); 49 | 50 | return; 51 | } 52 | 53 | var img = Scraper.SnapshotBitmap(); 54 | 55 | WriteObject(img); 56 | 57 | if (Path == null) 58 | return; 59 | 60 | img.Save(Path); 61 | 62 | } 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /Scraperion/InvokeWebScraperExpression.cs: -------------------------------------------------------------------------------- 1 | using System.Management.Automation; 2 | using ScraperionFramework; 3 | 4 | namespace Scraperion 5 | { 6 | /// 7 | /// Invokes a javascript command 8 | /// Invokes a javascript command in the browser and returns json of what was executed. 9 | /// 10 | [Cmdlet(VerbsLifecycle.Invoke, "WebScraperExpression")] 11 | public class InvokeWebScraperExpression : Cmdlet 12 | { 13 | /// 14 | /// Scrapper to invoke javascript on. 15 | /// 16 | [Parameter(ValueFromPipelineByPropertyName = true, Mandatory = true, ValueFromPipeline = true, Position = 0)] 17 | public WebScraper Scraper { get; set; } 18 | 19 | /// 20 | /// Expression to execute. 21 | /// 22 | [Parameter(ValueFromPipelineByPropertyName = true, Mandatory = true, Position = 1)] 23 | public string Expression { get; set; } 24 | 25 | /// 26 | /// Powershell cmdlet logic 27 | /// 28 | protected override void ProcessRecord() 29 | { 30 | WriteObject(Scraper.Exec(Expression)); 31 | } 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /Scraperion/MoveMouse.cs: -------------------------------------------------------------------------------- 1 | using System.Management.Automation; 2 | using ScraperionFramework; 3 | 4 | namespace Scraperion 5 | { 6 | /// 7 | /// Moves the mouse on the screen. 8 | /// Moves the mouse to target location on the screen. 9 | /// 10 | [Cmdlet(VerbsCommon.Move, "Mouse")] 11 | public class MoveMouse : Cmdlet 12 | { 13 | /// 14 | /// X coordinate to move mouse to on screen 15 | /// 16 | [Parameter(Mandatory = true, Position = 0)] 17 | public int X { get; set; } 18 | 19 | /// 20 | /// Y coordinate to move mouse to on screen 21 | /// 22 | [Parameter(Mandatory = true, Position = 1)] 23 | public int Y { get; set; } 24 | 25 | /// 26 | /// Powershell logic 27 | /// 28 | protected override void ProcessRecord() 29 | { 30 | var ss = new ScreenScraper(); 31 | 32 | ss.MoveMouse(X, Y); 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /Scraperion/MoveWebScraperMouse.cs: -------------------------------------------------------------------------------- 1 | using System.Management.Automation; 2 | using ScraperionFramework; 3 | 4 | namespace Scraperion 5 | { 6 | /// 7 | /// Simulates moving the mouse to a location on the web page. 8 | /// Simulates moving the mouse to a location on the web page. 9 | /// 10 | [Cmdlet(VerbsCommon.Move, "WebScraperMouse")] 11 | public class MoveWebScraperMouse :Cmdlet 12 | { 13 | /// 14 | /// Web scrapper object to move mouse on. 15 | /// 16 | [Parameter(ValueFromPipelineByPropertyName = true, Mandatory = true, ValueFromPipeline = true, Position = 0)] 17 | public WebScraper Scraper { get; set; } 18 | 19 | /// 20 | /// X coordinate to move mouse to on page 21 | /// 22 | [Parameter(Mandatory = true, Position = 1)] 23 | public decimal X { get; set; } 24 | 25 | 26 | /// 27 | /// Y coordinate to move mouse to on page 28 | /// 29 | [Parameter(Mandatory = true, Position = 2)] 30 | public decimal Y { get; set; } 31 | 32 | /// 33 | /// Powershell logic. 34 | /// 35 | protected override void ProcessRecord() 36 | { 37 | Scraper.MoveMouse(X, Y); 38 | } 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /Scraperion/Properties/AssemblyInfo.cs: -------------------------------------------------------------------------------- 1 | using System.Reflection; 2 | using System.Runtime.CompilerServices; 3 | using System.Runtime.InteropServices; 4 | 5 | // General Information about an assembly is controlled through the following 6 | // set of attributes. Change these attribute values to modify the information 7 | // associated with an assembly. 8 | [assembly: AssemblyTitle("Scraperion")] 9 | [assembly: AssemblyDescription("")] 10 | [assembly: AssemblyConfiguration("")] 11 | [assembly: AssemblyCompany("")] 12 | [assembly: AssemblyProduct("Scraperion")] 13 | [assembly: AssemblyCopyright("Copyright © 2018")] 14 | [assembly: AssemblyTrademark("")] 15 | [assembly: AssemblyCulture("")] 16 | 17 | // Setting ComVisible to false makes the types in this assembly not visible 18 | // to COM components. If you need to access a type in this assembly from 19 | // COM, set the ComVisible attribute to true on that type. 20 | [assembly: ComVisible(false)] 21 | 22 | // The following GUID is for the ID of the typelib if this project is exposed to COM 23 | [assembly: Guid("f1521c62-ef71-4e99-83b4-3d879a623cad")] 24 | 25 | // Version information for an assembly consists of the following four values: 26 | // 27 | // Major Version 28 | // Minor Version 29 | // Build Number 30 | // Revision 31 | // 32 | // You can specify all the values or you can default the Build and Revision Numbers 33 | // by using the '*' as shown below: 34 | // [assembly: AssemblyVersion("1.0.*")] 35 | [assembly: AssemblyVersion("1.0.0.0")] 36 | [assembly: AssemblyFileVersion("1.0.0.0")] 37 | -------------------------------------------------------------------------------- /Scraperion/Scraperion.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | AnyCPU 7 | {F1521C62-EF71-4E99-83B4-3D879A623CAD} 8 | Library 9 | Properties 10 | Scraperion 11 | Scraperion 12 | v4.6.1 13 | 512 14 | true 15 | 16 | 17 | 18 | 19 | 20 | true 21 | full 22 | false 23 | bin\Debug\ 24 | DEBUG;TRACE 25 | prompt 26 | 4 27 | false 28 | bin\Debug\Scraperion.xml 29 | 30 | 31 | pdbonly 32 | true 33 | bin\Release\ 34 | TRACE 35 | prompt 36 | 4 37 | false 38 | bin\Release\Scraperion.xml 39 | 40 | 41 | 42 | ..\packages\Microsoft.AspNetCore.WebUtilities.2.0.2\lib\netstandard2.0\Microsoft.AspNetCore.WebUtilities.dll 43 | 44 | 45 | ..\packages\Microsoft.Extensions.DependencyInjection.Abstractions.2.0.0\lib\netstandard2.0\Microsoft.Extensions.DependencyInjection.Abstractions.dll 46 | 47 | 48 | ..\packages\Microsoft.Extensions.Logging.2.0.2\lib\netstandard2.0\Microsoft.Extensions.Logging.dll 49 | 50 | 51 | ..\packages\Microsoft.Extensions.Logging.Abstractions.2.0.2\lib\netstandard2.0\Microsoft.Extensions.Logging.Abstractions.dll 52 | 53 | 54 | ..\packages\Microsoft.Extensions.Options.2.0.2\lib\netstandard2.0\Microsoft.Extensions.Options.dll 55 | 56 | 57 | ..\packages\Microsoft.Extensions.Primitives.2.0.0\lib\netstandard2.0\Microsoft.Extensions.Primitives.dll 58 | 59 | 60 | ..\packages\Microsoft.Net.Http.Headers.2.0.2\lib\netstandard2.0\Microsoft.Net.Http.Headers.dll 61 | 62 | 63 | ..\packages\Newtonsoft.Json.10.0.3\lib\net45\Newtonsoft.Json.dll 64 | 65 | 66 | 67 | ..\packages\System.Buffers.4.4.0\lib\netstandard2.0\System.Buffers.dll 68 | 69 | 70 | 71 | 72 | ..\packages\Microsoft.PowerShell.3.ReferenceAssemblies.1.0.0\lib\net4\System.Management.Automation.dll 73 | 74 | 75 | ..\packages\System.Net.Http.4.3.3\lib\net46\System.Net.Http.dll 76 | True 77 | True 78 | 79 | 80 | ..\packages\System.Runtime.CompilerServices.Unsafe.4.5.0\lib\netstandard2.0\System.Runtime.CompilerServices.Unsafe.dll 81 | 82 | 83 | ..\packages\System.Security.Cryptography.Algorithms.4.3.0\lib\net461\System.Security.Cryptography.Algorithms.dll 84 | True 85 | True 86 | 87 | 88 | ..\packages\System.Security.Cryptography.Encoding.4.3.0\lib\net46\System.Security.Cryptography.Encoding.dll 89 | True 90 | True 91 | 92 | 93 | ..\packages\System.Security.Cryptography.Primitives.4.3.0\lib\net46\System.Security.Cryptography.Primitives.dll 94 | True 95 | True 96 | 97 | 98 | ..\packages\System.Security.Cryptography.X509Certificates.4.3.0\lib\net461\System.Security.Cryptography.X509Certificates.dll 99 | True 100 | True 101 | 102 | 103 | ..\packages\System.Text.Encodings.Web.4.4.0\lib\netstandard2.0\System.Text.Encodings.Web.dll 104 | 105 | 106 | ..\packages\System.Threading.Tasks.Extensions.4.5.1\lib\netstandard2.0\System.Threading.Tasks.Extensions.dll 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | ..\packages\Tesseract.3.0.2.0\lib\net451\Tesseract.dll 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | Form 123 | 124 | 125 | frmImage.cs 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | Always 150 | 151 | 152 | 153 | 154 | Always 155 | 156 | 157 | Always 158 | 159 | 160 | Always 161 | 162 | 163 | Always 164 | 165 | 166 | 167 | 168 | {b4cc8f0d-7d5b-4d5b-aea9-b4610f34a75d} 169 | PuppeteerSharp 170 | 171 | 172 | {06837ace-3ee3-4286-b70b-41213078f878} 173 | ScraperionFramework 174 | 175 | 176 | 177 | 178 | frmImage.cs 179 | 180 | 181 | 182 | 183 | 184 | 185 | This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. 186 | 187 | 188 | 189 | -------------------------------------------------------------------------------- /Scraperion/Scraperion.psd1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wiltaylor/ScraperionFramework/106dc62a0595eab22482089d4dc00b0554f1725a/Scraperion/Scraperion.psd1 -------------------------------------------------------------------------------- /Scraperion/SelectImage.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Drawing; 3 | using System.Management.Automation; 4 | using ScraperionFramework; 5 | 6 | namespace Scraperion 7 | { 8 | /// 9 | /// Finds an image on the screen and clicks it. 10 | /// Finds an image on the screen and clicks it. 11 | /// 12 | [Cmdlet(VerbsCommon.Select, "Image")] 13 | public class SelectImage : Cmdlet 14 | { 15 | /// 16 | /// Image to search for on the screen. 17 | /// 18 | [Parameter(Mandatory = true, ValueFromPipeline = true, ValueFromPipelineByPropertyName = true, Position = 0)] 19 | public Bitmap Image { get; set; } 20 | 21 | /// 22 | /// Mouse button to use. 23 | /// 24 | [Parameter(Position = 1)] 25 | [ValidateSet("Left", "Right")] 26 | public string Button { get; set; } = "Left"; 27 | 28 | /// 29 | /// Offset to click on image when found in X axis. 30 | /// 31 | [Parameter] 32 | public int XOffset { get; set; } = 0; 33 | 34 | /// 35 | /// Offset to click on image when found in Y axis. 36 | /// 37 | [Parameter] 38 | public int YOffset { get; set; } = 0; 39 | 40 | /// 41 | /// Click on target image (mouse down then mouse up). 42 | /// 43 | [Parameter(Mandatory = true, ParameterSetName = "ClickSet")] 44 | public SwitchParameter Click { get; set; } 45 | 46 | /// 47 | /// Mouse up on target. 48 | /// 49 | [Parameter(Mandatory = true, ParameterSetName = "UpSet")] 50 | public SwitchParameter Up { get; set; } 51 | 52 | /// 53 | /// Mouse down on target. 54 | /// 55 | [Parameter(Mandatory = true, ParameterSetName = "DownSet")] 56 | public SwitchParameter Down { get; set; } 57 | 58 | /// 59 | /// Powershell logic. 60 | /// 61 | protected override void ProcessRecord() 62 | { 63 | var ss = new ScreenScraper(); 64 | 65 | var pos = ss.Find(ss.CaptureScreen(), Image); 66 | 67 | if (pos.Right == -1 && pos.Left == -1) 68 | throw new ApplicationException("Can't find image on screen!"); 69 | 70 | ss.MoveMouse(pos.X + XOffset, pos.Y + YOffset); 71 | 72 | if (Click) 73 | ss.MouseClick(Button == "Left" ? MouseButton.Left : MouseButton.Right); 74 | else if (Up) 75 | ss.MouseUp(Button == "Left" ? MouseButton.Left : MouseButton.Right); 76 | else if(Down) 77 | ss.MouseDown(Button == "Left" ? MouseButton.Left : MouseButton.Right); 78 | } 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /Scraperion/SendKeys.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Management.Automation; 3 | using System.Runtime.InteropServices; 4 | using System.Security; 5 | using ScraperionFramework; 6 | 7 | namespace Scraperion 8 | { 9 | /// 10 | /// Send keys to focused application. 11 | /// Simulates key presses on target application. This uses the standard .net send keys syntax. Enter is {ENTER} etc. 12 | /// For more information see https://docs.microsoft.com/en-us/dotnet/framework/winforms/how-to-simulate-mouse-and-keyboard-events-in-code 13 | /// 14 | [Cmdlet(VerbsCommunications.Send, "Keys")] 15 | public class SendKeys : Cmdlet 16 | { 17 | /// 18 | /// Text top type in .net Standard send keys syntax. 19 | /// 20 | [Parameter(Mandatory = true, Position = 0, ParameterSetName = "TextSet", ValueFromPipeline = true, ValueFromPipelineByPropertyName = true)] 21 | public string Text { get; set; } 22 | 23 | /// 24 | /// Decode a secure string and send that instead. Useful for sending passwords. 25 | /// 26 | [Parameter(Mandatory = true, Position = 0, ParameterSetName = "SecureTextSet", ValueFromPipeline = true, ValueFromPipelineByPropertyName = true)] 27 | public SecureString SecureText { get; set; } 28 | 29 | /// 30 | /// Sends the contents of a PSCredential object. Will press tab between username and password. 31 | /// 32 | [Parameter(Mandatory = true, Position = 0, ParameterSetName = "CredentialSet", ValueFromPipeline = true, ValueFromPipelineByPropertyName = true)] 33 | public PSCredential Credential { get; set; } 34 | 35 | /// 36 | /// Powershell logic. 37 | /// 38 | protected override void ProcessRecord() 39 | { 40 | var ss = new ScreenScraper(); 41 | 42 | if (Text != null) 43 | { 44 | ss.TypeKeys(Text); 45 | return; 46 | } 47 | 48 | if (SecureText != null) 49 | { 50 | ss.TypeKeys(SecureStringToString(SecureText)); 51 | } 52 | 53 | if (Credential != null) 54 | { 55 | ss.TypeKeys(Credential.UserName); 56 | ss.TypeKeys("{tab}"); 57 | ss.TypeKeys(Credential.GetNetworkCredential().Password); 58 | } 59 | } 60 | 61 | private string SecureStringToString(SecureString value) 62 | { 63 | var valuePtr = IntPtr.Zero; 64 | try 65 | { 66 | valuePtr = Marshal.SecureStringToGlobalAllocUnicode(value); 67 | return Marshal.PtrToStringUni(valuePtr); 68 | } 69 | finally 70 | { 71 | Marshal.ZeroFreeGlobalAllocUnicode(valuePtr); 72 | } 73 | } 74 | 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /Scraperion/SendMouse.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Management.Automation; 5 | using System.Text; 6 | using System.Threading.Tasks; 7 | using ScraperionFramework; 8 | 9 | namespace Scraperion 10 | { 11 | /// 12 | /// Simulate mouse click. 13 | /// Similar to send keys but for the mouse. 14 | /// 15 | [Cmdlet(VerbsCommunications.Send, "Mouse")] 16 | public class SendMouse: Cmdlet 17 | { 18 | /// 19 | /// Button to simulate. 20 | /// 21 | [Parameter] 22 | [ValidateSet("Left", "Right")] 23 | public string Button { get; set; } = "Left"; 24 | 25 | /// 26 | /// Simulate a mouse click (mouse down then mouse up). 27 | /// 28 | [Parameter(Mandatory = false, ParameterSetName = "ClickSet")] 29 | public SwitchParameter Click { get; set; } 30 | 31 | /// 32 | /// Simulates a mouse down. 33 | /// 34 | [Parameter(Mandatory = false, ParameterSetName = "UpSet")] 35 | public SwitchParameter Up { get; set; } 36 | 37 | /// 38 | /// Simulates a mouse up. 39 | /// 40 | [Parameter(Mandatory = false, ParameterSetName = "DownSet")] 41 | public SwitchParameter Down { get; set; } 42 | 43 | /// 44 | /// Powershell logic. 45 | /// 46 | protected override void ProcessRecord() 47 | { 48 | var ss = new ScreenScraper(); 49 | 50 | if (Click) 51 | ss.MouseClick(Button == "Left" ? MouseButton.Left : MouseButton.Right); 52 | else if (Up) 53 | ss.MouseUp(Button == "Left" ? MouseButton.Left : MouseButton.Right); 54 | else if (Down) 55 | ss.MouseDown(Button == "Left" ? MouseButton.Left : MouseButton.Right); 56 | } 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /Scraperion/SendWebScraperKeys.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Management.Automation; 3 | using System.Runtime.InteropServices; 4 | using System.Security; 5 | using ScraperionFramework; 6 | 7 | namespace Scraperion 8 | { 9 | /// 10 | /// Send key presses to browser. 11 | /// Simulate key presses in browser window. 12 | /// 13 | [Cmdlet(VerbsCommunications.Send, "WebScraperKeys")] 14 | public class SendWebScraperKeys : Cmdlet 15 | { 16 | /// 17 | /// Scraper object to send the key presses to. 18 | /// 19 | [Parameter(ValueFromPipelineByPropertyName = true, Mandatory = true, ValueFromPipeline = true, Position = 0)] 20 | public WebScraper Scraper { get; set; } 21 | 22 | /// 23 | /// Text to send to browser window. 24 | /// 25 | [Parameter(Mandatory = true, Position = 0, ParameterSetName = "TextSet", ValueFromPipeline = true, ValueFromPipelineByPropertyName = true)] 26 | public string Text { get; set; } 27 | 28 | /// 29 | /// Decode a secure string and send that instead. Useful for sending passwords. 30 | /// 31 | [Parameter(Mandatory = true, Position = 0, ParameterSetName = "SecureTextSet", ValueFromPipeline = true, ValueFromPipelineByPropertyName = true)] 32 | public SecureString SecureText { get; set; } 33 | 34 | /// 35 | /// Sends the contents of a PSCredential object. Will press tab between username and password. 36 | /// 37 | [Parameter(Mandatory = true, Position = 0, ParameterSetName = "CredentialSet", ValueFromPipeline = true, ValueFromPipelineByPropertyName = true)] 38 | public PSCredential Credential { get; set; } 39 | 40 | /// 41 | /// Powershell logic. 42 | /// 43 | protected override void ProcessRecord() 44 | { 45 | if (Text != null) 46 | { 47 | Scraper.SendKeys(Text); 48 | return; 49 | } 50 | 51 | if (SecureText != null) 52 | { 53 | Scraper.SendKeys(SecureStringToString(SecureText)); 54 | } 55 | 56 | if (Credential != null) 57 | { 58 | Scraper.SendKeys(Credential.UserName); 59 | Scraper.SendKeys("{tab}"); 60 | Scraper.SendKeys(Credential.GetNetworkCredential().Password); 61 | } 62 | } 63 | 64 | private string SecureStringToString(SecureString value) 65 | { 66 | var valuePtr = IntPtr.Zero; 67 | try 68 | { 69 | valuePtr = Marshal.SecureStringToGlobalAllocUnicode(value); 70 | return Marshal.PtrToStringUni(valuePtr); 71 | } 72 | finally 73 | { 74 | Marshal.ZeroFreeGlobalAllocUnicode(valuePtr); 75 | } 76 | } 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /Scraperion/SendWebScraperMouse.cs: -------------------------------------------------------------------------------- 1 | using System.Management.Automation; 2 | using ScraperionFramework; 3 | 4 | namespace Scraperion 5 | { 6 | /// 7 | /// Sends simulated mouse or tap event to target on page. 8 | /// Simulates a finger press or mouse event on target on page. 9 | /// 10 | [Cmdlet(VerbsCommunications.Send, "WebScraperMouse")] 11 | public class SendWebScraperMouse : Cmdlet 12 | { 13 | /// 14 | /// Scraper object to simulat event on. 15 | /// 16 | [Parameter(ValueFromPipelineByPropertyName = true, Mandatory = true, ValueFromPipeline = true, Position = 0)] 17 | public WebScraper Scraper { get; set; } 18 | 19 | /// 20 | /// Mouse button to simulate. 21 | /// 22 | [Parameter] 23 | [ValidateSet("Left", "Right")] 24 | public string Button { get; set; } = "Left"; 25 | 26 | /// 27 | /// Simulate a mouse click (mouse down then mouse up). 28 | /// 29 | [Parameter(Mandatory = true, ParameterSetName = "ClickSet")] 30 | public SwitchParameter Click { get; set; } 31 | 32 | /// 33 | /// Simulates a mouse up event. 34 | /// 35 | [Parameter(Mandatory = true, ParameterSetName = "UpSet")] 36 | public SwitchParameter Up { get; set; } 37 | 38 | /// 39 | /// Simulates a mouse down event. 40 | /// 41 | [Parameter(Mandatory = true, ParameterSetName = "DownSet")] 42 | public SwitchParameter Down { get; set; } 43 | 44 | /// 45 | /// Target to apply mouse event to. Use Target selector to specify. 46 | /// 47 | [Parameter(Mandatory = true, ParameterSetName = "TargetSet")] 48 | public string Target { get; set; } 49 | 50 | /// 51 | /// Javascript selector to select object to click on. For more details see https://www.w3schools.com/jsref/met_document_queryselector.asp 52 | /// 53 | [Parameter(ParameterSetName = "TargetSet")] 54 | public SwitchParameter Tap { get; set; } 55 | 56 | /// 57 | /// Powershell logic. 58 | /// 59 | protected override void ProcessRecord() 60 | { 61 | if(Target != null) 62 | if(Tap) 63 | Scraper.TapScreen(Target); 64 | else 65 | Scraper.Click(Target); 66 | else if (Click) 67 | Scraper.MouseClick(Button == "Left" ? MouseButton.Left : MouseButton.Right); 68 | else if(Up) 69 | Scraper.MouseUp(Button == "Left" ? MouseButton.Left : MouseButton.Right); 70 | else if(Down) 71 | Scraper.MouseDown(Button == "Left" ? MouseButton.Left : MouseButton.Right); 72 | 73 | } 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /Scraperion/SetWebScraperFocus.cs: -------------------------------------------------------------------------------- 1 | using System.Management.Automation; 2 | using ScraperionFramework; 3 | 4 | namespace Scraperion 5 | { 6 | /// 7 | /// Set element to have focus in web page. 8 | /// Set element in chromium to have focus. 9 | /// 10 | [Cmdlet(VerbsCommon.Select, "WebScraperFocus")] 11 | public class SetWebScraperFocus : Cmdlet 12 | { 13 | /// 14 | /// Scraper to set focus on. 15 | /// 16 | [Parameter(ValueFromPipelineByPropertyName = true, Mandatory = true, ValueFromPipeline = true, Position = 0)] 17 | public WebScraper Scraper { get; set; } 18 | 19 | /// 20 | /// Target to select using javascript selector. For more info see: https://www.w3schools.com/jsref/met_document_queryselector.asp 21 | /// 22 | [Parameter(Mandatory = true, Position = 1)] 23 | public string Target { get; set; } 24 | 25 | /// 26 | /// Powershell logic 27 | /// 28 | protected override void ProcessRecord() 29 | { 30 | Scraper.Focus(Target); 31 | } 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /Scraperion/ShowImage.cs: -------------------------------------------------------------------------------- 1 | using System.Drawing; 2 | using System.Management.Automation; 3 | 4 | namespace Scraperion 5 | { 6 | /// 7 | /// Shows image 8 | /// Opens a preview window for an image object. Useful for debugging. 9 | /// 10 | [Cmdlet(VerbsCommon.Show, "Image")] 11 | public class ShowImage : Cmdlet 12 | { 13 | /// 14 | /// Image object to preview 15 | /// 16 | [Parameter(Mandatory = true, ValueFromPipeline = true, ValueFromPipelineByPropertyName = true, Position = 0)] 17 | public Bitmap Image { get; set; } 18 | 19 | /// 20 | /// Powershell logic. 21 | /// 22 | protected override void ProcessRecord() 23 | { 24 | var window = new FrmImage(Image); 25 | window.ShowDialog(); 26 | } 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /Scraperion/Startup.cs: -------------------------------------------------------------------------------- 1 | using System.Management.Automation; 2 | using ScraperionFramework; 3 | 4 | namespace Scraperion 5 | { 6 | /// 7 | /// Contains code that runs when module is loaded. 8 | /// 9 | /// 10 | /// DPI awareness code needs to be called before any image preview code is done to make screen coordinates work properly. 11 | /// 12 | public class Startup : IModuleAssemblyInitializer 13 | { 14 | /// 15 | /// Module startup code 16 | /// 17 | public void OnImport() 18 | { 19 | ScreenScraper.SetupDPI(); 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /Scraperion/TestImage.cs: -------------------------------------------------------------------------------- 1 | using System.Drawing; 2 | using System.Management.Automation; 3 | using ScraperionFramework; 4 | 5 | namespace Scraperion 6 | { 7 | /// 8 | /// Return if target image appears on the screen or not. 9 | /// Test if image appears on the screen or not. 10 | /// 11 | [Cmdlet(VerbsDiagnostic.Test, "Image")] 12 | public class TestImage : Cmdlet 13 | { 14 | /// 15 | /// Image to test the existance of. 16 | /// 17 | [Parameter(Mandatory = true, ValueFromPipeline = true, ValueFromPipelineByPropertyName = true, Position = 0)] 18 | public Bitmap Image { get; set; } 19 | 20 | /// 21 | /// Image to search in. If left blank screen is used instead. 22 | /// 23 | [Parameter] 24 | public Bitmap SearchInImage { get; set; } 25 | 26 | /// 27 | /// Powershell logic. 28 | /// 29 | protected override void ProcessRecord() 30 | { 31 | var ss = new ScreenScraper(); 32 | 33 | if (SearchInImage == null) 34 | SearchInImage = ss.CaptureScreen(); 35 | 36 | var result = ss.Find(SearchInImage, Image); 37 | 38 | 39 | //-1 indicates it didn't find the image. 40 | WriteObject(result.Left != -1 && result.Right != -1); 41 | } 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /Scraperion/WaitImage.cs: -------------------------------------------------------------------------------- 1 | using System.Drawing; 2 | using System.Management.Automation; 3 | using ScraperionFramework; 4 | 5 | namespace Scraperion 6 | { 7 | /// 8 | /// Wait for image to appear on screen. 9 | /// Wait for image to appear on screen. 10 | /// 11 | [Cmdlet(VerbsLifecycle.Wait, "Image")] 12 | public class WaitImage : Cmdlet 13 | { 14 | /// 15 | /// Image to wait for. 16 | /// 17 | [Parameter(Mandatory = true, ValueFromPipelineByPropertyName = true, ValueFromPipeline = true, Position = 0)] 18 | public Bitmap Image { get; set; } 19 | 20 | /// 21 | /// Powershell logic. 22 | /// 23 | protected override void ProcessRecord() 24 | { 25 | var ss = new ScreenScraper(); 26 | 27 | while (true) 28 | { 29 | var result = ss.Find(ss.CaptureScreen(), Image); 30 | 31 | if (result.Left != -1 && result.Right != -1) 32 | break; 33 | } 34 | 35 | 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /Scraperion/WaitWebScraperExpression.cs: -------------------------------------------------------------------------------- 1 | using System.Management.Automation; 2 | using System.Threading; 3 | using ScraperionFramework; 4 | 5 | namespace Scraperion 6 | { 7 | /// 8 | /// Wait for web scrapper expression to be true. 9 | /// Wait for web scrapper expression to be true. 10 | /// 11 | [Cmdlet(VerbsLifecycle.Wait, "WebScraperExpression")] 12 | public class WaitWebScraperExpression : Cmdlet 13 | { 14 | /// 15 | /// Scraper to wait on expression to be true. 16 | /// 17 | [Parameter(ValueFromPipelineByPropertyName = true, Mandatory = true, ValueFromPipeline = true, Position = 0)] 18 | public WebScraper Scraper { get; set; } 19 | 20 | /// 21 | /// Javascript expression to test. 22 | /// 23 | [Parameter(Mandatory = true, Position = 1)] 24 | public string Expression { get; set; } 25 | 26 | /// 27 | /// Powershell logic. 28 | /// 29 | protected override void ProcessRecord() 30 | { 31 | while (Scraper.Exec(Expression)?.ToLower() != "true") 32 | { 33 | Thread.Sleep(1000); 34 | } 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /Scraperion/app.config: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /Scraperion/frmImage.Designer.cs: -------------------------------------------------------------------------------- 1 | namespace Scraperion 2 | { 3 | partial class FrmImage 4 | { 5 | /// 6 | /// Required designer variable. 7 | /// 8 | private System.ComponentModel.IContainer components = null; 9 | 10 | /// 11 | /// Clean up any resources being used. 12 | /// 13 | /// true if managed resources should be disposed; otherwise, false. 14 | protected override void Dispose(bool disposing) 15 | { 16 | if (disposing && (components != null)) 17 | { 18 | components.Dispose(); 19 | } 20 | base.Dispose(disposing); 21 | } 22 | 23 | #region Windows Form Designer generated code 24 | 25 | /// 26 | /// Required method for Designer support - do not modify 27 | /// the contents of this method with the code editor. 28 | /// 29 | private void InitializeComponent() 30 | { 31 | this.ImageBox = new System.Windows.Forms.PictureBox(); 32 | ((System.ComponentModel.ISupportInitialize)(this.ImageBox)).BeginInit(); 33 | this.SuspendLayout(); 34 | // 35 | // ImageBox 36 | // 37 | this.ImageBox.Dock = System.Windows.Forms.DockStyle.Fill; 38 | this.ImageBox.Location = new System.Drawing.Point(0, 0); 39 | this.ImageBox.Name = "ImageBox"; 40 | this.ImageBox.Size = new System.Drawing.Size(1340, 905); 41 | this.ImageBox.SizeMode = System.Windows.Forms.PictureBoxSizeMode.StretchImage; 42 | this.ImageBox.TabIndex = 0; 43 | this.ImageBox.TabStop = false; 44 | // 45 | // frmImage 46 | // 47 | this.AutoScaleDimensions = new System.Drawing.SizeF(9F, 20F); 48 | this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font; 49 | this.ClientSize = new System.Drawing.Size(1340, 905); 50 | this.Controls.Add(this.ImageBox); 51 | this.FormBorderStyle = System.Windows.Forms.FormBorderStyle.SizableToolWindow; 52 | this.Name = "FrmImage"; 53 | this.Text = "Image Viewer"; 54 | ((System.ComponentModel.ISupportInitialize)(this.ImageBox)).EndInit(); 55 | this.ResumeLayout(false); 56 | 57 | } 58 | 59 | #endregion 60 | 61 | private System.Windows.Forms.PictureBox ImageBox; 62 | } 63 | } -------------------------------------------------------------------------------- /Scraperion/frmImage.cs: -------------------------------------------------------------------------------- 1 | using System.Drawing; 2 | using System.Windows.Forms; 3 | 4 | namespace Scraperion 5 | { 6 | /// 7 | /// 8 | /// Class used to preview image files by Show-Image cmdlet. 9 | /// 10 | public partial class FrmImage : Form 11 | { 12 | /// 13 | /// 14 | /// Constructor 15 | /// 16 | /// Image to preview. 17 | public FrmImage(Image img) 18 | { 19 | InitializeComponent(); 20 | 21 | if ((img.Width < Size.Width || img.Height < Size.Height ) && img.Height > 100 && img.Width > 100) 22 | { 23 | Size = new Size(img.Width, img.Height); 24 | } 25 | 26 | ImageBox.Image = img; 27 | 28 | 29 | } 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /Scraperion/frmImage.resx: -------------------------------------------------------------------------------- 1 |  2 | 3 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | text/microsoft-resx 110 | 111 | 112 | 2.0 113 | 114 | 115 | System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 116 | 117 | 118 | System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 119 | 120 | -------------------------------------------------------------------------------- /Scraperion/packages.config: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /ScraperionFramework.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 15 4 | VisualStudioVersion = 15.0.28010.2003 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ScraperionFramework", "ScraperionFramework\ScraperionFramework.csproj", "{06837ACE-3EE3-4286-B70B-41213078F878}" 7 | EndProject 8 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Scraperion", "Scraperion\Scraperion.csproj", "{F1521C62-EF71-4E99-83B4-3D879A623CAD}" 9 | EndProject 10 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TestHarmess", "TestHarmess\TestHarmess.csproj", "{3226AD4F-B0AE-413D-B13D-CFF36B023F97}" 11 | EndProject 12 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "PuppeteerSharp", "puppeteer-sharp\lib\PuppeteerSharp\PuppeteerSharp.csproj", "{B4CC8F0D-7D5B-4D5B-AEA9-B4610F34A75D}" 13 | EndProject 14 | Global 15 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 16 | Debug|Any CPU = Debug|Any CPU 17 | Release|Any CPU = Release|Any CPU 18 | EndGlobalSection 19 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 20 | {06837ACE-3EE3-4286-B70B-41213078F878}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 21 | {06837ACE-3EE3-4286-B70B-41213078F878}.Debug|Any CPU.Build.0 = Debug|Any CPU 22 | {06837ACE-3EE3-4286-B70B-41213078F878}.Release|Any CPU.ActiveCfg = Release|Any CPU 23 | {06837ACE-3EE3-4286-B70B-41213078F878}.Release|Any CPU.Build.0 = Release|Any CPU 24 | {F1521C62-EF71-4E99-83B4-3D879A623CAD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 25 | {F1521C62-EF71-4E99-83B4-3D879A623CAD}.Debug|Any CPU.Build.0 = Debug|Any CPU 26 | {F1521C62-EF71-4E99-83B4-3D879A623CAD}.Release|Any CPU.ActiveCfg = Release|Any CPU 27 | {F1521C62-EF71-4E99-83B4-3D879A623CAD}.Release|Any CPU.Build.0 = Release|Any CPU 28 | {3226AD4F-B0AE-413D-B13D-CFF36B023F97}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 29 | {3226AD4F-B0AE-413D-B13D-CFF36B023F97}.Debug|Any CPU.Build.0 = Debug|Any CPU 30 | {3226AD4F-B0AE-413D-B13D-CFF36B023F97}.Release|Any CPU.ActiveCfg = Release|Any CPU 31 | {3226AD4F-B0AE-413D-B13D-CFF36B023F97}.Release|Any CPU.Build.0 = Release|Any CPU 32 | {B4CC8F0D-7D5B-4D5B-AEA9-B4610F34A75D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 33 | {B4CC8F0D-7D5B-4D5B-AEA9-B4610F34A75D}.Debug|Any CPU.Build.0 = Debug|Any CPU 34 | {B4CC8F0D-7D5B-4D5B-AEA9-B4610F34A75D}.Release|Any CPU.ActiveCfg = Release|Any CPU 35 | {B4CC8F0D-7D5B-4D5B-AEA9-B4610F34A75D}.Release|Any CPU.Build.0 = Release|Any CPU 36 | EndGlobalSection 37 | GlobalSection(SolutionProperties) = preSolution 38 | HideSolutionNode = FALSE 39 | EndGlobalSection 40 | GlobalSection(ExtensibilityGlobals) = postSolution 41 | SolutionGuid = {AF9FFF5A-8022-4103-BB61-F4E968989E8F} 42 | EndGlobalSection 43 | EndGlobal 44 | -------------------------------------------------------------------------------- /ScraperionFramework/MouseButton.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using System.Threading.Tasks; 6 | 7 | namespace ScraperionFramework 8 | { 9 | /// 10 | /// Mouse buttons used by scrapping classes. 11 | /// 12 | public enum MouseButton 13 | { 14 | /// 15 | /// Left mouse button. 16 | /// 17 | Left, 18 | /// 19 | /// Right mouse button. 20 | /// 21 | Right 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /ScraperionFramework/Properties/AssemblyInfo.cs: -------------------------------------------------------------------------------- 1 | using System.Reflection; 2 | using System.Runtime.CompilerServices; 3 | using System.Runtime.InteropServices; 4 | 5 | // General Information about an assembly is controlled through the following 6 | // set of attributes. Change these attribute values to modify the information 7 | // associated with an assembly. 8 | [assembly: AssemblyTitle("ScraperionFramework")] 9 | [assembly: AssemblyDescription("Framework for automating and scrapping the screen and the web")] 10 | [assembly: AssemblyConfiguration("")] 11 | [assembly: AssemblyCompany("")] 12 | [assembly: AssemblyProduct("ScraperionFramework")] 13 | [assembly: AssemblyCopyright("Copyright Wil Taylor © 2018")] 14 | [assembly: AssemblyTrademark("")] 15 | [assembly: AssemblyCulture("")] 16 | 17 | // Setting ComVisible to false makes the types in this assembly not visible 18 | // to COM components. If you need to access a type in this assembly from 19 | // COM, set the ComVisible attribute to true on that type. 20 | [assembly: ComVisible(false)] 21 | 22 | // The following GUID is for the ID of the typelib if this project is exposed to COM 23 | [assembly: Guid("06837ace-3ee3-4286-b70b-41213078f878")] 24 | 25 | // Version information for an assembly consists of the following four values: 26 | // 27 | // Major Version 28 | // Minor Version 29 | // Build Number 30 | // Revision 31 | // 32 | // You can specify all the values or you can default the Build and Revision Numbers 33 | // by using the '*' as shown below: 34 | // [assembly: AssemblyVersion("1.0.*")] 35 | [assembly: AssemblyVersion("1.0.1.1")] 36 | [assembly: AssemblyFileVersion("1.0.1.1")] 37 | -------------------------------------------------------------------------------- /ScraperionFramework/ScraperionFramework.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | AnyCPU 7 | {06837ACE-3EE3-4286-B70B-41213078F878} 8 | Library 9 | Properties 10 | ScraperionFramework 11 | ScraperionFramework 12 | v4.6.1 13 | 512 14 | true 15 | 16 | 17 | 18 | true 19 | full 20 | false 21 | bin\Debug\ 22 | DEBUG;TRACE 23 | prompt 24 | 4 25 | false 26 | bin\Debug\ScraperionFramework.xml 27 | 28 | 29 | pdbonly 30 | true 31 | bin\Release\ 32 | TRACE 33 | prompt 34 | 4 35 | false 36 | 37 | 38 | 39 | ..\packages\Microsoft.AspNetCore.WebUtilities.2.0.2\lib\netstandard2.0\Microsoft.AspNetCore.WebUtilities.dll 40 | 41 | 42 | 43 | ..\packages\Microsoft.Extensions.DependencyInjection.Abstractions.2.0.0\lib\netstandard2.0\Microsoft.Extensions.DependencyInjection.Abstractions.dll 44 | 45 | 46 | ..\packages\Microsoft.Extensions.Logging.2.0.2\lib\netstandard2.0\Microsoft.Extensions.Logging.dll 47 | 48 | 49 | ..\packages\Microsoft.Extensions.Logging.Abstractions.2.0.2\lib\netstandard2.0\Microsoft.Extensions.Logging.Abstractions.dll 50 | 51 | 52 | ..\packages\Microsoft.Extensions.Options.2.0.2\lib\netstandard2.0\Microsoft.Extensions.Options.dll 53 | 54 | 55 | ..\packages\Microsoft.Extensions.Primitives.2.0.0\lib\netstandard2.0\Microsoft.Extensions.Primitives.dll 56 | 57 | 58 | ..\packages\Microsoft.Net.Http.Headers.2.0.2\lib\netstandard2.0\Microsoft.Net.Http.Headers.dll 59 | 60 | 61 | ..\packages\Newtonsoft.Json.10.0.3\lib\net45\Newtonsoft.Json.dll 62 | 63 | 64 | 65 | ..\packages\System.Buffers.4.4.0\lib\netstandard2.0\System.Buffers.dll 66 | 67 | 68 | 69 | 70 | ..\packages\System.Net.Http.4.3.3\lib\net46\System.Net.Http.dll 71 | True 72 | True 73 | 74 | 75 | ..\packages\System.Runtime.CompilerServices.Unsafe.4.5.0\lib\netstandard2.0\System.Runtime.CompilerServices.Unsafe.dll 76 | 77 | 78 | ..\packages\System.Security.Cryptography.Algorithms.4.3.0\lib\net461\System.Security.Cryptography.Algorithms.dll 79 | True 80 | True 81 | 82 | 83 | ..\packages\System.Security.Cryptography.Encoding.4.3.0\lib\net46\System.Security.Cryptography.Encoding.dll 84 | True 85 | True 86 | 87 | 88 | ..\packages\System.Security.Cryptography.Primitives.4.3.0\lib\net46\System.Security.Cryptography.Primitives.dll 89 | True 90 | True 91 | 92 | 93 | ..\packages\System.Security.Cryptography.X509Certificates.4.3.0\lib\net461\System.Security.Cryptography.X509Certificates.dll 94 | True 95 | True 96 | 97 | 98 | ..\packages\System.Text.Encodings.Web.4.4.0\lib\netstandard2.0\System.Text.Encodings.Web.dll 99 | 100 | 101 | ..\packages\System.Threading.Tasks.Extensions.4.5.1\lib\netstandard2.0\System.Threading.Tasks.Extensions.dll 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | ..\packages\Tesseract.3.0.2.0\lib\net451\Tesseract.dll 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | Always 123 | 124 | 125 | Always 126 | 127 | 128 | Always 129 | 130 | 131 | Always 132 | 133 | 134 | Always 135 | 136 | 137 | Always 138 | 139 | 140 | Always 141 | 142 | 143 | Always 144 | 145 | 146 | Always 147 | 148 | 149 | 150 | 151 | Always 152 | 153 | 154 | Always 155 | 156 | 157 | Always 158 | 159 | 160 | Always 161 | 162 | 163 | 164 | 165 | {b4cc8f0d-7d5b-4d5b-aea9-b4610f34a75d} 166 | PuppeteerSharp 167 | 168 | 169 | 170 | -------------------------------------------------------------------------------- /ScraperionFramework/ScreenScraper.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Drawing; 4 | using System.IO; 5 | using System.Reflection; 6 | using System.Runtime.InteropServices; 7 | using System.Windows.Forms; 8 | using Tesseract; 9 | 10 | namespace ScraperionFramework 11 | { 12 | /// 13 | /// Screen scraper class 14 | /// This is useful for scrapping information from the screen and also controlling applications. 15 | /// 16 | /// Unlike similar frameworks this is based off images not finding controls. This allows it to do 17 | /// advanced things like drive applications through Citrix, VNC or RDP sessions. 18 | /// 19 | public class ScreenScraper 20 | { 21 | private enum ProcessDPIAwareness 22 | { 23 | ProcessDPIUnaware = 0, 24 | ProcessSystemDPIAware = 1, 25 | ProcessPerMonitorDPIAware = 2 26 | } 27 | 28 | [DllImport("shcore.dll")] 29 | private static extern int SetProcessDpiAwareness(ProcessDPIAwareness value); 30 | 31 | [DllImport("user32.dll", EntryPoint = "mouse_event", CharSet = CharSet.Auto, CallingConvention = CallingConvention.StdCall)] 32 | private static extern void MouseEvent(uint dwFlags, uint dx, uint dy, uint cButtons, uint dwExtraInfo); 33 | 34 | //Mouse actions 35 | private const int MOUSEEVENTF_LEFTDOWN = 0x02; 36 | private const int MOUSEEVENTF_LEFTUP = 0x04; 37 | private const int MOUSEEVENTF_RIGHTDOWN = 0x08; 38 | private const int MOUSEEVENTF_RIGHTUP = 0x10; 39 | 40 | 41 | /// 42 | /// This method fixes windows DPI issues which prevent the library from working properly. 43 | /// This must be called first thing at the start of your application before any image related methods are called. 44 | /// 45 | public static void SetupDPI() 46 | { 47 | if (Environment.OSVersion.Version.Major >= 6) 48 | { 49 | SetProcessDpiAwareness(ProcessDPIAwareness.ProcessPerMonitorDPIAware); 50 | } 51 | } 52 | 53 | /// 54 | /// Takes a screenshot of the screen. 55 | /// 56 | /// On multi monitor systems it will take a screenshot of all screens. 57 | /// 58 | /// Bitmap containing screenshot. 59 | public Bitmap CaptureScreen() 60 | { 61 | var left = 0; 62 | var top = 0; 63 | var right = 0; 64 | var bottom = 0; 65 | 66 | foreach (var screen in Screen.AllScreens) 67 | { 68 | if (screen.Bounds.Top < top) 69 | top = screen.Bounds.Top; 70 | if (screen.Bounds.Left < left) 71 | left = screen.Bounds.Left; 72 | if (screen.Bounds.Right > right) 73 | right = screen.Bounds.Right; 74 | if (screen.Bounds.Bottom > bottom) 75 | bottom = screen.Bounds.Bottom; 76 | } 77 | 78 | var rect = new Rectangle(top, left, right - left, bottom - top); 79 | 80 | var result = new Bitmap(right - left, bottom - top); 81 | 82 | using (Graphics g = Graphics.FromImage(result)) 83 | { 84 | g.CopyFromScreen(Point.Empty, Point.Empty, rect.Size); 85 | } 86 | 87 | return result; 88 | 89 | } 90 | 91 | /// 92 | /// Captures an area of teh screen. 93 | /// 94 | /// Area of screen to capture. 95 | /// Bitmap containing captured image. 96 | public Bitmap CaptureArea(Rectangle area) 97 | { 98 | var result = new Bitmap(area.Width, area.Height); 99 | 100 | using (Graphics g = Graphics.FromImage(result)) 101 | { 102 | g.CopyFromScreen(Point.Empty, Point.Empty, area.Size); 103 | } 104 | 105 | return result; 106 | } 107 | 108 | /// 109 | /// Finds an image in another image. 110 | /// 111 | /// Image to search 112 | /// Image to find. 113 | /// When searching how many pixels should be compared. Lower the number the more acurate the search. 114 | /// Rectangle with coordinates of found image. 115 | public Rectangle Find(Bitmap sourceImage, Bitmap targetImage, int stride = 4) 116 | { 117 | for (int x = 0; x < sourceImage.Width - targetImage.Width; x++) 118 | { 119 | for (int y = 0; y < sourceImage.Height - targetImage.Height; y++) 120 | { 121 | if (sourceImage.GetPixel(x, y) == targetImage.GetPixel(0, 0) && 122 | sourceImage.GetPixel(x + targetImage.Width - 1, y) == targetImage.GetPixel(targetImage.Width - 1, 0) && 123 | sourceImage.GetPixel(x, y + targetImage.Height - 1) == targetImage.GetPixel(0, targetImage.Height - 1) && 124 | sourceImage.GetPixel(x + targetImage.Width - 1, y + targetImage.Height - 1) == targetImage.GetPixel(targetImage.Width - 1, targetImage.Height - 1))// && 125 | { 126 | bool anyMiss = false; 127 | 128 | for (int tx = 0; tx < targetImage.Width; tx += stride) 129 | { 130 | if (anyMiss) 131 | break; 132 | 133 | for (int ty = 0; ty < targetImage.Height; ty += stride) 134 | { 135 | if (sourceImage.GetPixel(x + tx, y + ty) != targetImage.GetPixel(tx, ty)) 136 | { 137 | anyMiss = true; 138 | break; 139 | } 140 | } 141 | } 142 | 143 | if (!anyMiss) 144 | return new Rectangle(x, y, targetImage.Width, targetImage.Height); 145 | } 146 | } 147 | } 148 | 149 | return new Rectangle(-1, -1, -1, -1); 150 | } 151 | 152 | /// 153 | /// Find all instances of image in target image. 154 | /// 155 | /// Image to search in. 156 | /// Image to search for. 157 | /// When searching how many pixels should be compared. Lower the number the more acurate the search. 158 | /// IEnumerable of rectangles containing all the locations the image was found. 159 | public IEnumerable FindAll(Bitmap sourceImage, Bitmap targetImage, int stride = 4) 160 | { 161 | var result = new List(); 162 | 163 | for (int x = 0; x < sourceImage.Width - targetImage.Width; x++) 164 | { 165 | for (int y = 0; y < sourceImage.Height - targetImage.Height; y++) 166 | { 167 | if (sourceImage.GetPixel(x, y) == targetImage.GetPixel(0, 0) && 168 | sourceImage.GetPixel(x + targetImage.Width, y) == 169 | targetImage.GetPixel(targetImage.Width - 1, 0) && 170 | sourceImage.GetPixel(x, y + targetImage.Height) == 171 | targetImage.GetPixel(0, targetImage.Height - 1) && 172 | sourceImage.GetPixel(x + targetImage.Width, y + targetImage.Height) == 173 | targetImage.GetPixel(targetImage.Width - 1, targetImage.Height - 1) && 174 | sourceImage.GetPixel(x + targetImage.Width / 2, y + targetImage.Height / 2) == 175 | targetImage.GetPixel(targetImage.Width / 2 - 1, targetImage.Height / 2 - 1)) 176 | { 177 | bool anyMiss = false; 178 | 179 | for (int tx = 0; tx < targetImage.Width; tx += stride) 180 | { 181 | if (anyMiss) 182 | break; 183 | 184 | for (int ty = 0; ty < targetImage.Height; ty += stride) 185 | { 186 | if (sourceImage.GetPixel(x + tx, y + ty) != targetImage.GetPixel(tx, ty)) 187 | { 188 | anyMiss = true; 189 | break; 190 | } 191 | } 192 | } 193 | 194 | if (!anyMiss) 195 | result.Add(new Rectangle(x, y, targetImage.Width, targetImage.Height)); 196 | } 197 | } 198 | } 199 | 200 | return result; 201 | } 202 | 203 | /// 204 | /// Simulates key presses on the active application. 205 | /// 206 | /// Keys to simulate. 207 | public void TypeKeys(string keys) 208 | { 209 | SendKeys.SendWait(keys); 210 | } 211 | 212 | /// 213 | /// Move the mouse to target location. 214 | /// 215 | /// x coordinate to move mouse to. 216 | /// y coordinate to move mouse to. 217 | public void MoveMouse(int x, int y) 218 | { 219 | Cursor.Position = new Point(x, y); 220 | } 221 | 222 | /// 223 | /// Simulates a mouse down and then mouse up. 224 | /// 225 | /// Mouse button to use. 226 | public void MouseClick(MouseButton button) 227 | { 228 | var x = (uint)Cursor.Position.X; 229 | var y = (uint)Cursor.Position.Y; 230 | 231 | if (button == MouseButton.Left) 232 | MouseEvent(MOUSEEVENTF_LEFTDOWN | MOUSEEVENTF_LEFTUP, x, y, 0, 0); 233 | else 234 | MouseEvent(MOUSEEVENTF_RIGHTDOWN | MOUSEEVENTF_RIGHTUP, x, y, 0, 0); 235 | } 236 | 237 | /// 238 | /// Simulates a mouse down event. 239 | /// 240 | /// Mouse button to do. 241 | public void MouseDown(MouseButton button) 242 | { 243 | var x = (uint)Cursor.Position.X; 244 | var y = (uint)Cursor.Position.Y; 245 | 246 | MouseEvent(button == MouseButton.Left ? 247 | (uint)MOUSEEVENTF_LEFTDOWN : 248 | (uint)MOUSEEVENTF_RIGHTDOWN, x, y, 0, 0); 249 | } 250 | 251 | 252 | /// 253 | /// Simulate a mouse up event. 254 | /// 255 | /// Mouse button to use. 256 | public void MouseUp(MouseButton button) 257 | { 258 | var x = (uint)Cursor.Position.X; 259 | var y = (uint)Cursor.Position.Y; 260 | 261 | MouseEvent(button == MouseButton.Left ? 262 | (uint)MOUSEEVENTF_LEFTUP : 263 | (uint)MOUSEEVENTF_RIGHTUP, x, y, 0, 0); 264 | } 265 | 266 | /// 267 | /// Run OCR over an image. 268 | /// 269 | /// Image to run OCR over. 270 | /// Text result from OCR. 271 | public string OCR(Bitmap image) 272 | { 273 | var assemblyFolder = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location); 274 | 275 | using (var engine = new TesseractEngine(assemblyFolder + "\\tessdata", "eng", EngineMode.Default)) 276 | { 277 | using (var page = engine.Process(image, PageSegMode.Auto)) 278 | { 279 | return page.GetText(); 280 | } 281 | } 282 | } 283 | } 284 | 285 | } 286 | -------------------------------------------------------------------------------- /ScraperionFramework/WebScraper.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Drawing; 3 | using System.IO; 4 | using System.Threading.Tasks; 5 | using PuppeteerSharp; 6 | using PuppeteerSharp.Input; 7 | 8 | namespace ScraperionFramework 9 | { 10 | /// 11 | /// Web scraper class. 12 | /// This class handles all interaction with chromium. 13 | /// 14 | public class WebScraper : IDisposable 15 | { 16 | private readonly Browser m_browser; 17 | private readonly Page m_page; 18 | private decimal m_MouseX = 0; 19 | private decimal m_MouseY = 0; 20 | 21 | /// 22 | /// Defualt agent string this library uses. Simulates Chrome installed on windows 10. 23 | /// 24 | public static readonly string DefaultAgent = 25 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"; 26 | 27 | /// 28 | /// Constructor. 29 | /// 30 | /// Set to false to show chromium window. 31 | /// Agent to use when accessing pages. Uses DefaultAgent if non is set. 32 | public WebScraper(bool headless = true, string agent = "") 33 | { 34 | 35 | if (agent == "") 36 | agent = DefaultAgent; 37 | 38 | 39 | var ops = new BrowserFetcherOptions 40 | { 41 | Path = Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData) + "\\scraperion\\browser" 42 | }; 43 | 44 | (new BrowserFetcher(ops).DownloadAsync(BrowserFetcher.DefaultRevision)).Wait(); 45 | 46 | var browser = Puppeteer.LaunchAsync(new LaunchOptions 47 | { 48 | Headless = headless, 49 | IgnoreHTTPSErrors = true, 50 | 51 | }); 52 | 53 | browser.Wait(); 54 | m_browser = browser.Result; 55 | 56 | 57 | var page = m_browser.NewPageAsync(); 58 | page.Wait(); 59 | m_page = page.Result; 60 | 61 | m_page.Error += (s, e) => { 62 | Console.WriteLine("Error:" + e.ToString()); 63 | }; 64 | 65 | m_page.PageError += (s, e) => 66 | { 67 | Console.WriteLine("Error:" + e.ToString()); 68 | }; 69 | 70 | m_page.Console += (s, e) => { Console.WriteLine(e.Message.Text); }; 71 | 72 | m_page.SetUserAgentAsync(agent).Wait(); 73 | } 74 | 75 | /// 76 | /// Set username and password to authenticate against web pages with. 77 | /// 78 | /// Username to authenticate with 79 | /// Password to autrhenticate with. 80 | public void SetAuth(string username, string password) 81 | { 82 | SetAuthAsync(username, password).Wait(); 83 | } 84 | 85 | private async Task SetAuthAsync(string username, string password) 86 | { 87 | await m_page.AuthenticateAsync(new Credentials {Username = username, Password = password}); 88 | } 89 | 90 | /// 91 | /// Sets the view port size of the page. 92 | /// 93 | /// Width of the page in pixels. 94 | /// Height of page in pixels. 95 | public void SetViewPort(int width, int height) 96 | { 97 | SetViewPortAsync(width, height).Wait(); 98 | } 99 | 100 | private async Task SetViewPortAsync(int width, int height) 101 | { 102 | await m_page.SetViewportAsync(new ViewPortOptions 103 | { 104 | Width = width, 105 | Height = height 106 | }); 107 | } 108 | 109 | /// 110 | /// Gets or sets the url the page is currently at. 111 | /// 112 | public string Url 113 | { 114 | get => m_page.Url; 115 | set 116 | { 117 | try 118 | { 119 | m_page.GoToAsync(value).Wait(); 120 | } 121 | catch (Exception e) 122 | { 123 | Console.WriteLine(e); 124 | } 125 | } 126 | } 127 | 128 | /// 129 | /// Executes a javascript expression on page. 130 | /// This is simuilar to typing a command in the java console. 131 | /// 132 | /// Expression to run. 133 | /// Json of executed result. 134 | public string Exec(string script) 135 | { 136 | var result = ExecAsync(script); 137 | result.Wait(); 138 | 139 | return result.Result; 140 | } 141 | 142 | private async Task ExecAsync(string script) 143 | { 144 | 145 | var data = await m_page.EvaluateExpressionAsync(script); 146 | 147 | return (string)data.ToString(); 148 | 149 | } 150 | /// 151 | /// Takes a screenshot of the target page. 152 | /// 153 | /// Bitmap image containing screenshot. 154 | public Bitmap SnapshotBitmap() 155 | { 156 | var result = SnapshotBitmapAsync(); 157 | result.Wait(); 158 | return result.Result; 159 | } 160 | 161 | private async Task SnapshotBitmapAsync() 162 | { 163 | var data = await m_page.ScreenshotStreamAsync(); 164 | var image = new Bitmap(data); 165 | data.Dispose(); 166 | 167 | return image; 168 | } 169 | 170 | /// 171 | /// Simulates key presses on page. 172 | /// 173 | /// Text to send to page. 174 | public void SendKeys(string text) 175 | { 176 | SendKeysAsync(text).Wait(); 177 | } 178 | 179 | private async Task SendKeysAsync(string text) 180 | { 181 | await m_page.Keyboard.TypeAsync(text); 182 | } 183 | 184 | /// 185 | /// Simulates moving the mouse on the page. 186 | /// 187 | /// Note: this does not move the system mouse. 188 | /// 189 | /// X coordinates to move mouse to. 190 | /// Y coordinates to move mouse to. 191 | public void MoveMouse(decimal x, decimal y) 192 | { 193 | MoveMouseAsync(x, y).Wait(); 194 | } 195 | 196 | private async Task MoveMouseAsync(decimal x, decimal y) 197 | { 198 | await m_page.Mouse.MoveAsync(x, y); 199 | m_MouseX = x; 200 | m_MouseY = y; 201 | } 202 | 203 | /// 204 | /// Simulates a mouse click on page. 205 | /// 206 | /// Mouse button to simulate. 207 | public void MouseClick(MouseButton button) 208 | { 209 | MouseClickAsync(button).Wait(); 210 | } 211 | 212 | private async Task MouseClickAsync(MouseButton button) 213 | { 214 | await m_page.Mouse.ClickAsync(m_MouseX, m_MouseY, new ClickOptions{ Button = button == MouseButton.Left ? PuppeteerSharp.Input.MouseButton.Left : PuppeteerSharp.Input.MouseButton.Right }); 215 | } 216 | 217 | /// 218 | /// Simulates a mouse up event on page. 219 | /// 220 | /// Mouse button to simulate. 221 | public void MouseUp(MouseButton button) 222 | { 223 | MouseUpAsync(button).Wait(); 224 | 225 | } 226 | 227 | private async Task MouseUpAsync(MouseButton button) 228 | { 229 | await m_page.Mouse.UpAsync(new ClickOptions { Button = button == MouseButton.Left ? PuppeteerSharp.Input.MouseButton.Left : PuppeteerSharp.Input.MouseButton.Right }); 230 | } 231 | 232 | /// 233 | /// Simulates a mouse down event on page. 234 | /// 235 | /// Mouse button to simulate. 236 | public void MouseDown(MouseButton button) 237 | { 238 | MouseDownAsync(button).Wait(); 239 | } 240 | 241 | private async Task MouseDownAsync(MouseButton button) 242 | { 243 | await m_page.Mouse.DownAsync(new ClickOptions { Button = button == MouseButton.Left ? PuppeteerSharp.Input.MouseButton.Left : PuppeteerSharp.Input.MouseButton.Right }); 244 | 245 | } 246 | 247 | /// 248 | /// Simulates a touch tap on a page. 249 | /// 250 | /// Javascript selector for element to tap on. 251 | public void TapScreen(string target) 252 | { 253 | TapScreenAsync(target).Wait(); 254 | } 255 | 256 | private async Task TapScreenAsync(string target) 257 | { 258 | await m_page.TapAsync(target); 259 | } 260 | 261 | /// 262 | /// Generates a pdf of the page. 263 | /// 264 | /// Stream containing the pdf data. 265 | public Stream CreatePdf() 266 | { 267 | var data = CreatePdfAsync(); 268 | data.Wait(); 269 | return data.Result; 270 | 271 | } 272 | 273 | private async Task CreatePdfAsync() 274 | { 275 | return await m_page.PdfStreamAsync(); 276 | } 277 | 278 | /// 279 | /// Waits for expression to be to be true. 280 | /// 281 | /// Expression to wait on. 282 | public void WaitOnScript(string expression) 283 | { 284 | WaitOnScriptAsync(expression).Wait(); 285 | } 286 | 287 | private async Task WaitOnScriptAsync(string expression) 288 | { 289 | await m_page.WaitForExpressionAsync(expression); 290 | } 291 | 292 | /// 293 | /// Selects element on page to have focus. 294 | /// 295 | /// Javascript selector to make have focus. 296 | public void Focus(string target) 297 | { 298 | FocusAsync(target).Wait(); 299 | } 300 | 301 | private async Task FocusAsync(string target) 302 | { 303 | await m_page.FocusAsync(target); 304 | } 305 | 306 | /// 307 | /// Clicks on target element on page. 308 | /// 309 | /// Javascript selector of element to click on. 310 | public void Click(string target) 311 | { 312 | ClickAsync(target).Wait(); 313 | } 314 | 315 | private async Task ClickAsync(string target) 316 | { 317 | await m_page.ClickAsync(target); 318 | } 319 | 320 | /// 321 | /// Html content of page. Useful for scraping the html directly. 322 | /// 323 | public string Content 324 | { 325 | get 326 | { 327 | var data = m_page.GetContentAsync(); 328 | data.Wait(); 329 | return data.Result; 330 | } 331 | set 332 | { 333 | var data = m_page.SetContentAsync(value); 334 | data.Wait(); 335 | } 336 | } 337 | 338 | /// 339 | /// 340 | /// Dispose method. 341 | /// This will close out chromium session. 342 | /// 343 | public void Dispose() 344 | { 345 | m_browser?.Dispose(); 346 | m_page?.Dispose(); 347 | } 348 | } 349 | } 350 | -------------------------------------------------------------------------------- /ScraperionFramework/app.config: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /ScraperionFramework/packages.config: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /ScraperionFramework/tessdata/eng.cube.fold: -------------------------------------------------------------------------------- 1 | 0oO 2 | lI1 3 | cC 4 | kK 5 | pP 6 | sS 7 | uU 8 | vV 9 | wW 10 | xX 11 | yY 12 | zZ 13 | -------------------------------------------------------------------------------- /ScraperionFramework/tessdata/eng.cube.lm: -------------------------------------------------------------------------------- 1 | LeadPunc="({[`' 2 | TrailPunc=}:;-]!?`,.)"' 3 | NumLeadPunc=#({[@$ 4 | NumTrailPunc=}):;].,% 5 | Operators=*+-/.:,()[] 6 | Digits=0123456789 7 | Alphas=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ 8 | -------------------------------------------------------------------------------- /ScraperionFramework/tessdata/eng.cube.nn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wiltaylor/ScraperionFramework/106dc62a0595eab22482089d4dc00b0554f1725a/ScraperionFramework/tessdata/eng.cube.nn -------------------------------------------------------------------------------- /ScraperionFramework/tessdata/eng.cube.params: -------------------------------------------------------------------------------- 1 | RecoWgt=1.0 2 | SizeWgt=0.2435 3 | OODWgt=0.0214 4 | NumWgt=0.036 5 | CharBigramsWgt=0.1567 6 | MaxSegPerChar=8 7 | BeamWidth=10 8 | ConvGridSize=48 9 | WordUnigramsWgt=0.01 10 | MaxWordAspectRatio=20.0000 11 | MinSpaceHeightRatio=0.5000 12 | MaxSpaceHeightRatio=0.6000 13 | HistWindWid=2 14 | MinConCompSize=0 15 | -------------------------------------------------------------------------------- /ScraperionFramework/tessdata/eng.tesseract_cube.nn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wiltaylor/ScraperionFramework/106dc62a0595eab22482089d4dc00b0554f1725a/ScraperionFramework/tessdata/eng.tesseract_cube.nn -------------------------------------------------------------------------------- /ScraperionFramework/tessdata/eng.traineddata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wiltaylor/ScraperionFramework/106dc62a0595eab22482089d4dc00b0554f1725a/ScraperionFramework/tessdata/eng.traineddata -------------------------------------------------------------------------------- /TestHarmess/App.config: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /TestHarmess/Program.cs: -------------------------------------------------------------------------------- 1 | using ScraperionFramework; 2 | 3 | namespace TestHarmess 4 | { 5 | class Program 6 | { 7 | static void Main(string[] args) 8 | { 9 | 10 | WebScraper scrapper = new WebScraper(false) {Url = "http://www.weatherzone.com.au/"}; 11 | 12 | 13 | scrapper.Dispose(); 14 | 15 | } 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /TestHarmess/Properties/AssemblyInfo.cs: -------------------------------------------------------------------------------- 1 | using System.Reflection; 2 | using System.Runtime.CompilerServices; 3 | using System.Runtime.InteropServices; 4 | 5 | // General Information about an assembly is controlled through the following 6 | // set of attributes. Change these attribute values to modify the information 7 | // associated with an assembly. 8 | [assembly: AssemblyTitle("TestHarmess")] 9 | [assembly: AssemblyDescription("")] 10 | [assembly: AssemblyConfiguration("")] 11 | [assembly: AssemblyCompany("")] 12 | [assembly: AssemblyProduct("TestHarmess")] 13 | [assembly: AssemblyCopyright("Copyright © 2018")] 14 | [assembly: AssemblyTrademark("")] 15 | [assembly: AssemblyCulture("")] 16 | 17 | // Setting ComVisible to false makes the types in this assembly not visible 18 | // to COM components. If you need to access a type in this assembly from 19 | // COM, set the ComVisible attribute to true on that type. 20 | [assembly: ComVisible(false)] 21 | 22 | // The following GUID is for the ID of the typelib if this project is exposed to COM 23 | [assembly: Guid("3226ad4f-b0ae-413d-b13d-cff36b023f97")] 24 | 25 | // Version information for an assembly consists of the following four values: 26 | // 27 | // Major Version 28 | // Minor Version 29 | // Build Number 30 | // Revision 31 | // 32 | // You can specify all the values or you can default the Build and Revision Numbers 33 | // by using the '*' as shown below: 34 | // [assembly: AssemblyVersion("1.0.*")] 35 | [assembly: AssemblyVersion("1.0.0.0")] 36 | [assembly: AssemblyFileVersion("1.0.0.0")] 37 | -------------------------------------------------------------------------------- /TestHarmess/TestHarmess.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | AnyCPU 7 | {3226AD4F-B0AE-413D-B13D-CFF36B023F97} 8 | Exe 9 | TestHarmess 10 | TestHarmess 11 | v4.6.1 12 | 512 13 | true 14 | true 15 | 16 | 17 | AnyCPU 18 | true 19 | full 20 | false 21 | bin\Debug\ 22 | DEBUG;TRACE 23 | prompt 24 | 4 25 | 26 | 27 | AnyCPU 28 | pdbonly 29 | true 30 | bin\Release\ 31 | TRACE 32 | prompt 33 | 4 34 | 35 | 36 | 37 | ..\packages\Microsoft.AspNetCore.WebUtilities.2.0.2\lib\netstandard2.0\Microsoft.AspNetCore.WebUtilities.dll 38 | 39 | 40 | ..\packages\Microsoft.Extensions.DependencyInjection.Abstractions.2.0.0\lib\netstandard2.0\Microsoft.Extensions.DependencyInjection.Abstractions.dll 41 | 42 | 43 | ..\packages\Microsoft.Extensions.Logging.2.0.2\lib\netstandard2.0\Microsoft.Extensions.Logging.dll 44 | 45 | 46 | ..\packages\Microsoft.Extensions.Logging.Abstractions.2.0.2\lib\netstandard2.0\Microsoft.Extensions.Logging.Abstractions.dll 47 | 48 | 49 | ..\packages\Microsoft.Extensions.Options.2.0.2\lib\netstandard2.0\Microsoft.Extensions.Options.dll 50 | 51 | 52 | ..\packages\Microsoft.Extensions.Primitives.2.0.0\lib\netstandard2.0\Microsoft.Extensions.Primitives.dll 53 | 54 | 55 | ..\packages\Microsoft.Net.Http.Headers.2.0.2\lib\netstandard2.0\Microsoft.Net.Http.Headers.dll 56 | 57 | 58 | ..\packages\Newtonsoft.Json.10.0.3\lib\net45\Newtonsoft.Json.dll 59 | 60 | 61 | 62 | ..\packages\System.Buffers.4.4.0\lib\netstandard2.0\System.Buffers.dll 63 | 64 | 65 | 66 | 67 | ..\packages\System.Net.Http.4.3.3\lib\net46\System.Net.Http.dll 68 | True 69 | True 70 | 71 | 72 | ..\packages\System.Runtime.CompilerServices.Unsafe.4.5.0\lib\netstandard2.0\System.Runtime.CompilerServices.Unsafe.dll 73 | 74 | 75 | ..\packages\System.Security.Cryptography.Algorithms.4.3.0\lib\net461\System.Security.Cryptography.Algorithms.dll 76 | True 77 | True 78 | 79 | 80 | ..\packages\System.Security.Cryptography.Encoding.4.3.0\lib\net46\System.Security.Cryptography.Encoding.dll 81 | True 82 | True 83 | 84 | 85 | ..\packages\System.Security.Cryptography.Primitives.4.3.0\lib\net46\System.Security.Cryptography.Primitives.dll 86 | True 87 | True 88 | 89 | 90 | ..\packages\System.Security.Cryptography.X509Certificates.4.3.0\lib\net461\System.Security.Cryptography.X509Certificates.dll 91 | True 92 | True 93 | 94 | 95 | ..\packages\System.Text.Encodings.Web.4.4.0\lib\netstandard2.0\System.Text.Encodings.Web.dll 96 | 97 | 98 | ..\packages\System.Threading.Tasks.Extensions.4.5.1\lib\netstandard2.0\System.Threading.Tasks.Extensions.dll 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | ..\packages\Tesseract.3.0.2.0\lib\net451\Tesseract.dll 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | {b4cc8f0d-7d5b-4d5b-aea9-b4610f34a75d} 121 | PuppeteerSharp 122 | 123 | 124 | {06837ace-3ee3-4286-b70b-41213078f878} 125 | ScraperionFramework 126 | 127 | 128 | 129 | 130 | Always 131 | 132 | 133 | Always 134 | 135 | 136 | Always 137 | 138 | 139 | Always 140 | 141 | 142 | 143 | -------------------------------------------------------------------------------- /TestHarmess/packages.config: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | --------------------------------------------------------------------------------