├── .gitattributes ├── .gitignore ├── CONTRIBUTING.md ├── Demos └── ADLTools4VS │ └── Local_Execution │ ├── Datafiles │ ├── AdsLog.tsv │ ├── Employees.tsv │ ├── OlympicAthletes.tsv │ ├── QueryLog.tsv │ └── SearchLog.tsv │ └── Scripts │ ├── create_db_and_table.usql │ ├── query_a_tsv_file.usql │ ├── querytable.usql │ └── searchlog.usql ├── LICENSE.txt ├── README.md ├── SECURITY.md ├── Samples ├── AzureDiagnosticsSample │ ├── AzureDiagnostics │ │ ├── AzureDiagnostics.csproj │ │ ├── AzureDiagnosticsUtil.cs │ │ ├── DataLakeAnalyticsProperties.cs │ │ ├── DataLakeStoreProperties.cs │ │ ├── LogRecord.cs │ │ ├── Properties │ │ │ └── AssemblyInfo.cs │ │ └── packages.config │ ├── AzureDiagnosticsExtractors │ │ ├── AzureDiagnosticsExtractors.csproj │ │ ├── DataLakeAnalyticsExtractor.cs │ │ ├── DataLakeStoreExtractor.cs │ │ ├── Extensions.cs │ │ ├── Properties │ │ │ └── AssemblyInfo.cs │ │ └── packages.config │ ├── ParseAzureDiagnosticLog.sln │ ├── ParseAzureDiagnosticLog │ │ ├── App.config │ │ ├── Input │ │ │ ├── ADLA_PT1H.json │ │ │ └── ADLS_PT1H.json │ │ ├── ParseAzureDiagnosticLog.csproj │ │ ├── Program.cs │ │ ├── Properties │ │ │ └── AssemblyInfo.cs │ │ └── packages.config │ └── ProcessADLDiagnosticLogs │ │ ├── CreateDiagnosticsDB.usql │ │ ├── CreateDiagnosticsDB.usql.cs │ │ ├── FileSet.usql │ │ ├── FileSet.usql.cs │ │ ├── LocalScript.usql │ │ ├── LocalScript.usql.cs │ │ ├── ProcessADLDiagnosticLogs.usqlproj │ │ ├── README.md │ │ ├── RegisterAssemblies.ps1 │ │ ├── SingleFile.usql │ │ ├── SingleFile.usql.cs │ │ └── input │ │ ├── ADLA_PT1H.json │ │ └── ADLS_PT1H.json ├── DistCp │ └── ConfigureADLS-3.0.0-alpha2WithNewDistCp.sh ├── ExcelExtractor │ ├── ADLA ExcelExtractor.sln │ ├── USQLExcelExtractor │ │ ├── Assemblies │ │ │ └── DocumentFormat.OpenXml.xml │ │ ├── ReadExcel.usql │ │ ├── ReadExcel.usql.cs │ │ ├── Samples │ │ │ ├── sample.xlsx │ │ │ └── taxonomy_97.xlsx │ │ ├── TaxonomyData.usql │ │ ├── TaxonomyData.usql.cs │ │ └── USQLExcelExtractor.usqlproj │ └── oh22is.Analytics.Formats │ │ ├── ExcelExtractor.cs │ │ ├── Properties │ │ └── AssemblyInfo.cs │ │ ├── oh22is.Analytics.Formats.csproj │ │ ├── oh22is.Analytics.Formats.snk │ │ └── packages.config └── PowerShell │ ├── ADLAUsers │ ├── Add-AdlaJobUser.ps1 │ └── Remove-AdlaJobUser.ps1 │ ├── ADLSUser │ └── Set-AdlsAccess.ps1 │ ├── Create_One_Million_Files.ps1 │ ├── Get-AclEntriesRecursive.ps1 │ └── Get-ItemsRecursive.ps1 ├── Visuals └── Icons │ ├── ADLA-Icon.svg │ ├── ADLS-Icon.svg │ └── HDInsight-elephant.svg └── docs ├── General ├── ADF Blob to DataLake Connector.docx └── Security Best Practices.md ├── Hands_on_Labs ├── ADLA_ADF_HOL.md ├── ADL_PS_HOL.md ├── ADL_SDK_HOL.md ├── ADL_Store_CLI_HOL.md ├── Images │ ├── AccountSettings.jpg │ ├── CreateProject-old.jpg │ ├── CreateProject.jpg │ ├── LoginAccount_2015.jpg │ ├── NewProject.jpg │ ├── Portal_ADLA.jpg │ ├── Portal_ADLS_Alice.jpg │ ├── Portal_ADLS_DataExplorer.jpg │ ├── Portal_ADLS_OpenDataExplorer.jpg │ ├── Portal_AllResources.jpg │ ├── Portal_Ex1_OpenResult.jpg │ ├── Portal_Ex1_Result.jpg │ ├── Portal_Ex2_Job.jpg │ ├── Portal_Ex2_image_ocr.jpg │ ├── Portal_Ex2_ocr_keyphrases.jpg │ ├── Portal_Ex3_Job.jpg │ ├── Portal_Ex3_common_summary.jpg │ ├── Portal_Exercise1.jpg │ ├── Portal_Exercise2.jpg │ ├── Portal_Exercise3.jpg │ ├── Portal_InstallAnalytics.jpg │ ├── Portal_JobRunning.jpg │ ├── Portal_JobStart.jpg │ ├── Portal_NewJob.jpg │ ├── Portal_OpenDataExplorer.jpg │ ├── Portal_SampleProjects.jpg │ ├── Portal_SearchADLA.jpg │ ├── Portal_SearchADLHOLADLS.jpg │ ├── SetAccount.jpg │ ├── SignInAccount.jpg │ ├── VS_Ex2_Job.JPG │ ├── VS_Ex3_Job.JPG │ ├── q1_jobview.jpg │ ├── q1_jobview_completed.jpg │ ├── q1_jobview_error.jpg │ ├── q1_jobview_output.jpg │ ├── q1_query.jpg │ ├── q1_query_submit.jpg │ ├── q1_result.jpg │ ├── q3_result.jpg │ ├── q3_result2.jpg │ ├── q4_agg_result.jpg │ ├── q4_having_result.jpg │ ├── q4_top5_result.jpg │ ├── q5_view_explorer.jpg │ ├── q5_view_jobview.jpg │ ├── q6_result.jpg │ ├── q6_tvf_explorer.jpg │ ├── q7_explorer.jpg │ ├── q8_result.jpg │ └── q9_result.jpg ├── Software_Preqrequisites.md ├── Start.md ├── USQLCode_HOL.md ├── USQL_Cognitive_ILL.md ├── USQL_HOL.md └── USQL_Selfguided_HOL.md ├── Release_Notes ├── 2016 │ ├── 2016_02_15 │ │ └── USQL_Release_Notes_2016_02_15.md │ ├── 2016_04_11 │ │ ├── ADLTools4VS_Release_Notes_2016_04_11.md │ │ ├── Portal_Release_Notes_2016_04_21.md │ │ └── USQL_Release_Notes_2016_04_11.md │ ├── 2016_05_xx │ │ └── Portal_Release_Notes_2016_05_10.md │ ├── 2016_07_14 │ │ └── USQL_Release_Notes_2016_07_14.md │ ├── 2016_08_01 │ │ ├── Portal_Release_Notes_2016_08_01.md │ │ └── USQL_Release_Notes_2016_08_01.md │ ├── 2016_09_12 │ │ └── USQL_Release_Notes_2016_09_12.md │ └── 2016_10_16 │ │ └── USQL_Release_Notes_2016_10_16.md ├── 2017 │ ├── 2017_03_09 │ │ └── USQL_Release_Notes_2017_03_09.md │ ├── 2017_04_24 │ │ └── USQL_Release_Notes_2017_04_24.md │ └── 2017_Summer │ │ └── USQL_Release_Notes_2017_Summer.md ├── 2018 │ └── 2018_Spring │ │ └── USQL_Release_Notes_2018_Spring.md ├── 2020 │ └── 2020_07_28 │ │ └── ADL_Release_Notes_2020_07_28.md ├── 2021 │ └── 2021_03_15 │ │ └── ADL_Release_Notes_2021_03_15.md └── README.md ├── SDK └── nuget_packages.md ├── img ├── Blogs │ └── RegisterAssembly │ │ ├── Fig1-Codebehind.JPG │ │ ├── Fig2-CodebehindScript.jpg │ │ ├── Fig3-U-SQL-ClassLib.jpg │ │ ├── Fig4-ClassLibraryContent.JPG │ │ ├── Fig5-AssemblyRegistrationStep.jpg │ │ └── Fig6-RegisterFormatAssembly.JPG ├── Migration │ ├── AAD.png │ ├── AADAddGroup.png │ ├── AADAddGroup2.png │ ├── AADDirectorySelected.png │ ├── AADGroupDetails.png │ ├── AADGroupProperties.png │ ├── AADGroupSelected.png │ ├── AzureRbacAdd.png │ ├── AzureRbacResource.png │ ├── AzureRbacResource2.png │ ├── AzureRbacSelectRole.png │ ├── AzureRbacSelectUser.png │ ├── AzureRbacUserDone.png │ ├── AzureRbacUserSelectedRoleUser.png │ ├── DataExplorer.png │ ├── DataExplorerAccess.png │ └── DataExplorerAccessAdd.png ├── Portal │ ├── ADLAObjects.png │ ├── AutomationOptions.png │ ├── AzureMPCreate.png │ ├── AzureMarketPlace.png │ ├── AzurePortal.png │ ├── DiagnosticSettings.png │ ├── JobFilter.png │ ├── JobGraphSize.png │ ├── JobManagementActive.png │ └── Tools.png └── ReleaseNotes │ ├── 2017-Apr-CheckForUpdates.gif │ ├── 2017-Apr-DebugDataAccessLevels.png │ ├── 2017-Apr-ExportDB.gif │ ├── 2017-Apr-ImportDB.png │ ├── 2017-Apr-NewMenuLocation.gif │ ├── 2017-Apr-RotateKey.png │ ├── Calling-Python-R-codebehind.png │ ├── DataViewTab.png │ ├── DebugInfoRegistration.png │ ├── F1Help.png │ ├── Portal-PythonUDOMarking.png │ ├── Portal-UDOMarking.png │ ├── Python-R-codebehind.png │ ├── TempScript.png │ ├── VarHighlighting.png │ ├── right-click-create-extract-script.png │ └── winter2018 │ ├── 2018-winter-filesets-nogrouping-jobgraph.jpg │ ├── 2018-winter-filesets-nogrouping-vertexexecution.jpg │ ├── 2018-winter-filesets-withgrouping-jobgraph.jpg │ ├── 2018-winter-filesets-withgrouping-vertexexecution.jpg │ ├── VS-AUModeler.JPG │ ├── VS-DataTab.JPG │ ├── VS-DiagnosticsLink.jpg │ ├── VS-RefinedSubmitBar.jpg │ ├── VS-RuntimeErrorMessage.jpg │ ├── VS-StageToolTip.jpg │ ├── VS-UDOStageAnnotation.jpg │ └── VS-VertexOpsView.JPG ├── index.css └── index.html /.gitattributes: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Set default behavior to automatically normalize line endings. 3 | ############################################################################### 4 | * text=auto 5 | 6 | ############################################################################### 7 | # Set default behavior for command prompt diff. 8 | # 9 | # This is need for earlier builds of msysgit that does not have it on by 10 | # default for csharp files. 11 | # Note: This is only used by command line 12 | ############################################################################### 13 | #*.cs diff=csharp 14 | 15 | ############################################################################### 16 | # Set the merge driver for project and solution files 17 | # 18 | # Merging from the command prompt will add diff markers to the files if there 19 | # are conflicts (Merging from VS is not affected by the settings below, in VS 20 | # the diff markers are never inserted). Diff markers may cause the following 21 | # file extensions to fail to load in VS. An alternative would be to treat 22 | # these files as binary and thus will always conflict and require user 23 | # intervention with every merge. To do so, just uncomment the entries below 24 | ############################################################################### 25 | #*.sln merge=binary 26 | #*.csproj merge=binary 27 | #*.vbproj merge=binary 28 | #*.vcxproj merge=binary 29 | #*.vcproj merge=binary 30 | #*.dbproj merge=binary 31 | #*.fsproj merge=binary 32 | #*.lsproj merge=binary 33 | #*.wixproj merge=binary 34 | #*.modelproj merge=binary 35 | #*.sqlproj merge=binary 36 | #*.wwaproj merge=binary 37 | 38 | ############################################################################### 39 | # behavior for image files 40 | # 41 | # image files are treated as binary by default. 42 | ############################################################################### 43 | #*.jpg binary 44 | #*.png binary 45 | #*.gif binary 46 | 47 | ############################################################################### 48 | # diff behavior for common document formats 49 | # 50 | # Convert binary document formats to text before diffing them. This feature 51 | # is only available from the command line. Turn it on by uncommenting the 52 | # entries below. 53 | ############################################################################### 54 | #*.doc diff=astextplain 55 | #*.DOC diff=astextplain 56 | #*.docx diff=astextplain 57 | #*.DOCX diff=astextplain 58 | #*.dot diff=astextplain 59 | #*.DOT diff=astextplain 60 | #*.pdf diff=astextplain 61 | #*.PDF diff=astextplain 62 | #*.rtf diff=astextplain 63 | #*.RTF diff=astextplain 64 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | 4 | # User-specific files 5 | *.suo 6 | *.user 7 | *.sln.docstates 8 | 9 | # Build results 10 | 11 | [Dd]ebug/ 12 | [Rr]elease/ 13 | x64/ 14 | build/ 15 | [Bb]in/ 16 | [Oo]bj/ 17 | 18 | # Enable "build/" folder in the NuGet Packages folder since NuGet packages use it for MSBuild targets 19 | !packages/*/build/ 20 | 21 | # MSTest test Results 22 | [Tt]est[Rr]esult*/ 23 | [Bb]uild[Ll]og.* 24 | 25 | *_i.c 26 | *_p.c 27 | *.ilk 28 | *.meta 29 | *.obj 30 | *.pch 31 | *.pdb 32 | *.pgc 33 | *.pgd 34 | *.rsp 35 | *.sbr 36 | *.tlb 37 | *.tli 38 | *.tlh 39 | *.tmp 40 | *.tmp_proj 41 | *.log 42 | *.vspscc 43 | *.vssscc 44 | .builds 45 | *.pidb 46 | *.log 47 | *.scc 48 | 49 | # Visual C++ cache files 50 | ipch/ 51 | *.aps 52 | *.ncb 53 | *.opensdf 54 | *.sdf 55 | *.cachefile 56 | 57 | # Visual Studio profiler 58 | *.psess 59 | *.vsp 60 | *.vspx 61 | 62 | # Guidance Automation Toolkit 63 | *.gpState 64 | 65 | # ReSharper is a .NET coding add-in 66 | _ReSharper*/ 67 | *.[Rr]e[Ss]harper 68 | 69 | # TeamCity is a build add-in 70 | _TeamCity* 71 | 72 | # DotCover is a Code Coverage Tool 73 | *.dotCover 74 | 75 | # NCrunch 76 | *.ncrunch* 77 | .*crunch*.local.xml 78 | 79 | # Installshield output folder 80 | [Ee]xpress/ 81 | 82 | # DocProject is a documentation generator add-in 83 | DocProject/buildhelp/ 84 | DocProject/Help/*.HxT 85 | DocProject/Help/*.HxC 86 | DocProject/Help/*.hhc 87 | DocProject/Help/*.hhk 88 | DocProject/Help/*.hhp 89 | DocProject/Help/Html2 90 | DocProject/Help/html 91 | 92 | # Click-Once directory 93 | publish/ 94 | 95 | # Publish Web Output 96 | *.Publish.xml 97 | 98 | # NuGet Packages Directory 99 | ## TODO: If you have NuGet Package Restore enabled, uncomment the next line 100 | #packages/ 101 | 102 | # Windows Azure Build Output 103 | csx 104 | *.build.csdef 105 | 106 | # Windows Store app package directory 107 | AppPackages/ 108 | 109 | # Others 110 | sql/ 111 | *.Cache 112 | ClientBin/ 113 | [Ss]tyle[Cc]op.* 114 | ~$* 115 | *~ 116 | *.dbmdl 117 | *.[Pp]ublish.xml 118 | *.pfx 119 | *.publishsettings 120 | 121 | # RIA/Silverlight projects 122 | Generated_Code/ 123 | 124 | # Backup & report files from converting an old project file to a newer 125 | # Visual Studio version. Backup files are not needed, because we have git ;-) 126 | _UpgradeReport_Files/ 127 | Backup*/ 128 | UpgradeLog*.XML 129 | UpgradeLog*.htm 130 | 131 | # SQL Server files 132 | App_Data/*.mdf 133 | App_Data/*.ldf 134 | 135 | 136 | #LightSwitch generated files 137 | GeneratedArtifacts/ 138 | _Pvt_Extensions/ 139 | ModelManifest.xml 140 | 141 | # ========================= 142 | # Windows detritus 143 | # ========================= 144 | 145 | # Windows image file caches 146 | Thumbs.db 147 | ehthumbs.db 148 | 149 | # Folder config file 150 | Desktop.ini 151 | 152 | # Recycle Bin used on file shares 153 | $RECYCLE.BIN/ 154 | 155 | # Mac desktop service store files 156 | .DS_Store 157 | 158 | # NuGet 159 | packages/ 160 | 161 | .vs/ 162 | *.pyc 163 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to MicrosoftBigData/AzureDataLake 2 | -------------------------------------------------------------------------------- /Demos/ADLTools4VS/Local_Execution/Datafiles/AdsLog.tsv: -------------------------------------------------------------------------------- 1 | 399266 Doritos 1 2 | 399266 Tacobell 0 3 | 382045 Voelkl 1 4 | 382045 Whistler Resort 1 5 | 106479 Amazon Prime 0 6 | 906441 Hayden Planetarium 1 7 | 351530 Microsoft Azure Services 1 8 | -------------------------------------------------------------------------------- /Demos/ADLTools4VS/Local_Execution/Datafiles/Employees.tsv: -------------------------------------------------------------------------------- 1 | 1 Noah Engineering 100 10000 2 | 2 Sophia Engineering 100 20000 3 | 3 Liam Engineering 100 30000 4 | 4 Emma HR 200 10000 5 | 5 Jacob HR 200 10000 6 | 6 Olivia HR 200 10000 7 | 7 Mason Executive 300 50000 8 | 8 Ava Marketing 400 15000 9 | 9 Ethan Marketing 400 10000 -------------------------------------------------------------------------------- /Demos/ADLTools4VS/Local_Execution/Datafiles/QueryLog.tsv: -------------------------------------------------------------------------------- 1 | Banana 300 Image 2 | Cherry 300 Image 3 | Durian 500 Image 4 | Apple 100 Web 5 | Fig 200 Web 6 | Papaya 200 Web 7 | Fig 300 Web 8 | Cherry 400 Web 9 | Durian 500 Web 10 | -------------------------------------------------------------------------------- /Demos/ADLTools4VS/Local_Execution/Datafiles/SearchLog.tsv: -------------------------------------------------------------------------------- 1 | 399266 2/15/2012 11:53:16 AM en-us how to make nachos 73 www.nachos.com;www.wikipedia.com NULL 2 | 382045 2/15/2012 11:53:18 AM en-gb best ski resorts 614 skiresorts.com;ski-europe.com;www.travelersdigest.com/ski_resorts.htm ski-europe.com;www.travelersdigest.com/ski_resorts.htm 3 | 382045 2/16/2012 11:53:20 AM en-gb broken leg 74 mayoclinic.com/health;webmd.com/a-to-z-guides;mybrokenleg.com;wikipedia.com/Bone_fracture mayoclinic.com/health;webmd.com/a-to-z-guides;mybrokenleg.com;wikipedia.com/Bone_fracture 4 | 106479 2/16/2012 11:53:50 AM en-ca south park episodes 24 southparkstudios.com;wikipedia.org/wiki/Sout_Park;imdb.com/title/tt0121955;simon.com/mall southparkstudios.com 5 | 906441 2/16/2012 11:54:01 AM en-us cosmos 1213 cosmos.com;wikipedia.org/wiki/Cosmos:_A_Personal_Voyage;hulu.com/cosmos NULL 6 | 351530 2/16/2012 11:54:01 AM en-fr microsoft 241 microsoft.com;wikipedia.org/wiki/Microsoft;xbox.com NULL 7 | 640806 2/16/2012 11:54:02 AM en-us wireless headphones 502 www.amazon.com;reviews.cnet.com/wireless-headphones;store.apple.com www.amazon.com;store.apple.com 8 | 304305 2/16/2012 11:54:03 AM en-us dominos pizza 60 dominos.com;wikipedia.org/wiki/Domino's_Pizza;facebook.com/dominos dominos.com 9 | 460748 2/16/2012 11:54:04 AM en-us yelp 1270 yelp.com;apple.com/us/app/yelp;wikipedia.org/wiki/Yelp,_Inc.;facebook.com/yelp yelp.com 10 | 354841 2/16/2012 11:59:01 AM en-us how to run 610 running.about.com;ehow.com;go.com running.about.com;ehow.com 11 | 354068 2/16/2012 12:00:33 PM en-mx what is sql 422 wikipedia.org/wiki/SQL;sqlcourse.com/intro.html;wikipedia.org/wiki/Microsoft_SQL wikipedia.org/wiki/SQL 12 | 674364 2/16/2012 12:00:55 PM en-us mexican food redmond 283 eltoreador.com;yelp.com/c/redmond-wa/mexican;agaverest.com NULL 13 | 347413 2/16/2012 12:11:55 PM en-gr microsoft 305 microsoft.com;wikipedia.org/wiki/Microsoft;xbox.com NULL 14 | 848434 2/16/2012 12:12:35 PM en-ch facebook 10 facebook.com;facebook.com/login;wikipedia.org/wiki/Facebook facebook.com 15 | 604846 2/16/2012 12:13:55 PM en-us wikipedia 612 wikipedia.org;en.wikipedia.org;en.wikipedia.org/wiki/Wikipedia wikipedia.org 16 | 840614 2/16/2012 12:13:56 PM en-us xbox 1220 xbox.com;en.wikipedia.org/wiki/Xbox;xbox.com/xbox360 xbox.com/xbox360 17 | 656666 2/16/2012 12:15:55 PM en-us hotmail 691 hotmail.com;login.live.com;msn.com;en.wikipedia.org/wiki/Hotmail NULL 18 | 951513 2/16/2012 12:17:00 PM en-us pokemon 63 pokemon.com;pokemon.com/us;serebii.net pokemon.com 19 | 350350 2/16/2012 12:18:17 PM en-us wolfram 30 wolframalpha.com;wolfram.com;mathworld.wolfram.com;en.wikipedia.org/wiki/Stephen_Wolfram NULL 20 | 641615 2/16/2012 12:19:55 PM en-us kahn 119 khanacademy.org;en.wikipedia.org/wiki/Khan_(title);answers.com/topic/genghis-khan;en.wikipedia.org/wiki/Khan_(name) khanacademy.org 21 | 321065 2/16/2012 12:20:03 PM en-us clothes 732 gap.com;overstock.com;forever21.com;footballfanatics.com/college_washington_state_cougars footballfanatics.com/college_washington_state_cougars 22 | 651777 2/16/2012 12:20:33 PM en-us food recipes 183 allrecipes.com;foodnetwork.com;simplyrecipes.com foodnetwork.com 23 | 666352 2/16/2012 12:21:03 PM en-us weight loss 630 en.wikipedia.org/wiki/Weight_loss;webmd.com/diet;exercise.about.com webmd.com/diet 24 | -------------------------------------------------------------------------------- /Demos/ADLTools4VS/Local_Execution/Scripts/create_db_and_table.usql: -------------------------------------------------------------------------------- 1 | //Create Database SampleDBTutorials 2 | CREATE DATABASE IF NOT EXISTS SampleDBTutorials; 3 | 4 | //Create Table OlympicAthletes 5 | CREATE TABLE IF NOT EXISTS SampleDBTutorials.dbo.SearchLog 6 | ( 7 | //Define schema of table 8 | UserId int, 9 | Start DateTime, 10 | Region string, 11 | Query string, 12 | Duration int, 13 | Urls string, 14 | ClickedUrls string, 15 | INDEX idx1 //Name of index 16 | CLUSTERED (Region ASC) //Column to cluster by 17 | DISTRIUBTED BY HASH (Region) //Column to partition by 18 | ); -------------------------------------------------------------------------------- /Demos/ADLTools4VS/Local_Execution/Scripts/query_a_tsv_file.usql: -------------------------------------------------------------------------------- 1 | @searchlog = 2 | EXTRACT UserId int, 3 | Start DateTime, 4 | Region string, 5 | Query string, 6 | Duration int, 7 | Urls string, 8 | ClickedUrls string 9 | FROM @"/Samples/Data/SearchLog.tsv" 10 | USING Extractors.Tsv(); 11 | 12 | OUTPUT @searchlog 13 | TO @"/Samples/Output/SearchLog_output.tsv" 14 | USING Outputters.Tsv(); 15 | 16 | -------------------------------------------------------------------------------- /Demos/ADLTools4VS/Local_Execution/Scripts/querytable.usql: -------------------------------------------------------------------------------- 1 | //Read from SearchLog table 2 | @athletes = 3 | SELECT * 4 | FROM SampleDBTutorials.dbo.SearchLog; 5 | 6 | //Write it to a file so we can look at it 7 | OUTPUT @athletes 8 | TO @"/Samples/Output/SearchLog_output.tsv" 9 | USING Outputters.Tsv(); 10 | 11 | //Alternatively, we can output the whole table to a file without using SELECT 12 | OUTPUT SampleDBTutorials.dbo.SearchLog 13 | TO @"/Samples/Output/SearchLog_output_direct.tsv" 14 | USING Outputters.Tsv(); -------------------------------------------------------------------------------- /Demos/ADLTools4VS/Local_Execution/Scripts/searchlog.usql: -------------------------------------------------------------------------------- 1 | //Read some data 2 | @searchlog = 3 | EXTRACT UserId int, 4 | Start DateTime, 5 | Region string, 6 | Query string, 7 | Duration int, 8 | Urls string, 9 | ClickedUrls string 10 | FROM @"/Samples/Data/SearchLog.tsv" 11 | USING Extractors.Tsv(); 12 | 13 | //Insert it into a previously created table 14 | INSERT INTO SampleDBTutorials.dbo.SearchLog 15 | SELECT * 16 | FROM @searchlog; -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2017 Microsoft 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | For the Azure Data Lake homepage go here: 2 | 3 | http://aka.ms/AzureDataLake 4 | 5 | 6 | ## Code of Conduct 7 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 8 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd). 40 | 41 | 42 | -------------------------------------------------------------------------------- /Samples/AzureDiagnosticsSample/AzureDiagnostics/AzureDiagnostics.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | AnyCPU 7 | {76BD4563-F478-4477-9B17-90FE82F98AB5} 8 | Library 9 | Properties 10 | AzureDiagnostics 11 | AzureDiagnostics 12 | v4.0 13 | 512 14 | 15 | 16 | 17 | true 18 | full 19 | false 20 | bin\Debug\ 21 | DEBUG;TRACE 22 | prompt 23 | 4 24 | 25 | 26 | pdbonly 27 | true 28 | bin\Release\ 29 | TRACE 30 | prompt 31 | 4 32 | 33 | 34 | 35 | ..\packages\Newtonsoft.Json.8.0.3\lib\net40\Newtonsoft.Json.dll 36 | True 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 60 | -------------------------------------------------------------------------------- /Samples/AzureDiagnosticsSample/AzureDiagnostics/AzureDiagnosticsUtil.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | 3 | namespace AzureDiagnostics 4 | { 5 | public static class AzureDiagnosticsUtil 6 | { 7 | public static IEnumerable> GetLogADLARecords(System.IO.StreamReader stream_reader) 8 | { 9 | var rows = GetLogRecords(stream_reader, o => new DataLakeAnalyticsProperties(o)); 10 | return rows; 11 | } 12 | 13 | public static IEnumerable> GetLogADLSRecords(System.IO.StreamReader stream_reader) 14 | { 15 | var rows = GetLogRecords(stream_reader, o => new DataLakeStoreProperties(o)); 16 | return rows; 17 | } 18 | 19 | public static IEnumerable> GetLogRecords(System.IO.StreamReader sr, System.Func func) 20 | { 21 | var jr = new Newtonsoft.Json.JsonTextReader(sr); 22 | var djom = Newtonsoft.Json.Linq.JObject.ReadFrom(jr); 23 | 24 | foreach (var jt_doc in djom.Children()) // dom contains doc(s) 25 | { 26 | foreach (var jt_records in jt_doc.Children()) // foc contains records (an array) 27 | { 28 | foreach (var jt_record in jt_records.Children()) // loop through specific records 29 | { 30 | var jo_record = (Newtonsoft.Json.Linq.JObject)jt_record; 31 | var o = NewAzureDiagnosticLogRecord(jo_record); 32 | 33 | var PropertiesJSON = (Newtonsoft.Json.Linq.JObject)jo_record["properties"]; 34 | 35 | o.Properties = func(PropertiesJSON); 36 | yield return o; 37 | } 38 | } 39 | } 40 | } 41 | 42 | 43 | 44 | public static LogRecord NewAzureDiagnosticLogRecord(Newtonsoft.Json.Linq.JObject jo_record) 45 | { 46 | var o = new LogRecord(); 47 | o.Time = jo_record.GetDateTimeOffset("time"); 48 | o.ResourceId = jo_record["resourceId"].ToString(); 49 | o.Category = jo_record["category"].ToString(); 50 | o.OperationName = jo_record["operationName"].ToString(); 51 | o.ResultType = jo_record.GetString("resultType",null); 52 | o.ResultSignature = jo_record.GetString("resultSignature", null); 53 | o.CorrelationId = jo_record.GetString( "correlationId" , null ); 54 | o.Identity = jo_record["identity"].ToString(); 55 | 56 | return o; 57 | } 58 | } 59 | 60 | 61 | public static class Extensions 62 | { 63 | public static string GetString(this Newtonsoft.Json.Linq.JObject jo, string name, string default_value) 64 | { 65 | if (jo[name] != null) 66 | { 67 | return jo[name].ToString(); 68 | } 69 | return default_value; 70 | } 71 | 72 | public static System.DateTimeOffset? GetDateTimeOffsetNullable(this Newtonsoft.Json.Linq.JObject jo, string name) 73 | { 74 | string s = jo.GetString(name, null); 75 | 76 | if (s!=null) 77 | { 78 | if (string.IsNullOrWhiteSpace(s)) 79 | { 80 | return null; 81 | } 82 | 83 | return System.DateTimeOffset.Parse(s); 84 | } 85 | return null; 86 | } 87 | 88 | public static System.DateTimeOffset GetDateTimeOffset(this Newtonsoft.Json.Linq.JObject jo, string name) 89 | { 90 | string s = jo.GetString(name, null); 91 | return System.DateTimeOffset.Parse(s); 92 | } 93 | 94 | } 95 | } -------------------------------------------------------------------------------- /Samples/AzureDiagnosticsSample/AzureDiagnostics/DataLakeAnalyticsProperties.cs: -------------------------------------------------------------------------------- 1 | namespace AzureDiagnostics 2 | { 3 | public class DataLakeAnalyticsProperties 4 | { 5 | public string JobId; 6 | public string JobName; 7 | public string JobRuntimeName; 8 | public System.DateTimeOffset? StartTime; 9 | public System.DateTimeOffset? SubmitTime; 10 | public System.DateTimeOffset? EndTime; 11 | 12 | public DataLakeAnalyticsProperties(Newtonsoft.Json.Linq.JObject rec) 13 | { 14 | this.JobId = rec["JobId"].ToString(); 15 | this.JobName = rec["JobName"].ToString(); 16 | this.JobRuntimeName = rec["JobRuntimeName"].ToString(); 17 | this.StartTime = rec.GetDateTimeOffsetNullable("StartTime"); 18 | this.SubmitTime = rec.GetDateTimeOffsetNullable("SubmitTime"); 19 | this.EndTime = rec.GetDateTimeOffsetNullable("EndTime"); 20 | } 21 | } 22 | } -------------------------------------------------------------------------------- /Samples/AzureDiagnosticsSample/AzureDiagnostics/DataLakeStoreProperties.cs: -------------------------------------------------------------------------------- 1 | namespace AzureDiagnostics 2 | { 3 | public class DataLakeStoreProperties 4 | { 5 | public string StreamName; 6 | 7 | public DataLakeStoreProperties (Newtonsoft.Json.Linq.JObject rec) 8 | { 9 | this.StreamName = rec["StreamName"].ToString(); 10 | } 11 | } 12 | } -------------------------------------------------------------------------------- /Samples/AzureDiagnosticsSample/AzureDiagnostics/LogRecord.cs: -------------------------------------------------------------------------------- 1 | namespace AzureDiagnostics 2 | { 3 | public class LogRecord 4 | { 5 | public System.DateTimeOffset Time; 6 | public string ResourceId; 7 | public string Category; 8 | public string OperationName; 9 | public string ResultType; 10 | public string ResultSignature; 11 | public string CorrelationId; 12 | public string Identity; 13 | } 14 | 15 | public class LogRecord : LogRecord 16 | { 17 | public T Properties; 18 | } 19 | } -------------------------------------------------------------------------------- /Samples/AzureDiagnosticsSample/AzureDiagnostics/Properties/AssemblyInfo.cs: -------------------------------------------------------------------------------- 1 | using System.Reflection; 2 | using System.Runtime.CompilerServices; 3 | using System.Runtime.InteropServices; 4 | 5 | // General Information about an assembly is controlled through the following 6 | // set of attributes. Change these attribute values to modify the information 7 | // associated with an assembly. 8 | [assembly: AssemblyTitle("AzureDiagnostics")] 9 | [assembly: AssemblyDescription("")] 10 | [assembly: AssemblyConfiguration("")] 11 | [assembly: AssemblyCompany("")] 12 | [assembly: AssemblyProduct("AzureDiagnostics")] 13 | [assembly: AssemblyCopyright("Copyright © 2016")] 14 | [assembly: AssemblyTrademark("")] 15 | [assembly: AssemblyCulture("")] 16 | 17 | // Setting ComVisible to false makes the types in this assembly not visible 18 | // to COM components. If you need to access a type in this assembly from 19 | // COM, set the ComVisible attribute to true on that type. 20 | [assembly: ComVisible(false)] 21 | 22 | // The following GUID is for the ID of the typelib if this project is exposed to COM 23 | [assembly: Guid("76bd4563-f478-4477-9b17-90fe82f98ab5")] 24 | 25 | // Version information for an assembly consists of the following four values: 26 | // 27 | // Major Version 28 | // Minor Version 29 | // Build Number 30 | // Revision 31 | // 32 | // You can specify all the values or you can default the Build and Revision Numbers 33 | // by using the '*' as shown below: 34 | // [assembly: AssemblyVersion("1.0.*")] 35 | [assembly: AssemblyVersion("1.0.0.0")] 36 | [assembly: AssemblyFileVersion("1.0.0.0")] 37 | -------------------------------------------------------------------------------- /Samples/AzureDiagnosticsSample/AzureDiagnostics/packages.config: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Samples/AzureDiagnosticsSample/AzureDiagnosticsExtractors/AzureDiagnosticsExtractors.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | AnyCPU 7 | 2.0 8 | {416D63FD-0477-49AA-A954-A7C5B95A9B51};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} 9 | {1B3E7106-6D16-4B96-87C5-F15E18FFC08F} 10 | Library 11 | Properties 12 | AzureDiagnosticsExtractors 13 | AzureDiagnosticsExtractors 14 | v4.5.2 15 | 512 16 | 17 | 18 | 19 | true 20 | full 21 | false 22 | bin\Debug\ 23 | DEBUG;TRACE 24 | prompt 25 | 4 26 | false 27 | 28 | 29 | pdbonly 30 | true 31 | bin\Release\ 32 | TRACE 33 | prompt 34 | 4 35 | false 36 | 37 | 38 | 39 | ..\packages\Newtonsoft.Json.8.0.3\lib\net45\Newtonsoft.Json.dll 40 | True 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | {76bd4563-f478-4477-9b17-90fe82f98ab5} 63 | AzureDiagnostics 64 | 65 | 66 | 67 | 74 | -------------------------------------------------------------------------------- /Samples/AzureDiagnosticsSample/AzureDiagnosticsExtractors/DataLakeAnalyticsExtractor.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using Microsoft.Analytics.Interfaces; 3 | 4 | namespace AzureDiagnosticsExtractors 5 | { 6 | [SqlUserDefinedExtractor(AtomicFileProcessing = true)] 7 | public class DataLakeAnalyticsExtractor : IExtractor 8 | { 9 | 10 | public DataLakeAnalyticsExtractor() 11 | { 12 | } 13 | 14 | public override IEnumerable Extract(IUnstructuredReader input, IUpdatableRow output_row) 15 | { 16 | var s = new System.IO.StreamReader(input.BaseStream); 17 | { 18 | var rows = AzureDiagnostics.AzureDiagnosticsUtil.GetLogADLARecords(s); 19 | 20 | foreach (var row in rows) 21 | { 22 | output_row.Set("Time", row.Time.DateTime); 23 | output_row.Set("ResourceId", row.ResourceId); 24 | output_row.Set("Category", row.Category); 25 | output_row.Set("OperationName", row.OperationName); 26 | output_row.Set("ResultType", row.ResultType); 27 | output_row.Set("ResultSignature", row.ResultType); 28 | output_row.Set("CorrelationId", row.CorrelationId); 29 | output_row.Set("Identity", row.Identity); 30 | 31 | var props = row.Properties; 32 | output_row.Set("ADLA_JobId", props.JobId); 33 | output_row.Set("ADLA_JobName", props.JobName); 34 | output_row.Set("ADLA_JobRuntimeName", props.JobRuntimeName); 35 | 36 | 37 | output_row.Set("ADLA_StartTime", props.StartTime.ToDateTimeNullable()); 38 | output_row.Set("ADLA_SubmitTime", props.SubmitTime.ToDateTimeNullable()); 39 | output_row.Set("ADLA_EndTime", props.EndTime.ToDateTimeNullable()); 40 | 41 | yield return output_row.AsReadOnly(); 42 | } 43 | } 44 | 45 | } 46 | } 47 | } -------------------------------------------------------------------------------- /Samples/AzureDiagnosticsSample/AzureDiagnosticsExtractors/DataLakeStoreExtractor.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using Microsoft.Analytics.Interfaces; 3 | 4 | namespace AzureDiagnosticsExtractors 5 | { 6 | [SqlUserDefinedExtractor(AtomicFileProcessing = true)] 7 | public class DataLakeStoreExtractor : IExtractor 8 | { 9 | 10 | public DataLakeStoreExtractor() 11 | { 12 | } 13 | 14 | public override IEnumerable Extract(IUnstructuredReader input, IUpdatableRow output_row) 15 | { 16 | var s = new System.IO.StreamReader(input.BaseStream); 17 | { 18 | var rows = AzureDiagnostics.AzureDiagnosticsUtil.GetLogADLSRecords(s); 19 | 20 | foreach (var row in rows) 21 | { 22 | output_row.Set("Time", row.Time.DateTime); 23 | output_row.Set("ResourceId", row.ResourceId); 24 | output_row.Set("Category", row.Category); 25 | output_row.Set("OperationName", row.OperationName); 26 | output_row.Set("ResultType", row.ResultType); 27 | output_row.Set("ResultSignature", row.ResultType); 28 | output_row.Set("CorrelationId", row.CorrelationId); 29 | output_row.Set("Identity", row.Identity); 30 | 31 | var props = row.Properties; 32 | output_row.Set("ADLS_StreamName", props.StreamName); 33 | 34 | yield return output_row.AsReadOnly(); 35 | } 36 | } 37 | 38 | } 39 | } 40 | } -------------------------------------------------------------------------------- /Samples/AzureDiagnosticsSample/AzureDiagnosticsExtractors/Extensions.cs: -------------------------------------------------------------------------------- 1 | using Microsoft.Analytics.Types.Sql; 2 | using System; 3 | using System.Linq; 4 | using System.Text; 5 | 6 | namespace AzureDiagnosticsExtractors 7 | { 8 | static class Extensions 9 | { 10 | public static System.DateTime? ToDateTimeNullable(this System.DateTimeOffset? dto) 11 | { 12 | System.DateTime? dt; 13 | if (dto.HasValue) 14 | { 15 | dt = (System.DateTime?)dto.Value.DateTime; 16 | } 17 | else 18 | { 19 | dt = null; 20 | } 21 | return dt; 22 | } 23 | } 24 | } 25 | 26 | -------------------------------------------------------------------------------- /Samples/AzureDiagnosticsSample/AzureDiagnosticsExtractors/Properties/AssemblyInfo.cs: -------------------------------------------------------------------------------- 1 | using System.Reflection; 2 | using System.Runtime.CompilerServices; 3 | using System.Runtime.InteropServices; 4 | 5 | // General Information about an assembly is controlled through the following 6 | // set of attributes. Change these attribute values to modify the information 7 | // associated with an assembly. 8 | [assembly: AssemblyTitle("AzureDiagnosticsExtractors")] 9 | [assembly: AssemblyDescription("")] 10 | [assembly: AssemblyConfiguration("")] 11 | [assembly: AssemblyCompany("")] 12 | [assembly: AssemblyProduct("AzureDiagnosticsExtractors")] 13 | [assembly: AssemblyCopyright("Copyright © 2016")] 14 | [assembly: AssemblyTrademark("")] 15 | [assembly: AssemblyCulture("")] 16 | 17 | // Setting ComVisible to false makes the types in this assembly not visible 18 | // to COM components. If you need to access a type in this assembly from 19 | // COM, set the ComVisible attribute to true on that type. 20 | [assembly: ComVisible(false)] 21 | 22 | // The following GUID is for the ID of the typelib if this project is exposed to COM 23 | [assembly: Guid("03a55105-cd32-495e-9c58-ad488defb6f9")] 24 | 25 | // Version information for an assembly consists of the following four values: 26 | // 27 | // Major Version 28 | // Minor Version 29 | // Build Number 30 | // Revision 31 | // 32 | // You can specify all the values or you can default the Build and Revision Numbers 33 | // by using the '*' as shown below: 34 | // [assembly: AssemblyVersion("1.0.*")] 35 | [assembly: AssemblyVersion("1.0.0.0")] 36 | [assembly: AssemblyFileVersion("1.0.0.0")] 37 | -------------------------------------------------------------------------------- /Samples/AzureDiagnosticsSample/AzureDiagnosticsExtractors/packages.config: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Samples/AzureDiagnosticsSample/ParseAzureDiagnosticLog.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 14 4 | VisualStudioVersion = 14.0.25123.0 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ParseAzureDiagnosticLog", "ParseAzureDiagnosticLog\ParseAzureDiagnosticLog.csproj", "{D683E0B3-E146-4DF3-9590-8EBED5113170}" 7 | EndProject 8 | Project("{182E2583-ECAD-465B-BB50-91101D7C24CE}") = "ProcessADLDiagnosticLogs", "ProcessADLDiagnosticLogs\ProcessADLDiagnosticLogs.usqlproj", "{F518A4CF-F2CF-43D8-942F-7DD0E5ED3C7B}" 9 | EndProject 10 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "AzureDiagnostics", "AzureDiagnostics\AzureDiagnostics.csproj", "{76BD4563-F478-4477-9B17-90FE82F98AB5}" 11 | EndProject 12 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "AzureDiagnosticsExtractors", "AzureDiagnosticsExtractors\AzureDiagnosticsExtractors.csproj", "{1B3E7106-6D16-4B96-87C5-F15E18FFC08F}" 13 | EndProject 14 | Global 15 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 16 | Debug|Any CPU = Debug|Any CPU 17 | Debug|x64 = Debug|x64 18 | Debug|x86 = Debug|x86 19 | Release|Any CPU = Release|Any CPU 20 | Release|x64 = Release|x64 21 | Release|x86 = Release|x86 22 | EndGlobalSection 23 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 24 | {D683E0B3-E146-4DF3-9590-8EBED5113170}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 25 | {D683E0B3-E146-4DF3-9590-8EBED5113170}.Debug|Any CPU.Build.0 = Debug|Any CPU 26 | {D683E0B3-E146-4DF3-9590-8EBED5113170}.Debug|x64.ActiveCfg = Debug|Any CPU 27 | {D683E0B3-E146-4DF3-9590-8EBED5113170}.Debug|x64.Build.0 = Debug|Any CPU 28 | {D683E0B3-E146-4DF3-9590-8EBED5113170}.Debug|x86.ActiveCfg = Debug|Any CPU 29 | {D683E0B3-E146-4DF3-9590-8EBED5113170}.Debug|x86.Build.0 = Debug|Any CPU 30 | {D683E0B3-E146-4DF3-9590-8EBED5113170}.Release|Any CPU.ActiveCfg = Release|Any CPU 31 | {D683E0B3-E146-4DF3-9590-8EBED5113170}.Release|Any CPU.Build.0 = Release|Any CPU 32 | {D683E0B3-E146-4DF3-9590-8EBED5113170}.Release|x64.ActiveCfg = Release|Any CPU 33 | {D683E0B3-E146-4DF3-9590-8EBED5113170}.Release|x64.Build.0 = Release|Any CPU 34 | {D683E0B3-E146-4DF3-9590-8EBED5113170}.Release|x86.ActiveCfg = Release|Any CPU 35 | {D683E0B3-E146-4DF3-9590-8EBED5113170}.Release|x86.Build.0 = Release|Any CPU 36 | {F518A4CF-F2CF-43D8-942F-7DD0E5ED3C7B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 37 | {F518A4CF-F2CF-43D8-942F-7DD0E5ED3C7B}.Debug|Any CPU.Build.0 = Debug|Any CPU 38 | {F518A4CF-F2CF-43D8-942F-7DD0E5ED3C7B}.Debug|x64.ActiveCfg = Debug|x64 39 | {F518A4CF-F2CF-43D8-942F-7DD0E5ED3C7B}.Debug|x64.Build.0 = Debug|x64 40 | {F518A4CF-F2CF-43D8-942F-7DD0E5ED3C7B}.Debug|x86.ActiveCfg = Debug|x86 41 | {F518A4CF-F2CF-43D8-942F-7DD0E5ED3C7B}.Debug|x86.Build.0 = Debug|x86 42 | {F518A4CF-F2CF-43D8-942F-7DD0E5ED3C7B}.Release|Any CPU.ActiveCfg = Release|Any CPU 43 | {F518A4CF-F2CF-43D8-942F-7DD0E5ED3C7B}.Release|Any CPU.Build.0 = Release|Any CPU 44 | {F518A4CF-F2CF-43D8-942F-7DD0E5ED3C7B}.Release|x64.ActiveCfg = Release|x64 45 | {F518A4CF-F2CF-43D8-942F-7DD0E5ED3C7B}.Release|x64.Build.0 = Release|x64 46 | {F518A4CF-F2CF-43D8-942F-7DD0E5ED3C7B}.Release|x86.ActiveCfg = Release|x86 47 | {F518A4CF-F2CF-43D8-942F-7DD0E5ED3C7B}.Release|x86.Build.0 = Release|x86 48 | {76BD4563-F478-4477-9B17-90FE82F98AB5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 49 | {76BD4563-F478-4477-9B17-90FE82F98AB5}.Debug|Any CPU.Build.0 = Debug|Any CPU 50 | {76BD4563-F478-4477-9B17-90FE82F98AB5}.Debug|x64.ActiveCfg = Debug|Any CPU 51 | {76BD4563-F478-4477-9B17-90FE82F98AB5}.Debug|x64.Build.0 = Debug|Any CPU 52 | {76BD4563-F478-4477-9B17-90FE82F98AB5}.Debug|x86.ActiveCfg = Debug|Any CPU 53 | {76BD4563-F478-4477-9B17-90FE82F98AB5}.Debug|x86.Build.0 = Debug|Any CPU 54 | {76BD4563-F478-4477-9B17-90FE82F98AB5}.Release|Any CPU.ActiveCfg = Release|Any CPU 55 | {76BD4563-F478-4477-9B17-90FE82F98AB5}.Release|Any CPU.Build.0 = Release|Any CPU 56 | {76BD4563-F478-4477-9B17-90FE82F98AB5}.Release|x64.ActiveCfg = Release|Any CPU 57 | {76BD4563-F478-4477-9B17-90FE82F98AB5}.Release|x64.Build.0 = Release|Any CPU 58 | {76BD4563-F478-4477-9B17-90FE82F98AB5}.Release|x86.ActiveCfg = Release|Any CPU 59 | {76BD4563-F478-4477-9B17-90FE82F98AB5}.Release|x86.Build.0 = Release|Any CPU 60 | {1B3E7106-6D16-4B96-87C5-F15E18FFC08F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 61 | {1B3E7106-6D16-4B96-87C5-F15E18FFC08F}.Debug|Any CPU.Build.0 = Debug|Any CPU 62 | {1B3E7106-6D16-4B96-87C5-F15E18FFC08F}.Debug|x64.ActiveCfg = Debug|Any CPU 63 | {1B3E7106-6D16-4B96-87C5-F15E18FFC08F}.Debug|x64.Build.0 = Debug|Any CPU 64 | {1B3E7106-6D16-4B96-87C5-F15E18FFC08F}.Debug|x86.ActiveCfg = Debug|Any CPU 65 | {1B3E7106-6D16-4B96-87C5-F15E18FFC08F}.Debug|x86.Build.0 = Debug|Any CPU 66 | {1B3E7106-6D16-4B96-87C5-F15E18FFC08F}.Release|Any CPU.ActiveCfg = Release|Any CPU 67 | {1B3E7106-6D16-4B96-87C5-F15E18FFC08F}.Release|Any CPU.Build.0 = Release|Any CPU 68 | {1B3E7106-6D16-4B96-87C5-F15E18FFC08F}.Release|x64.ActiveCfg = Release|Any CPU 69 | {1B3E7106-6D16-4B96-87C5-F15E18FFC08F}.Release|x64.Build.0 = Release|Any CPU 70 | {1B3E7106-6D16-4B96-87C5-F15E18FFC08F}.Release|x86.ActiveCfg = Release|Any CPU 71 | {1B3E7106-6D16-4B96-87C5-F15E18FFC08F}.Release|x86.Build.0 = Release|Any CPU 72 | EndGlobalSection 73 | GlobalSection(SolutionProperties) = preSolution 74 | HideSolutionNode = FALSE 75 | EndGlobalSection 76 | EndGlobal 77 | -------------------------------------------------------------------------------- /Samples/AzureDiagnosticsSample/ParseAzureDiagnosticLog/App.config: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /Samples/AzureDiagnosticsSample/ParseAzureDiagnosticLog/ParseAzureDiagnosticLog.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | AnyCPU 7 | {D683E0B3-E146-4DF3-9590-8EBED5113170} 8 | Exe 9 | Properties 10 | ParseAzureDiagnosticLog 11 | ParseAzureDiagnosticLog 12 | v4.5.2 13 | 512 14 | true 15 | 16 | 17 | 18 | AnyCPU 19 | true 20 | full 21 | false 22 | bin\Debug\ 23 | DEBUG;TRACE 24 | prompt 25 | 4 26 | false 27 | 28 | 29 | AnyCPU 30 | pdbonly 31 | true 32 | bin\Release\ 33 | TRACE 34 | prompt 35 | 4 36 | false 37 | 38 | 39 | 40 | ..\packages\Newtonsoft.Json.8.0.3\lib\net45\Newtonsoft.Json.dll 41 | True 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | PreserveNewest 60 | 61 | 62 | PreserveNewest 63 | 64 | 65 | 66 | 67 | 68 | {1b3e7106-6d16-4b96-87c5-f15e18ffc08f} 69 | AzureDiagnosticsExtractors 70 | 71 | 72 | {76bd4563-f478-4477-9b17-90fe82f98ab5} 73 | AzureDiagnostics 74 | 75 | 76 | 77 | 84 | -------------------------------------------------------------------------------- /Samples/AzureDiagnosticsSample/ParseAzureDiagnosticLog/Program.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using Newtonsoft.Json.Linq; 4 | 5 | namespace ParseAzureDiagnosticLog 6 | { 7 | 8 | class Program 9 | { 10 | private static bool quiet=true; 11 | 12 | static void Main(string[] args) 13 | { 14 | string this_asm = System.Reflection.Assembly.GetAssembly(typeof (Program)).Location; 15 | string this_folder = System.IO.Path.GetDirectoryName(this_asm); 16 | string input_folder = System.IO.Path.Combine(this_folder, "Input"); 17 | 18 | parse_adls_logs_in_folder(input_folder); 19 | parse_adla_logs_in_folder(input_folder); 20 | } 21 | 22 | private static void parse_adls_logs_in_folder(string input_folder) 23 | { 24 | var files = System.IO.Directory.GetFiles(input_folder, "ADLS*.json"); 25 | 26 | foreach (var file in files) 27 | { 28 | using (var stream_reader = new System.IO.StreamReader(file)) 29 | { 30 | foreach (var row in AzureDiagnostics.AzureDiagnosticsUtil.GetLogRecords(stream_reader, o=> new AzureDiagnostics.DataLakeStoreProperties(o))) 31 | { 32 | if (!quiet) 33 | { 34 | Console.WriteLine("-----------------------------"); 35 | Console.WriteLine("Time = {0}", row.Time); 36 | Console.WriteLine("ResourceId = {0}", row.ResourceId); 37 | Console.WriteLine("Category = {0}", row.Category); 38 | Console.WriteLine("OperationName = {0}", row.OperationName); 39 | Console.WriteLine("ResultType = {0}", row.ResultType); 40 | Console.WriteLine("CorrelationId = {0}", row.CorrelationId); 41 | Console.WriteLine("Identity = {0}", row.Identity); 42 | } 43 | } 44 | } 45 | } 46 | } 47 | 48 | private static void parse_adla_logs_in_folder(string input_folder) 49 | { 50 | var files = System.IO.Directory.GetFiles(input_folder, "ADLA*.json"); 51 | 52 | foreach (var file in files) 53 | { 54 | using (var stream_reader = new System.IO.StreamReader(file)) 55 | { 56 | var rows = AzureDiagnostics.AzureDiagnosticsUtil.GetLogRecords(stream_reader, o => new AzureDiagnostics.DataLakeAnalyticsProperties(o)); 57 | foreach (var row in rows) 58 | { 59 | if (!quiet) 60 | { 61 | Console.WriteLine("-----------------------------"); 62 | Console.WriteLine("Time = {0}", row.Time); 63 | Console.WriteLine("ResourceId = {0}", row.ResourceId); 64 | Console.WriteLine("Category = {0}", row.Category); 65 | Console.WriteLine("OperationName = {0}", row.OperationName); 66 | Console.WriteLine("ResultType = {0}", row.ResultType); 67 | Console.WriteLine("CorrelationId = {0}", row.CorrelationId); 68 | Console.WriteLine("Identity = {0}", row.Identity); 69 | } 70 | } 71 | } 72 | } 73 | } 74 | 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /Samples/AzureDiagnosticsSample/ParseAzureDiagnosticLog/Properties/AssemblyInfo.cs: -------------------------------------------------------------------------------- 1 | using System.Reflection; 2 | using System.Runtime.CompilerServices; 3 | using System.Runtime.InteropServices; 4 | 5 | // General Information about an assembly is controlled through the following 6 | // set of attributes. Change these attribute values to modify the information 7 | // associated with an assembly. 8 | [assembly: AssemblyTitle("ParseAzureDiagnosticLog")] 9 | [assembly: AssemblyDescription("")] 10 | [assembly: AssemblyConfiguration("")] 11 | [assembly: AssemblyCompany("")] 12 | [assembly: AssemblyProduct("ParseAzureDiagnosticLog")] 13 | [assembly: AssemblyCopyright("Copyright © 2016")] 14 | [assembly: AssemblyTrademark("")] 15 | [assembly: AssemblyCulture("")] 16 | 17 | // Setting ComVisible to false makes the types in this assembly not visible 18 | // to COM components. If you need to access a type in this assembly from 19 | // COM, set the ComVisible attribute to true on that type. 20 | [assembly: ComVisible(false)] 21 | 22 | // The following GUID is for the ID of the typelib if this project is exposed to COM 23 | [assembly: Guid("d683e0b3-e146-4df3-9590-8ebed5113170")] 24 | 25 | // Version information for an assembly consists of the following four values: 26 | // 27 | // Major Version 28 | // Minor Version 29 | // Build Number 30 | // Revision 31 | // 32 | // You can specify all the values or you can default the Build and Revision Numbers 33 | // by using the '*' as shown below: 34 | // [assembly: AssemblyVersion("1.0.*")] 35 | [assembly: AssemblyVersion("1.0.0.0")] 36 | [assembly: AssemblyFileVersion("1.0.0.0")] 37 | -------------------------------------------------------------------------------- /Samples/AzureDiagnosticsSample/ParseAzureDiagnosticLog/packages.config: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Samples/AzureDiagnosticsSample/ProcessADLDiagnosticLogs/CreateDiagnosticsDB.usql: -------------------------------------------------------------------------------- 1 |  2 | // DROP DATABASE IF EXISTS Diagnostics; 3 | 4 | CREATE DATABASE IF NOT EXISTS Diagnostics; 5 | 6 | DROP FUNCTION IF EXISTS Diagnostics.dbo.GetDataLakeStoreRecords; 7 | 8 | CREATE FUNCTION Diagnostics.dbo.GetDataLakeStoreRecords(@start_date string, @end_date string, @subscription string, @resource_group string, @adls_account_name string, @wasb_account string) 9 | RETURNS @rows 10 | AS 11 | BEGIN 12 | 13 | REFERENCE ASSEMBLY Diagnostics.[AzureDiagnostics]; 14 | REFERENCE ASSEMBLY Diagnostics.[AzureDiagnosticsExtractors]; 15 | REFERENCE ASSEMBLY Diagnostics.[Newtonsoft.Json]; 16 | 17 | DECLARE @adls_path string = @wasb_account + 18 | "/resourceId=/SUBSCRIPTIONS/" + @subscription.ToUpper() + 19 | "/RESOURCEGROUPS/" + @resource_group.ToUpper() + 20 | "/PROVIDERS/MICROSOFT.DATALAKESTORE/ACCOUNTS/" + 21 | @adls_account_name.ToUpper() + 22 | "/y={LogDate:yyyy}/m={LogDate:MM}/d={LogDate:dd}/h={LogDate:hh}/m={LogDate:mm}/PT1H.json"; 23 | 24 | @rows = 25 | EXTRACT LogDate DateTime, 26 | Time DateTime, 27 | ResourceId string, 28 | Category string, 29 | OperationName string, 30 | ResultType string, 31 | ResultSignature string, 32 | CorrelationId string, 33 | Identity string, 34 | ADLS_StreamName string 35 | FROM @adls_path 36 | USING new AzureDiagnosticsExtractors.DataLakeStoreExtractor(); 37 | 38 | @rows = 39 | SELECT * 40 | FROM @rows 41 | WHERE LogDate >= System.DateTime.Parse(@start_date) AND LogDate <= System.DateTime.Parse(@end_date); 42 | END; 43 | 44 | 45 | DROP FUNCTION IF EXISTS Diagnostics.dbo.GetDataLakeAnalyticsRecords; 46 | 47 | CREATE FUNCTION Diagnostics.dbo.GetDataLakeAnalyticsRecords(@start_date string, @end_date string, @subscription string, @resource_group string, @adla_account_name string, @wasb_account string) 48 | RETURNS @rows 49 | AS 50 | BEGIN 51 | 52 | REFERENCE ASSEMBLY Diagnostics.[AzureDiagnostics]; 53 | REFERENCE ASSEMBLY Diagnostics.[AzureDiagnosticsExtractors]; 54 | REFERENCE ASSEMBLY Diagnostics.[Newtonsoft.Json]; 55 | 56 | DECLARE @adla_path string = @wasb_account + 57 | "/resourceId=/SUBSCRIPTIONS/" + @subscription.ToUpper() + 58 | "/RESOURCEGROUPS/" + @resource_group.ToUpper() + 59 | "/PROVIDERS/MICROSOFT.DATALAKEANALYTICS/ACCOUNTS/" + @adla_account_name.ToUpper() + 60 | "/y={LogDate:yyyy}/m={LogDate:MM}/d={LogDate:dd}/h={LogDate:hh}/m={LogDate:mm}/PT1H.json"; 61 | 62 | @rows = 63 | EXTRACT LogDate DateTime, 64 | Time DateTime, 65 | ResourceId string, 66 | Category string, 67 | OperationName string, 68 | ResultType string, 69 | ResultSignature string, 70 | CorrelationId string, 71 | Identity string, 72 | ADLA_JobId string, 73 | ADLA_JobName string, 74 | ADLA_JobRuntimeName string, 75 | ADLA_StartTime DateTime?, 76 | ADLA_SubmitTime DateTime?, 77 | ADLA_EndTime DateTime? 78 | 79 | FROM @adla_path 80 | USING new AzureDiagnosticsExtractors.DataLakeAnalyticsExtractor(); 81 | 82 | @rows = 83 | SELECT * 84 | FROM @rows 85 | WHERE LogDate >= System.DateTime.Parse(@start_date) AND LogDate <= System.DateTime.Parse(@end_date); 86 | END; 87 | -------------------------------------------------------------------------------- /Samples/AzureDiagnosticsSample/ProcessADLDiagnosticLogs/CreateDiagnosticsDB.usql.cs: -------------------------------------------------------------------------------- 1 | using Microsoft.Analytics.Interfaces; 2 | using Microsoft.Analytics.Types.Sql; 3 | using System.Collections.Generic; 4 | using System.Linq; 5 | 6 | namespace ProcessADLDiagnosticLogs 7 | { 8 | } 9 | -------------------------------------------------------------------------------- /Samples/AzureDiagnosticsSample/ProcessADLDiagnosticLogs/FileSet.usql: -------------------------------------------------------------------------------- 1 |  2 | USE Diagnostics; 3 | 4 | REFERENCE ASSEMBLY [AzureDiagnostics]; 5 | REFERENCE ASSEMBLY [AzureDiagnosticsExtractors]; 6 | REFERENCE ASSEMBLY [Newtonsoft.Json]; 7 | 8 | DECLARE @start_date string = "2016-04-20"; 9 | DECLARE @end_date string = "2016-04-21"; 10 | 11 | DECLARE @subscription string = "d66b1168-d835-4066-8c45-7d2ed713c082".ToUpper(); 12 | DECLARE @resource_group string = "bigdatasandbox".ToUpper(); 13 | DECLARE @adla_account_name string = "sandbox".ToUpper(); 14 | DECLARE @adls_account_name string = "sandboxadl".ToUpper(); 15 | DECLARE @wasb_account = "wasb://insights-logs-audit@sandboxazurestorage"; 16 | 17 | @adla_rows = SELECT * FROM GetDataLakeAnalyticsRecords 18 | ( 19 | @start_date , 20 | @end_date , 21 | @subscription, 22 | @resource_group, 23 | @adla_account_name, 24 | @wasb_account 25 | ) AS T; 26 | 27 | @adls_rows = SELECT * FROM GetDataLakeStoreRecords 28 | ( 29 | @start_date , 30 | @end_date , 31 | @subscription, 32 | @resource_group, 33 | @adla_account_name, 34 | @wasb_account 35 | ) AS T; 36 | 37 | OUTPUT @adls_rows 38 | TO 39 | @"/Output/adls_out2.csv" 40 | USING Outputters.Csv(); 41 | 42 | OUTPUT @adla_rows 43 | TO 44 | @"/Output/adla_out2.csv" 45 | USING Outputters.Csv(); 46 | 47 | -------------------------------------------------------------------------------- /Samples/AzureDiagnosticsSample/ProcessADLDiagnosticLogs/FileSet.usql.cs: -------------------------------------------------------------------------------- 1 | using Microsoft.Analytics.Interfaces; 2 | using Microsoft.Analytics.Types.Sql; 3 | using System.Collections.Generic; 4 | using System.Linq; 5 | 6 | namespace ProcessADLDiagnosticLogs 7 | { 8 | } 9 | -------------------------------------------------------------------------------- /Samples/AzureDiagnosticsSample/ProcessADLDiagnosticLogs/LocalScript.usql: -------------------------------------------------------------------------------- 1 | REFERENCE ASSEMBLY AzureDiagnostics; 2 | REFERENCE ASSEMBLY AzureDiagnosticsExtractors; 3 | REFERENCE ASSEMBLY [Newtonsoft.Json]; 4 | 5 | @adls_rows = 6 | EXTRACT Time DateTime, 7 | ResourceId string, 8 | Category string, 9 | OperationName string, 10 | ResultType string, 11 | ResultSignature string, 12 | CorrelationId string, 13 | Identity string, 14 | ADLS_StreamName string 15 | FROM @"/Input/ADLS_PT1H.json" 16 | USING new AzureDiagnosticsExtractors.DataLakeStoreExtractor(); 17 | 18 | @adla_rows = 19 | EXTRACT Time DateTime, 20 | ResourceId string, 21 | Category string, 22 | OperationName string, 23 | ResultType string, 24 | ResultSignature string, 25 | CorrelationId string, 26 | Identity string, 27 | ADLA_JobId string, 28 | ADLA_JobName string, 29 | ADLA_JobRuntimeName string, 30 | ADLA_StartTime DateTime?, 31 | ADLA_SubmitTime DateTime?, 32 | ADLA_EndTime DateTime? 33 | 34 | FROM @"/Input/ADLA_PT1H.json" 35 | USING new AzureDiagnosticsExtractors.DataLakeAnalyticsExtractor(); 36 | 37 | OUTPUT @adls_rows 38 | TO 39 | @"/Output/adls_out.csv" 40 | USING Outputters.Csv(); 41 | 42 | OUTPUT @adla_rows 43 | TO 44 | @"/Output/adla_out.csv" 45 | USING Outputters.Csv(); 46 | -------------------------------------------------------------------------------- /Samples/AzureDiagnosticsSample/ProcessADLDiagnosticLogs/LocalScript.usql.cs: -------------------------------------------------------------------------------- 1 | using Microsoft.Analytics.Interfaces; 2 | using Microsoft.Analytics.Types.Sql; 3 | using System.Collections.Generic; 4 | using System.Linq; 5 | 6 | namespace ProcessADLDiagnosticLogs 7 | { 8 | } 9 | -------------------------------------------------------------------------------- /Samples/AzureDiagnosticsSample/ProcessADLDiagnosticLogs/ProcessADLDiagnosticLogs.usqlproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | Debug 5 | AnyCPU 6 | 2.0 7 | f518a4cf-f2cf-43d8-942f-7dd0e5ed3c7b 8 | File 9 | Algebra.xml 10 | v4.5.2 11 | ProcessADLDiagnosticLogs 12 | ProcessADLDiagnosticLogs 13 | default 14 | C:\LocalRunDataRoot 15 | 16 | 17 | true 18 | bin\Debug\ 19 | 20 | 21 | false 22 | bin\Release\ 23 | 24 | 25 | 31 | 41 | 50 | 51 | SingleFile.usql 52 | 53 | 54 | LocalScript.usql 55 | 56 | 57 | CreateDiagnosticsDB.usql 58 | 59 | 60 | FileSet.usql 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | Content 69 | 70 | 71 | Content 72 | 73 | 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /Samples/AzureDiagnosticsSample/ProcessADLDiagnosticLogs/README.md: -------------------------------------------------------------------------------- 1 |   2 | 3 |   4 | 5 | First Register the Assemblies 6 | ----------------------------- 7 | 8 |   9 | 10 | - Right-click on the **AzureDiagnostics** project and select **Register 11 | Assembly**, 12 | 13 | - Under **Managed Assemblies** select **Newtonsoft.Json** 14 | 15 | - Click **Submit** 16 | 17 | - Right-click on AzureDiagnosticsExtractors project and select **Reference 18 | Assembly**, 19 | 20 | - Click **Submit** 21 | 22 |   23 | 24 | Once you do this you should see three items listed in your Database’s Assemblies 25 | folder 26 | 27 | - AzureDiagnostics 28 | 29 | - AzureDiagnosticsExtractors 30 | 31 | - Newtonsoft.Json 32 | 33 |   34 | 35 | Reading a log from a U-SQL Script 36 | --------------------------------- 37 | 38 |   39 | 40 | To use the Extractors, you U-SQL script will need to have these statements and 41 | the beginning: 42 | 43 |   44 | 45 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 46 | REFERENCE ASSEMBLY AzureDiagnostics; 47 | REFERENCE ASSEMBLY AzureDiagnosticsExtractors; 48 | REFERENCE ASSEMBLY [Newtonsoft.Json]; 49 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 50 | 51 |   52 | 53 | And in order for those REFERENCE ASSEMBLY commands to work the assemblies must 54 | be registered in a U-SQL database. 55 | 56 |   57 | 58 | Then call the extractors to get the data: 59 | 60 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 61 | @adls_rows = 62 | EXTRACT Time DateTime, 63 | ResourceId string, 64 | Category string, 65 | OperationName string, 66 | ResultType string, 67 | ResultSignature string, 68 | CorrelationId string, 69 | Identity string, 70 | ADLS_StreamName string 71 | FROM @"/Input/ADLS_PT1H.json" 72 | USING new AzureDiagnosticsExtractors.DataLakeStoreExtractor(); 73 | 74 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 75 | 76 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 77 | @adla_rows = 78 | EXTRACT Time DateTime, 79 | ResourceId string, 80 | Category string, 81 | OperationName string, 82 | ResultType string, 83 | ResultSignature string, 84 | CorrelationId string, 85 | Identity string, 86 | ADLA_JobId string, 87 | ADLA_JobName string, 88 | ADLA_JobRuntimeName string, 89 | ADLA_StartTime DateTime?, 90 | ADLA_SubmitTime DateTime?, 91 | ADLA_EndTime DateTime? 92 | 93 | FROM @"/Input/ADLA_PT1H.json" 94 | USING new AzureDiagnosticsExtractors.DataLakeAnalyticsExtractor(); 95 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 96 | 97 |   98 | 99 |   100 | -------------------------------------------------------------------------------- /Samples/AzureDiagnosticsSample/ProcessADLDiagnosticLogs/RegisterAssemblies.ps1: -------------------------------------------------------------------------------- 1 | param([string]$account) #Must be the first statement in your script 2 | 3 | # First, make sure you are logged into the correct Azure Subscription and Tenant with Login-AzureRmAccount first 4 | # 5 | # > Login-AzureRmAccount -SubscriptionId "..." 6 | # 7 | # Then, call it like this from the command line 8 | # 9 | # > .\RegisterAssemblies.ps1 -account "sandbox" 10 | # 11 | 12 | 13 | 14 | $folder = join-path $PSScriptRoot "..\AzureDiagnosticsExtractors\bin\Debug" 15 | $folder = [System.IO.Path]::GetFullPath($folder) 16 | $dlls = @( 17 | "Newtonsoft.Json.dll", 18 | "AzureDiagnosticsExtractors.dll", 19 | "AzureDiagnostics.dll" 20 | ) 21 | 22 | $dlls = $dlls | % { join-path $folder $_ } 23 | 24 | 25 | $dlls 26 | 27 | foreach ($dll in $dlls) 28 | { 29 | if (!(test-path $dll)) 30 | { 31 | Write-Host "Does not exist: $dll" 32 | } 33 | else 34 | { 35 | Write-Host "File Exists: $dll" 36 | } 37 | } 38 | 39 | $remote_temp = "/dll_temp"; 40 | 41 | $adla_account = Get-AzureRmDataLakeAnalyticsAccount -Name $account 42 | 43 | Write-Host $adla_account 44 | 45 | $adls_name = $adla_account.Properties.DefaultDataLakeStoreAccount 46 | 47 | Write-Host $adls_name 48 | 49 | $script = "" 50 | $script = $script + "CREATE DATABASE IF NOT EXISTS Diagnostics;`n" 51 | 52 | 53 | foreach ($dll in $dlls) 54 | { 55 | $basename = [System.IO.Path]::GetFileName($dll) 56 | $basename_noext = [System.IO.Path]::GetFileNameWithoutExtension($dll) 57 | 58 | $asm_name = $basename_noext 59 | $dest_path = "/dll_temp/" + $basename 60 | Import-AzureRmDataLakeStoreItem -AccountName $adls_name -Path $dll -Destination $dest_path -Force 61 | 62 | $full_dest_path = "adl://" + $adls_name + ".azuredatalakestore.net" + $dest_path 63 | $script = $script + "DROP ASSEMBLY IF EXISTS Diagnostics.[" + $asm_name + "];`n" 64 | $script = $script + "CREATE ASSEMBLY Diagnostics.[" + $asm_name + "] FROM `"$full_dest_path`" ;`n" 65 | 66 | } 67 | 68 | 69 | 70 | Submit-AzureRmDataLakeAnalyticsJob -Name "RegisterDiagnosticsAssemblies" -AccountName $account -Script $script 71 | -------------------------------------------------------------------------------- /Samples/AzureDiagnosticsSample/ProcessADLDiagnosticLogs/SingleFile.usql: -------------------------------------------------------------------------------- 1 | USE Diagnostics; 2 | 3 | REFERENCE ASSEMBLY [AzureDiagnostics]; 4 | REFERENCE ASSEMBLY [AzureDiagnosticsExtractors]; 5 | REFERENCE ASSEMBLY [Newtonsoft.Json]; 6 | 7 | DECLARE @subscription string = "d66b1168-d835-4066-8c45-7d2ed713c082".ToUpper(); 8 | DECLARE @resource_group string = "bigdatasandbox".ToUpper(); 9 | DECLARE @adla_account_name string = "sandbox".ToUpper(); 10 | DECLARE @adls_account_name string = "sandboxadl".ToUpper(); 11 | DECLARE @wasb_account = "wasb://insights-logs-audit@sandboxazurestorage"; 12 | 13 | DECLARE @adla_path string = @wasb_account + "/resourceId=/SUBSCRIPTIONS/" + @subscription + "/RESOURCEGROUPS/" + @resource_group + "/PROVIDERS/MICROSOFT.DATALAKEANALYTICS/ACCOUNTS/" + @adla_account_name + "/y=2016/m=04/d=22/h=08/m=00/PT1H.json"; 14 | DECLARE @adls_path string = @wasb_account + "/resourceId=/SUBSCRIPTIONS/" + @subscription + "/RESOURCEGROUPS/" + @resource_group + "/PROVIDERS/MICROSOFT.DATALAKESTORE/ACCOUNTS/" + @adls_account_name + "/y=2016/m=04/d=22/h=08/m=00/PT1H.json"; 15 | 16 | @adls_rows = 17 | EXTRACT Time DateTime, 18 | ResourceId string, 19 | Category string, 20 | OperationName string, 21 | ResultType string, 22 | ResultSignature string, 23 | CorrelationId string, 24 | Identity string, 25 | ADLS_StreamName string 26 | FROM @adls_path 27 | USING new AzureDiagnosticsExtractors.DataLakeStoreExtractor(); 28 | 29 | @adla_rows = 30 | EXTRACT Time DateTime, 31 | ResourceId string, 32 | Category string, 33 | OperationName string, 34 | ResultType string, 35 | ResultSignature string, 36 | CorrelationId string, 37 | Identity string, 38 | ADLA_JobId string, 39 | ADLA_JobName string, 40 | ADLA_JobRuntimeName string, 41 | ADLA_StartTime DateTime?, 42 | ADLA_SubmitTime DateTime?, 43 | ADLA_EndTime DateTime? 44 | 45 | FROM @adla_path 46 | USING new AzureDiagnosticsExtractors.DataLakeAnalyticsExtractor(); 47 | 48 | OUTPUT @adls_rows 49 | TO 50 | @"/Output/adls_out.csv" 51 | USING Outputters.Csv(); 52 | 53 | OUTPUT @adla_rows 54 | TO 55 | @"/Output/adla_out.csv" 56 | USING Outputters.Csv(); 57 | -------------------------------------------------------------------------------- /Samples/AzureDiagnosticsSample/ProcessADLDiagnosticLogs/SingleFile.usql.cs: -------------------------------------------------------------------------------- 1 | using Microsoft.Analytics.Interfaces; 2 | using Microsoft.Analytics.Types.Sql; 3 | using System.Collections.Generic; 4 | using System.Linq; 5 | 6 | namespace ProcessADLDiagnosticLogs 7 | { 8 | } 9 | -------------------------------------------------------------------------------- /Samples/AzureDiagnosticsSample/ProcessADLDiagnosticLogs/input/ADLA_PT1H.json: -------------------------------------------------------------------------------- 1 | { 2 | "records": 3 | [ 4 | 5 | { 6 | "time": "2016-04-19T22:22:10.518Z", 7 | "resourceId": "/SUBSCRIPTIONS/D66B1168-D835-4066-8C45-7D2ED713C082/RESOURCEGROUPS/BIGDATASANDBOX/PROVIDERS/MICROSOFT.DATALAKEANALYTICS/ACCOUNTS/SANDBOX", 8 | "category": "Audit", 9 | "operationName": "JobStarted", 10 | "identity": "zhictan@microsoft.com", 11 | "properties": {"JobId":"FE5B7925-1722-4297-A4D8-565C355F834A","JobName":"Query a TSV file ","JobRuntimeName":"","StartTime":"4/19/2016 10:22:10 PM"} 12 | } 13 | , 14 | { 15 | "time": "2016-04-19T22:22:55.128Z", 16 | "resourceId": "/SUBSCRIPTIONS/D66B1168-D835-4066-8C45-7D2ED713C082/RESOURCEGROUPS/BIGDATASANDBOX/PROVIDERS/MICROSOFT.DATALAKEANALYTICS/ACCOUNTS/SANDBOX", 17 | "category": "Audit", 18 | "operationName": "JobStarted", 19 | "identity": "zhictan@microsoft.com", 20 | "properties": {"JobId":"5F1008C3-9DCF-4CA2-ACC2-21C9C9839A9F","JobName":"Query a TSV file","JobRuntimeName":"","StartTime":"4/19/2016 10:22:55 PM"} 21 | } 22 | , 23 | { 24 | "time": "2016-04-19T22:21:11.705Z", 25 | "resourceId": "/SUBSCRIPTIONS/D66B1168-D835-4066-8C45-7D2ED713C082/RESOURCEGROUPS/BIGDATASANDBOX/PROVIDERS/MICROSOFT.DATALAKEANALYTICS/ACCOUNTS/SANDBOX", 26 | "category": "Audit", 27 | "operationName": "JobSubmitted", 28 | "identity": "zhictan@microsoft.com", 29 | "properties": {"JobId":"FE5B7925-1722-4297-A4D8-565C355F834A","JobName":"Query a TSV file ","JobRuntimeName":"default","SubmitTime":"4/19/2016 10:21:10 PM"} 30 | } 31 | , 32 | { 33 | "time": "2016-04-19T22:22:15.924Z", 34 | "resourceId": "/SUBSCRIPTIONS/D66B1168-D835-4066-8C45-7D2ED713C082/RESOURCEGROUPS/BIGDATASANDBOX/PROVIDERS/MICROSOFT.DATALAKEANALYTICS/ACCOUNTS/SANDBOX", 35 | "category": "Audit", 36 | "operationName": "JobSubmitted", 37 | "identity": "zhictan@microsoft.com", 38 | "properties": {"JobId":"5F1008C3-9DCF-4CA2-ACC2-21C9C9839A9F","JobName":"Query a TSV file","JobRuntimeName":"default","SubmitTime":"4/19/2016 10:22:15 PM"} 39 | } 40 | , 41 | { 42 | "time": "2016-04-19T22:23:14.332Z", 43 | "resourceId": "/SUBSCRIPTIONS/D66B1168-D835-4066-8C45-7D2ED713C082/RESOURCEGROUPS/BIGDATASANDBOX/PROVIDERS/MICROSOFT.DATALAKEANALYTICS/ACCOUNTS/SANDBOX", 44 | "category": "Audit", 45 | "operationName": "JobEnded", 46 | "resultType": "CompletedSuccess", 47 | "resultSignature": "Succeeded", 48 | "identity": "zhictan@microsoft.com", 49 | "properties": {"JobId":"FE5B7925-1722-4297-A4D8-565C355F834A","JobName":"Query a TSV file ","JobRuntimeName":"kobo_vnext_4836194","SubmitTime":"4/19/2016 10:21:10 PM","StartTime":"4/19/2016 10:22:10 PM","EndTime":"4/19/2016 10:23:14 PM","Parallelism":"1"} 50 | } 51 | , 52 | { 53 | "time": "2016-04-19T22:24:18.176Z", 54 | "resourceId": "/SUBSCRIPTIONS/D66B1168-D835-4066-8C45-7D2ED713C082/RESOURCEGROUPS/BIGDATASANDBOX/PROVIDERS/MICROSOFT.DATALAKEANALYTICS/ACCOUNTS/SANDBOX", 55 | "category": "Audit", 56 | "operationName": "JobEnded", 57 | "resultType": "CompletedSuccess", 58 | "resultSignature": "Succeeded", 59 | "identity": "zhictan@microsoft.com", 60 | "properties": {"JobId":"5F1008C3-9DCF-4CA2-ACC2-21C9C9839A9F","JobName":"Query a TSV file","JobRuntimeName":"kobo_vnext_4836194","SubmitTime":"4/19/2016 10:22:15 PM","StartTime":"4/19/2016 10:22:55 PM","EndTime":"4/19/2016 10:24:18 PM","Parallelism":"1"} 61 | } 62 | 63 | ] 64 | } 65 | -------------------------------------------------------------------------------- /Samples/DistCp/ConfigureADLS-3.0.0-alpha2WithNewDistCp.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | ######################################## 4 | #### USAGE: 5 | #### 6 | #### Step 1: Download and place the script on the HeadNode of your cluster. 7 | #### 8 | #### Step 2: Run script to configure ADLS. 9 | #### source ./ConfigureADLS-3.0.0-alpha2WithNewDistCp.sh 10 | #### 11 | #### Step 3: Run the new DistCp alias. 12 | #### adlsHadoopDistCp -blocksperchunk -m -copybuffersize -bandwidth 10000 // adl://.azuredatalakestore.net// 13 | ######################################## 14 | 15 | machine_login_usn=$1 16 | adls_account_name=$2 17 | tenant_id=$3 18 | client_id=$4 19 | client_credential=$5 20 | 21 | hn=`hostname` 22 | 23 | distcp_jar_name='hadoop-distcp-2.9.0-SNAPSHOT-cbs-java1.7compat.jar' 24 | distcp_jar_url='https://github.com/omkarksa10/AzureDataLake/releases/download/v2.2.6000.0' 25 | 26 | adl_sdk_jar_name='azure-data-lake-store-sdk-2.1.5.jar' 27 | adl_sdk_jar_url='http://central.maven.org/maven2/com/microsoft/azure/azure-data-lake-store-sdk/2.1.5' 28 | 29 | adl_driver_jar_name='hadoop-azure-datalake-3.0.0-alpha2.jar' 30 | adl_driver_jar_url='http://central.maven.org/maven2/org/apache/hadoop/hadoop-azure-datalake/3.0.0-alpha2' 31 | 32 | 33 | function ConfigureADLSWithNewDistCp() 34 | { 35 | rm $adl_sdk_jar_name 36 | wget $adl_sdk_jar_url/$adl_sdk_jar_name 37 | rm $adl_driver_jar_name 38 | wget $adl_driver_jar_url/$adl_driver_jar_name 39 | 40 | rm $distcp_jar_name 41 | wget $distcp_jar_url/$distcp_jar_name 42 | 43 | jackson_files_colon=`find $HADOOP_HOME/share/hadoop/tools/lib/ -name jackson-core* | tr '\n' ':'` 44 | jackson_files_csv=`echo $jackson_files_colon | tr ':' ','` 45 | 46 | unset HADOOP_CLASSPATH 47 | export HADOOP_CLASSPATH=$jackson_files_colon/home/$machine_login_usn/$adl_sdk_jar_name:/home/$machine_login_usn/$adl_driver_jar_name:/home/$machine_login_usn/$distcp_jar_name:`hadoop classpath` 48 | export LIBJARS=$jackson_files_csv/home/$machine_login_usn/$adl_sdk_jar_name,/home/$machine_login_usn/$adl_driver_jar_name,/home/$machine_login_usn/$distcp_jar_name 49 | 50 | echo $HADOOP_CLASSPATH 51 | echo $LIBJARS 52 | 53 | alias adlsHdfs='hdfs dfs -libjars $LIBJARS -D fs.AbstractFileSystem.adl.impl=org.apache.hadoop.fs.adl.Adl -D fs.adl.impl=org.apache.hadoop.fs.adl.AdlFileSystem -D dfs.adls.oauth2.access.token.provider.type=ClientCredential -D dfs.adls.oauth2.refresh.url=https://login.windows.net/$tenant_id/oauth2/token -D dfs.adls.oauth2.client.id=$client_id -D dfs.adls.oauth2.credential=$client_credential' 54 | 55 | alias adlsHadoopDistCp='hadoop distcp -libjars $LIBJARS -D fs.AbstractFileSystem.adl.impl=org.apache.hadoop.fs.adl.Adl -D fs.adl.impl=org.apache.hadoop.fs.adl.AdlFileSystem -D dfs.adls.oauth2.access.token.provider.type=ClientCredential -D dfs.adls.oauth2.refresh.url=https://login.windows.net/$tenant_id/oauth2/token -D dfs.adls.oauth2.client.id=$client_id -D dfs.adls.oauth2.credential=$client_credential' 56 | } 57 | 58 | function Test() { 59 | adlsHdfs -ls adl://$adls_account_name.azuredatalakestore.net/ 60 | } 61 | 62 | function TestDistCp() { 63 | dirName='tmp' 64 | cpyBuffSize=4194304 65 | blksPerChnk=10 66 | nMappers=4 67 | adlsHdfs -rmr -skipTrash adl://$adls_account_name.azuredatalakestore.net/* 68 | adlsHadoopDistCp -blocksperchunk $blksPerChnk -m $nMappers -copybuffersize $cpyBuffSize -bandwidth 10000 /$dirName/ adl://$adls_account_name.azuredatalakestore.net/ 69 | adlsHdfs -ls adl://$adls_account_name.azuredatalakestore.net/$dirName/ 70 | } 71 | 72 | function PrintLine() { 73 | DATE=`date +%Y-%m-%d" "%H:%M:%S,%3N` 74 | echo "$DATE - $1" 75 | } 76 | 77 | function Main() { 78 | PrintLine "Configure ADLS with new DistCp on host: $hn" 79 | 80 | # all params are mandatory 81 | if [ ! -z "$machine_login_usn" -a "$machine_login_usn" != " " ] 82 | then 83 | PrintLine "User Name provided: $machine_login_usn" 84 | else 85 | PrintLine "User Name not provided! => EXIT" 86 | exit 87 | fi 88 | 89 | if [ ! -z "$adls_account_name" -a "$adls_account_name" != " " ] 90 | then 91 | PrintLine "ADLS account provided: $adls_account_name" 92 | else 93 | PrintLine "ADLS account NOT provided! => EXIT" 94 | exit 95 | fi 96 | 97 | if [ ! -z "$tenant_id" -a "$tenant_id" != " " ] 98 | then 99 | PrintLine "Tenant ID provided: $tenant_id" 100 | else 101 | PrintLine "Tenant ID NOT provided! => EXIT" 102 | exit 103 | fi 104 | 105 | if [ ! -z "$client_id" -a "$client_id" != " " ] 106 | then 107 | PrintLine "Client ID provided: $client_id" 108 | else 109 | PrintLine "Client ID NOT provided! => EXIT" 110 | exit 111 | fi 112 | 113 | if [ ! -z "$client_credential" -a "$client_credential" != " " ] 114 | then 115 | PrintLine "Client credential provided: $client_credential" 116 | else 117 | PrintLine "Client credential NOT provided! => EXIT" 118 | exit 119 | fi 120 | 121 | ConfigureADLSWithNewDistCp 122 | Test 123 | } 124 | 125 | Main 126 | 127 | -------------------------------------------------------------------------------- /Samples/ExcelExtractor/ADLA ExcelExtractor.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 15 4 | VisualStudioVersion = 15.0.26430.15 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{182E2583-ECAD-465B-BB50-91101D7C24CE}") = "USQLExcelExtractor", "USQLExcelExtractor\USQLExcelExtractor.usqlproj", "{CA7D4C8D-3A2D-4E49-B4BB-FA5BBCB413FB}" 7 | EndProject 8 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "oh22is.Analytics.Formats", "oh22is.Analytics.Formats\oh22is.Analytics.Formats.csproj", "{1B3E7106-6D16-4B96-87C5-F15E18FFC08F}" 9 | EndProject 10 | Global 11 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 12 | Debug|Any CPU = Debug|Any CPU 13 | Debug|x64 = Debug|x64 14 | Debug|x86 = Debug|x86 15 | Release|Any CPU = Release|Any CPU 16 | Release|x64 = Release|x64 17 | Release|x86 = Release|x86 18 | EndGlobalSection 19 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 20 | {CA7D4C8D-3A2D-4E49-B4BB-FA5BBCB413FB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 21 | {CA7D4C8D-3A2D-4E49-B4BB-FA5BBCB413FB}.Debug|Any CPU.Build.0 = Debug|Any CPU 22 | {CA7D4C8D-3A2D-4E49-B4BB-FA5BBCB413FB}.Debug|x64.ActiveCfg = Debug|x64 23 | {CA7D4C8D-3A2D-4E49-B4BB-FA5BBCB413FB}.Debug|x64.Build.0 = Debug|x64 24 | {CA7D4C8D-3A2D-4E49-B4BB-FA5BBCB413FB}.Debug|x86.ActiveCfg = Debug|x86 25 | {CA7D4C8D-3A2D-4E49-B4BB-FA5BBCB413FB}.Debug|x86.Build.0 = Debug|x86 26 | {CA7D4C8D-3A2D-4E49-B4BB-FA5BBCB413FB}.Release|Any CPU.ActiveCfg = Release|Any CPU 27 | {CA7D4C8D-3A2D-4E49-B4BB-FA5BBCB413FB}.Release|Any CPU.Build.0 = Release|Any CPU 28 | {CA7D4C8D-3A2D-4E49-B4BB-FA5BBCB413FB}.Release|x64.ActiveCfg = Release|x64 29 | {CA7D4C8D-3A2D-4E49-B4BB-FA5BBCB413FB}.Release|x64.Build.0 = Release|x64 30 | {CA7D4C8D-3A2D-4E49-B4BB-FA5BBCB413FB}.Release|x86.ActiveCfg = Release|x86 31 | {CA7D4C8D-3A2D-4E49-B4BB-FA5BBCB413FB}.Release|x86.Build.0 = Release|x86 32 | {1B3E7106-6D16-4B96-87C5-F15E18FFC08F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 33 | {1B3E7106-6D16-4B96-87C5-F15E18FFC08F}.Debug|Any CPU.Build.0 = Debug|Any CPU 34 | {1B3E7106-6D16-4B96-87C5-F15E18FFC08F}.Debug|x64.ActiveCfg = Debug|Any CPU 35 | {1B3E7106-6D16-4B96-87C5-F15E18FFC08F}.Debug|x64.Build.0 = Debug|Any CPU 36 | {1B3E7106-6D16-4B96-87C5-F15E18FFC08F}.Debug|x86.ActiveCfg = Debug|Any CPU 37 | {1B3E7106-6D16-4B96-87C5-F15E18FFC08F}.Debug|x86.Build.0 = Debug|Any CPU 38 | {1B3E7106-6D16-4B96-87C5-F15E18FFC08F}.Release|Any CPU.ActiveCfg = Release|Any CPU 39 | {1B3E7106-6D16-4B96-87C5-F15E18FFC08F}.Release|Any CPU.Build.0 = Release|Any CPU 40 | {1B3E7106-6D16-4B96-87C5-F15E18FFC08F}.Release|x64.ActiveCfg = Release|Any CPU 41 | {1B3E7106-6D16-4B96-87C5-F15E18FFC08F}.Release|x64.Build.0 = Release|Any CPU 42 | {1B3E7106-6D16-4B96-87C5-F15E18FFC08F}.Release|x86.ActiveCfg = Release|Any CPU 43 | {1B3E7106-6D16-4B96-87C5-F15E18FFC08F}.Release|x86.Build.0 = Release|Any CPU 44 | EndGlobalSection 45 | GlobalSection(SolutionProperties) = preSolution 46 | HideSolutionNode = FALSE 47 | EndGlobalSection 48 | EndGlobal 49 | -------------------------------------------------------------------------------- /Samples/ExcelExtractor/USQLExcelExtractor/ReadExcel.usql: -------------------------------------------------------------------------------- 1 | // The "sample.xlsx" file is - as the name suggests - a pure example file with fictitious data. 2 | // The file contains different data types to demonstrate the handling of the ExcelExtractor. 3 | // In the current version, DateTime values can not be read out yet, they can only be read as string. 4 | 5 | 6 | // Registering the assemblies to use the Excel Extractor 7 | DROP ASSEMBLY IF EXISTS openxml; 8 | CREATE ASSEMBLY openxml FROM @"\Assemblies\DocumentFormat.OpenXml.dll"; 9 | REFERENCE ASSEMBLY openxml; 10 | 11 | DROP ASSEMBLY IF EXISTS analytics; 12 | CREATE ASSEMBLY analytics FROM @"\Assemblies\oh22is.Analytics.Formats.dll"; 13 | REFERENCE ASSEMBLY analytics; 14 | 15 | // Define a local variable for the Excel file 16 | DECLARE @ExcelFile = @"/Samples/sample.xlsx"; 17 | 18 | // Read the Sheet1 from the Excel file by defining the sheet in the constructor 19 | @sheet1 = EXTRACT A string, 20 | B string, 21 | C int, 22 | D int, 23 | E string, // formulas must be declared as a string 24 | F string, 25 | G int, 26 | H int, 27 | I int, 28 | J string, // Date formats must be declared as a string 29 | K string // Date formats must be declared as a string 30 | FROM @ExcelFile 31 | USING new oh22is.Analytics.Formats.ExcelExtractor("Sheet1"); 32 | 33 | // Read the Sheet2 from the Excel file by defining the sheet in the constructor 34 | @sheet2 = EXTRACT A string, 35 | B string, 36 | C int, 37 | D int, 38 | E string, // formulas must be declared as a string 39 | F string, 40 | G int, 41 | H int, 42 | I int, 43 | J string, // Date formats must be declared as a string 44 | K string // Date formats must be declared as a string 45 | FROM @ExcelFile 46 | USING new oh22is.Analytics.Formats.ExcelExtractor("Sheet2"); 47 | 48 | // Export the two sheets to a CSV file through a union 49 | OUTPUT 50 | ( 51 | SELECT * 52 | FROM @sheet1 53 | UNION 54 | SELECT * 55 | FROM @sheet2 56 | ) 57 | TO "/Outputs/sample01.csv" 58 | USING Outputters.Csv(); -------------------------------------------------------------------------------- /Samples/ExcelExtractor/USQLExcelExtractor/ReadExcel.usql.cs: -------------------------------------------------------------------------------- 1 | using Microsoft.Analytics.Interfaces; 2 | using Microsoft.Analytics.Types.Sql; 3 | using System; 4 | using System.Collections.Generic; 5 | using System.IO; 6 | using System.Linq; 7 | using System.Text; 8 | 9 | namespace USQLApplication1 10 | { 11 | 12 | } 13 | -------------------------------------------------------------------------------- /Samples/ExcelExtractor/USQLExcelExtractor/Samples/sample.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/AzureDataLake/9970c7fa351f3e94e8ad5952235e75f801a1608a/Samples/ExcelExtractor/USQLExcelExtractor/Samples/sample.xlsx -------------------------------------------------------------------------------- /Samples/ExcelExtractor/USQLExcelExtractor/Samples/taxonomy_97.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/AzureDataLake/9970c7fa351f3e94e8ad5952235e75f801a1608a/Samples/ExcelExtractor/USQLExcelExtractor/Samples/taxonomy_97.xlsx -------------------------------------------------------------------------------- /Samples/ExcelExtractor/USQLExcelExtractor/TaxonomyData.usql: -------------------------------------------------------------------------------- 1 | // This is a sample data file from the silva project 2 | // The original data source can be found here: https://www.arb-silva.de/ 3 | 4 | // Registering the assemblies to use the Excel Extractor 5 | DROP ASSEMBLY IF EXISTS openxml; 6 | CREATE ASSEMBLY openxml FROM @"\Assemblies\DocumentFormat.OpenXml.dll"; 7 | REFERENCE ASSEMBLY openxml; 8 | 9 | DROP ASSEMBLY IF EXISTS analytics; 10 | CREATE ASSEMBLY analytics FROM @"/Assemblies/oh22is.Analytics.Formats.dll"; 11 | REFERENCE ASSEMBLY analytics; 12 | 13 | // Define a local variable for the Excel file 14 | DECLARE @ExcelFile = "/Samples/taxonomy_97.xlsx"; 15 | 16 | // If no sheet is passed to the constructor, the first Excel sheet is read from the file 17 | @sheet1 = EXTRACT AccessionNumber string, 18 | D_0 string, 19 | D_1 string, 20 | D_2 string, 21 | D_3 string, 22 | D_4 string, 23 | D_5 string, 24 | D_6 string, 25 | D_7 string, 26 | D_8 string, 27 | D_9 string, 28 | D_10 string, 29 | D_11 string, 30 | D_12 string, 31 | D_13 string, 32 | D_14 string, 33 | D_15 string, 34 | D_16 string 35 | FROM @ExcelFile 36 | USING new oh22is.Analytics.Formats.ExcelExtractor(); 37 | 38 | // Export the sheet into a CSV file 39 | OUTPUT 40 | ( 41 | SELECT * 42 | FROM @sheet1 43 | ) 44 | TO "/Outputs/taxonomy_97.csv" 45 | USING Outputters.Csv(); -------------------------------------------------------------------------------- /Samples/ExcelExtractor/USQLExcelExtractor/TaxonomyData.usql.cs: -------------------------------------------------------------------------------- 1 | using Microsoft.Analytics.Interfaces; 2 | using Microsoft.Analytics.Types.Sql; 3 | using System; 4 | using System.Collections.Generic; 5 | using System.IO; 6 | using System.Linq; 7 | using System.Text; 8 | 9 | namespace USQLApplication1 10 | { 11 | 12 | } 13 | -------------------------------------------------------------------------------- /Samples/ExcelExtractor/USQLExcelExtractor/USQLExcelExtractor.usqlproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | Debug 5 | AnyCPU 6 | 2.0 7 | ca7d4c8d-3a2d-4e49-b4bb-fa5bbcb413fb 8 | File 9 | Algebra.xml 10 | v4.5.2 11 | USQLExcelExtractor 12 | USQLApplication1 13 | default 14 | C:\Users\Tillmann\AppData\Local\USQLDataRoot 15 | 16 | 17 | true 18 | bin\Debug\ 19 | 20 | 21 | false 22 | bin\Release\ 23 | 24 | 25 |