├── .gitattributes ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── build.sh ├── docs ├── debugging.md ├── filtering.md └── keyvault-backed-secrets.md ├── perftools ├── dashboards │ └── grafana │ │ ├── DashGen.sh │ │ └── SparkMetricsDashboardTemplate.json └── deployment │ ├── grafana │ ├── AzureDataSource.sh │ └── grafanaDeploy.json │ ├── images │ ├── AddSource.png │ ├── DataSource.png │ ├── Import.png │ └── UserChange.png │ ├── loganalytics │ └── logAnalyticsDeploy.json │ └── readme.md ├── sample └── spark-sample-job │ ├── pom.xml │ └── src │ └── main │ ├── resources │ └── com │ │ └── microsoft │ │ └── pnp │ │ └── samplejob │ │ └── log4j.properties │ └── scala │ └── com │ └── microsoft │ └── pnp │ └── samplejob │ └── StreamingQueryListenerSampleJob.scala └── src ├── pom.xml ├── spark-listeners-loganalytics ├── pom.xml └── src │ ├── main │ ├── java │ │ └── com │ │ │ └── microsoft │ │ │ └── pnp │ │ │ ├── LogAnalyticsEnvironment.java │ │ │ ├── client │ │ │ └── loganalytics │ │ │ │ ├── LogAnalyticsClient.java │ │ │ │ ├── LogAnalyticsSendBuffer.java │ │ │ │ ├── LogAnalyticsSendBufferClient.java │ │ │ │ └── LogAnalyticsSendBufferTask.java │ │ │ └── logging │ │ │ └── loganalytics │ │ │ └── LogAnalyticsAppender.java │ └── scala │ │ └── org │ │ └── apache │ │ └── spark │ │ ├── com │ │ └── microsoft │ │ │ └── pnp │ │ │ └── LogAnalyticsConfiguration.scala │ │ ├── listeners │ │ └── sink │ │ │ └── loganalytics │ │ │ ├── LogAnalyticsListenerSink.scala │ │ │ └── LogAnalyticsListenerSinkConfiguration.scala │ │ └── metrics │ │ └── sink │ │ └── loganalytics │ │ ├── LogAnalyticsMetricsSink.scala │ │ ├── LogAnalyticsReporter.scala │ │ └── LogAnalyticsSinkConfiguration.scala │ └── test │ └── java │ └── com │ └── microsoft │ └── pnp │ ├── client │ └── loganalytics │ │ ├── LogAnalyticsClientTester.java │ │ └── ResourceIdTest.java │ └── logging │ └── loganalytics │ └── LogAnalyticsAppenderTest.java └── spark-listeners ├── pom.xml ├── scripts └── spark-monitoring.sh └── src ├── main ├── java │ └── com │ │ └── microsoft │ │ └── pnp │ │ ├── client │ │ ├── GenericSendBuffer.java │ │ └── GenericSendBufferTask.java │ │ └── logging │ │ ├── JSONConfiguration.java │ │ ├── JSONLayout.java │ │ ├── Log4jConfiguration.java │ │ ├── MDCCloseableFactory.java │ │ └── SparkPropertyEnricher.java └── scala │ ├── com │ └── microsoft │ │ └── pnp │ │ ├── SparkInformation.scala │ │ └── util │ │ └── TryWith.scala │ └── org │ └── apache │ └── spark │ ├── listeners │ ├── SparkListenerHandlers.scala │ ├── StreamingListenerHandlers.scala │ ├── StreamingQueryListenerHandlers.scala │ ├── UnifiedSparkListener.scala │ ├── UnifiedSparkListenerHandler.scala │ └── sink │ │ └── SparkListenerSink.scala │ └── metrics │ ├── Implicits.scala │ ├── MetricProxies.scala │ ├── MetricsSource.scala │ ├── MetricsSourceBuilders.scala │ ├── MetricsSystems.scala │ ├── RpcMessages.scala │ ├── RpcMetricsReceiver.scala │ └── UserMetricsSystem.scala └── test ├── resources └── log4j.properties └── scala └── org └── apache └── spark ├── listeners ├── ListenerSuite.scala ├── LogAnalyticsListenerSuite.scala └── LogAnalyticsStreamingListenerSuite.scala ├── metrics ├── CustomMetricsSystemSuite.scala ├── MetricProxiesSuite.scala ├── MetricsSourceBuildersSuite.scala ├── MetricsSystemsSuite.scala ├── ReceiverMetricSystemBuilderSuite.scala ├── RpcMetricsReceiverSuite.scala └── TestUtils.scala └── sql └── streaming └── LogAnalyticsStreamingQueryListenerSuite.scala /.gitattributes: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Set default behavior to automatically normalize line endings. 3 | ############################################################################### 4 | * text=auto 5 | 6 | # Keep shell scripts with LF 7 | *.sh text eol=lf 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | 4 | # User-specific files 5 | *.suo 6 | *.user 7 | *.userosscache 8 | *.sln.docstates 9 | 10 | # User-specific files (MonoDevelop/Xamarin Studio) 11 | *.userprefs 12 | 13 | # Build results 14 | [Dd]ebug/ 15 | [Dd]ebugPublic/ 16 | [Rr]elease/ 17 | [Rr]eleases/ 18 | x64/ 19 | x86/ 20 | build/ 21 | bld/ 22 | [Bb]in/ 23 | [Oo]bj/ 24 | 25 | # Visual Studio 2015 cache/options directory 26 | .vs/ 27 | 28 | # MSTest test Results 29 | [Tt]est[Rr]esult*/ 30 | [Bb]uild[Ll]og.* 31 | 32 | # NUNIT 33 | *.VisualState.xml 34 | TestResult.xml 35 | 36 | # Build Results of an ATL Project 37 | [Dd]ebugPS/ 38 | [Rr]eleasePS/ 39 | dlldata.c 40 | 41 | # DNX 42 | project.lock.json 43 | artifacts/ 44 | 45 | *_i.c 46 | *_p.c 47 | *_i.h 48 | *.ilk 49 | *.meta 50 | *.obj 51 | *.pch 52 | *.pdb 53 | *.pgc 54 | *.pgd 55 | *.rsp 56 | *.sbr 57 | *.tlb 58 | *.tli 59 | *.tlh 60 | *.tmp 61 | *.tmp_proj 62 | *.log 63 | *.vspscc 64 | *.vssscc 65 | .builds 66 | *.pidb 67 | *.svclog 68 | *.scc 69 | 70 | # Chutzpah Test files 71 | _Chutzpah* 72 | 73 | # Visual C++ cache files 74 | ipch/ 75 | *.aps 76 | *.ncb 77 | *.opensdf 78 | *.sdf 79 | *.cachefile 80 | 81 | # Visual Studio profiler 82 | *.psess 83 | *.vsp 84 | *.vspx 85 | 86 | # TFS 2012 Local Workspace 87 | $tf/ 88 | 89 | # Guidance Automation Toolkit 90 | *.gpState 91 | 92 | # ReSharper is a .NET coding add-in 93 | _ReSharper*/ 94 | *.[Rr]e[Ss]harper 95 | *.DotSettings.user 96 | 97 | # JustCode is a .NET coding add-in 98 | .JustCode 99 | 100 | # TeamCity is a build add-in 101 | _TeamCity* 102 | 103 | # DotCover is a Code Coverage Tool 104 | *.dotCover 105 | 106 | # NCrunch 107 | _NCrunch_* 108 | .*crunch*.local.xml 109 | 110 | # MightyMoose 111 | *.mm.* 112 | AutoTest.Net/ 113 | 114 | # Web workbench (sass) 115 | .sass-cache/ 116 | 117 | # Installshield output folder 118 | [Ee]xpress/ 119 | 120 | # DocProject is a documentation generator add-in 121 | DocProject/buildhelp/ 122 | DocProject/Help/*.HxT 123 | DocProject/Help/*.HxC 124 | DocProject/Help/*.hhc 125 | DocProject/Help/*.hhk 126 | DocProject/Help/*.hhp 127 | DocProject/Help/Html2 128 | DocProject/Help/html 129 | 130 | # Click-Once directory 131 | publish/ 132 | 133 | # Publish Web Output 134 | *.[Pp]ublish.xml 135 | *.azurePubxml 136 | ## TODO: Comment the next line if you want to checkin your 137 | ## web deploy settings but do note that will include unencrypted 138 | ## passwords 139 | #*.pubxml 140 | 141 | *.publishproj 142 | 143 | # NuGet Packages 144 | *.nupkg 145 | # The packages folder can be ignored because of Package Restore 146 | **/packages/* 147 | # except build/, which is used as an MSBuild target. 148 | !**/packages/build/ 149 | # Uncomment if necessary however generally it will be regenerated when needed 150 | #!**/packages/repositories.config 151 | 152 | # Windows Azure Build Output 153 | csx/ 154 | *.build.csdef 155 | 156 | # Windows Store app package directory 157 | AppPackages/ 158 | 159 | # Visual Studio cache files 160 | # files ending in .cache can be ignored 161 | *.[Cc]ache 162 | # but keep track of directories ending in .cache 163 | !*.[Cc]ache/ 164 | 165 | # Others 166 | ClientBin/ 167 | [Ss]tyle[Cc]op.* 168 | ~$* 169 | *~ 170 | *.dbmdl 171 | *.dbproj.schemaview 172 | *.pfx 173 | *.publishsettings 174 | node_modules/ 175 | orleans.codegen.cs 176 | 177 | # RIA/Silverlight projects 178 | Generated_Code/ 179 | 180 | # Backup & report files from converting an old project file 181 | # to a newer Visual Studio version. Backup files are not needed, 182 | # because we have git ;-) 183 | _UpgradeReport_Files/ 184 | Backup*/ 185 | UpgradeLog*.XML 186 | UpgradeLog*.htm 187 | 188 | # SQL Server files 189 | *.mdf 190 | *.ldf 191 | 192 | # Business Intelligence projects 193 | *.rdl.data 194 | *.bim.layout 195 | *.bim_*.settings 196 | 197 | # Microsoft Fakes 198 | FakesAssemblies/ 199 | 200 | # Node.js Tools for Visual Studio 201 | .ntvs_analysis.dat 202 | 203 | # Visual Studio 6 build log 204 | *.plg 205 | 206 | # Visual Studio 6 workspace options file 207 | *.opt 208 | 209 | # LightSwitch generated files 210 | GeneratedArtifacts/ 211 | _Pvt_Extensions/ 212 | ModelManifest.xml 213 | 214 | #Not to include .ds_store file 215 | .DS_Store 216 | 217 | #Not to include target files 218 | */target/** 219 | .idea 220 | project 221 | 222 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 223 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 224 | 225 | # User-specific stuff 226 | .idea/**/workspace.xml 227 | .idea/**/tasks.xml 228 | .idea/**/usage.statistics.xml 229 | .idea/**/dictionaries 230 | .idea/**/shelf 231 | 232 | # Generated files 233 | .idea/**/contentModel.xml 234 | 235 | # Sensitive or high-churn files 236 | .idea/**/dataSources/ 237 | .idea/**/dataSources.ids 238 | .idea/**/dataSources.local.xml 239 | .idea/**/sqlDataSources.xml 240 | .idea/**/dynamic.xml 241 | .idea/**/uiDesigner.xml 242 | .idea/**/dbnavigator.xml 243 | 244 | # Gradle 245 | .idea/**/gradle.xml 246 | .idea/**/libraries 247 | 248 | # Gradle and Maven with auto-import 249 | # When using Gradle or Maven with auto-import, you should exclude module files, 250 | # since they will be recreated, and may cause churn. Uncomment if using 251 | # auto-import. 252 | # .idea/modules.xml 253 | # .idea/*.iml 254 | # .idea/modules 255 | 256 | # CMake 257 | cmake-build-*/ 258 | 259 | # Mongo Explorer plugin 260 | .idea/**/mongoSettings.xml 261 | 262 | # File-based project format 263 | *.iws 264 | 265 | # IntelliJ 266 | out/ 267 | 268 | # mpeltonen/sbt-idea plugin 269 | .idea_modules/ 270 | 271 | # JIRA plugin 272 | atlassian-ide-plugin.xml 273 | 274 | # Cursive Clojure plugin 275 | .idea/replstate.xml 276 | 277 | # Crashlytics plugin (for Android Studio and IntelliJ) 278 | com_crashlytics_export_strings.xml 279 | crashlytics.properties 280 | crashlytics-build.properties 281 | fabric.properties 282 | 283 | # Editor-based Rest Client 284 | .idea/httpRequests 285 | 286 | # Java 287 | # Compiled class file 288 | *.class 289 | 290 | # Log file 291 | *.log 292 | 293 | # BlueJ files 294 | *.ctxt 295 | 296 | # Mobile Tools for Java (J2ME) 297 | .mtj.tmp/ 298 | 299 | # Package Files # 300 | *.jar 301 | *.war 302 | *.nar 303 | *.ear 304 | *.zip 305 | *.tar.gz 306 | *.rar 307 | 308 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 309 | hs_err_pid* 310 | 311 | # Maven 312 | target/ 313 | pom.xml.tag 314 | pom.xml.releaseBackup 315 | pom.xml.versionsBackup 316 | pom.xml.next 317 | release.properties 318 | dependency-reduced-pom.xml 319 | buildNumber.properties 320 | .mvn/timing.properties 321 | .mvn/wrapper/maven-wrapper.jar 322 | 323 | # This is to ignore IntelliJ project files, since we use Maven 324 | *.iml 325 | 326 | # For .flattened-pom.xml files that are generated by maven-flatten plugin 327 | **/**/.flattened-pom.xml 328 | 329 | # eclipse 330 | .classpath 331 | .project 332 | .settings 333 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | This project welcomes contributions and suggestions. Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us the rights to use your contribution. For details, visit https://cla.microsoft.com. 4 | 5 | When you submit a pull request, a CLA-bot will automatically determine whether you need to provide a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions provided by the bot. You will only need to do this once across all repositories using our CLA. 6 | 7 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 8 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Microsoft patterns & practices 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash -eu 2 | 3 | # Fix broken DNS bits 4 | echo 'hosts: files dns' > /etc/nsswitch.conf 5 | echo "127.0.0.1 $(hostname)" >> /etc/hosts 6 | 7 | MAVEN_PROFILES=( "scala-2.12_spark-3.0.1" "scala-2.12_spark-3.1.2" "scala-2.12_spark-3.2.1" ) 8 | for MAVEN_PROFILE in "${MAVEN_PROFILES[@]}" 9 | do 10 | mvn -f /spark-monitoring/src/pom.xml install -P ${MAVEN_PROFILE} "$@" 11 | done 12 | -------------------------------------------------------------------------------- /docs/debugging.md: -------------------------------------------------------------------------------- 1 | # Debugging 2 | 3 | If you have any issues with the init script, you can debug this by clicking on 4 | the `Logging` tab in the `Advanced Options` section of your cluster 5 | configuration and add a path to save the logs to such as: 6 | 7 | ```sh 8 | dbfs:/cluster-logs 9 | ``` 10 | 11 | Then, you can download the logs to your local system with: 12 | 13 | ```sh 14 | databricks fs cp -r dbfs:/cluster-logs 15 | ``` 16 | -------------------------------------------------------------------------------- /docs/filtering.md: -------------------------------------------------------------------------------- 1 | # Filtering 2 | 3 | ## Introduction 4 | 5 | The Spark Monitoring Library can generate large volumes of logging and metrics data. This page describes the ways that you can limit the events that are forwarded to Azure Monitor. 6 | 7 | > Note: All regex filters below are implemented with java.lang.String.matches(...). This implementation essentially appends ^...$ around the regular expression, so the entire string must match the regex. If you need to allow for other values you should include .* before and/or after your expression. 8 | 9 | > Note: The REGEX value(s) should be surrounded by double quotes as noted in the examples so that the characters in the regular expression(s) are not interpretted by the shell. 10 | 11 | ## Limiting events in SparkListenerEvent_CL 12 | 13 | You can uncomment and edit the `LA_SPARKLISTENEREVENT_REGEX` environment variable that is included in [spark-monitoring.sh](../src/spark-listeners/scripts/spark-monitoring.sh) to limit the logging to only include events where Event_s matches the regex. 14 | 15 | The example below will only log events for `SparkListenerJobStart`, `SparkListenerJobEnd`, or where `org.apache.spark.sql.execution.ui.` is in the event name. 16 | 17 | `export LA_SPARKLISTENEREVENT_REGEX="SparkListenerJobEnd|SparkListenerTaskEnd|org\.apache\.spark\.sql\.execution\.ui\..*"` 18 | 19 | ### Finding Event Names in Azure Monitor 20 | 21 | The following query will show counts by day for all events that have been logged to Azure Monitor: 22 | ```kusto 23 | SparkListenerEvent_CL 24 | | project TimeGenerated, Event_s 25 | | summarize Count=count() by tostring(Event_s), bin(TimeGenerated, 1d) 26 | ``` 27 | 28 | ### Events Noted in SparkListenerEvent_CL 29 | 30 | * SparkListenerExecutorAdded 31 | * SparkListenerBlockManagerAdded 32 | * org.apache.spark.sql.streaming.StreamingQueryListener$QueryStartedEvent 33 | * org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart 34 | * org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd 35 | * org.apache.spark.sql.execution.ui.SparkListenerDriverAccumUpdates 36 | * org.apache.spark.sql.streaming.StreamingQueryListener$QueryTerminatedEvent 37 | * SparkListenerJobStart 38 | * SparkListenerStageSubmitted 39 | * SparkListenerTaskStart 40 | * SparkListenerTaskEnd 41 | * org.apache.spark.sql.streaming.StreamingQueryListener$QueryProgressEvent 42 | * SparkListenerStageCompleted 43 | * SparkListenerJobEnd 44 | 45 | ## Limiting Metrics in SparkMetric_CL 46 | 47 | You can uncomment and edit the `LA_SPARKMETRIC_REGEX` environment variable that is included in [spark-monitoring.sh](../src/spark-listeners/scripts/spark-monitoring.sh) to limit the logging to only include events where name_s matches the regex. 48 | 49 | The example below will only log metrics where the name begins with `app` and ends in `.jvmCpuTime` or `.heap.max`. 50 | 51 | `export LA_SPARKMETRIC_REGEX="app.*\.jvmCpuTime|app.*\.heap.max` 52 | 53 | ### Finding Metric Names in Azure Monitor 54 | 55 | Query to find all metric prefixes and counts by day: 56 | 57 | ```kusto 58 | SparkMetric_CL 59 | | project nameprefix=split(tostring(name_s),".")[0], TimeGenerated 60 | | summarize Count=count() by tostring(nameprefix), bin(TimeGenerated, 1d) 61 | ``` 62 | If you want to get more granular, the full names can be seen with the following query. Note: This will include a large number of metrics including for specific Spark applications. 63 | 64 | ```kusto 65 | SparkMetric_CL 66 | | project name_s, TimeGenerated 67 | | summarize Count=count() by tostring(name_s), bin(TimeGenerated, 1d) 68 | ``` 69 | 70 | ### Metric Name Prefixes Noted in SparkMetric_CL 71 | 72 | * jvm 73 | * worker 74 | * Databricks 75 | * HiveExternalCatalog 76 | * CodeGenerator 77 | * application 78 | * master 79 | * app-20201014133042-0000 - Note: This prefix includes all metrics for a specific Spark application run. 80 | * shuffleService 81 | * SparkStatusTracker 82 | 83 | ## Limiting Logs in SparkLoggingEvent_CL (Basic) 84 | 85 | The logs that propagate to SparkLoggingEvent_CL do so through a log4j appender. This can be configured by altering the spark-monitoring.sh script that is responsible for writing the log4j.properties file. The script at [spark-monitoring.sh](../src/spark-listeners/scripts/spark-monitoring.sh) can be modified to set the threshold for events to be forwarded. A commented example is included in the script. 86 | 87 | ```bash 88 | # Commented line below shows how to set the threshhold for logging to only capture events that are 89 | # level ERROR or more severe. 90 | # log4j.appender.logAnalyticsAppender.Threshold=ERROR 91 | ``` 92 | 93 | ## Limiting Logs in SparkLoggingEvent_CL (Advanced) 94 | 95 | You can uncomment and edit the `LA_SPARKLOGGINGEVENT_NAME_REGEX` environment variable that is included in [spark-monitoring.sh](../src/spark-listeners/scripts/spark-monitoring.sh) to limit the logging to only include events where logger_name_s matches the regex. 96 | 97 | The example below will only log events from logger `com.microsoft.pnp.samplejob.StreamingQueryListenerSampleJob` or where the logger name starts with `org.apache.spark.util.Utils`. 98 | 99 | `export LA_SPARKLOGGINGEVENT_NAME_REGEX="com\.microsoft\.pnp\.samplejob\.StreamingQueryListenerSampleJob|org\.apache\.spark\.util\.Utils.*"` 100 | 101 | You can uncomment and edit the `LA_SPARKLOGGINGEVENT_MESSAGE_REGEX` environment variable that is included in [spark-monitoring.sh](../src/spark-listeners/scripts/spark-monitoring.sh) to limit the logging to only include events where the message matches the regex. 102 | 103 | The example below will only log events where the message ends with the string `StreamingQueryListenerSampleJob` or begins with the string `FS_CONF_COMPAT`. 104 | 105 | `export LA_SPARKLOGGINGEVENT_MESSAGE_REGEX=".*StreamingQueryListenerSampleJob|FS_CONF_COMPAT.*"` 106 | 107 | ## Performance Considerations 108 | 109 | You should be mindful of using complicated Regular Expressions as they have to be evaluated for every logged event or metric. Simple whole-string matches should be relatively performent, or `pattern.*` expressions that will match the beginning of strings. 110 | 111 | > Warning: Filtering on the logging event message with `LA_SPARKLOGGINGEVENT_MESSAGE_REGEX` should be considered experimental. Some components generate very large message strings and processing the `.matches` operation on these strings could cause a significant burden on the cluster nodes. 112 | -------------------------------------------------------------------------------- /docs/keyvault-backed-secrets.md: -------------------------------------------------------------------------------- 1 | # Setting Up an Azure Key Vault-Backed Secret Scope 2 | 3 | To set up your databricks cluster to reference Key Vault-backed secrets, 4 | follow the steps below: 5 | 6 | 1. Set up an [Azure Key Vault-backed secret scope](https://learn.microsoft.com/azure/databricks/security/secrets/secret-scopes#--create-an-azure-key-vault-backed-secret-scope). 7 | 1. Reference your Key Vault-backed keys in your cluster's environment variables: 8 | 9 | ```sh 10 | LOG_ANALYTICS_WORKSPACE_ID={{secrets//}} 11 | LOG_ANALYTICS_WORKSPACE_KEY={{secrets//}} 12 | ``` 13 | 14 | 1. Edit the spark-monitoring.sh initialization script to reference these values: 15 | 16 | ```sh 17 | LOG_ANALYTICS_WORKSPACE_ID=$LOG_ANALYTICS_WORKSPACE_ID 18 | LOG_ANALYTICS_WORKSPACE_KEY=$LOG_ANALYTICS_WORKSPACE_KEY 19 | ``` 20 | -------------------------------------------------------------------------------- /perftools/dashboards/grafana/DashGen.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | sed "s/YOUR_WORKSPACEID/${WORKSPACE}/g" "SparkMetricsDashboardTemplate.json" | sed "s/SparkListenerEvent_CL/${LOGTYPE}/g" > SparkMetricsDash.json 4 | -------------------------------------------------------------------------------- /perftools/deployment/grafana/AzureDataSource.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | sudo /opt/bitnami/grafana/bin/grafana-cli --pluginsDir /opt/bitnami/grafana/data/plugins/ plugins install grafana-azure-monitor-datasource 3 | sudo /opt/bitnami/ctlscript.sh restart grafana -------------------------------------------------------------------------------- /perftools/deployment/images/AddSource.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mspnp/spark-monitoring/fcee4f76dc2e307280a34191a501448f9ff9a482/perftools/deployment/images/AddSource.png -------------------------------------------------------------------------------- /perftools/deployment/images/DataSource.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mspnp/spark-monitoring/fcee4f76dc2e307280a34191a501448f9ff9a482/perftools/deployment/images/DataSource.png -------------------------------------------------------------------------------- /perftools/deployment/images/Import.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mspnp/spark-monitoring/fcee4f76dc2e307280a34191a501448f9ff9a482/perftools/deployment/images/Import.png -------------------------------------------------------------------------------- /perftools/deployment/images/UserChange.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mspnp/spark-monitoring/fcee4f76dc2e307280a34191a501448f9ff9a482/perftools/deployment/images/UserChange.png -------------------------------------------------------------------------------- /perftools/deployment/readme.md: -------------------------------------------------------------------------------- 1 | # Spark Performance Monitoring With Grafana and Log Analytics 2 | 3 | ## Deployment of Log Analytics With Spark Metrics 4 | 5 | For more details on how to use Grafana to monitor Spark performance, visit: [Use dashboards to visualize Azure Databricks metrics](https://learn.microsoft.com/azure/architecture/databricks-monitoring/dashboards). 6 | 7 | ### Step 1: Deploy Log Analytics With Spark Metrics 8 | 9 | Open an Azure bash cloud shell or a bash command shell and execute the azure cli command, Replacing yourResourceGroupName and yourLocation. 10 | 11 | ``` 12 | export RGNAME=yourResourceGroupName 13 | # location example "East Us" 14 | export RGLOCATION=yourLocation 15 | 16 | az group create --name "${RGNAME}" --location "${RGLOCATION}" 17 | 18 | az group deployment create --resource-group $RGNAME \ 19 | --template-uri https://raw.githubusercontent.com/mspnp/spark-monitoring/master/perftools/deployment/loganalytics/logAnalyticsDeploy.json \ 20 | --parameters location="${RGLOCATION}" \ 21 | dataRetention=30 serviceTier=PerGB2018 22 | ``` 23 | if you run the command *az group deployment create --resource-group $RGNAME --template-uri https://raw.githubusercontent.com/mspnp/spark-monitoring/master/perftools/deployment/loganalytics/logAnalyticsDeploy.json* it will prompt for all required parameters 24 | 25 | 26 | 27 | ## Deployment of Grafana 28 | 29 | ### Step 1: Deploy Certified Grafana From Azure 30 | For Grafana deployment a bitnami certified image will be used. You can find more information about bitnami applications on azure at https://docs.bitnami.com/azure/get-started-marketplace/ 31 | 32 | 1. Open an Azure bash cloud shell or a bash command shell and execute the below command and azure cli, note the VM password has minimum requirements of 12 characters and be a strong password. Replace yourResourceGroupName and yourLocation. 33 | 34 | ``` 35 | read -s VMADMINPASSWORD 36 | ``` 37 | 38 | ``` 39 | export RGNAME=yourResourceGroupName 40 | # location example "South Central US" 41 | export RGLOCATION=yourLocation 42 | 43 | az group create --name "${RGNAME}" --location "${RGLOCATION}" 44 | 45 | az group deployment create --resource-group $RGNAME \ 46 | --template-uri https://raw.githubusercontent.com/mspnp/spark-monitoring/master/perftools/deployment/grafana/grafanaDeploy.json \ 47 | --parameters adminPass=$VMADMINPASSWORD \ 48 | dataSource=https://raw.githubusercontent.com/mspnp/spark-monitoring/master/perftools/deployment/grafana/AzureDataSource.sh 49 | ``` 50 | 51 | if you run the command *az group deployment create --resource-group $RGNAME --template-uri https://raw.githubusercontent.com/mspnp/spark-monitoring/master/perftools/deployment/grafana/grafanaDeploy.json* it will prompt for all required parameters 52 | 53 | After the ARM template deploys the bitnami image of grafana a temporary grafana password for user admin will be created. To find those credentials follow bellow instructions. 54 | 55 | 2. You can either click on grafana vm resource then click on **Boot diagnostics** then click on **serial log**. Search for the string **Setting Bitnami application password to**, or you can open ssh connection to the grafana vm and enter the command **cat ./bitnami_credentials** you can find instructions by following https://docs.bitnami.com/azure/faq/get-started/find-credentials/. 56 | 57 | ### Step 2: Change Grafana Administrator Password 58 | 1. Open the browser at http://grafanapublicipaddress:3000 and login in as admin and password from previous step 59 | 2. Move the mouse on the settings icon located to the left then click on **Server Admin** 60 | ![change user Logo](./images/UserChange.png) 61 | 3. Click on admin then on the text box **Change Password** enter new password the click **Update** 62 | 63 | ### Step 3: Create Service Principal for Azure Monitor Data Source Using Azure Cli 64 | 65 | 1. Enter command below to login to azure 66 | 67 | ``` 68 | az login 69 | ``` 70 | 2. Make sure you are on the right subscription. You can set the default subscription with command below: 71 | ``` 72 | az account show 73 | az account set --subscription yourSubscriptionId 74 | ``` 75 | 3. Create the Service Principal running below command. 76 | 77 | 78 | ``` 79 | az ad sp create-for-rbac --name http://NameOfSp --role "Log Analytics Reader" 80 | ``` 81 | 4. Take note of appId, password and tenant 82 | 83 | ``` 84 | { 85 | "appId": "applicationClientId", 86 | "displayName": "applicationName", 87 | "name": "http://applicationName", 88 | "password": "applicationSecret", 89 | "tenant": "TenantId" 90 | } 91 | ``` 92 | 93 | ### Step 4: Create Azure Monitor Datasource in Grafana 94 | 95 | 96 | 1. On grafana move mouse on the settings icon located to the left then click on **Data Sources** then **Add data Source**. Select **Azure Monitor**. 97 | ![change user Logo](./images/AddSource.png) 98 | 99 | 2. Enter **ALA** in name, SubscriptionId, TenantId(tenant in previous step), Client id(appId in previous step), Client secret (password in previous step). Click on check box **Same Details as Azure Monitor api** then click on **Save & test** 100 | ![change user Logo](./images/DataSource.png) 101 | 102 | ### Step 5: Import Spark Metrics Dashboard 103 | 104 | 1. Open a bash shell command prompt, move to the directory containing SparkMetricsDashboardTemplate.json file and execute commands below, replacing YOUR_WORKSPACEID with the workspace id of Log Analytics and SparkListenerEvent_CL with the log type if a non default logtype is used for spark monitoring. 105 | The workspace id for Log Analytics can be found at the **advanced settings** blade of Log Analytics resource. 106 | 107 | 108 | ``` 109 | export WORKSPACE=YOUR_WORKSPACEID 110 | export LOGTYPE=SparkListenerEvent_CL 111 | 112 | sed "s/YOUR_WORKSPACEID/${WORKSPACE}/g" "SparkMetricsDashboardTemplate.json" | sed "s/SparkListenerEvent_CL/${LOGTYPE}/g" > SparkMetricsDash.json 113 | ``` 114 | 115 | or execute below script from same directory location 116 | 117 | ``` 118 | export WORKSPACE=YOUR_WORKSPACEID 119 | export LOGTYPE=SparkListenerEvent_CL 120 | 121 | sh DashGen.sh 122 | ``` 123 | 124 | 2. On grafana move mouse on the settings icon located to the left then click on **Manage** then **Import**, browse to directory /spark-monitoring/perftools/dashboards/grafana click on open SparkMetricsDash.json. Then select your azure monitor data source that was create before 125 | ![change user Logo](./images/Import.png) 126 | -------------------------------------------------------------------------------- /sample/spark-sample-job/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | com.microsoft.pnp 6 | 1.0.0 7 | spark-monitoring-sample 8 | 4.0.0 9 | jar 10 | 11 | 12 | 13 | scala-2.12_spark-3.0.1 14 | 15 | 9.4.44.v20210927 16 | 1.7.30 17 | 3.0.1 18 | 2.12.14 19 | 2.12 20 | 21 | 22 | 23 | scala-2.12_spark-3.1.2 24 | 25 | 9.4.44.v20210927 26 | 1.7.30 27 | 3.1.2 28 | 2.12.14 29 | 2.12 30 | 31 | 32 | 33 | scala-2.12_spark-3.2.1 34 | 35 | true 36 | 37 | 38 | 9.4.44.v20210927 39 | 1.7.30 40 | 3.2.1 41 | 2.12.14 42 | 2.12 43 | 44 | 45 | 46 | 47 | 48 | UTF-8 49 | UTF-8 50 | 1.8 51 | ${java.version} 52 | ${java.version} 53 | 3.6.1 54 | 1.0.0 55 | 1.0.0 56 | 57 | 58 | 59 | 60 | org.scala-lang 61 | scala-library 62 | ${scala.version} 63 | provided 64 | 65 | 66 | org.apache.spark 67 | spark-core_${scala.compat.version} 68 | ${spark.version} 69 | provided 70 | 71 | 72 | org.apache.spark 73 | spark-sql_${scala.compat.version} 74 | ${spark.version} 75 | provided 76 | 77 | 78 | org.apache.spark 79 | spark-streaming_${scala.compat.version} 80 | ${spark.version} 81 | provided 82 | 83 | 84 | org.slf4j 85 | slf4j-api 86 | ${slf4j.version} 87 | provided 88 | 89 | 90 | com.microsoft.pnp 91 | spark-listeners 92 | ${spark.listeners.version} 93 | provided 94 | 95 | 96 | 97 | 98 | 99 | org.apache.maven.plugins 100 | maven-jar-plugin 101 | 3.1.2 102 | 103 | 104 | 105 | net.alchim31.maven 106 | scala-maven-plugin 107 | 3.4.2 108 | 109 | all 110 | 111 | 112 | 113 | 114 | add-source 115 | compile 116 | testCompile 117 | 118 | 119 | ${scala.version} 120 | ${scala.compat.version} 121 | 122 | -target:jvm-${maven.compiler.target} 123 | -feature 124 | -dependencyfile 125 | ${project.build.directory}/.scala_dependencies 126 | 127 | 128 | -source 129 | ${maven.compiler.source} 130 | -target 131 | ${maven.compiler.target} 132 | 133 | 134 | 135 | 136 | 137 | 138 | org.apache.maven.plugins 139 | maven-compiler-plugin 140 | 3.8.0 141 | 142 | 143 | default-compile 144 | none 145 | 146 | 147 | 148 | ${maven.compiler.source} 149 | ${maven.compiler.target} 150 | 151 | -Xlint 152 | 153 | 154 | 155 | 156 | org.apache.maven.plugins 157 | maven-clean-plugin 158 | 3.1.0 159 | 160 | 161 | auto-clean 162 | initialize 163 | 164 | clean 165 | 166 | 167 | 168 | 169 | 170 | 171 | -------------------------------------------------------------------------------- /sample/spark-sample-job/src/main/resources/com/microsoft/pnp/samplejob/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.appender.A1=com.microsoft.pnp.logging.loganalytics.LogAnalyticsAppender 2 | log4j.appender.A1.layout=com.microsoft.pnp.logging.JSONLayout 3 | log4j.appender.A1.layout.LocationInfo=false 4 | log4j.additivity.com.microsoft.pnp.samplejob=false 5 | log4j.logger.com.microsoft.pnp.samplejob=INFO, A1 6 | -------------------------------------------------------------------------------- /sample/spark-sample-job/src/main/scala/com/microsoft/pnp/samplejob/StreamingQueryListenerSampleJob.scala: -------------------------------------------------------------------------------- 1 | package com.microsoft.pnp.samplejob 2 | 3 | import com.microsoft.pnp.logging.Log4jConfiguration 4 | import com.microsoft.pnp.util.TryWith 5 | import org.apache.spark.SparkEnv 6 | import org.apache.spark.internal.Logging 7 | import org.apache.spark.metrics.UserMetricsSystems 8 | import org.apache.spark.sql.SparkSession 9 | import org.apache.spark.sql.functions.window 10 | import org.apache.spark.sql.types.{StringType, StructType, TimestampType} 11 | 12 | object StreamingQueryListenerSampleJob extends Logging { 13 | 14 | private final val METRICS_NAMESPACE = "streamingquerylistenersamplejob" 15 | private final val COUNTER_NAME = "rowcounter" 16 | 17 | def main(args: Array[String]): Unit = { 18 | 19 | // Configure our logging 20 | TryWith(getClass.getResourceAsStream("/com/microsoft/pnp/samplejob/log4j.properties")) { 21 | stream => { 22 | Log4jConfiguration.configure(stream) 23 | } 24 | } 25 | 26 | logTrace("Trace message from StreamingQueryListenerSampleJob") 27 | logDebug("Debug message from StreamingQueryListenerSampleJob") 28 | logInfo("Info message from StreamingQueryListenerSampleJob") 29 | logWarning("Warning message from StreamingQueryListenerSampleJob") 30 | logError("Error message from StreamingQueryListenerSampleJob") 31 | 32 | val spark = SparkSession 33 | .builder 34 | .getOrCreate 35 | 36 | import spark.implicits._ 37 | 38 | // this path has sample files provided by databricks for trying out purpose 39 | val inputPath = "/databricks-datasets/structured-streaming/events/" 40 | 41 | val jsonSchema = new StructType().add("time", TimestampType).add("action", StringType) 42 | 43 | val driverMetricsSystem = UserMetricsSystems 44 | .getMetricSystem(METRICS_NAMESPACE, builder => { 45 | builder.registerCounter(COUNTER_NAME) 46 | }) 47 | 48 | driverMetricsSystem.counter(COUNTER_NAME).inc 49 | 50 | // Similar to definition of staticInputDF above, just using `readStream` instead of `read` 51 | val streamingInputDF = 52 | spark 53 | .readStream // `readStream` instead of `read` for creating streaming DataFrame 54 | .schema(jsonSchema) // Set the schema of the JSON data 55 | .option("maxFilesPerTrigger", 1) // Treat a sequence of files as a stream by picking one file at a time 56 | .json(inputPath) 57 | 58 | driverMetricsSystem.counter(COUNTER_NAME).inc(5) 59 | 60 | val streamingCountsDF = 61 | streamingInputDF 62 | .groupBy($"action", window($"time", "1 hour")) 63 | .count() 64 | 65 | // Is this DF actually a streaming DF? 66 | streamingCountsDF.isStreaming 67 | 68 | driverMetricsSystem.counter(COUNTER_NAME).inc(10) 69 | 70 | val query = 71 | streamingCountsDF 72 | .writeStream 73 | .format("memory") // memory = store in-memory table (for testing only in Spark 2.0) 74 | .queryName("counts") // counts = name of the in-memory table 75 | .outputMode("complete") // complete = all the counts should be in the table 76 | .start() 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/spark-listeners-loganalytics/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4 | spark-monitoring 5 | com.microsoft.pnp 6 | 1.0.0 7 | ../pom.xml 8 | 9 | 4.0.0 10 | spark-listeners-loganalytics 11 | ${project.artifactId} 12 | 13 | 14 | com.microsoft.pnp 15 | spark-listeners 16 | ${spark.listeners.version} 17 | compile 18 | 19 | 20 | org.apache.httpcomponents 21 | httpclient 22 | 23 | 24 | org.scala-lang 25 | scala-reflect 26 | ${scala.version} 27 | 28 | 29 | 30 | 31 | org.mockito 32 | mockito-core 33 | 34 | 35 | org.scalatest 36 | scalatest_${scala.compat.version} 37 | 38 | 39 | junit 40 | junit 41 | 42 | 43 | com.github.stefanbirkner 44 | system-rules 45 | 1.19.0 46 | test 47 | 48 | 49 | org.scala-lang 50 | scala-compiler 51 | ${scala.version} 52 | test 53 | 54 | 55 | 56 | ${project.artifactId}_${spark.version}_${scala.compat.version}-${project.version} 57 | 58 | 59 | org.apache.maven.plugins 60 | maven-clean-plugin 61 | 62 | 63 | net.alchim31.maven 64 | scala-maven-plugin 65 | 66 | 67 | org.apache.maven.plugins 68 | maven-compiler-plugin 69 | 70 | 71 | org.apache.maven.plugins 72 | maven-surefire-plugin 73 | 74 | 75 | org.scalatest 76 | scalatest-maven-plugin 77 | 78 | 79 | org.codehaus.mojo 80 | flatten-maven-plugin 81 | 82 | 83 | org.apache.maven.plugins 84 | maven-source-plugin 85 | 86 | 87 | 88 | 89 | -------------------------------------------------------------------------------- /src/spark-listeners-loganalytics/src/main/java/com/microsoft/pnp/LogAnalyticsEnvironment.java: -------------------------------------------------------------------------------- 1 | package com.microsoft.pnp; 2 | 3 | public class LogAnalyticsEnvironment { 4 | public static final String LOG_ANALYTICS_WORKSPACE_ID = "LOG_ANALYTICS_WORKSPACE_ID"; 5 | public static final String LOG_ANALYTICS_WORKSPACE_KEY = "LOG_ANALYTICS_WORKSPACE_KEY"; 6 | 7 | public static String getWorkspaceId() { 8 | return System.getenv(LOG_ANALYTICS_WORKSPACE_ID); 9 | } 10 | 11 | public static String getWorkspaceKey() { 12 | return System.getenv(LOG_ANALYTICS_WORKSPACE_KEY); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/spark-listeners-loganalytics/src/main/java/com/microsoft/pnp/client/loganalytics/LogAnalyticsSendBuffer.java: -------------------------------------------------------------------------------- 1 | package com.microsoft.pnp.client.loganalytics; 2 | 3 | import com.microsoft.pnp.client.GenericSendBuffer; 4 | import com.microsoft.pnp.client.GenericSendBufferTask; 5 | 6 | public class LogAnalyticsSendBuffer extends GenericSendBuffer { 7 | // We will leave this at 25MB, since the Log Analytics limit is 30MB. 8 | public static final int DEFAULT_MAX_MESSAGE_SIZE_IN_BYTES = 1024 * 1024 * 25; 9 | public static final int DEFAULT_BATCH_TIME_IN_MILLISECONDS = 5000; 10 | 11 | private final LogAnalyticsClient client; 12 | private final String logType; 13 | private final String timeGeneratedField; 14 | 15 | public LogAnalyticsSendBuffer( 16 | LogAnalyticsClient client, 17 | String logType, 18 | String timeGenerateField) { 19 | super(); 20 | this.client = client; 21 | this.logType = logType; 22 | this.timeGeneratedField = timeGenerateField; 23 | } 24 | 25 | @Override 26 | protected GenericSendBufferTask createSendBufferTask() { 27 | return new LogAnalyticsSendBufferTask( 28 | this.client, 29 | this.logType, 30 | this.timeGeneratedField, 31 | DEFAULT_MAX_MESSAGE_SIZE_IN_BYTES, 32 | DEFAULT_BATCH_TIME_IN_MILLISECONDS 33 | ); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/spark-listeners-loganalytics/src/main/java/com/microsoft/pnp/client/loganalytics/LogAnalyticsSendBufferClient.java: -------------------------------------------------------------------------------- 1 | package com.microsoft.pnp.client.loganalytics; 2 | 3 | import com.microsoft.pnp.client.GenericSendBuffer; 4 | 5 | import java.util.LinkedHashMap; 6 | 7 | public class LogAnalyticsSendBufferClient implements AutoCloseable { 8 | private final LinkedHashMap buffers = new LinkedHashMap<>(); 9 | 10 | private final LogAnalyticsClient client; 11 | private final String logType; 12 | private final int maxMessageSizeInBytes; 13 | private final int batchTimeInMilliseconds; 14 | 15 | // We will leave this at 25MB, since the Log Analytics limit is 30MB. 16 | public static final int DEFAULT_MAX_MESSAGE_SIZE_IN_BYTES = 1024 * 1024 * 25; 17 | public static final int DEFAULT_BATCH_TIME_IN_MILLISECONDS = 5000; 18 | 19 | public LogAnalyticsSendBufferClient(LogAnalyticsClient client, String messageType) { 20 | this( 21 | client, 22 | messageType, 23 | DEFAULT_MAX_MESSAGE_SIZE_IN_BYTES, 24 | DEFAULT_BATCH_TIME_IN_MILLISECONDS 25 | ); 26 | } 27 | 28 | public LogAnalyticsSendBufferClient(LogAnalyticsClient client, 29 | String logType, 30 | int maxMessageSizeInBytes, 31 | int batchTimeInMilliseconds) { 32 | this.client = client; 33 | this.logType = logType; 34 | this.maxMessageSizeInBytes = maxMessageSizeInBytes; 35 | this.batchTimeInMilliseconds = batchTimeInMilliseconds; 36 | } 37 | 38 | public void sendMessage(String message, String timeGeneratedField) { 39 | // Get buffer for bucketing, in this case, time-generated field 40 | // since we limit the client to a specific message type. 41 | // This is because different event types can have differing time fields (i.e. Spark) 42 | LogAnalyticsSendBuffer buffer = this.getBuffer(timeGeneratedField); 43 | buffer.send(message); 44 | } 45 | 46 | private synchronized LogAnalyticsSendBuffer getBuffer(String timeGeneratedField) { 47 | LogAnalyticsSendBuffer buffer = this.buffers.get(timeGeneratedField); 48 | if (null == buffer) { 49 | buffer = new LogAnalyticsSendBuffer( 50 | this.client, 51 | this.logType, 52 | timeGeneratedField); 53 | this.buffers.put(timeGeneratedField, buffer); 54 | } 55 | 56 | return buffer; 57 | } 58 | 59 | @Override 60 | public void close() { 61 | this.buffers.values().forEach(GenericSendBuffer::close); 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/spark-listeners-loganalytics/src/main/java/com/microsoft/pnp/client/loganalytics/LogAnalyticsSendBufferTask.java: -------------------------------------------------------------------------------- 1 | package com.microsoft.pnp.client.loganalytics; 2 | 3 | import com.microsoft.pnp.client.GenericSendBufferTask; 4 | 5 | import java.time.Instant; 6 | import java.util.concurrent.TimeUnit; 7 | import java.util.List; 8 | 9 | public class LogAnalyticsSendBufferTask extends GenericSendBufferTask { 10 | 11 | private final LogAnalyticsClient client; 12 | private final String logType; 13 | private final String timeGeneratedField; 14 | 15 | public LogAnalyticsSendBufferTask(LogAnalyticsClient client, 16 | String logType, 17 | String timeGenerateField, 18 | int maxBatchSizeBytes, 19 | int batchTimeInMilliseconds 20 | ) { 21 | super(maxBatchSizeBytes, batchTimeInMilliseconds); 22 | this.client = client; 23 | this.logType = logType; 24 | this.timeGeneratedField = timeGenerateField; 25 | } 26 | 27 | @Override 28 | protected int calculateDataSize(String data) { 29 | return data.getBytes().length; 30 | } 31 | 32 | @Override 33 | protected void process(List datas) { 34 | if (datas.isEmpty()) { 35 | return; 36 | } 37 | 38 | // Build up Log Analytics "batch" and send. 39 | // How should we handle failures? I think there is retry built into the HttpClient, 40 | // but what if that fails as well? I suspect we should just log it and move on. 41 | 42 | // We are going to assume that the events are properly formatted 43 | // JSON strings. So for now, we are going to just wrap brackets around 44 | // them. 45 | StringBuffer sb = new StringBuffer("["); 46 | for (String data : datas) { 47 | sb.append(data).append(","); 48 | } 49 | sb.deleteCharAt(sb.lastIndexOf(",")).append("]"); 50 | try { 51 | int retry=8; 52 | int backoff=1; 53 | while(!client.ready() && retry-- > 0){ 54 | System.err.println("Log Analytics client not ready, waiting: " + backoff + " seconds at time = " + Instant.now()); 55 | TimeUnit.SECONDS.sleep(backoff); 56 | backoff*=2; 57 | } 58 | client.send(sb.toString(), logType, timeGeneratedField); 59 | } catch (Exception ioe) { 60 | // We can't do much here since we might be inside a logger 61 | ioe.printStackTrace(); 62 | Throwable inner = ioe.getCause(); 63 | while(inner != null) { 64 | System.err.println("Details of nested cause:"); 65 | inner.printStackTrace(); 66 | inner=inner.getCause(); 67 | } 68 | System.err.println("Buffer causing error on send(body, logType, timestampFieldName):"); 69 | System.err.println("clock time = " + Instant.now()); 70 | System.err.println("logType = " + logType); 71 | System.err.println("timestampFieldName = " + timeGeneratedField); 72 | if(System.getenv().getOrDefault("LA_LOGFAILEDBUFFERSEND", "") == "TRUE") { 73 | System.err.println("body ="); 74 | System.err.println(sb.toString()); 75 | } 76 | } 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/spark-listeners-loganalytics/src/main/java/com/microsoft/pnp/logging/loganalytics/LogAnalyticsAppender.java: -------------------------------------------------------------------------------- 1 | package com.microsoft.pnp.logging.loganalytics; 2 | 3 | import com.microsoft.pnp.LogAnalyticsEnvironment; 4 | import com.microsoft.pnp.client.loganalytics.LogAnalyticsClient; 5 | import com.microsoft.pnp.client.loganalytics.LogAnalyticsSendBufferClient; 6 | import com.microsoft.pnp.logging.JSONLayout; 7 | import org.apache.log4j.AppenderSkeleton; 8 | import org.apache.log4j.Layout; 9 | import org.apache.log4j.helpers.LogLog; 10 | import org.apache.log4j.spi.Filter; 11 | import org.apache.log4j.spi.LoggingEvent; 12 | 13 | import static com.microsoft.pnp.logging.JSONLayout.TIMESTAMP_FIELD_NAME; 14 | 15 | public class LogAnalyticsAppender extends AppenderSkeleton { 16 | private static final String LA_SPARKLOGGINGEVENT_NAME_REGEX=System.getenv().getOrDefault("LA_SPARKLOGGINGEVENT_NAME_REGEX", ""); 17 | private static final String LA_SPARKLOGGINGEVENT_MESSAGE_REGEX=System.getenv().getOrDefault("LA_SPARKLOGGINGEVENT_MESSAGE_REGEX", ""); 18 | 19 | private static final Filter DEFAULT_FILTER = new Filter() { 20 | @Override 21 | public int decide(LoggingEvent loggingEvent) { 22 | String loggerName=loggingEvent.getLoggerName(); 23 | // ignore logs from org.apache.http to avoid infinite loop on logger error 24 | if (loggerName.startsWith("org.apache.http")) { 25 | return Filter.DENY; 26 | } 27 | // if LA_SPARKLOGGINGEVENT_NAME_REGEX is not empty, deny logs where the name doesn't match the regex 28 | if (!LA_SPARKLOGGINGEVENT_NAME_REGEX.isEmpty() && !loggerName.matches(LA_SPARKLOGGINGEVENT_NAME_REGEX)) { 29 | return Filter.DENY; 30 | } 31 | // if LA_SPARKLOGGINGEVENT_MESSAGE_REGEX is not empty, deny logs where the message doesn't match the regex 32 | if (!LA_SPARKLOGGINGEVENT_MESSAGE_REGEX.isEmpty() && !loggingEvent.getRenderedMessage().matches(LA_SPARKLOGGINGEVENT_MESSAGE_REGEX)) { 33 | return Filter.DENY; 34 | } 35 | 36 | return Filter.NEUTRAL; 37 | } 38 | }; 39 | 40 | private static final String DEFAULT_LOG_TYPE = "SparkLoggingEvent"; 41 | // We will default to environment so the properties file can override 42 | private String workspaceId = LogAnalyticsEnvironment.getWorkspaceId(); 43 | private String secret = LogAnalyticsEnvironment.getWorkspaceKey(); 44 | private String logType = DEFAULT_LOG_TYPE; 45 | private LogAnalyticsSendBufferClient client; 46 | 47 | public LogAnalyticsAppender() { 48 | this.addFilter(DEFAULT_FILTER); 49 | // Add a default layout so we can simplify config 50 | this.setLayout(new JSONLayout()); 51 | } 52 | 53 | @Override 54 | public void activateOptions() { 55 | this.client = new LogAnalyticsSendBufferClient( 56 | new LogAnalyticsClient(this.workspaceId, this.secret), 57 | this.logType 58 | ); 59 | } 60 | 61 | @Override 62 | protected void append(LoggingEvent loggingEvent) { 63 | try { 64 | String json = this.getLayout().format(loggingEvent); 65 | this.client.sendMessage(json, TIMESTAMP_FIELD_NAME); 66 | } catch (Exception ex) { 67 | LogLog.error("Error sending logging event to Log Analytics", ex); 68 | } 69 | } 70 | 71 | @Override 72 | public boolean requiresLayout() { 73 | // We will set this to false so we can simplify our config 74 | // If no layout is provided, we will get the default. 75 | return false; 76 | } 77 | 78 | @Override 79 | public void close() { 80 | this.client.close(); 81 | } 82 | 83 | @Override 84 | public void setLayout(Layout layout) { 85 | // This will allow us to configure the layout from properties to add custom JSON stuff. 86 | if (!(layout instanceof JSONLayout)) { 87 | throw new UnsupportedOperationException("layout must be an instance of JSONLayout"); 88 | } 89 | 90 | super.setLayout(layout); 91 | } 92 | 93 | @Override 94 | public void clearFilters() { 95 | super.clearFilters(); 96 | // We need to make sure to add the filter back so we don't get stuck in a loop 97 | this.addFilter(DEFAULT_FILTER); 98 | } 99 | 100 | public String getWorkspaceId() { 101 | return this.workspaceId; 102 | } 103 | 104 | public void setWorkspaceId(String workspaceId) { 105 | this.workspaceId = workspaceId; 106 | } 107 | 108 | public String getSecret() { 109 | return this.secret; 110 | } 111 | 112 | public void setSecret(String secret) { 113 | this.secret = secret; 114 | } 115 | 116 | public String getLogType() { 117 | return this.logType; 118 | } 119 | 120 | public void setLogType(String logType) { 121 | this.logType = logType; 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /src/spark-listeners-loganalytics/src/main/scala/org/apache/spark/com/microsoft/pnp/LogAnalyticsConfiguration.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.com.microsoft.pnp 2 | 3 | import org.apache.spark.internal.Logging 4 | 5 | private[spark] trait LogAnalyticsConfiguration extends Logging { 6 | protected def getWorkspaceId: Option[String] 7 | 8 | protected def getSecret: Option[String] 9 | 10 | protected def getLogType: String 11 | 12 | protected def getTimestampFieldName: Option[String] 13 | 14 | 15 | val workspaceId: String = { 16 | val value = getWorkspaceId 17 | require(value.isDefined, "A Log Analytics Workspace ID is required") 18 | logInfo(s"Setting workspaceId to ${value.get}") 19 | value.get 20 | 21 | } 22 | 23 | val secret: String = { 24 | val value = getSecret 25 | require(value.isDefined, "A Log Analytics Workspace Key is required") 26 | value.get 27 | } 28 | 29 | 30 | val logType: String = { 31 | val value = getLogType 32 | logInfo(s"Setting logType to $value") 33 | value 34 | } 35 | 36 | val timestampFieldName: String = { 37 | val value = getTimestampFieldName 38 | logInfo(s"Setting timestampNameField to $value") 39 | value.orNull 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/spark-listeners-loganalytics/src/main/scala/org/apache/spark/listeners/sink/loganalytics/LogAnalyticsListenerSink.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.listeners.sink.loganalytics 2 | 3 | import com.microsoft.pnp.client.loganalytics.{LogAnalyticsClient, LogAnalyticsSendBufferClient} 4 | import org.apache.spark.SparkConf 5 | import org.apache.spark.internal.Logging 6 | import org.apache.spark.listeners.sink.SparkListenerSink 7 | import org.json4s.{JsonAST, DefaultFormats} 8 | import org.json4s.jackson.JsonMethods.compact 9 | 10 | import scala.util.control.NonFatal 11 | 12 | class LogAnalyticsListenerSink(conf: SparkConf) extends SparkListenerSink with Logging { 13 | private val config = new LogAnalyticsListenerSinkConfiguration(conf) 14 | implicit val formats = DefaultFormats 15 | private var filterRegex = sys.env.getOrElse("LA_SPARKLISTENEREVENT_REGEX", "") 16 | 17 | protected lazy val logAnalyticsBufferedClient = new LogAnalyticsSendBufferClient( 18 | new LogAnalyticsClient( 19 | config.workspaceId, config.secret), 20 | config.logType 21 | ) 22 | 23 | override def logEvent(event: Option[JsonAST.JValue]): Unit = { 24 | try { 25 | event match { 26 | case Some(j) => { 27 | val event = (j \ "Event").extract[String] 28 | if(filterRegex=="" || event.matches(filterRegex)) 29 | { 30 | val jsonString = compact(j) 31 | logDebug(s"Sending event to Log Analytics: ${jsonString}") 32 | logAnalyticsBufferedClient.sendMessage(jsonString, "SparkEventTime") 33 | } 34 | } 35 | case None => 36 | } 37 | } catch { 38 | case NonFatal(e) => 39 | logError(s"Error sending to Log Analytics: $e") 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/spark-listeners-loganalytics/src/main/scala/org/apache/spark/listeners/sink/loganalytics/LogAnalyticsListenerSinkConfiguration.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.listeners.sink.loganalytics 2 | 3 | import com.microsoft.pnp.LogAnalyticsEnvironment 4 | import org.apache.spark.SparkConf 5 | import org.apache.spark.com.microsoft.pnp.LogAnalyticsConfiguration 6 | 7 | private[spark] object LogAnalyticsListenerSinkConfiguration { 8 | private val CONFIG_PREFIX = "spark.logAnalytics" 9 | 10 | private[spark] val WORKSPACE_ID = CONFIG_PREFIX + ".workspaceId" 11 | 12 | // We'll name this secret so Spark will redact it. 13 | private[spark] val SECRET = CONFIG_PREFIX + ".secret" 14 | 15 | private[spark] val LOG_TYPE = CONFIG_PREFIX + ".logType" 16 | 17 | private[spark] val DEFAULT_LOG_TYPE = "SparkListenerEvent" 18 | 19 | private[spark] val TIMESTAMP_FIELD_NAME = CONFIG_PREFIX + ".timestampFieldName" 20 | 21 | //private[spark] val ENV_LOG_ANALYTICS_WORKSPACEID = "LOG_ANALYTICS_WORKSPACEID" 22 | 23 | ///private[spark] val ENV_LOG_ANALYTICS_SECRET = "LOG_ANALYTICS_SECRET" 24 | } 25 | 26 | private[spark] class LogAnalyticsListenerSinkConfiguration(sparkConf: SparkConf) 27 | extends LogAnalyticsConfiguration { 28 | 29 | import LogAnalyticsListenerSinkConfiguration._ 30 | 31 | override def getWorkspaceId: Option[String] = { 32 | // Match spark priority order 33 | //sparkConf.getOption(WORKSPACE_ID).orElse(sys.env.get(ENV_LOG_ANALYTICS_WORKSPACEID)) 34 | sparkConf.getOption(WORKSPACE_ID).orElse(Option(LogAnalyticsEnvironment.getWorkspaceId)) 35 | } 36 | 37 | override def getSecret: Option[String] = { 38 | // Match spark priority order 39 | //sparkConf.getOption(SECRET).orElse(sys.env.get(ENV_LOG_ANALYTICS_SECRET)) 40 | sparkConf.getOption(SECRET).orElse(Option(LogAnalyticsEnvironment.getWorkspaceKey)) 41 | } 42 | 43 | override def getLogType: String = sparkConf.get(LOG_TYPE, DEFAULT_LOG_TYPE) 44 | 45 | override def getTimestampFieldName: Option[String] = sparkConf.getOption(TIMESTAMP_FIELD_NAME) 46 | } 47 | -------------------------------------------------------------------------------- /src/spark-listeners-loganalytics/src/main/scala/org/apache/spark/metrics/sink/loganalytics/LogAnalyticsMetricsSink.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.metrics.sink.loganalytics 2 | 3 | import java.util.Properties 4 | import java.util.concurrent.TimeUnit 5 | 6 | import com.codahale.metrics.MetricRegistry 7 | import org.apache.spark.internal.Logging 8 | import org.apache.spark.metrics.sink.Sink 9 | import org.apache.spark.{SecurityManager, SparkException} 10 | 11 | private class LogAnalyticsMetricsSink( 12 | val property: Properties, 13 | val registry: MetricRegistry, 14 | securityMgr: SecurityManager) 15 | extends Sink with Logging { 16 | 17 | // This additional constructor allows the library to be used on Spark 3.2.x clusters 18 | // without the fix for https://issues.apache.org/jira/browse/SPARK-37078 19 | def this( 20 | property: Properties, 21 | registry: MetricRegistry) 22 | { 23 | this(property, registry, null) 24 | } 25 | 26 | private val config = new LogAnalyticsSinkConfiguration(property) 27 | 28 | org.apache.spark.metrics.MetricsSystem.checkMinimalPollingPeriod(config.pollUnit, config.pollPeriod) 29 | 30 | var reporter = LogAnalyticsReporter.forRegistry(registry) 31 | .withWorkspaceId(config.workspaceId) 32 | .withWorkspaceKey(config.secret) 33 | .withLogType(config.logType) 34 | .build() 35 | 36 | override def start(): Unit = { 37 | reporter.start(config.pollPeriod, config.pollUnit) 38 | logInfo(s"LogAnalyticsMetricsSink started") 39 | } 40 | 41 | override def stop(): Unit = { 42 | reporter.stop() 43 | logInfo("LogAnalyticsMetricsSink stopped.") 44 | } 45 | 46 | override def report(): Unit = { 47 | reporter.report() 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/spark-listeners-loganalytics/src/main/scala/org/apache/spark/metrics/sink/loganalytics/LogAnalyticsReporter.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.metrics.sink.loganalytics 2 | 3 | import java.time.Instant 4 | import java.util.concurrent.TimeUnit 5 | 6 | import com.codahale.metrics.{Timer, _} 7 | import com.codahale.metrics.json.MetricsModule 8 | import com.fasterxml.jackson.databind.ObjectMapper 9 | import com.fasterxml.jackson.module.scala.DefaultScalaModule 10 | import com.microsoft.pnp.SparkInformation 11 | import com.microsoft.pnp.client.loganalytics.{LogAnalyticsClient, LogAnalyticsSendBufferClient} 12 | import org.apache.spark.internal.Logging 13 | import org.json4s.JsonAST.JValue 14 | import org.json4s.JsonDSL._ 15 | import org.json4s.jackson.JsonMethods.{compact, parse, render} 16 | 17 | import scala.util.control.NonFatal 18 | 19 | object LogAnalyticsReporter { 20 | /** 21 | * Returns a new {@link Builder} for {@link LogAnalyticsReporter}. 22 | * 23 | * @param registry the registry to report 24 | * @return a { @link Builder} instance for a { @link LogAnalyticsReporter} 25 | */ 26 | def forRegistry(registry: MetricRegistry) = new LogAnalyticsReporter.Builder(registry) 27 | 28 | /** 29 | * A builder for {@link LogAnalyticsReporter} instances. Defaults to not using a prefix, using the default clock, converting rates to 30 | * events/second, converting durations to milliseconds, and not filtering metrics. The default 31 | * Log Analytics log type is DropWizard 32 | */ 33 | class Builder(val registry: MetricRegistry) extends Logging { 34 | private var clock = Clock.defaultClock 35 | private var prefix: String = null 36 | private var rateUnit = TimeUnit.SECONDS 37 | private var durationUnit = TimeUnit.MILLISECONDS 38 | private var filter = MetricFilter.ALL 39 | private var filterRegex = sys.env.getOrElse("LA_SPARKMETRIC_REGEX", "") 40 | if(filterRegex != "") { 41 | filter = new MetricFilter() { 42 | override def matches(name: String, metric: Metric): Boolean = { 43 | name.matches(filterRegex) 44 | } 45 | } 46 | } 47 | private var logType = "SparkMetrics" 48 | private var workspaceId: String = null 49 | private var workspaceKey: String = null 50 | 51 | /** 52 | * Use the given {@link Clock} instance for the time. Usually the default clock is sufficient. 53 | * 54 | * @param clock clock 55 | * @return { @code this} 56 | */ 57 | def withClock(clock: Clock): LogAnalyticsReporter.Builder = { 58 | this.clock = clock 59 | this 60 | } 61 | 62 | /** 63 | * Configure a prefix for each metric name. Optional, but useful to identify originator of metric. 64 | * 65 | * @param prefix prefix for metric name 66 | * @return { @code this} 67 | */ 68 | def prefixedWith(prefix: String): LogAnalyticsReporter.Builder = { 69 | this.prefix = prefix 70 | this 71 | } 72 | 73 | /** 74 | * Convert all the rates to a certain TimeUnit, defaults to TimeUnit.SECONDS. 75 | * 76 | * @param rateUnit unit of rate 77 | * @return { @code this} 78 | */ 79 | def convertRatesTo(rateUnit: TimeUnit): LogAnalyticsReporter.Builder = { 80 | this.rateUnit = rateUnit 81 | this 82 | } 83 | 84 | /** 85 | * Convert all the durations to a certain TimeUnit, defaults to TimeUnit.MILLISECONDS 86 | * 87 | * @param durationUnit unit of duration 88 | * @return { @code this} 89 | */ 90 | def convertDurationsTo(durationUnit: TimeUnit): LogAnalyticsReporter.Builder = { 91 | this.durationUnit = durationUnit 92 | this 93 | } 94 | 95 | /** 96 | * Allows to configure a special MetricFilter, which defines what metrics are reported 97 | * 98 | * @param filter metrics filter 99 | * @return { @code this} 100 | */ 101 | def filter(filter: MetricFilter): LogAnalyticsReporter.Builder = { 102 | this.filter = filter 103 | this 104 | } 105 | 106 | /** 107 | * The log type to send to Log Analytics. Defaults to 'SparkMetrics'. 108 | * 109 | * @param logType Log Analytics log type 110 | * @return { @code this} 111 | */ 112 | def withLogType(logType: String): LogAnalyticsReporter.Builder = { 113 | logInfo(s"Setting logType to '${logType}'") 114 | this.logType = logType 115 | this 116 | } 117 | 118 | /** 119 | * The workspace id of the Log Analytics workspace 120 | * 121 | * @param workspaceId Log Analytics workspace id 122 | * @return { @code this} 123 | */ 124 | def withWorkspaceId(workspaceId: String): LogAnalyticsReporter.Builder = { 125 | logInfo(s"Setting workspaceId to '${workspaceId}'") 126 | this.workspaceId = workspaceId 127 | this 128 | } 129 | 130 | /** 131 | * The workspace key of the Log Analytics workspace 132 | * 133 | * @param workspaceKey Log Analytics workspace key 134 | * @return { @code this} 135 | */ 136 | def withWorkspaceKey(workspaceKey: String): LogAnalyticsReporter.Builder = { 137 | this.workspaceKey = workspaceKey 138 | this 139 | } 140 | 141 | /** 142 | * Builds a {@link LogAnalyticsReporter} with the given properties. 143 | * 144 | * @return a { @link LogAnalyticsReporter} 145 | */ 146 | def build(): LogAnalyticsReporter = { 147 | logDebug("Creating LogAnalyticsReporter") 148 | new LogAnalyticsReporter( 149 | registry, 150 | workspaceId, 151 | workspaceKey, 152 | logType, 153 | clock, 154 | prefix, 155 | rateUnit, 156 | durationUnit, 157 | filter 158 | ) 159 | } 160 | } 161 | } 162 | 163 | class LogAnalyticsReporter(val registry: MetricRegistry, val workspaceId: String, val workspaceKey: String, val logType: String, val clock: Clock, val prefix: String, val rateUnit: TimeUnit, val durationUnit: TimeUnit, val filter: MetricFilter)//, var additionalFields: util.Map[String, AnyRef]) //this.logType); 164 | extends ScheduledReporter(registry, "loganalytics-reporter", filter, rateUnit, durationUnit) 165 | with Logging { 166 | private val mapper = new ObjectMapper() 167 | .registerModules( 168 | DefaultScalaModule, 169 | new MetricsModule( 170 | rateUnit, 171 | durationUnit, 172 | true, 173 | filter 174 | ) 175 | ) 176 | 177 | private val logAnalyticsBufferedClient = new LogAnalyticsSendBufferClient( 178 | new LogAnalyticsClient(this.workspaceId, this.workspaceKey), 179 | "SparkMetric" 180 | ) 181 | 182 | 183 | override def report( 184 | gauges: java.util.SortedMap[String, Gauge[_]], 185 | counters: java.util.SortedMap[String, Counter], 186 | histograms: java.util.SortedMap[String, Histogram], 187 | meters: java.util.SortedMap[String, Meter], 188 | timers: java.util.SortedMap[String, Timer]): Unit = { 189 | logDebug("Reporting metrics") 190 | // nothing to do if we don't have any metrics to report 191 | if (gauges.isEmpty && counters.isEmpty && histograms.isEmpty && meters.isEmpty && timers.isEmpty) { 192 | logInfo("All metrics empty, nothing to report") 193 | return 194 | } 195 | val now = Instant.now 196 | import scala.collection.JavaConversions._ 197 | 198 | val ambientProperties = SparkInformation.get() + ("SparkEventTime" -> now.toString) 199 | val metrics = gauges.retain((_, v) => v.getValue != null).toSeq ++ 200 | counters.toSeq ++ histograms.toSeq ++ meters.toSeq ++ timers.toSeq 201 | for ((name, metric) <- metrics) { 202 | try { 203 | this.logAnalyticsBufferedClient.sendMessage( 204 | compact(this.addProperties(name, metric, ambientProperties)), 205 | "SparkMetricTime" 206 | ) 207 | } catch { 208 | case NonFatal(e) => 209 | logError(s"Error serializing metric to JSON", e) 210 | None 211 | } 212 | } 213 | } 214 | 215 | //private def addProperties(name: String, metric: Metric, timestamp: Instant): JValue = { 216 | private def addProperties(name: String, metric: Metric, properties: Map[String, String]): JValue = { 217 | val metricType: String = metric match { 218 | case _: Counter => classOf[Counter].getSimpleName 219 | case _: Gauge[_] => classOf[Gauge[_]].getSimpleName 220 | case _: Histogram => classOf[Histogram].getSimpleName 221 | case _: Meter => classOf[Meter].getSimpleName 222 | case _: Timer => classOf[Timer].getSimpleName 223 | case m: Metric => m.getClass.getSimpleName 224 | } 225 | 226 | parse(this.mapper.writeValueAsString(metric)) 227 | .merge(render( 228 | ("metric_type" -> metricType) ~ 229 | ("name" -> name) ~ 230 | properties 231 | )) 232 | } 233 | } 234 | -------------------------------------------------------------------------------- /src/spark-listeners-loganalytics/src/main/scala/org/apache/spark/metrics/sink/loganalytics/LogAnalyticsSinkConfiguration.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.metrics.sink.loganalytics 2 | 3 | import java.util.Properties 4 | import java.util.concurrent.TimeUnit 5 | 6 | import com.microsoft.pnp.LogAnalyticsEnvironment 7 | import org.apache.spark.com.microsoft.pnp.LogAnalyticsConfiguration 8 | 9 | private[spark] object LogAnalyticsSinkConfiguration { 10 | private[spark] val LOGANALYTICS_KEY_WORKSPACEID = "workspaceId" 11 | private[spark] val LOGANALYTICS_KEY_SECRET = "secret" 12 | private[spark] val LOGANALYTICS_KEY_LOGTYPE = "logType" 13 | private[spark] val LOGANALYTICS_KEY_TIMESTAMPFIELD = "timestampField" 14 | private[spark] val LOGANALYTICS_KEY_PERIOD = "period" 15 | private[spark] val LOGANALYTICS_KEY_UNIT = "unit" 16 | 17 | private[spark] val LOGANALYTICS_DEFAULT_LOGTYPE = "SparkMetrics" 18 | private[spark] val LOGANALYTICS_DEFAULT_PERIOD = "10" 19 | private[spark] val LOGANALYTICS_DEFAULT_UNIT = "SECONDS" 20 | } 21 | 22 | private[spark] class LogAnalyticsSinkConfiguration(properties: Properties) 23 | extends LogAnalyticsConfiguration { 24 | 25 | import LogAnalyticsSinkConfiguration._ 26 | 27 | override def getWorkspaceId: Option[String] = { 28 | Option(properties.getProperty(LOGANALYTICS_KEY_WORKSPACEID, LogAnalyticsEnvironment.getWorkspaceId)) 29 | } 30 | 31 | override def getSecret: Option[String] = { 32 | Option(properties.getProperty(LOGANALYTICS_KEY_SECRET, LogAnalyticsEnvironment.getWorkspaceKey)) 33 | } 34 | 35 | override protected def getLogType: String = 36 | properties.getProperty(LOGANALYTICS_KEY_LOGTYPE, LOGANALYTICS_DEFAULT_LOGTYPE) 37 | 38 | override protected def getTimestampFieldName: Option[String] = 39 | Option(properties.getProperty(LOGANALYTICS_KEY_TIMESTAMPFIELD, null)) 40 | 41 | val pollPeriod: Int = { 42 | val value = properties.getProperty(LOGANALYTICS_KEY_PERIOD, LOGANALYTICS_DEFAULT_PERIOD).toInt 43 | logInfo(s"Setting polling period to $value") 44 | value 45 | } 46 | 47 | val pollUnit: TimeUnit = { 48 | val value = TimeUnit.valueOf( 49 | properties.getProperty(LOGANALYTICS_KEY_UNIT, LOGANALYTICS_DEFAULT_UNIT).toUpperCase) 50 | logInfo(s"Setting polling unit to $value") 51 | value 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/spark-listeners-loganalytics/src/test/java/com/microsoft/pnp/client/loganalytics/LogAnalyticsClientTester.java: -------------------------------------------------------------------------------- 1 | package com.microsoft.pnp.client.loganalytics; 2 | 3 | import org.apache.http.HttpResponse; 4 | import org.apache.http.StatusLine; 5 | import org.apache.http.client.HttpClient; 6 | import org.apache.http.client.methods.HttpPost; 7 | import org.junit.Before; 8 | import org.junit.Test; 9 | import org.mockito.Matchers; 10 | import org.mockito.Mock; 11 | import org.mockito.MockitoAnnotations; 12 | 13 | import java.io.IOException; 14 | import java.lang.reflect.Field; 15 | 16 | import static org.mockito.Mockito.*; 17 | 18 | public class LogAnalyticsClientTester { 19 | 20 | @Mock 21 | private HttpClient httpClient; 22 | 23 | 24 | @Before 25 | public void setUp() { 26 | 27 | MockitoAnnotations.initMocks(this); 28 | } 29 | 30 | @Test(expected = IllegalArgumentException.class) 31 | public void it_should_not_initialize_with_null_or_empty_workspaceID() { 32 | 33 | String workSpaceKey = "someKey"; 34 | LogAnalyticsClient sut = new LogAnalyticsClient("", workSpaceKey); 35 | } 36 | 37 | @Test(expected = IllegalArgumentException.class) 38 | public void it_should_not_initialize_with_null_or_empty_workspaceKey() { 39 | 40 | String workSpaceID = "someWorkSpaceId"; 41 | LogAnalyticsClient sut = new LogAnalyticsClient(workSpaceID, ""); 42 | } 43 | 44 | @Test(expected = IllegalArgumentException.class) 45 | public void it_should_not_initialize_with_null_httpClient() { 46 | 47 | String workSpaceID = "someWorkSpaceId"; 48 | String workSpaceKey = "someKey"; 49 | LogAnalyticsClient sut = new LogAnalyticsClient(workSpaceID, workSpaceKey, null); 50 | } 51 | 52 | @Test(expected = IllegalArgumentException.class) 53 | public void it_should_not_initialize_with_null_or_empty_urlSuffix() { 54 | 55 | String workSpaceID = "someWorkSpaceId"; 56 | String workSpaceKey = "someKey"; 57 | String apiVersion = "someApiVersion"; 58 | LogAnalyticsClient sut = new LogAnalyticsClient(workSpaceID, workSpaceKey, httpClient, "", apiVersion); 59 | } 60 | 61 | @Test(expected = IllegalArgumentException.class) 62 | public void it_should_not_initialize_with_null_or_empty_apiVersion() { 63 | 64 | String workSpaceID = "someWorkSpaceId"; 65 | String workSpaceKey = "someKey"; 66 | String urlSuffix = "someUrlSuffix"; 67 | LogAnalyticsClient sut = new LogAnalyticsClient(workSpaceID, workSpaceKey, httpClient, urlSuffix, ""); 68 | } 69 | 70 | @Test 71 | public void it_should_initialize_successfully() { 72 | 73 | String workSpaceID = "someWorkSpaceId"; 74 | String workSpaceKey = "someKey"; 75 | String urlSuffix = "someUrlSuffix"; 76 | String apiVersion = "someApiVersion"; 77 | 78 | // this is static construction in actual class 79 | String expectedURL = String.format("https://%s.%s/api/logs?api-version=%s" 80 | , workSpaceID 81 | , urlSuffix 82 | , apiVersion); 83 | 84 | LogAnalyticsClient sut = new LogAnalyticsClient(workSpaceID, workSpaceKey, httpClient, urlSuffix, apiVersion); 85 | 86 | 87 | try { 88 | Field workspaceIdField = LogAnalyticsClient.class.getDeclaredField("workspaceId"); 89 | workspaceIdField.setAccessible(true); 90 | Field workspaceKeyField = LogAnalyticsClient.class.getDeclaredField("workspaceKey"); 91 | workspaceKeyField.setAccessible(true); 92 | Field urlField = LogAnalyticsClient.class.getDeclaredField("url"); 93 | urlField.setAccessible(true); 94 | Field httpClientField = LogAnalyticsClient.class.getDeclaredField("httpClient"); 95 | httpClientField.setAccessible(true); 96 | 97 | String actualWorkSpaceID = (String) workspaceIdField.get(sut); 98 | assert (actualWorkSpaceID.contentEquals(workSpaceID)); 99 | 100 | String actualWorkSpaceKey = (String) workspaceKeyField.get(sut); 101 | assert (actualWorkSpaceKey.contentEquals(workSpaceKey)); 102 | 103 | String actualURL = (String) urlField.get(sut); 104 | assert (expectedURL.contentEquals(actualURL)); 105 | 106 | 107 | } catch (NoSuchFieldException | IllegalAccessException e) { 108 | e.printStackTrace(); 109 | } 110 | 111 | } 112 | 113 | @Test(expected = IOException.class) 114 | public void it_should_not_send_when_empty_body() throws IOException { 115 | 116 | String workSpaceID = "someWorkSpaceId"; 117 | String workSpaceKey = "someKey"; 118 | String urlSuffix = "someUrlSuffix"; 119 | String apiVersion = "someApiVersion"; 120 | 121 | LogAnalyticsClient sut = new LogAnalyticsClient(workSpaceID, workSpaceKey, httpClient, urlSuffix, apiVersion); 122 | 123 | sut.send(null, "someLogType"); 124 | 125 | } 126 | 127 | @Test(expected = IOException.class) 128 | public void it_should_not_send_when_empty_logtype() throws IOException { 129 | 130 | String workSpaceID = "someWorkSpaceId"; 131 | String workSpaceKey = "someKey"; 132 | String urlSuffix = "someUrlSuffix"; 133 | String apiVersion = "someApiVersion"; 134 | 135 | LogAnalyticsClient sut = new LogAnalyticsClient(workSpaceID, workSpaceKey, httpClient, urlSuffix, apiVersion); 136 | sut.send("someBody", ""); 137 | 138 | } 139 | 140 | @Test 141 | public void it_should_send_to_logAnalytics_when_body_and_log_type_not_empty() throws IOException { 142 | 143 | //arrange 144 | String workSpaceID = "someWorkSpaceId"; 145 | String workSpaceKey = "someKey"; 146 | String urlSuffix = "someUrlSuffix"; 147 | String apiVersion = "someApiVersion"; 148 | 149 | HttpResponse httpResponse = mock(HttpResponse.class); 150 | StatusLine statusLine = mock(StatusLine.class); 151 | when(statusLine.getStatusCode()).thenReturn(200); 152 | when(httpResponse.getStatusLine()).thenReturn(statusLine); 153 | when(httpClient.execute(Matchers.any(HttpPost.class))).thenReturn(httpResponse); 154 | 155 | //act 156 | LogAnalyticsClient sut = new LogAnalyticsClient(workSpaceID, workSpaceKey, httpClient, urlSuffix, apiVersion); 157 | sut.send("someBody", "someLogType"); 158 | 159 | //assert 160 | verify(httpClient, times(1)).execute(Matchers.any(HttpPost.class)); 161 | 162 | } 163 | 164 | 165 | } 166 | -------------------------------------------------------------------------------- /src/spark-listeners-loganalytics/src/test/java/com/microsoft/pnp/client/loganalytics/ResourceIdTest.java: -------------------------------------------------------------------------------- 1 | package com.microsoft.pnp.client.loganalytics; 2 | 3 | import org.apache.http.Header; 4 | import org.apache.http.HttpResponse; 5 | import org.apache.http.StatusLine; 6 | import org.apache.http.client.HttpClient; 7 | import org.apache.http.client.methods.HttpPost; 8 | import org.junit.Before; 9 | import org.junit.Rule; 10 | import org.junit.Test; 11 | import org.mockito.Matchers; 12 | import org.mockito.Mock; 13 | import org.mockito.MockitoAnnotations; 14 | import org.junit.contrib.java.lang.system.EnvironmentVariables; 15 | 16 | import java.io.IOException; 17 | 18 | import static org.junit.Assert.assertNotNull; 19 | import static org.junit.Assert.assertNull; 20 | import static org.junit.Assert.assertTrue; 21 | import static org.mockito.Mockito.mock; 22 | import static org.mockito.Mockito.spy; 23 | import static org.mockito.Mockito.when; 24 | 25 | 26 | public class ResourceIdTest { 27 | 28 | @Rule 29 | public final EnvironmentVariables env = new EnvironmentVariables(); 30 | 31 | @Mock 32 | private HttpClient httpClient; 33 | 34 | @Before 35 | public void setUp() { 36 | MockitoAnnotations.initMocks(this); 37 | } 38 | 39 | @Test 40 | public void it_should_have_x_ms_azure_resource_Id_set() throws IOException { 41 | 42 | env.set("AZ_SUBSCRIPTION_ID", "1234"); 43 | env.set("AZ_RSRC_GRP_NAME", "someGrpName"); 44 | env.set("AZ_RSRC_PROV_NAMESPACE", "someProvNamespace"); 45 | env.set("AZ_RSRC_TYPE", "someResourceType"); 46 | env.set("AZ_RSRC_NAME", "someResourceName"); 47 | 48 | String workSpaceID = "someWorkSpaceId"; 49 | String workSpaceKey = "someKey"; 50 | String urlSuffix = "someUrlSuffix"; 51 | String apiVersion = "someApiVersion"; 52 | 53 | HttpResponse httpResponse = mock(HttpResponse.class); 54 | StatusLine statusLine = mock(StatusLine.class); 55 | when(statusLine.getStatusCode()).thenReturn(200); 56 | when(httpResponse.getStatusLine()).thenReturn(statusLine); 57 | when(httpClient.execute(Matchers.any(HttpPost.class))).thenReturn(httpResponse); 58 | 59 | LogAnalyticsClient sut = new LogAnalyticsClient(workSpaceID, workSpaceKey, httpClient, urlSuffix, apiVersion); 60 | LogAnalyticsClient mockSut = spy(sut); 61 | HttpPost httpPost = spy(HttpPost.class); 62 | when(mockSut.getHttpPost()).thenReturn(httpPost); 63 | mockSut.send("someBody", "someLogType"); 64 | 65 | Header header = httpPost.getFirstHeader("x-ms-AzureResourceId"); 66 | assertNotNull(header); 67 | assertTrue(header.getValue().contains("1234")); 68 | assertTrue(header.getValue().contains("someGrpName")); 69 | assertTrue(header.getValue().contains("someProvNamespace")); 70 | assertTrue(header.getValue().contains("someResourceType")); 71 | assertTrue(header.getValue().contains("someResourceName")); 72 | } 73 | 74 | @Test 75 | public void it_should_have_x_ms_azure_resource_Id_null() throws IOException { 76 | 77 | // Not all the env variables are set. 78 | env.set("AZ_SUBSCRIPTION_ID", "1234"); 79 | env.set("AZ_RSRC_TYPE", "someResourceType"); 80 | env.set("AZ_RSRC_NAME", "someResourceName"); 81 | 82 | String workSpaceID = "someWorkSpaceId"; 83 | String workSpaceKey = "someKey"; 84 | String urlSuffix = "someUrlSuffix"; 85 | String apiVersion = "someApiVersion"; 86 | 87 | HttpResponse httpResponse = mock(HttpResponse.class); 88 | StatusLine statusLine = mock(StatusLine.class); 89 | when(statusLine.getStatusCode()).thenReturn(200); 90 | when(httpResponse.getStatusLine()).thenReturn(statusLine); 91 | when(httpClient.execute(Matchers.any(HttpPost.class))).thenReturn(httpResponse); 92 | 93 | LogAnalyticsClient sut = new LogAnalyticsClient(workSpaceID, workSpaceKey, httpClient, urlSuffix, apiVersion); 94 | LogAnalyticsClient mockSut = spy(sut); 95 | HttpPost httpPost = spy(HttpPost.class); 96 | when(mockSut.getHttpPost()).thenReturn(httpPost); 97 | mockSut.send("someBody", "someLogType"); 98 | 99 | Header header = httpPost.getFirstHeader("x-ms-AzureResourceId"); 100 | assertNull(header); 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /src/spark-listeners-loganalytics/src/test/java/com/microsoft/pnp/logging/loganalytics/LogAnalyticsAppenderTest.java: -------------------------------------------------------------------------------- 1 | package com.microsoft.pnp.logging.loganalytics; 2 | 3 | import com.microsoft.pnp.client.loganalytics.LogAnalyticsClient; 4 | import junit.framework.TestCase; 5 | import org.apache.http.client.HttpClient; 6 | import org.apache.log4j.spi.Filter; 7 | import org.apache.log4j.spi.LoggingEvent; 8 | import org.junit.Before; 9 | import org.junit.Rule; 10 | import org.junit.Test; 11 | import org.junit.contrib.java.lang.system.EnvironmentVariables; 12 | import org.mockito.Matchers; 13 | import org.mockito.Mock; 14 | import org.mockito.MockitoAnnotations; 15 | import java.lang.reflect.Field; 16 | import java.lang.reflect.Modifier; 17 | import static org.mockito.Mockito.*; 18 | 19 | import static org.junit.Assert.*; 20 | 21 | public class LogAnalyticsAppenderTest { 22 | 23 | Field nameregex; 24 | Field messageregex; 25 | LogAnalyticsAppender sut; 26 | LoggingEvent test = mock(LoggingEvent.class); 27 | 28 | @Before 29 | public void setUp() throws NoSuchFieldException, IllegalAccessException { 30 | sut = new LogAnalyticsAppender(); 31 | 32 | // These fields are set private static from environment at startup, but we need different values for testing so 33 | // we are changing them to be accessible so that we can modify for testing. 34 | nameregex = LogAnalyticsAppender.class.getDeclaredField("LA_SPARKLOGGINGEVENT_NAME_REGEX"); 35 | messageregex = LogAnalyticsAppender.class.getDeclaredField("LA_SPARKLOGGINGEVENT_MESSAGE_REGEX"); 36 | nameregex.setAccessible(true); 37 | messageregex.setAccessible(true); 38 | Field modifiersField = Field.class.getDeclaredField( "modifiers" ); 39 | modifiersField.setAccessible( true ); 40 | modifiersField.setInt( nameregex, nameregex.getModifiers() & ~Modifier.FINAL ); 41 | modifiersField.setInt( messageregex, messageregex.getModifiers() & ~Modifier.FINAL ); 42 | } 43 | 44 | @Test 45 | public void FilterShouldWorkWithEmptyEnv() throws IllegalAccessException { 46 | nameregex.set(null,""); 47 | messageregex.set(null,""); 48 | when(test.getLoggerName()).thenReturn("12_This_is_a_test_34"); 49 | when(test.getRenderedMessage()).thenReturn("This is a generic log message"); 50 | assertEquals(sut.getFilter().decide(test),Filter.NEUTRAL); 51 | } 52 | @Test 53 | public void FilterShouldRejectOrgApacheHttp() throws IllegalAccessException { 54 | nameregex.set(null,""); 55 | messageregex.set(null,""); 56 | when(test.getLoggerName()).thenReturn("org.apache.http.this.is.a.test"); 57 | when(test.getRenderedMessage()).thenReturn("This is a generic log message"); 58 | assertEquals(sut.getFilter().decide(test),Filter.DENY); 59 | } 60 | @Test 61 | public void FilterShouldAllowNameRegex() throws IllegalAccessException { 62 | nameregex.set(null,"12.*34"); 63 | messageregex.set(null,""); 64 | when(test.getLoggerName()).thenReturn("12_This_is_a_test_34"); 65 | when(test.getRenderedMessage()).thenReturn("This is a generic log message"); 66 | assertEquals(sut.getFilter().decide(test),Filter.NEUTRAL); 67 | } 68 | @Test 69 | public void FilterShouldDenyNameRegex() throws IllegalAccessException { 70 | nameregex.set(null,"12.*34"); 71 | messageregex.set(null,""); 72 | when(test.getLoggerName()).thenReturn("x12_This_is_a_test_34x"); 73 | when(test.getRenderedMessage()).thenReturn("This is a generic log message"); 74 | assertEquals(sut.getFilter().decide(test),Filter.DENY); 75 | when(test.getLoggerName()).thenReturn("12_This_is_a_test_3456789"); 76 | when(test.getRenderedMessage()).thenReturn("This is a generic log message"); 77 | assertEquals(sut.getFilter().decide(test),Filter.DENY); 78 | } 79 | @Test 80 | public void FilterShouldDenyMessageRegex() throws IllegalAccessException { 81 | nameregex.set(null,""); 82 | messageregex.set(null,"(?i)((?!password).)*");// Only match if the message does not contain the word password 83 | when(test.getLoggerName()).thenReturn("12_This_is_a_test_34"); 84 | when(test.getRenderedMessage()).thenReturn("This message is pretending to return a logged password: randomstring"); 85 | assertEquals(sut.getFilter().decide(test),Filter.DENY); 86 | when(test.getRenderedMessage()).thenReturn("This message is pretending to return a logged Password: randomstring"); 87 | assertEquals(sut.getFilter().decide(test),Filter.DENY); 88 | } 89 | @Test 90 | public void FilterShouldAllowMessageRegex() throws IllegalAccessException { 91 | nameregex.set(null,""); 92 | messageregex.set(null,"(?i)((?!password).)*");// Only match if the message does not contain the word password 93 | when(test.getLoggerName()).thenReturn("12_This_is_a_test_34"); 94 | when(test.getRenderedMessage()).thenReturn("This message is pretending to return a logged Username: not_a_user"); 95 | assertEquals(sut.getFilter().decide(test),Filter.NEUTRAL); 96 | } 97 | @Test 98 | public void FilterShouldDenyNameMessageRegex() throws IllegalAccessException { 99 | nameregex.set(null,"12.*34"); 100 | messageregex.set(null,"(?i)((?!password).)*");// Only match if the message does not contain the word password 101 | // message does not match 102 | when(test.getLoggerName()).thenReturn("12_This_is_a_test_34"); 103 | when(test.getRenderedMessage()).thenReturn("This message is pretending to return a logged password: randomstring"); 104 | assertEquals(sut.getFilter().decide(test),Filter.DENY); 105 | // name doesn't match 106 | when(test.getLoggerName()).thenReturn("12_This_is_a_test_3456789"); 107 | when(test.getRenderedMessage()).thenReturn("This is a generic log message"); 108 | assertEquals(sut.getFilter().decide(test),Filter.DENY); 109 | // name and message do not match 110 | when(test.getLoggerName()).thenReturn("12_This_is_a_test_3456789"); 111 | when(test.getRenderedMessage()).thenReturn("This message is pretending to return a logged password: randomstring"); 112 | assertEquals(sut.getFilter().decide(test),Filter.DENY); 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /src/spark-listeners/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | spark-monitoring 7 | com.microsoft.pnp 8 | 1.0.0 9 | ../pom.xml 10 | 11 | 4.0.0 12 | spark-listeners 13 | ${project.artifactId} 14 | jar 15 | 16 | 17 | org.scala-lang 18 | scala-library 19 | 20 | 21 | org.apache.spark 22 | spark-sql_${scala.compat.version} 23 | 24 | 25 | org.apache.spark 26 | spark-streaming_${scala.compat.version} 27 | 28 | 29 | org.eclipse.jetty 30 | jetty-server 31 | 32 | 33 | org.scala-lang 34 | scala-reflect 35 | ${scala.version} 36 | 37 | 38 | 39 | 40 | org.scalatest 41 | scalatest_${scala.compat.version} 42 | ${scalatest.version} 43 | 44 | 45 | org.scalatest 46 | scalatest-funsuite_${scala.compat.version} 47 | test 48 | 3.2.9 49 | 50 | 51 | junit 52 | junit 53 | 54 | 55 | org.apache.spark 56 | spark-core_${scala.compat.version} 57 | ${spark.version} 58 | test-jar 59 | test 60 | 61 | 62 | org.mockito 63 | mockito-core 64 | 65 | 66 | org.scala-lang 67 | scala-compiler 68 | ${scala.version} 69 | test 70 | 71 | 72 | 73 | ${project.artifactId}_${spark.version}_${scala.compat.version}-${project.version} 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | org.apache.maven.plugins 101 | maven-clean-plugin 102 | 103 | 104 | net.alchim31.maven 105 | scala-maven-plugin 106 | 107 | 108 | org.apache.maven.plugins 109 | maven-compiler-plugin 110 | 111 | 112 | org.apache.maven.plugins 113 | maven-surefire-plugin 114 | 115 | 116 | org.scalatest 117 | scalatest-maven-plugin 118 | 119 | 120 | org.codehaus.mojo 121 | flatten-maven-plugin 122 | 123 | 124 | org.apache.maven.plugins 125 | maven-source-plugin 126 | 127 | 128 | 129 | 130 | -------------------------------------------------------------------------------- /src/spark-listeners/scripts/spark-monitoring.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | set -o pipefail 5 | 6 | # These environment variables would normally be set by Spark scripts 7 | # However, for a Databricks init script, they have not been set yet. 8 | # We will keep the names the same here, but not export them. 9 | # These must be changed if the associated Spark environment variables 10 | # are changed. 11 | DB_HOME=/databricks 12 | SPARK_HOME=$DB_HOME/spark 13 | SPARK_CONF_DIR=$SPARK_HOME/conf 14 | 15 | # Add your Log Analytics Workspace information below so all clusters use the same 16 | # Log Analytics Workspace 17 | # Also if it is available use AZ_* variables to include x-ms-AzureResourceId 18 | # header as part of the request 19 | tee -a "$SPARK_CONF_DIR/spark-env.sh" << EOF 20 | export DB_CLUSTER_ID=$DB_CLUSTER_ID 21 | export DB_CLUSTER_NAME=$DB_CLUSTER_NAME 22 | export LOG_ANALYTICS_WORKSPACE_ID= 23 | export LOG_ANALYTICS_WORKSPACE_KEY= 24 | export AZ_SUBSCRIPTION_ID= 25 | export AZ_RSRC_GRP_NAME= 26 | export AZ_RSRC_PROV_NAMESPACE= 27 | export AZ_RSRC_TYPE= 28 | export AZ_RSRC_NAME= 29 | 30 | # Note: All REGEX filters below are implemented with java.lang.String.matches(...). This implementation essentially appends ^...$ around 31 | # the regular expression, so the entire string must match the regex. If you need to allow for other values you should include .* before and/or 32 | # after your expression. 33 | 34 | # Add a quoted regex value to filter the events for SparkListenerEvent_CL, the log will only include events where Event_s matches the regex. 35 | # Commented example below will only log events for SparkListenerJobStart, SparkListenerJobEnd, or where "org.apache.spark.sql.execution.ui." 36 | # is is the start of the event name. 37 | # export LA_SPARKLISTENEREVENT_REGEX="SparkListenerJobStart|SparkListenerJobEnd|org\.apache\.spark\.sql\.execution\.ui\..*" 38 | 39 | # Add a quoted regex value to filter the events for SparkMetric_CL, the log will only include events where name_s matches the regex. 40 | # Commented example below will only log metrics where the name begins with app and ends in .jvmCpuTime or .heap.max. 41 | # export LA_SPARKMETRIC_REGEX="app.*\.jvmCpuTime|app.*\.heap.max" 42 | 43 | # Add a quoted regex value to filter the events for SparkLoggingEvent_CL, the log will only include events where logger_name_s matches the name regex 44 | # or where the Message matches the message regex. If both are specified, then both must be matched for the log to be sent. 45 | # Commented examples below will only log messages where the logger name is com.microsoft.pnp.samplejob.StreamingQueryListenerSampleJob or begins with 46 | # org.apache.spark.util.Utils, or where the Message ends with the string 'StreamingQueryListenerSampleJob' or begins with the string 'FS_CONF_COMPAT'. 47 | # export LA_SPARKLOGGINGEVENT_NAME_REGEX="com\.microsoft\.pnp\.samplejob\.StreamingQueryListenerSampleJob|org\.apache\.spark\.util\.Utils.*" 48 | # export LA_SPARKLOGGINGEVENT_MESSAGE_REGEX=".*StreamingQueryListenerSampleJob|FS_CONF_COMPAT.*" 49 | 50 | # Uncomment the following line to enable local logging to stderr of buffer contents when an exception is hit when sending a buffer to Log Analytics 51 | # export LA_LOGFAILEDBUFFERSEND=TRUE 52 | EOF 53 | 54 | STAGE_DIR=/dbfs/databricks/spark-monitoring 55 | SPARK_LISTENERS_VERSION=${SPARK_LISTENERS_VERSION:-1.0.0} 56 | SPARK_LISTENERS_LOG_ANALYTICS_VERSION=${SPARK_LISTENERS_LOG_ANALYTICS_VERSION:-1.0.0} 57 | SPARK_VERSION=$(cat /databricks/spark/VERSION 2> /dev/null || echo "") 58 | SPARK_VERSION=${SPARK_VERSION:-3.2.1} 59 | SPARK_SCALA_VERSION=$(ls /databricks/spark/assembly/target | cut -d '-' -f2 2> /dev/null || echo "") 60 | SPARK_SCALA_VERSION=${SPARK_SCALA_VERSION:-2.12} 61 | 62 | # This variable configures the spark-monitoring library metrics sink. 63 | # Any valid Spark metric.properties entry can be added here as well. 64 | # It will get merged with the metrics.properties on the cluster. 65 | METRICS_PROPERTIES=$(cat << EOF 66 | # This will enable the sink for all of the instances. 67 | *.sink.loganalytics.class=org.apache.spark.metrics.sink.loganalytics.LogAnalyticsMetricsSink 68 | *.sink.loganalytics.period=5 69 | *.sink.loganalytics.unit=seconds 70 | 71 | # Enable JvmSource for instance master, worker, driver and executor 72 | master.source.jvm.class=org.apache.spark.metrics.source.JvmSource 73 | 74 | worker.source.jvm.class=org.apache.spark.metrics.source.JvmSource 75 | 76 | driver.source.jvm.class=org.apache.spark.metrics.source.JvmSource 77 | 78 | executor.source.jvm.class=org.apache.spark.metrics.source.JvmSource 79 | 80 | EOF 81 | ) 82 | 83 | echo "Copying Spark Monitoring jars" 84 | JAR_FILENAME="spark-listeners_${SPARK_VERSION}_${SPARK_SCALA_VERSION}-${SPARK_LISTENERS_VERSION}.jar" 85 | echo "Copying $JAR_FILENAME" 86 | cp -f "$STAGE_DIR/$JAR_FILENAME" /mnt/driver-daemon/jars 87 | JAR_FILENAME="spark-listeners-loganalytics_${SPARK_VERSION}_${SPARK_SCALA_VERSION}-${SPARK_LISTENERS_LOG_ANALYTICS_VERSION}.jar" 88 | echo "Copying $JAR_FILENAME" 89 | cp -f "$STAGE_DIR/$JAR_FILENAME" /mnt/driver-daemon/jars 90 | echo "Copied Spark Monitoring jars successfully" 91 | 92 | echo "Merging metrics.properties" 93 | echo "$(echo "$METRICS_PROPERTIES"; cat "$SPARK_CONF_DIR/metrics.properties")" > "$SPARK_CONF_DIR/metrics.properties" || { echo "Error writing metrics.properties"; exit 1; } 94 | echo "Merged metrics.properties successfully" 95 | 96 | # This will enable master/worker metrics 97 | cat << EOF >> "$SPARK_CONF_DIR/spark-defaults.conf" 98 | spark.metrics.conf ${SPARK_CONF_DIR}/metrics.properties 99 | EOF 100 | 101 | log4jDirectories=( "executor" "driver" "master-worker" ) 102 | for log4jDirectory in "${log4jDirectories[@]}" 103 | do 104 | 105 | LOG4J_CONFIG_FILE="$SPARK_HOME/dbconf/log4j/$log4jDirectory/log4j.properties" 106 | echo "BEGIN: Updating $LOG4J_CONFIG_FILE with Log Analytics appender" 107 | sed -i 's/log4j.rootCategory=.*/&, logAnalyticsAppender/g' ${LOG4J_CONFIG_FILE} 108 | tee -a ${LOG4J_CONFIG_FILE} << EOF 109 | # logAnalytics 110 | log4j.appender.logAnalyticsAppender=com.microsoft.pnp.logging.loganalytics.LogAnalyticsAppender 111 | log4j.appender.logAnalyticsAppender.filter.spark=com.microsoft.pnp.logging.SparkPropertyEnricher 112 | # Commented line below shows how to set the threshhold for logging to only capture events that are 113 | # level ERROR or more severe. 114 | # log4j.appender.logAnalyticsAppender.Threshold=ERROR 115 | EOF 116 | 117 | echo "END: Updating $LOG4J_CONFIG_FILE with Log Analytics appender" 118 | 119 | done 120 | 121 | # The spark.extraListeners property has an entry from Databricks by default. 122 | # We have to readd it here because we did not find a way to get this setting when the init script is running. 123 | # If Databricks changes the default value of this property, it needs to be changed here. 124 | cat << EOF > "$DB_HOME/driver/conf/00-custom-spark-driver-defaults.conf" 125 | [driver] { 126 | "spark.extraListeners" = "com.databricks.backend.daemon.driver.DBCEventLoggingListener,org.apache.spark.listeners.UnifiedSparkListener" 127 | "spark.unifiedListener.sink" = "org.apache.spark.listeners.sink.loganalytics.LogAnalyticsListenerSink" 128 | } 129 | EOF 130 | -------------------------------------------------------------------------------- /src/spark-listeners/src/main/java/com/microsoft/pnp/client/GenericSendBuffer.java: -------------------------------------------------------------------------------- 1 | package com.microsoft.pnp.client; 2 | 3 | import java.util.concurrent.ExecutorService; 4 | import java.util.concurrent.Executors; 5 | import java.util.concurrent.Semaphore; 6 | 7 | public abstract class GenericSendBuffer implements AutoCloseable { 8 | 9 | /** 10 | * This executor that will be shared among all buffers. We may not need this, since we 11 | * shouldn't have hundreds of different time generated fields, but we can't have 12 | * executors spinning up hundreds of threads. 13 | *

14 | * We won't use daemon threads because in the event of a crash, we want to try to send 15 | * as much data as we can. 16 | */ 17 | static ExecutorService executor = Executors.newCachedThreadPool(); 18 | 19 | // Configure whether to throw exception on failed send for oversized event 20 | private static final boolean EXCEPTION_ON_FAILED_SEND = true; 21 | 22 | // Interface to support event notifications with a parameter. 23 | public interface Listener { 24 | void invoke(T o); 25 | } 26 | 27 | // making it available to every thread to see if changes happen. 28 | // also this value will be set only when shutdown is called. 29 | public volatile boolean isClosed = false; 30 | 31 | 32 | /** 33 | * Object used to serialize sendRequest calls. 34 | */ 35 | private final Object sendBufferLock = new Object(); 36 | 37 | /** 38 | * Current batching task. 39 | * Synchronized by {@code sendBufferLock}. 40 | */ 41 | private GenericSendBufferTask sendBufferTask = null; 42 | 43 | /** 44 | * Permits controlling the number of in flight SendMessage batches. 45 | */ 46 | private final Semaphore inflightBatches; 47 | 48 | // Make configurable 49 | private final int maxInflightBatches = 4; 50 | 51 | protected GenericSendBuffer() { 52 | this.inflightBatches = new Semaphore(this.maxInflightBatches); 53 | } 54 | 55 | protected abstract GenericSendBufferTask createSendBufferTask(); 56 | 57 | public void send(T data) { 58 | // if this buffer is closed , then no need to proceed. 59 | if (this.isClosed) { 60 | return; 61 | } 62 | 63 | try { 64 | synchronized (this.sendBufferLock) { 65 | if (this.sendBufferTask == null 66 | || (!this.sendBufferTask.addEvent(data))) { 67 | // We need a new task because one of the following is true: 68 | // 1. We don't have one yet (i.e. first message!) 69 | // 2. The task is full 70 | // 3. The task's timeout elapsed 71 | GenericSendBufferTask sendBufferTask = this.createSendBufferTask(); 72 | // Make sure we don't have too many in flight at once. 73 | // This WILL block the calling code, but it's simpler than 74 | // building a circular buffer, although we are sort of doing that. :) 75 | // Not sure we need this yet! 76 | this.inflightBatches.acquire(); 77 | this.sendBufferTask = sendBufferTask; 78 | 79 | // Register a listener for the event signaling that the 80 | // batch task has completed (successfully or not). 81 | this.sendBufferTask.setOnCompleted(task -> { 82 | inflightBatches.release(); 83 | }); 84 | 85 | // There is an edge case here. 86 | // If the max bytes are too small for the first message, things go 87 | // wonky, so let's bail 88 | if (!this.sendBufferTask.addEvent(data)) { 89 | String message = String.format("Failed to schedule batch because first message size %d exceeds batch size limit %d (bytes).", 90 | this.sendBufferTask.calculateDataSize(data), 91 | this.sendBufferTask.getMaxBatchSizeBytes()); 92 | System.err.println(message); 93 | if(EXCEPTION_ON_FAILED_SEND) { 94 | // If we are throwing before we call execute on the sendBufferTask, we should release the semaphore. 95 | inflightBatches.release(); 96 | throw new RuntimeException(message); 97 | } 98 | } 99 | executor.execute(this.sendBufferTask); 100 | } 101 | } 102 | 103 | } catch (InterruptedException e) { 104 | Thread.currentThread().interrupt(); 105 | RuntimeException toThrow = new RuntimeException("Interrupted while waiting for lock."); 106 | toThrow.initCause(e); 107 | throw toThrow; 108 | } 109 | } 110 | 111 | /** 112 | * Flushes all outstanding outbound events in this buffer. 113 | *

114 | * The call returns successfully when all outstanding events submitted before the 115 | * call are completed. 116 | */ 117 | public void flush() { 118 | 119 | try { 120 | synchronized (sendBufferLock) { 121 | inflightBatches.acquire(this.maxInflightBatches); 122 | inflightBatches.release(this.maxInflightBatches); 123 | } 124 | } catch (InterruptedException e) { 125 | Thread.currentThread().interrupt(); 126 | } 127 | } 128 | 129 | 130 | /** 131 | * makes this buffer closed, 132 | * flush what is in 133 | * executor will complete what is in and will not accept new entries. 134 | */ 135 | public void close() { 136 | this.isClosed = true; 137 | flush(); 138 | this.executor.shutdown(); 139 | } 140 | 141 | 142 | } 143 | 144 | -------------------------------------------------------------------------------- /src/spark-listeners/src/main/java/com/microsoft/pnp/client/GenericSendBufferTask.java: -------------------------------------------------------------------------------- 1 | package com.microsoft.pnp.client; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import java.util.concurrent.TimeUnit; 6 | 7 | /** 8 | * Task to send a batch of data. 9 | *

10 | * The batch task is constructed open and accepts data until full, or until 11 | * {@code maxBatchOpenMs} elapses. At that point, the batch closes and the collected events 12 | * are assembled into a single request. 13 | *

14 | * Instances of this class (and subclasses) are thread-safe. 15 | * 16 | */ 17 | public abstract class GenericSendBufferTask implements Runnable { 18 | 19 | int currentBatchSize = 0; 20 | private final int maxBatchSizeBytes; 21 | private final int maxBatchOpenMs; 22 | private static final int DEFAULT_MAX_BATCH_OPEN_MILLISECONDS = 10000; 23 | 24 | protected final List datas; 25 | private boolean closed; 26 | private volatile GenericSendBuffer.Listener> onCompleted; 27 | 28 | public GenericSendBufferTask(int maxBatchSizeBytes) { 29 | this(maxBatchSizeBytes, DEFAULT_MAX_BATCH_OPEN_MILLISECONDS); 30 | } 31 | 32 | public GenericSendBufferTask(int maxBatchSizeBytes, int maxBatchOpenMs) { 33 | this.datas = new ArrayList<>(); 34 | this.maxBatchSizeBytes = maxBatchSizeBytes; 35 | this.maxBatchOpenMs = maxBatchOpenMs; 36 | } 37 | 38 | public void setOnCompleted(GenericSendBuffer.Listener> value) { 39 | this.onCompleted = value; 40 | } 41 | 42 | /** 43 | * Adds an event to the batch if it is still open and has capacity. 44 | * 45 | * @param data 46 | * @return true if the event was added, otherwise, false 47 | */ 48 | public synchronized boolean addEvent(T data) { 49 | if (closed) { 50 | return false; 51 | } 52 | 53 | boolean wasAdded = addIfAllowed(data); 54 | // If we can't add the event (because we are full), close the batch 55 | if (!wasAdded) { 56 | closed = true; 57 | notify(); 58 | } 59 | 60 | return wasAdded; 61 | } 62 | 63 | /** 64 | * Adds the event to the batch if capacity allows it. Called by {@code addEvent} with a 65 | * lock on {@code this} held. 66 | * 67 | * @param data 68 | * @return true if it is okay to add the event, otherwise, false 69 | */ 70 | private boolean addIfAllowed(T data) { 71 | if (isOkToAdd(data)) { 72 | this.datas.add(data); 73 | onEventAdded(data); 74 | return true; 75 | } else { 76 | return false; 77 | } 78 | } 79 | 80 | protected abstract int calculateDataSize(T data); 81 | 82 | /** 83 | * Checks whether it's okay to add the event to this buffer. Called by 84 | * {@code addIfAllowed} with a lock on {@code this} held. 85 | * 86 | * @param data 87 | * the event to add 88 | * @return true if the event is okay to add, otherwise, false 89 | */ 90 | protected boolean isOkToAdd(T data) { 91 | return ((this.calculateDataSize(data) + this.currentBatchSize) <= this.maxBatchSizeBytes); 92 | } 93 | 94 | /** 95 | * Returns the max batch size for the implementation 96 | * 97 | * @return maximum batch size 98 | */ 99 | protected int getMaxBatchSizeBytes() { 100 | return (maxBatchSizeBytes); 101 | } 102 | 103 | /** 104 | * A hook to be run when an event is successfully added to this buffer. Called by 105 | * {@code addIfAllowed} with a lock on {@code this} held. 106 | * 107 | * @param data 108 | * the event that was added 109 | */ 110 | protected void onEventAdded(T data) { 111 | this.currentBatchSize += this.calculateDataSize(data); 112 | } 113 | 114 | /** 115 | * Processes the batch once closed. Is NOT called with a lock on {@code this}. 116 | * However, it's passed a local copy of the {@code event} list 117 | * made while holding the lock. 118 | */ 119 | protected abstract void process(List datas); 120 | 121 | @Override 122 | public final void run() { 123 | try { 124 | 125 | long deadlineMs = TimeUnit.MILLISECONDS.convert( 126 | System.nanoTime(), 127 | TimeUnit.NANOSECONDS 128 | ) + maxBatchOpenMs + 1; 129 | 130 | long t = TimeUnit.MILLISECONDS.convert( 131 | System.nanoTime(), 132 | TimeUnit.NANOSECONDS); 133 | 134 | List datas; 135 | 136 | synchronized (this) { 137 | while (!closed && (t < deadlineMs)) { 138 | t = TimeUnit.MILLISECONDS.convert( 139 | System.nanoTime(), 140 | TimeUnit.NANOSECONDS 141 | ); 142 | 143 | // zero means "wait forever", can't have that. 144 | long toWait = Math.max(1, deadlineMs - t); 145 | wait(toWait); 146 | } 147 | 148 | closed = true; 149 | datas = new ArrayList<>(this.datas); 150 | } 151 | 152 | process(datas); 153 | } catch (InterruptedException e) { 154 | // If the thread is interrupted, we should make a best effort to deliver the messages in the buffer. 155 | // This may result in duplicated messages if the thread is interrupted late in the execution of process(...) 156 | // but this is better than missing messages that might have information on an important error. 157 | process(new ArrayList<>(this.datas)); 158 | this.datas.clear(); 159 | } catch (RuntimeException e) { 160 | throw e; 161 | } catch (Error e) { 162 | throw new RuntimeException("Error encountered", e); 163 | } finally { 164 | // make a copy of the listener since it (theoretically) can be 165 | // modified from the outside. 166 | GenericSendBuffer.Listener> listener = onCompleted; 167 | if (listener != null) { 168 | listener.invoke(this); 169 | } 170 | } 171 | } 172 | } 173 | -------------------------------------------------------------------------------- /src/spark-listeners/src/main/java/com/microsoft/pnp/logging/JSONConfiguration.java: -------------------------------------------------------------------------------- 1 | package com.microsoft.pnp.logging; 2 | 3 | import com.fasterxml.jackson.databind.ObjectMapper; 4 | 5 | public interface JSONConfiguration { 6 | void configure(ObjectMapper objectMapper); 7 | } 8 | -------------------------------------------------------------------------------- /src/spark-listeners/src/main/java/com/microsoft/pnp/logging/JSONLayout.java: -------------------------------------------------------------------------------- 1 | package com.microsoft.pnp.logging; 2 | 3 | import com.fasterxml.jackson.databind.ObjectMapper; 4 | import com.fasterxml.jackson.databind.node.ObjectNode; 5 | import org.apache.log4j.Layout; 6 | import org.apache.log4j.helpers.LogLog; 7 | import org.apache.log4j.spi.LocationInfo; 8 | import org.apache.log4j.spi.LoggingEvent; 9 | import org.apache.log4j.spi.ThrowableInformation; 10 | 11 | import java.time.Instant; 12 | import java.util.Map; 13 | import java.util.Set; 14 | 15 | public class JSONLayout extends Layout { 16 | 17 | public static final String TIMESTAMP_FIELD_NAME = "timestamp"; 18 | private boolean locationInfo; 19 | private String jsonConfiguration; 20 | private ObjectMapper objectMapper = new ObjectMapper(); 21 | 22 | public JSONLayout() { 23 | this(false); 24 | } 25 | 26 | /** 27 | * Creates a layout that optionally inserts location information into log messages. 28 | * 29 | * @param locationInfo whether or not to include location information in the log messages. 30 | */ 31 | public JSONLayout(boolean locationInfo) { 32 | this.locationInfo = locationInfo; 33 | } 34 | 35 | public String format(LoggingEvent loggingEvent) { 36 | String threadName = loggingEvent.getThreadName(); 37 | long timestamp = loggingEvent.getTimeStamp(); 38 | Map mdc = loggingEvent.getProperties(); 39 | ObjectNode event = this.objectMapper.createObjectNode(); 40 | 41 | event.put(TIMESTAMP_FIELD_NAME, Instant.ofEpochMilli(timestamp).toString()); 42 | 43 | event.put("message", loggingEvent.getRenderedMessage()); 44 | 45 | if (loggingEvent.getThrowableInformation() != null) { 46 | ObjectNode exceptionNode = objectMapper.createObjectNode(); 47 | final ThrowableInformation throwableInformation = loggingEvent.getThrowableInformation(); 48 | if (throwableInformation.getThrowable().getClass().getCanonicalName() != null) { 49 | exceptionNode.put("exception_class", throwableInformation.getThrowable().getClass().getCanonicalName()); 50 | } 51 | if (throwableInformation.getThrowable().getMessage() != null) { 52 | exceptionNode.put("exception_message", throwableInformation.getThrowable().getMessage()); 53 | } 54 | if (throwableInformation.getThrowableStrRep() != null) { 55 | String stackTrace = String.join("\n", throwableInformation.getThrowableStrRep()); 56 | exceptionNode.put("stacktrace", stackTrace); 57 | } 58 | event.replace("exception", exceptionNode); 59 | } 60 | 61 | if (locationInfo) { 62 | LocationInfo info = loggingEvent.getLocationInformation(); 63 | event.put("file", info.getFileName()); 64 | event.put("line_number", info.getLineNumber()); 65 | event.put("class", info.getClassName()); 66 | event.put("method", info.getMethodName()); 67 | } 68 | 69 | event.put("logger_name", loggingEvent.getLoggerName()); 70 | event.put("level", loggingEvent.getLevel().toString()); 71 | event.put("thread_name", threadName); 72 | 73 | // We are going to change this to promote all MDC properties to top-level properties. 74 | // If there is a name collision, we will just log a warning to LogLog. 75 | // Because of this, we need to process MDC as the last set of properties. 76 | for (Map.Entry property : (Set)mdc.entrySet()) { 77 | String key = (String)property.getKey(); 78 | if (event.has(key)) { 79 | // The field in MDC has a name conflict with the existing fields. 80 | LogLog.warn(String.format("MDC field '%s' already exists in log4j event", key)); 81 | } else { 82 | event.put(key, (String)property.getValue()); 83 | } 84 | } 85 | 86 | try { 87 | return objectMapper.writeValueAsString(event); 88 | } catch (Exception ex) { 89 | LogLog.warn("Error serializing event", ex); 90 | return null; 91 | } 92 | } 93 | 94 | public boolean ignoresThrowable() { 95 | return false; 96 | } 97 | 98 | /** 99 | * Query whether log messages include location information. 100 | * 101 | * @return true if location information is included in log messages, false otherwise. 102 | */ 103 | public boolean getLocationInfo() { 104 | return this.locationInfo; 105 | } 106 | 107 | public void setLocationInfo(boolean locationInfo) { 108 | this.locationInfo = locationInfo; 109 | } 110 | 111 | public String getJsonConfiguration() { 112 | return this.jsonConfiguration; 113 | } 114 | 115 | public void setJsonConfiguration(String jsonConfiguration) { 116 | try { 117 | Class clazz = Class.forName(jsonConfiguration); 118 | JSONConfiguration configuration = (JSONConfiguration)clazz.newInstance(); 119 | configuration.configure(this.objectMapper); 120 | } catch (ClassNotFoundException cnfe) { 121 | LogLog.warn( 122 | String.format("Could not find JSON Configuration class: %s", jsonConfiguration), 123 | cnfe); 124 | } catch (InstantiationException | IllegalAccessException ie) { 125 | LogLog.warn( 126 | String.format("Error creating instance of JSON Configuration class: %s", jsonConfiguration), 127 | ie); 128 | } catch (Exception ex) { 129 | LogLog.warn("Unexpected error setting JSON Configuration", ex); 130 | } 131 | } 132 | 133 | public void activateOptions() { 134 | } 135 | } -------------------------------------------------------------------------------- /src/spark-listeners/src/main/java/com/microsoft/pnp/logging/Log4jConfiguration.java: -------------------------------------------------------------------------------- 1 | package com.microsoft.pnp.logging; 2 | 3 | import org.apache.log4j.PropertyConfigurator; 4 | 5 | import java.io.InputStream; 6 | 7 | public class Log4jConfiguration { 8 | public static void configure(String configFilename) { 9 | PropertyConfigurator.configure(configFilename); 10 | } 11 | 12 | public static void configure(InputStream inputStream) { 13 | PropertyConfigurator.configure(inputStream); 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /src/spark-listeners/src/main/java/com/microsoft/pnp/logging/MDCCloseableFactory.java: -------------------------------------------------------------------------------- 1 | package com.microsoft.pnp.logging; 2 | 3 | import org.slf4j.MDC; 4 | 5 | import java.util.HashMap; 6 | import java.util.Map; 7 | import java.util.Optional; 8 | 9 | public class MDCCloseableFactory { 10 | private class MDCCloseable implements AutoCloseable { 11 | @SuppressWarnings("unchecked") 12 | public MDCCloseable(Map mdc) { 13 | // Log4j supports Map, but slf4j wants Map 14 | // Because of type erasure, this should be okay. 15 | MDC.setContextMap((Map)mdc); 16 | } 17 | 18 | @Override 19 | public void close() { 20 | MDC.clear(); 21 | } 22 | } 23 | 24 | private Optional> context; 25 | 26 | public MDCCloseableFactory() { 27 | this(null); 28 | } 29 | 30 | public MDCCloseableFactory(Map context) { 31 | this.context = Optional.ofNullable(context); 32 | } 33 | 34 | public AutoCloseable create(Map mdc) { 35 | // Values in mdc will override context 36 | Map newMDC = new HashMap<>(); 37 | this.context.ifPresent(c -> newMDC.putAll(c)); 38 | newMDC.putAll(mdc); 39 | return new MDCCloseable(newMDC); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/spark-listeners/src/main/java/com/microsoft/pnp/logging/SparkPropertyEnricher.java: -------------------------------------------------------------------------------- 1 | package com.microsoft.pnp.logging; 2 | 3 | import org.apache.log4j.spi.Filter; 4 | import org.apache.log4j.spi.LoggingEvent; 5 | import org.apache.spark.SparkConf; 6 | import org.apache.spark.SparkEnv; 7 | import com.microsoft.pnp.SparkInformation; 8 | 9 | import java.util.Map; 10 | 11 | import scala.collection.JavaConverters; 12 | 13 | public class SparkPropertyEnricher extends Filter { 14 | 15 | @Override 16 | public int decide(LoggingEvent loggingEvent) { 17 | // This is not how we should really do this since we aren't actually filtering, 18 | // but because Spark uses the log4j.properties configuration instead of the XML 19 | // configuration, our options are limited. 20 | 21 | // There are some things that are unavailable until a certain point 22 | // in the Spark lifecycle on the driver. We will try to get as much as we can. 23 | Map javaMap = JavaConverters 24 | .mapAsJavaMapConverter(SparkInformation.get()).asJava(); 25 | for (Map.Entry entry : javaMap.entrySet()) { 26 | loggingEvent.setProperty(entry.getKey(), entry.getValue()); 27 | } 28 | 29 | return Filter.NEUTRAL; 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/spark-listeners/src/main/scala/com/microsoft/pnp/SparkInformation.scala: -------------------------------------------------------------------------------- 1 | package com.microsoft.pnp 2 | 3 | import org.apache.spark.SparkEnv 4 | 5 | object SparkInformation { 6 | // Spark Configuration 7 | // NOTE - In Spark versions > 2.4.0, many settings have been, or will likely be, replaced with values 8 | // in the internal config package, so these should be replaced with those. 9 | private val EXECUTOR_ID = "spark.executor.id" 10 | private val APPLICATION_ID = "spark.app.id" 11 | private val APPLICATION_NAME = "spark.app.name" 12 | 13 | // Databricks-specific 14 | private val DB_CLUSTER_ID = "spark.databricks.clusterUsageTags.clusterId" 15 | private val DB_CLUSTER_NAME = "spark.databricks.clusterUsageTags.clusterName" 16 | // This is the environment variable name set in our init script. 17 | private val DB_CLUSTER_ID_ENVIRONMENT_VARIABLE = "DB_CLUSTER_ID" 18 | private val DB_CLUSTER_NAME_ENVIRONMENT_VARIABLE = "DB_CLUSTER_NAME" 19 | 20 | def get(): Map[String, String] = { 21 | // We might want to improve this to pull valid class names from the beginning of the command 22 | val className = "^(\\S*).*$".r 23 | // The sun.java.command is valid on the Oracle and OpenJDK JVMs, so we should be okay 24 | // for Databricks 25 | val nodeType = System.getProperty("sun.java.command") match { 26 | // Most of these values come from the org/apache/spark/launcher/SparkClassCommandBuilder.java 27 | // file. When Spark is upgraded to new versions, this file needs to be checked. 28 | // We are basically taking the first part of the command passed to the 29 | // ${SPARK_HOME}/bin/spark-class script. We have to do this because we cannot 30 | // always get to a SparkEnv. If we don't have a match on any of these, we will 31 | // at least return the full class name. 32 | case className(c) => Some(c match { 33 | case "org.apache.spark.deploy.master.Master" => "master" 34 | case "org.apache.spark.deploy.worker.Worker" => "worker" 35 | case "org.apache.spark.executor.CoarseGrainedExecutorBackend" => "executor" 36 | case "org.apache.spark.deploy.ExternalShuffleService" => "shuffle" 37 | // The first value is returned because we on Databricks running a JAR job or 38 | // a Notebook, since Databricks wraps up the Spark stuff 39 | case "com.databricks.backend.daemon.driver.DriverDaemon" | 40 | "org.apache.spark.deploy.SparkSubmit" => "driver" 41 | case _ => c 42 | }) 43 | case _ => None 44 | } 45 | 46 | val sparkInfo = Option(SparkEnv.get) match { 47 | case Some(e) => { 48 | val conf = e.conf 49 | Map( 50 | "applicationId" -> conf.getOption(APPLICATION_ID), 51 | "applicationName" -> conf.getOption(APPLICATION_NAME), 52 | "clusterId" -> conf.getOption(DB_CLUSTER_ID), 53 | "clusterName" -> conf.getOption(DB_CLUSTER_NAME), 54 | "executorId" -> Option(e.executorId), 55 | "nodeType" -> nodeType 56 | ) 57 | } 58 | case None => { 59 | // If we don't have a SparkEnv, we could be on any node type, really. 60 | Map( 61 | "clusterId" -> sys.env.get(DB_CLUSTER_ID_ENVIRONMENT_VARIABLE), 62 | "clusterName" -> sys.env.get(DB_CLUSTER_NAME_ENVIRONMENT_VARIABLE), 63 | "nodeType" -> nodeType 64 | ) 65 | } 66 | } 67 | 68 | // We will remove None values and convert to Map[String, String] to make conversion 69 | // less painful. 70 | for ((k, Some(v)) <- sparkInfo ) yield k -> v 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/spark-listeners/src/main/scala/com/microsoft/pnp/util/TryWith.scala: -------------------------------------------------------------------------------- 1 | package com.microsoft.pnp.util 2 | 3 | import scala.util.control.NonFatal 4 | import scala.util.{Failure, Try} 5 | 6 | object TryWith { 7 | def apply[C <: AutoCloseable, R](resource: => C)(f: C => R): Try[R] = 8 | Try(resource).flatMap(resourceInstance => { 9 | try { 10 | val returnValue = f(resourceInstance) 11 | Try(resourceInstance.close()).map(_ => returnValue) 12 | } 13 | catch { 14 | case NonFatal(exceptionInFunction) => 15 | try { 16 | resourceInstance.close() 17 | Failure(exceptionInFunction) 18 | } 19 | catch { 20 | case NonFatal(exceptionInClose) => 21 | exceptionInFunction.addSuppressed(exceptionInClose) 22 | Failure(exceptionInFunction) 23 | } 24 | } 25 | }) 26 | } 27 | -------------------------------------------------------------------------------- /src/spark-listeners/src/main/scala/org/apache/spark/listeners/SparkListenerHandlers.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.listeners 2 | 3 | import java.time.Instant 4 | 5 | import org.apache.spark.scheduler._ 6 | import org.apache.spark.util.Utils 7 | 8 | trait SparkListenerHandlers { 9 | this: UnifiedSparkListenerHandler => 10 | 11 | override val logBlockUpdates = conf.getBoolean( 12 | "spark.unifiedListener.logBlockUpdates", 13 | false 14 | ) 15 | 16 | override def onApplicationEnd(event: SparkListenerApplicationEnd): Unit = logSparkListenerEvent( 17 | event, 18 | () => Instant.ofEpochMilli(event.time) 19 | ) 20 | 21 | override def onApplicationStart(event: SparkListenerApplicationStart): Unit = logSparkListenerEvent( 22 | event, 23 | () => Instant.ofEpochMilli(event.time) 24 | ) 25 | 26 | override def onBlockManagerAdded(event: SparkListenerBlockManagerAdded): Unit = logSparkListenerEvent( 27 | event, 28 | () => Instant.ofEpochMilli(event.time) 29 | ) 30 | 31 | override def onBlockManagerRemoved(event: SparkListenerBlockManagerRemoved): Unit = logSparkListenerEvent( 32 | event, 33 | () => Instant.ofEpochMilli(event.time) 34 | ) 35 | 36 | override def onBlockUpdated(event: SparkListenerBlockUpdated): Unit = { 37 | if (this.logBlockUpdates) { 38 | logSparkListenerEvent(event) 39 | } 40 | } 41 | 42 | override def onEnvironmentUpdate(event: SparkListenerEnvironmentUpdate): Unit = 43 | logSparkListenerEvent(redactEvent(event)) 44 | 45 | override def onExecutorAdded(event: SparkListenerExecutorAdded): Unit = logSparkListenerEvent( 46 | event, 47 | () => Instant.ofEpochMilli(event.time) 48 | ) 49 | 50 | override def onExecutorBlacklisted(event: SparkListenerExecutorBlacklisted): Unit = logSparkListenerEvent( 51 | event, 52 | () => Instant.ofEpochMilli(event.time) 53 | ) 54 | 55 | // No-op because logging every update would be overkill 56 | override def onExecutorMetricsUpdate(event: SparkListenerExecutorMetricsUpdate): Unit = {} 57 | 58 | override def onExecutorRemoved(event: SparkListenerExecutorRemoved): Unit = logSparkListenerEvent( 59 | event, 60 | () => Instant.ofEpochMilli(event.time) 61 | ) 62 | 63 | override def onExecutorUnblacklisted(event: SparkListenerExecutorUnblacklisted): Unit = logSparkListenerEvent( 64 | event, 65 | () => Instant.ofEpochMilli(event.time) 66 | ) 67 | 68 | override def onJobEnd(event: SparkListenerJobEnd): Unit = logSparkListenerEvent( 69 | event, 70 | () => Instant.ofEpochMilli(event.time) 71 | ) 72 | 73 | override def onJobStart(event: SparkListenerJobStart): Unit = logSparkListenerEvent( 74 | event, 75 | () => Instant.ofEpochMilli(event.time) 76 | ) 77 | 78 | override def onNodeBlacklisted(event: SparkListenerNodeBlacklisted): Unit = logSparkListenerEvent( 79 | event, 80 | () => Instant.ofEpochMilli(event.time) 81 | ) 82 | 83 | override def onNodeUnblacklisted(event: SparkListenerNodeUnblacklisted): Unit = logSparkListenerEvent( 84 | event, 85 | () => Instant.ofEpochMilli(event.time) 86 | ) 87 | 88 | override def onStageCompleted(event: SparkListenerStageCompleted): Unit = logSparkListenerEvent( 89 | event, 90 | () => Instant.ofEpochMilli(event.stageInfo.completionTime.getOrElse(Instant.now().toEpochMilli)) 91 | ) 92 | 93 | override def onStageSubmitted(event: SparkListenerStageSubmitted): Unit = logSparkListenerEvent( 94 | event, 95 | () => Instant.ofEpochMilli(event.stageInfo.submissionTime.getOrElse(Instant.now().toEpochMilli)) 96 | ) 97 | 98 | override def onTaskGettingResult(event: SparkListenerTaskGettingResult): Unit = logSparkListenerEvent(event) 99 | 100 | override def onTaskStart(event: SparkListenerTaskStart): Unit = logSparkListenerEvent( 101 | event, 102 | () => Instant.ofEpochMilli(event.taskInfo.launchTime) 103 | ) 104 | 105 | override def onTaskEnd(event: SparkListenerTaskEnd): Unit = logSparkListenerEvent( 106 | event, 107 | () => Instant.ofEpochMilli(event.taskInfo.finishTime) 108 | ) 109 | 110 | override def onUnpersistRDD(event: SparkListenerUnpersistRDD): Unit = { 111 | logSparkListenerEvent(event) 112 | } 113 | 114 | private def redactEvent(event: SparkListenerEnvironmentUpdate): SparkListenerEnvironmentUpdate = { 115 | // environmentDetails maps a string descriptor to a set of properties 116 | // Similar to: 117 | // "JVM Information" -> jvmInformation, 118 | // "Spark Properties" -> sparkProperties, 119 | // ... 120 | // where jvmInformation, sparkProperties, etc. are sequence of tuples. 121 | // We go through the various of properties and redact sensitive information from them. 122 | val redactedProps = event.environmentDetails.map { case (name, props) => 123 | name -> Utils.redact(this.conf, props) 124 | } 125 | SparkListenerEnvironmentUpdate(redactedProps) 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /src/spark-listeners/src/main/scala/org/apache/spark/listeners/StreamingListenerHandlers.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.listeners 2 | 3 | import java.time.Instant 4 | 5 | import com.fasterxml.jackson.annotation.{JsonTypeInfo, JsonUnwrapped} 6 | import com.fasterxml.jackson.databind.annotation.JsonTypeIdResolver 7 | import com.fasterxml.jackson.databind.{DatabindContext, JavaType} 8 | import org.apache.spark.internal.Logging 9 | import org.apache.spark.scheduler.SparkListenerEvent 10 | import org.apache.spark.streaming.scheduler._ 11 | 12 | import scala.reflect.ClassTag 13 | import scala.reflect.runtime.{universe => ru} 14 | 15 | private object StreamingListenerHandlers { 16 | val WrappedStreamingListenerEventClassName: String = 17 | "org.apache.spark.streaming.scheduler.StreamingListenerBus$WrappedStreamingListenerEvent" 18 | val StreamingListenerEventFieldName: String = "streamingListenerEvent" 19 | } 20 | 21 | trait StreamingListenerHandlers { 22 | this: UnifiedSparkListenerHandler with Logging => 23 | 24 | private val wrappedStreamingListenerEventClass: Class[_] = { 25 | // This is the class Spark uses internally to wrap StreamingListenerEvents so it can pass through 26 | // the LiveListenerBus. It's private, so we'll get it this way. If this class name ever changes, 27 | // this code needs to be updated!!! 28 | val clazz = Class.forName(StreamingListenerHandlers.WrappedStreamingListenerEventClassName) 29 | if (clazz == null) { 30 | throw new ClassNotFoundException( 31 | s"Error loading class: ${StreamingListenerHandlers.WrappedStreamingListenerEventClassName}") 32 | } 33 | clazz 34 | } 35 | 36 | private val streamingListenerEventField: ru.TermSymbol = { 37 | val classSymbol = ru.runtimeMirror( 38 | this.wrappedStreamingListenerEventClass.getClassLoader 39 | ).classSymbol(this.wrappedStreamingListenerEventClass) 40 | classSymbol.typeSignature.member(ru.TermName( 41 | StreamingListenerHandlers.StreamingListenerEventFieldName) 42 | ) match { 43 | case symbol: ru.SymbolApi => symbol.asTerm 44 | case null => throw new NoSuchFieldException( 45 | s"Error reflecting ${StreamingListenerHandlers.StreamingListenerEventFieldName} field") 46 | } 47 | } 48 | 49 | protected val streamingListenerEventClassTag: ClassTag[SparkListenerEvent] = 50 | ClassTag[SparkListenerEvent](this.wrappedStreamingListenerEventClass) 51 | 52 | import scala.language.implicitConversions 53 | 54 | // Unwrap the StreamingListenerEvent from the Spark-owned private, inner class 55 | protected implicit def wrappedStreamingListenerEventToStreamingListenerEvent(event: SparkListenerEvent): Option[StreamingListenerEvent] = { 56 | val instanceMirror: ru.InstanceMirror = ru.runtimeMirror( 57 | event.getClass.getClassLoader 58 | ).reflect(event) 59 | 60 | val fieldMirror = instanceMirror.reflectField(streamingListenerEventField) 61 | Some(fieldMirror.get.asInstanceOf[StreamingListenerEvent]) 62 | } 63 | 64 | // Re-wrap the StreamingListenerEvent in our wrapper. The wrapper contains the JSON 65 | // serialization bits to serialize StreamingListenerEvents properly so we don't have to 66 | // maintain our own ObjectMapper 67 | private implicit def streamingListenerEventToSparkListenerEvent(event: StreamingListenerEvent): SparkListenerEvent = { 68 | new StreamingListenerEventWrapper(event) 69 | } 70 | 71 | protected def onStreamingListenerEvent(event: Option[StreamingListenerEvent]): Unit = { 72 | event match { 73 | case Some(sle) => sle match { 74 | case streamingStarted: StreamingListenerStreamingStarted => 75 | this.logSparkListenerEvent( 76 | streamingStarted, 77 | () => Instant.ofEpochMilli(streamingStarted.time) 78 | ) 79 | case batchCompleted: StreamingListenerBatchCompleted => 80 | this.logSparkListenerEvent( 81 | batchCompleted, 82 | () => Instant.ofEpochMilli( 83 | batchCompleted.batchInfo.processingEndTime.getOrElse( 84 | Instant.now.toEpochMilli 85 | ) 86 | ) 87 | ) 88 | case batchStarted: StreamingListenerBatchStarted => 89 | this.logSparkListenerEvent( 90 | batchStarted, 91 | () => Instant.ofEpochMilli( 92 | batchStarted.batchInfo.processingStartTime.getOrElse( 93 | Instant.now().toEpochMilli 94 | ) 95 | ) 96 | ) 97 | case batchSubmitted: StreamingListenerBatchSubmitted => 98 | this.logSparkListenerEvent( 99 | batchSubmitted, 100 | () => Instant.ofEpochMilli(batchSubmitted.batchInfo.submissionTime) 101 | ) 102 | case outputOperationCompleted: StreamingListenerOutputOperationCompleted => 103 | this.logSparkListenerEvent( 104 | outputOperationCompleted, 105 | () => Instant.ofEpochMilli( 106 | outputOperationCompleted.outputOperationInfo.endTime.getOrElse( 107 | Instant.now.toEpochMilli 108 | ) 109 | ) 110 | ) 111 | case outputOperationStarted: StreamingListenerOutputOperationStarted => 112 | this.logSparkListenerEvent( 113 | outputOperationStarted, 114 | () => Instant.ofEpochMilli( 115 | outputOperationStarted.outputOperationInfo.startTime.getOrElse( 116 | Instant.now.toEpochMilli 117 | ) 118 | ) 119 | ) 120 | case receiverError: StreamingListenerReceiverError => 121 | this.logSparkListenerEvent(receiverError) 122 | case receiverStarted: StreamingListenerReceiverStarted => 123 | this.logSparkListenerEvent(receiverStarted) 124 | case receiverStopped: StreamingListenerReceiverStopped => 125 | this.logSparkListenerEvent(receiverStopped) 126 | } 127 | case None => this.logWarning("StreamingListenerEvent was None") 128 | } 129 | } 130 | } 131 | 132 | @JsonTypeInfo(use = JsonTypeInfo.Id.CUSTOM, include = JsonTypeInfo.As.PROPERTY, property = "Event") 133 | @JsonTypeIdResolver(classOf[StreamingListenerEventWrapperTypeIdResolver]) 134 | class StreamingListenerEventWrapper( 135 | @JsonUnwrapped 136 | val streamingListenerEvent: StreamingListenerEvent 137 | ) extends SparkListenerEvent 138 | 139 | class StreamingListenerEventWrapperTypeIdResolver extends com.fasterxml.jackson.databind.jsontype.TypeIdResolver { 140 | private var javaType: JavaType = _ 141 | 142 | override def init(javaType: JavaType): Unit = { 143 | this.javaType = javaType 144 | } 145 | 146 | override def idFromValue(o: Any): String = this.idFromValueAndType(o, o.getClass) 147 | 148 | override def idFromValueAndType(o: Any, aClass: Class[_]): String = { 149 | o 150 | .asInstanceOf[StreamingListenerEventWrapper] 151 | .streamingListenerEvent 152 | .getClass 153 | .getName 154 | } 155 | 156 | override def idFromBaseType(): String = throw new NotImplementedError() 157 | 158 | def typeFromId(s: String): JavaType = throw new NotImplementedError() 159 | 160 | override def typeFromId(databindContext: DatabindContext, s: String): JavaType = throw new NotImplementedError() 161 | 162 | override def getMechanism: JsonTypeInfo.Id = JsonTypeInfo.Id.CUSTOM 163 | 164 | def getDescForKnownTypeIds: String = throw new NotImplementedError() 165 | } 166 | -------------------------------------------------------------------------------- /src/spark-listeners/src/main/scala/org/apache/spark/listeners/StreamingQueryListenerHandlers.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.listeners 2 | 3 | import java.time.Instant 4 | 5 | import org.apache.spark.sql.streaming.StreamingQueryListener 6 | 7 | trait StreamingQueryListenerHandlers{ 8 | this: UnifiedSparkListenerHandler => 9 | 10 | private[listeners] def onStreamingQueryListenerEvent(event: StreamingQueryListener.Event): Unit = { 11 | // Only the query progress event has a timestamp, so we'll send everything else 12 | // on through 13 | event match { 14 | case queryProgress: StreamingQueryListener.QueryProgressEvent => 15 | logSparkListenerEvent( 16 | event, 17 | () => Instant.parse(queryProgress.progress.timestamp) 18 | ) 19 | case streamingQueryListenerEvent: StreamingQueryListener.Event => 20 | logSparkListenerEvent(streamingQueryListenerEvent) 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/spark-listeners/src/main/scala/org/apache/spark/listeners/UnifiedSparkListener.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.listeners 2 | 3 | import java.time.Instant 4 | 5 | import com.microsoft.pnp.SparkInformation 6 | import org.apache.spark.{SparkConf, SparkException} 7 | import org.apache.spark.internal.Logging 8 | import org.apache.spark.listeners.sink.SparkListenerSink 9 | import org.apache.spark.scheduler._ 10 | import org.apache.spark.sql.streaming.StreamingQueryListener 11 | import org.apache.spark.util.JsonProtocol 12 | import org.json4s.JsonAST.JValue 13 | import org.json4s.JsonDSL._ 14 | import org.json4s.jackson.JsonMethods.{compact, render} 15 | 16 | import scala.util.control.NonFatal 17 | 18 | /** 19 | * A unified SparkListener that logs events to a configured sink. 20 | * 21 | */ 22 | class UnifiedSparkListener(override val conf: SparkConf) 23 | extends UnifiedSparkListenerHandler 24 | with Logging 25 | with SparkListenerHandlers 26 | with StreamingListenerHandlers 27 | with StreamingQueryListenerHandlers { 28 | 29 | private val listenerSink = this.createSink(this.conf) 30 | 31 | override def onOtherEvent(event: SparkListenerEvent): Unit = { 32 | // All events in Spark that are not specific to SparkListener go through 33 | // this method. The typed ListenerBus implementations intercept and forward to 34 | // their "local" listeners. 35 | // We will just handle everything here so we only have to have one listener. 36 | // The advantage is that this can be registered in extraListeners, so no 37 | // code change is required to add listener support. 38 | event match { 39 | // We will use the ClassTag for the private wrapper class to match 40 | case this.streamingListenerEventClassTag(e) => 41 | this.onStreamingListenerEvent(e) 42 | case streamingQueryListenerEvent: StreamingQueryListener.Event => 43 | this.onStreamingQueryListenerEvent(streamingQueryListenerEvent) 44 | case sparkListenerEvent: SparkListenerEvent => if (sparkListenerEvent.logEvent) { 45 | logSparkListenerEvent(sparkListenerEvent) 46 | } 47 | } 48 | } 49 | 50 | private def createSink(conf: SparkConf): SparkListenerSink = { 51 | val sink = conf.getOption("spark.unifiedListener.sink") match { 52 | case Some(listenerSinkClassName) => listenerSinkClassName 53 | case None => throw new SparkException("spark.unifiedListener.sink setting is required") 54 | } 55 | logInfo(s"Creating listener sink: ${sink}") 56 | org.apache.spark.util.Utils.loadExtensions( 57 | classOf[SparkListenerSink], 58 | Seq(sink), 59 | conf).head 60 | } 61 | 62 | protected def logSparkListenerEvent( 63 | event: SparkListenerEvent, 64 | getTimestamp: () => Instant = 65 | () => Instant.now()): Unit = { 66 | val json = try { 67 | // Add a well-known time field. 68 | Some( 69 | JsonProtocol.sparkEventToJson(event) 70 | .merge(render( 71 | SparkInformation.get() + ("SparkEventTime" -> getTimestamp().toString) 72 | )) 73 | ) 74 | } catch { 75 | case NonFatal(e) => 76 | logError(s"Error serializing SparkListenerEvent to JSON: $event", e) 77 | None 78 | } 79 | 80 | sendToSink(json) 81 | } 82 | 83 | private[spark] def sendToSink(json: Option[JValue]): Unit = { 84 | try { 85 | json match { 86 | case Some(j) => { 87 | logDebug(s"Sending event to listener sink: ${compact(j)}") 88 | this.listenerSink.logEvent(json) 89 | } 90 | case None => { 91 | logWarning("json value was None") 92 | } 93 | } 94 | } catch { 95 | case NonFatal(e) => 96 | logError(s"Error sending to listener sink: $e") 97 | } 98 | } 99 | } 100 | 101 | -------------------------------------------------------------------------------- /src/spark-listeners/src/main/scala/org/apache/spark/listeners/UnifiedSparkListenerHandler.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.listeners 2 | 3 | import java.time.Instant 4 | 5 | import org.apache.spark.SparkConf 6 | import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent} 7 | 8 | trait UnifiedSparkListenerHandler extends SparkListener { 9 | protected def conf: SparkConf 10 | protected def logBlockUpdates: Boolean 11 | 12 | protected def logSparkListenerEvent( 13 | event: SparkListenerEvent, 14 | getTimestamp: () => Instant = () => Instant.now() 15 | ): Unit 16 | } 17 | -------------------------------------------------------------------------------- /src/spark-listeners/src/main/scala/org/apache/spark/listeners/sink/SparkListenerSink.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.listeners.sink 2 | 3 | import org.json4s.JsonAST.JValue 4 | 5 | trait SparkListenerSink { 6 | def logEvent(event: Option[JValue]) 7 | } 8 | -------------------------------------------------------------------------------- /src/spark-listeners/src/main/scala/org/apache/spark/metrics/Implicits.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.metrics 2 | 3 | object Implicits { 4 | implicit class StringExtensions(val input: String) { 5 | def isNullOrEmpty: Boolean = input == null || input.trim.isEmpty 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /src/spark-listeners/src/main/scala/org/apache/spark/metrics/MetricProxies.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.metrics 2 | 3 | import java.util.concurrent.{Callable, TimeUnit} 4 | 5 | import com.codahale.metrics._ 6 | import org.apache.spark.rpc.RpcEndpointRef 7 | 8 | import scala.reflect.ClassTag 9 | 10 | object MetricsProxiesImplicits { 11 | import scala.language.implicitConversions 12 | 13 | implicit def callable[T](f: () => T): Callable[T] = 14 | new Callable[T]() { def call() = f() } 15 | } 16 | 17 | private[metrics] object MetricsProxiesReflectionImplicits { 18 | private def getField[T: ClassTag](fieldName: String): java.lang.reflect.Field = { 19 | val field = scala.reflect.classTag[T].runtimeClass.getDeclaredField(fieldName) 20 | field.setAccessible(true) 21 | field 22 | } 23 | 24 | implicit class HistogramReflect(val histogram: Histogram) { 25 | private lazy val reservoirField = getField[Histogram]("reservoir") 26 | def getReservoirClass: Class[_ <: Reservoir] = { 27 | reservoirField.get(histogram).getClass.asInstanceOf[Class[_ <: Reservoir]] 28 | } 29 | } 30 | 31 | implicit class MeterReflect(val meter: Meter) { 32 | private lazy val clockField = getField[Meter]("clock") 33 | 34 | def getClockClass: Class[_ <: Clock] = { 35 | clockField.get(meter).getClass.asInstanceOf[Class[_ <: Clock]] 36 | } 37 | } 38 | 39 | implicit class TimerReflect(val timer: Timer) { 40 | lazy val clockField = getField[Timer]("clock") 41 | lazy val histogramField = getField[Timer]("histogram") 42 | 43 | def getClockClass: Class[_ <: Clock] = { 44 | clockField.get(timer).getClass.asInstanceOf[Class[_ <: Clock]] 45 | } 46 | 47 | def getHistogram: Histogram = { 48 | histogramField.get(timer).asInstanceOf[Histogram] 49 | } 50 | } 51 | } 52 | 53 | sealed trait MetricProxy extends Metric with Serializable { 54 | protected val metricsEndpoint: RpcEndpointRef 55 | 56 | def sendMetric[T](message: MetricMessage[T]): Unit = { 57 | metricsEndpoint.send(message) 58 | } 59 | } 60 | 61 | class CounterProxy ( 62 | override val metricsEndpoint: RpcEndpointRef, 63 | val namespace: String, 64 | val metricName: String 65 | ) extends Counter with MetricProxy { 66 | 67 | override def inc(): Unit = { 68 | inc(1) 69 | } 70 | 71 | override def inc(n: Long): Unit = { 72 | sendMetric(CounterMessage(namespace, metricName, n)) 73 | } 74 | 75 | override def dec(): Unit = { 76 | dec(1) 77 | } 78 | 79 | override def dec(n: Long): Unit = { 80 | inc(-n) 81 | } 82 | } 83 | 84 | class HistogramProxy ( 85 | override val metricsEndpoint: RpcEndpointRef, 86 | val namespace: String, 87 | val metricName: String, 88 | val reservoir: Reservoir = new ExponentiallyDecayingReservoir 89 | ) extends Histogram(reservoir) with MetricProxy { 90 | override def update(value: Long): Unit = { 91 | sendMetric(HistogramMessage(namespace, metricName, value, reservoir.getClass)) 92 | } 93 | } 94 | 95 | class MeterProxy ( 96 | override val metricsEndpoint: RpcEndpointRef, 97 | val namespace: String, 98 | val metricName: String, 99 | val clock: Clock = Clock.defaultClock 100 | ) extends Meter(clock) with MetricProxy { 101 | override def mark(n: Long): Unit = { 102 | sendMetric(MeterMessage(namespace, metricName, n, clock.getClass)) 103 | } 104 | } 105 | 106 | class TimerProxy ( 107 | override val metricsEndpoint: RpcEndpointRef, 108 | val namespace: String, 109 | val metricName: String, 110 | val reservoir: Reservoir = new ExponentiallyDecayingReservoir, 111 | val clock: Clock = Clock.defaultClock 112 | ) extends Timer(reservoir, clock) with MetricProxy { 113 | 114 | override def update(duration: Long, unit: TimeUnit): Unit = { 115 | sendMetric(TimerMessage( 116 | namespace, 117 | metricName, 118 | duration, 119 | unit, 120 | reservoir.getClass, 121 | clock.getClass 122 | )) 123 | } 124 | 125 | // We need to override the time(Callable) method because it bypasses the update(Long, TimeUnit) 126 | // method. 127 | override def time[T](event: Callable[T]): T = { 128 | val context = this.time() 129 | try { 130 | event.call() 131 | } finally { 132 | context.close() 133 | } 134 | } 135 | } 136 | 137 | trait SettableGauge[T] extends Gauge[T] { 138 | protected var value: T = _ 139 | def set(value: T): Unit = { 140 | this.value = value 141 | } 142 | 143 | override def getValue: T = { 144 | this.value 145 | } 146 | } 147 | 148 | class SettableGaugeProxy[T]( 149 | override val metricsEndpoint: RpcEndpointRef, 150 | val namespace: String, 151 | val metricName: String 152 | ) extends SettableGauge[T] with MetricProxy { 153 | override def set(value: T): Unit = { 154 | // We don't really need to set this, but we will, just in case 155 | sendMetric(SettableGaugeMessage[T](namespace, metricName, value)) 156 | } 157 | } 158 | -------------------------------------------------------------------------------- /src/spark-listeners/src/main/scala/org/apache/spark/metrics/MetricsSource.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.metrics 2 | 3 | import com.codahale.metrics.MetricRegistry 4 | 5 | case class MetricsSource( 6 | override val sourceName: String, 7 | override val metricRegistry: MetricRegistry 8 | ) extends org.apache.spark.metrics.source.Source 9 | -------------------------------------------------------------------------------- /src/spark-listeners/src/main/scala/org/apache/spark/metrics/MetricsSourceBuilders.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.metrics 2 | 3 | import com.codahale.metrics._ 4 | import org.apache.spark.internal.Logging 5 | import org.apache.spark.metrics.Implicits.StringExtensions 6 | import org.apache.spark.util.RpcUtils 7 | import org.apache.spark.{SparkContext, SparkEnv, SparkException} 8 | 9 | abstract class MetricsSourceBuilder(protected val namespace: String) extends Logging { 10 | 11 | require(!namespace.isNullOrEmpty, "namespace cannot be null, empty, or only whitespace") 12 | 13 | protected val metricRegistry = new MetricRegistry 14 | 15 | def registerCounter(name: String): this.type 16 | 17 | def registerHistogram(name: String): this.type 18 | 19 | def registerMeter(name: String): this.type 20 | 21 | def registerTimer(name: String): this.type 22 | } 23 | 24 | class LocalMetricsSourceBuilder(override val namespace: String) 25 | extends MetricsSourceBuilder(namespace) { 26 | 27 | override def registerCounter(name: String): this.type = { 28 | register(name, new Counter) 29 | } 30 | 31 | override def registerHistogram(name: String): this.type = { 32 | register(name, new Histogram(new ExponentiallyDecayingReservoir)) 33 | } 34 | 35 | override def registerMeter(name: String): this.type = { 36 | register(name, new Meter) 37 | } 38 | 39 | override def registerTimer(name: String): this.type = { 40 | register(name, new Timer) 41 | } 42 | 43 | def register[T <: Metric](name: String, metric: T)(implicit ev: T <:!< MetricProxy): this.type = { 44 | require(!name.isNullOrEmpty, "name cannot be null, empty, or only whitespace") 45 | this.metricRegistry.register[T](MetricRegistry.name(name), metric) 46 | this 47 | } 48 | 49 | private[metrics] def build(): MetricsSource = { 50 | MetricsSource(this.namespace, this.metricRegistry) 51 | } 52 | } 53 | 54 | class RemoteMetricsSourceBuilder(override val namespace: String, 55 | val endpointName: String, 56 | val sparkEnv: SparkEnv) 57 | extends MetricsSourceBuilder(namespace) { 58 | 59 | if (sparkEnv.executorId == SparkContext.DRIVER_IDENTIFIER) { 60 | throw new IllegalStateException("RemoteMetricsSourceBuilder cannot be used on a driver") 61 | } 62 | 63 | val endpointRef = try { 64 | RpcUtils.makeDriverRef(endpointName, sparkEnv.conf, sparkEnv.rpcEnv) 65 | } catch { 66 | case e: SparkException => { 67 | logError("Could not create RPC driver reference", e) 68 | throw e 69 | } 70 | } 71 | 72 | override def registerCounter(name: String): this.type = { 73 | register(name, new CounterProxy( 74 | this.endpointRef, 75 | this.namespace, 76 | name 77 | )) 78 | } 79 | 80 | override def registerHistogram(name: String): this.type = { 81 | register(name, new HistogramProxy( 82 | this.endpointRef, 83 | this.namespace, 84 | name 85 | )) 86 | } 87 | 88 | override def registerMeter(name: String): this.type = { 89 | register(name, new MeterProxy( 90 | this.endpointRef, 91 | this.namespace, 92 | name 93 | )) 94 | } 95 | 96 | override def registerTimer(name: String): this.type = { 97 | register(name, new TimerProxy( 98 | this.endpointRef, 99 | this.namespace, 100 | name 101 | )) 102 | } 103 | 104 | def register[T <: MetricProxy](name: String, metric: T): this.type = { 105 | require(!name.isNullOrEmpty, "name cannot be null, empty, or only whitespace") 106 | this.metricRegistry.register[T](MetricRegistry.name(name), metric) 107 | this 108 | } 109 | 110 | private[metrics] def build(): MetricsSource = { 111 | MetricsSource(this.namespace, this.metricRegistry) 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /src/spark-listeners/src/main/scala/org/apache/spark/metrics/MetricsSystems.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.metrics 2 | 3 | import com.codahale.metrics._ 4 | import org.apache.spark.SparkException 5 | import org.apache.spark.internal.Logging 6 | import scala.collection.JavaConverters.mapAsScalaMapConverter 7 | 8 | // These will only be created on executors 9 | private[metrics] class RpcMetricsSystem( 10 | private val metricsSource: MetricsSource 11 | ) extends UserMetricsSystem with Logging { 12 | 13 | require(metricsSource != null, "metricsSource cannot be null") 14 | 15 | private val namespace = metricsSource.sourceName 16 | private val metricProxies = metricsSource.metricRegistry.getMetrics.asScala 17 | 18 | def counter(metricName: String): Counter = { 19 | getMetric[CounterProxy](metricName) 20 | } 21 | 22 | def histogram(metricName: String): Histogram = { 23 | getMetric[HistogramProxy](metricName) 24 | } 25 | 26 | def meter(metricName: String): Meter = { 27 | getMetric[MeterProxy](metricName) 28 | } 29 | 30 | def timer(metricName: String): Timer = { 31 | getMetric[TimerProxy](metricName) 32 | } 33 | 34 | def gauge[T](metricName: String): SettableGauge[T] = { 35 | getMetric[SettableGaugeProxy[T]](metricName) 36 | } 37 | 38 | private def getMetric[T <: MetricProxy](metricName: String): T = { 39 | metricProxies.get(metricName) match { 40 | case Some(metric) => { 41 | metric.asInstanceOf[T] 42 | } 43 | case None => throw new SparkException(s"Metric '${metricName}' in namespace ${namespace} was not found") 44 | } 45 | } 46 | } 47 | 48 | // These can be created on the driver and the executors. 49 | class LocalMetricsSystem( 50 | metricsSource: MetricsSource 51 | ) extends UserMetricsSystem { 52 | 53 | require(metricsSource != null, "metricsSource cannot be null") 54 | 55 | private val namespace = metricsSource.sourceName 56 | private lazy val metrics = metricsSource.metricRegistry.getMetrics.asScala 57 | 58 | def counter(metricName: String): Counter = { 59 | getMetric[Counter](metricName) 60 | } 61 | 62 | def histogram(metricName: String): Histogram = { 63 | getMetric[Histogram](metricName) 64 | } 65 | 66 | def meter(metricName: String): Meter = { 67 | getMetric[Meter](metricName) 68 | } 69 | 70 | def timer(metricName: String): Timer = { 71 | getMetric[Timer](metricName) 72 | } 73 | 74 | def gauge[T](metricName: String): SettableGauge[T] = { 75 | val metric = getMetric[Gauge[T]](metricName) 76 | // If we have one, but it's not a settable gauge, it will run autonomously and provide metrics. 77 | // However, this is an exception here, as the developer wants to set it. 78 | if (!(metric.isInstanceOf[SettableGauge[T]])) { 79 | throw new SparkException(s"Gauge ${metricName} does not extend SettableGauge[T]") 80 | } 81 | 82 | metric.asInstanceOf[SettableGauge[T]] 83 | } 84 | 85 | private def getMetric[T <: Metric](metricName: String): T = { 86 | metrics.get(metricName) match { 87 | case Some(metric) => { 88 | metric.asInstanceOf[T] 89 | } 90 | case None => throw new SparkException(s"Metric '${metricName}' in namespace ${namespace} was not found") 91 | } 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /src/spark-listeners/src/main/scala/org/apache/spark/metrics/RpcMessages.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.metrics 2 | 3 | import java.util.concurrent.TimeUnit 4 | 5 | import com.codahale.metrics.{Clock, Reservoir} 6 | 7 | trait MetricMessage[T] { 8 | val namespace: String 9 | val metricName: String 10 | val value: T 11 | } 12 | 13 | private[metrics] case class CounterMessage( 14 | override val namespace: String, 15 | override val metricName: String, 16 | override val value: Long 17 | ) extends MetricMessage[Long] 18 | 19 | private[metrics] case class SettableGaugeMessage[T]( 20 | override val namespace: String, 21 | override val metricName: String, 22 | override val value: T 23 | ) extends MetricMessage[T] 24 | 25 | import scala.language.existentials 26 | 27 | private[metrics] case class HistogramMessage( 28 | override val namespace: String, 29 | override val metricName: String, 30 | override val value: Long, 31 | reservoirClass: Class[_ <: Reservoir] 32 | ) extends MetricMessage[Long] 33 | 34 | private[metrics] case class MeterMessage( 35 | override val namespace: String, 36 | override val metricName: String, 37 | override val value: Long, 38 | clockClass: Class[_ <: Clock] 39 | ) extends MetricMessage[Long] 40 | 41 | private[metrics] case class TimerMessage( 42 | override val namespace: String, 43 | override val metricName: String, 44 | override val value: Long, 45 | timeUnit: TimeUnit, 46 | reservoirClass: Class[_ <: Reservoir], 47 | clockClass: Class[_ <: Clock] 48 | ) extends MetricMessage[Long] 49 | -------------------------------------------------------------------------------- /src/spark-listeners/src/main/scala/org/apache/spark/metrics/RpcMetricsReceiver.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.metrics 2 | 3 | import java.util.concurrent.TimeUnit 4 | 5 | import com.codahale.metrics._ 6 | import org.apache.spark.SparkEnv 7 | import org.apache.spark.internal.Logging 8 | import org.apache.spark.rpc.{RpcEndpoint, RpcEnv} 9 | 10 | import scala.collection.JavaConverters.mapAsScalaMapConverter 11 | import scala.reflect.ClassTag 12 | 13 | class RpcMetricsReceiver(val sparkEnv: SparkEnv, 14 | val sources: Seq[org.apache.spark.metrics.source.Source]) 15 | extends RpcEndpoint with Logging { 16 | override val rpcEnv: RpcEnv = sparkEnv.rpcEnv 17 | 18 | private val metricsSources: Map[String, Map[String, Metric]] = 19 | sources.map( 20 | source => source.sourceName -> source.metricRegistry.getMetrics.asScala.toMap 21 | ) 22 | .toMap 23 | 24 | import MetricsProxiesReflectionImplicits._ 25 | 26 | override def receive: PartialFunction[Any, Unit] = { 27 | case CounterMessage(namespace, metricName, value) => { 28 | getMetric[Counter](namespace, metricName) match { 29 | case Some(counter) => { 30 | logDebug(s"inc(${value}) called on counter '${metricName}', in namespace '${namespace}'") 31 | counter.inc(value) 32 | } 33 | case None => logWarning(s"Counter '${metricName}' not found") 34 | } 35 | } 36 | case HistogramMessage(namespace, metricName, value, reservoirClass) => { 37 | getMetric[Histogram](namespace, metricName) match { 38 | case Some(histogram) => { 39 | val histogramReservoirClass = histogram.getReservoirClass 40 | if (histogramReservoirClass != reservoirClass) { 41 | logWarning(s"Proxy reservoir class ${reservoirClass.getCanonicalName} does not match driver reservoir class ${histogramReservoirClass.getCanonicalName}") 42 | } else { 43 | histogram.update(value) 44 | } 45 | } 46 | case None => logWarning(s"Histogram '${metricName}' not found") 47 | } 48 | } 49 | case MeterMessage(namespace, metricName, value, clockClass) => { 50 | getMetric[Meter](namespace, metricName) match { 51 | case Some(meter) => { 52 | val meterClockClass = meter.getClockClass 53 | if (meterClockClass != clockClass) { 54 | logWarning(s"Proxy meter class ${clockClass.getCanonicalName} does not match driver clock class ${meterClockClass.getCanonicalName}") 55 | } else { 56 | meter.mark(value) 57 | } 58 | } 59 | case None => logWarning(s"Meter '${metricName}' not found") 60 | } 61 | } 62 | 63 | case TimerMessage(namespace, metricName, value, unit, reservoirClass, clockClass) => { 64 | getMetric[Timer](namespace, metricName) match { 65 | case Some(timer) => { 66 | val timerClockClass = timer.getClockClass 67 | val timerReservoirClass = timer.getHistogram.getReservoirClass 68 | if (timerClockClass != clockClass) { 69 | logWarning(s"Proxy clock class ${clockClass.getCanonicalName} does not match driver clock class ${timerClockClass.getCanonicalName}") 70 | } else if (timerReservoirClass != reservoirClass) { 71 | logWarning(s"Proxy reservoir class ${reservoirClass.getCanonicalName} does not match driver reservoir class ${timerReservoirClass.getCanonicalName}") 72 | } else { 73 | // Everything looks good 74 | timer.update(value, unit) 75 | } 76 | } 77 | case None => logWarning(s"Timer '${metricName}' not found") 78 | } 79 | } 80 | case SettableGaugeMessage(namespace, metricName, value) => { 81 | getMetric[SettableGauge[Any]](namespace, metricName) match { 82 | case Some(gauge) => gauge.set(value) 83 | case None => logWarning(s"SettableGauge '${metricName}' not found") 84 | } 85 | } 86 | case message: Any => logWarning(s"Unsupported message type: $message") 87 | } 88 | 89 | private[metrics] def getMetric[T <: Metric](namespace: String, metricName: String)(implicit tag: ClassTag[T]): Option[T] = { 90 | metricsSources.get(namespace) match { 91 | case Some(metrics) => { 92 | metrics.get(metricName) match { 93 | case Some(metric) => { 94 | if (tag.runtimeClass.isInstance(metric)) Some(metric.asInstanceOf[T]) else None 95 | } 96 | case _ => None 97 | } 98 | } 99 | case _ => None 100 | } 101 | } 102 | } 103 | 104 | object RpcMetricsReceiver { 105 | val DefaultTimeUnit = TimeUnit.NANOSECONDS 106 | val DefaultEndpointName = "MetricsReceiver" 107 | } -------------------------------------------------------------------------------- /src/spark-listeners/src/main/scala/org/apache/spark/metrics/UserMetricsSystem.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.metrics 2 | 3 | import java.util.concurrent.ConcurrentHashMap 4 | 5 | import com.codahale.metrics._ 6 | import org.apache.spark.internal.Logging 7 | import org.apache.spark.metrics.Implicits.StringExtensions 8 | import org.apache.spark.{SparkContext, SparkEnv} 9 | 10 | import scala.collection.JavaConverters.mapAsScalaConcurrentMapConverter 11 | import scala.collection.mutable 12 | 13 | trait UserMetricsSystem { 14 | def counter(metricName: String): Counter 15 | 16 | def histogram(metricName: String): Histogram 17 | 18 | def meter(metricName: String): Meter 19 | 20 | def timer(metricName: String): Timer 21 | 22 | def gauge[T](metricName: String): SettableGauge[T] 23 | } 24 | 25 | object UserMetricsSystems extends Logging { 26 | // This is for registries local to our current environment (i.e. driver or executor) 27 | @transient private lazy val metricsSystems = 28 | new ConcurrentHashMap[String, UserMetricsSystem].asScala 29 | 30 | // This method is only for "local" (i.e. driver OR executor) metrics. 31 | // These systems can be queried and used by name. 32 | def getMetricSystem(namespace: String, create: (LocalMetricsSourceBuilder) => Unit): LocalMetricsSystem = { 33 | metricsSystems.getOrElseUpdate(namespace, { 34 | logInfo(s"Creating LocalMetricsSystem ${namespace}") 35 | val builder = new LocalMetricsSourceBuilder(namespace) 36 | create(builder) 37 | val metricsSource = builder.build 38 | // Register here for now! 39 | SparkEnv.get.metricsSystem.registerSource(metricsSource) 40 | new LocalMetricsSystem(metricsSource) 41 | }).asInstanceOf[LocalMetricsSystem] 42 | } 43 | 44 | def buildReceiverMetricSystem( 45 | sparkEnv: SparkEnv, 46 | create: (ReceiverMetricSystemBuilder) => Unit, 47 | endpointName: String = RpcMetricsReceiver.DefaultEndpointName 48 | ): Unit = { 49 | // Just to be safe, we will throw an exception if the user tries to run this on the driver 50 | if (sparkEnv.executorId != SparkContext.DRIVER_IDENTIFIER) { 51 | // We are on the driver, so this is invalid 52 | throw new IllegalStateException(s"buildReceiverMetricSystem cannot be invoked on a Spark executor") 53 | } 54 | val builder = new ReceiverMetricSystemBuilder(sparkEnv, endpointName) 55 | create(builder) 56 | builder.build 57 | } 58 | 59 | // This should only be used on the executors. 60 | // They can be queried by name as well. 61 | def getRemoteMetricSystem( 62 | namespace: String, 63 | create: RemoteMetricsSourceBuilder => Unit, 64 | endpointName: String = RpcMetricsReceiver.DefaultEndpointName): RpcMetricsSystem = { 65 | // Just to be safe, we will throw an exception if the user tries to run this on the driver 66 | if (SparkEnv.get.executorId == SparkContext.DRIVER_IDENTIFIER) { 67 | // We are on the driver, so this is invalid 68 | throw new IllegalStateException(s"getRemoteMetricSystem cannot be invoked on a Spark driver") 69 | } 70 | metricsSystems.getOrElseUpdate( 71 | namespace, { 72 | val builder = new RemoteMetricsSourceBuilder(namespace, endpointName, SparkEnv.get) 73 | create(builder) 74 | new RpcMetricsSystem(builder.build) 75 | } 76 | ).asInstanceOf[RpcMetricsSystem] 77 | } 78 | } 79 | 80 | @annotation.implicitNotFound(msg = "Cannot prove that ${A} <:!< ${B}.") 81 | trait <:!<[A,B] 82 | object <:!< { 83 | class Impl[A, B] 84 | object Impl { 85 | implicit def nsub[A, B] : A Impl B = null 86 | implicit def nsubAmbig1[A, B>:A] : A Impl B = null 87 | implicit def nsubAmbig2[A, B>:A] : A Impl B = null 88 | } 89 | 90 | implicit def foo[A,B]( implicit e: A Impl B ): A <:!< B = null 91 | } 92 | 93 | case class ReceiverMetricSystemBuilder( 94 | val sparkEnv: SparkEnv, 95 | val endpointName: String = RpcMetricsReceiver.DefaultEndpointName 96 | ) { 97 | require(sparkEnv != null, "sparkEnv cannot be null") 98 | require(!endpointName.isNullOrEmpty, "endpointName cannot be null, empty, or only whitespace") 99 | 100 | if (sparkEnv.executorId != SparkContext.DRIVER_IDENTIFIER) { 101 | throw new IllegalStateException("ReceiverMetricSystemBuilder can only be used on a driver") 102 | } 103 | 104 | private val metricsSources = mutable.Map[String, MetricsSource]() 105 | 106 | def addSource(namespace: String, create: (LocalMetricsSourceBuilder) => Unit): ReceiverMetricSystemBuilder = { 107 | val builder = new LocalMetricsSourceBuilder(namespace) 108 | create(builder) 109 | val metricsSource = builder.build 110 | metricsSources += (metricsSource.sourceName -> metricsSource) 111 | this 112 | } 113 | 114 | def build(): Unit = { 115 | this.metricsSources.values.foreach( 116 | source => this.sparkEnv.metricsSystem.registerSource(source) 117 | ) 118 | 119 | this.sparkEnv.rpcEnv.setupEndpoint( 120 | this.endpointName, 121 | new RpcMetricsReceiver( 122 | this.sparkEnv, 123 | this.metricsSources.values.toSeq) 124 | ) 125 | } 126 | } -------------------------------------------------------------------------------- /src/spark-listeners/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Set everything to be logged to the file target/unit-tests.log 2 | test.appender=console 3 | log4j.rootCategory=DEBUG, ${test.appender} 4 | log4j.appender.file=org.apache.log4j.FileAppender 5 | log4j.appender.file.append=true 6 | log4j.appender.file.file=target/unit-tests.log 7 | log4j.appender.file.layout=org.apache.log4j.PatternLayout 8 | log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n 9 | 10 | # Tests that launch java subprocesses can set the "test.appender" system property to 11 | # "console" to avoid having the child process's logs overwrite the unit test's 12 | # log file. 13 | log4j.appender.console=org.apache.log4j.ConsoleAppender 14 | log4j.appender.console.target=System.err 15 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 16 | log4j.appender.console.layout.ConversionPattern=%t: %m%n 17 | 18 | # Ignore messages below warning level from Jetty, because it's a bit verbose 19 | log4j.logger.org.spark_project.jetty=WARN 20 | #log4j.logger.org.apache.spark=WARN 21 | log4j.logger.org.apache.spark.listeners.microsoft=DEBUG 22 | -------------------------------------------------------------------------------- /src/spark-listeners/src/test/scala/org/apache/spark/listeners/ListenerSuite.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.listeners 2 | 3 | import org.apache.spark.internal.Logging 4 | import org.apache.spark.listeners.sink.SparkListenerSink 5 | import org.apache.spark.scheduler.SparkListenerEvent 6 | import org.apache.spark.sql.streaming.StreamingQueryListener 7 | import org.apache.spark.streaming.scheduler.StreamingListenerEvent 8 | import org.apache.spark.{SparkConf, SparkFunSuite} 9 | import org.json4s.JsonAST.JValue 10 | import org.mockito.ArgumentCaptor 11 | import org.mockito.Mockito.{spy, times, verify} 12 | import org.scalatest.BeforeAndAfterEach 13 | 14 | import scala.reflect.ClassTag 15 | import scala.reflect.runtime.{universe => ru} 16 | 17 | class TestSparkListenerSink extends SparkListenerSink with Logging { 18 | override def logEvent(event: Option[JValue]): Unit = { 19 | logInfo(s"sendToSink called: ${event}") 20 | } 21 | } 22 | 23 | object ListenerSuite { 24 | val EPOCH_TIME = 1422981759407L 25 | val EPOCH_TIME_AS_ISO8601 = "2015-02-03T16:42:39.407Z" 26 | } 27 | 28 | class ListenerSuite extends SparkFunSuite 29 | with BeforeAndAfterEach { 30 | 31 | protected implicit val defaultFormats = org.json4s.DefaultFormats 32 | protected var listener: UnifiedSparkListener = null 33 | private var logEventCaptor: ArgumentCaptor[Option[JValue]] = null 34 | 35 | override def beforeEach(): Unit = { 36 | super.beforeEach() 37 | // We will use a mock sink 38 | val conf = new SparkConf() 39 | .set("spark.driver.allowMultipleContexts", "true") 40 | .set("spark.unifiedListener.sink", classOf[TestSparkListenerSink].getName) 41 | this.logEventCaptor = ArgumentCaptor.forClass(classOf[Option[JValue]]) 42 | this.listener = spy(new UnifiedSparkListener(conf)) 43 | } 44 | 45 | override def afterEach(): Unit = { 46 | super.afterEach() 47 | this.listener = null 48 | this.logEventCaptor = null 49 | } 50 | 51 | protected def onSparkListenerEvent[T <: SparkListenerEvent]( 52 | onEvent: T => Unit, 53 | event: T): (Option[JValue], T) = { 54 | onEvent(event) 55 | verify(this.listener, times(1)).sendToSink(this.logEventCaptor.capture) 56 | ( 57 | this.logEventCaptor.getValue, 58 | event 59 | ) 60 | } 61 | 62 | private val wrapperCtor: ru.MethodMirror = { 63 | // We need to get the wrapper class so we can wrap this the way Spark does 64 | val mirror = ru.runtimeMirror(getClass.getClassLoader) 65 | val streamingListenerBusClassSymbol = mirror.classSymbol( 66 | Class.forName("org.apache.spark.streaming.scheduler.StreamingListenerBus") 67 | ) 68 | 69 | val streamingListenerBusClassMirror = mirror.reflectClass(streamingListenerBusClassSymbol) 70 | val streamingListenerBusCtor = streamingListenerBusClassMirror 71 | .reflectConstructor( 72 | streamingListenerBusClassSymbol.typeSignature.members.filter(_.isConstructor).head.asMethod 73 | ) 74 | val streamingListenerBus = streamingListenerBusCtor(null) 75 | val streamingListenerBusInstanceMirror = mirror.reflect(streamingListenerBus) 76 | 77 | val wrappedStreamingListenerEventClassSymbol = mirror.classSymbol( 78 | Class.forName( 79 | StreamingListenerHandlers.WrappedStreamingListenerEventClassName 80 | ) 81 | ) 82 | 83 | val wrappedStreamingListenerEventClassSymbolCtor = wrappedStreamingListenerEventClassSymbol 84 | .typeSignature.members.filter(_.isConstructor).head.asMethod 85 | streamingListenerBusInstanceMirror.reflectClass( 86 | wrappedStreamingListenerEventClassSymbol 87 | ).reflectConstructor(wrappedStreamingListenerEventClassSymbolCtor) 88 | } 89 | 90 | // All StreamingListenerEvents go through the onOtherEvent method, so we will call directly here. 91 | protected def onStreamingListenerEvent[T <: StreamingListenerEvent](event: T): (Option[JValue], T) = { 92 | // This one is the odd one. 93 | val (json, _) = onSparkListenerEvent( 94 | this.listener.onOtherEvent, 95 | this.wrapperCtor.apply(event).asInstanceOf[SparkListenerEvent] 96 | ) 97 | ( 98 | json, 99 | event 100 | ) 101 | } 102 | 103 | protected def onStreamingQueryListenerEvent[T <: StreamingQueryListener.Event]( 104 | event: T): (Option[JValue], T) = { 105 | onSparkListenerEvent( 106 | this.listener.onOtherEvent, 107 | event 108 | ) 109 | } 110 | 111 | protected def assertEvent[T <: AnyRef]( 112 | json: Option[JValue], 113 | event: T)(implicit classTag: ClassTag[T]): org.scalatest.Assertion = { 114 | this.assertField( 115 | json, 116 | "Event", 117 | (_, value) => assert(value.extract[String] === classTag.runtimeClass.getName) 118 | ) 119 | } 120 | 121 | protected def assertSparkEventTime( 122 | json: Option[JValue], 123 | assertion: (String, JValue) => org.scalatest.Assertion): org.scalatest.Assertion = 124 | this.assertField(json, "SparkEventTime", assertion) 125 | 126 | protected def assertField( 127 | json: Option[JValue], 128 | fieldName: String, 129 | assertion: (String, JValue) => org.scalatest.Assertion): org.scalatest.Assertion = { 130 | json match { 131 | case Some(jValue) => { 132 | jValue.findField { case (n, _) => n == fieldName } match { 133 | case Some(jField) => { 134 | assertion.tupled(jField) 135 | } 136 | case None => fail(s"${fieldName} field not found") 137 | } 138 | } 139 | case None => fail("None passed to assertField") 140 | } 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /src/spark-listeners/src/test/scala/org/apache/spark/listeners/LogAnalyticsStreamingListenerSuite.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.listeners 2 | 3 | import org.apache.spark.streaming.Time 4 | import org.apache.spark.streaming.scheduler._ 5 | 6 | object LogAnalyticsStreamingListenerSuite { 7 | val streamingListenerStreamingStarted = StreamingListenerStreamingStarted( 8 | ListenerSuite.EPOCH_TIME 9 | ) 10 | val streamingListenerReceiverStarted = StreamingListenerReceiverStarted( 11 | ReceiverInfo(0, "test", true, "localhost", "0") 12 | ) 13 | val streamingListenerReceiverError = StreamingListenerReceiverError( 14 | ReceiverInfo(1, "test", true, "localhost", "1") 15 | ) 16 | val streamingListenerReceiverStopped = StreamingListenerReceiverStopped( 17 | ReceiverInfo(2, "test", true, "localhost", "2") 18 | ) 19 | 20 | private val streamIdToInputInfo = Map( 21 | 0 -> StreamInputInfo(0, 300L), 22 | 1 -> StreamInputInfo(1, 300L, Map(StreamInputInfo.METADATA_KEY_DESCRIPTION -> "test"))) 23 | 24 | val streamingListenerBatchSubmitted = StreamingListenerBatchSubmitted( 25 | BatchInfo(Time(1000), streamIdToInputInfo, ListenerSuite.EPOCH_TIME, None, None, Map.empty) 26 | ) 27 | 28 | val streamingListenerBatchStarted = StreamingListenerBatchStarted( 29 | BatchInfo(Time(1000), streamIdToInputInfo, 1000, Some(ListenerSuite.EPOCH_TIME), None, Map.empty) 30 | ) 31 | 32 | val streamingListenerBatchStartedNoneProcessingStartTime = StreamingListenerBatchStarted( 33 | BatchInfo(Time(1000), streamIdToInputInfo, 1000, None, None, Map.empty) 34 | ) 35 | 36 | val streamingListenerBatchCompleted = StreamingListenerBatchCompleted( 37 | BatchInfo(Time(1000L), streamIdToInputInfo, 1000, Some(2000), Some(ListenerSuite.EPOCH_TIME), Map.empty) 38 | ) 39 | 40 | val streamingListenerBatchCompletedNoneProcessingEndTime = StreamingListenerBatchCompleted( 41 | BatchInfo(Time(ListenerSuite.EPOCH_TIME), streamIdToInputInfo, 1000, Some(2000), None, Map.empty) 42 | ) 43 | 44 | val streamingListenerOutputOperationStarted = StreamingListenerOutputOperationStarted( 45 | OutputOperationInfo( 46 | Time(1000L), 47 | 0, 48 | "op1", 49 | "operation1", 50 | Some(ListenerSuite.EPOCH_TIME), 51 | None, 52 | None 53 | ) 54 | ) 55 | 56 | val streamingListenerOutputOperationStartedNoneStartTime = StreamingListenerOutputOperationStarted( 57 | OutputOperationInfo( 58 | Time(1000L), 59 | 0, 60 | "op1", 61 | "operation1", 62 | None, 63 | None, 64 | None 65 | ) 66 | ) 67 | 68 | val streamingListenerOutputOperationCompleted = StreamingListenerOutputOperationCompleted( 69 | OutputOperationInfo( 70 | Time(1000L), 71 | 0, 72 | "op1", 73 | "operation1", 74 | Some(1003L), 75 | Some(ListenerSuite.EPOCH_TIME), 76 | None 77 | )) 78 | 79 | val streamingListenerOutputOperationCompletedNoneEndTime = StreamingListenerOutputOperationCompleted( 80 | OutputOperationInfo( 81 | Time(1000L), 82 | 0, 83 | "op1", 84 | "operation1", 85 | Some(1003L), 86 | None, 87 | None 88 | )) 89 | } 90 | 91 | class LogAnalyticsStreamingListenerSuite extends ListenerSuite { 92 | test("should invoke sendToSink for StreamingListenerStreamingStarted event with full class name") { 93 | val (json, event) = this.onStreamingListenerEvent( 94 | LogAnalyticsStreamingListenerSuite.streamingListenerStreamingStarted 95 | ) 96 | 97 | this.assertEvent(json, event) 98 | } 99 | 100 | test("should invoke sendToSink for StreamingListenerReceiverStarted event with full class name") { 101 | val (json, event) = this.onStreamingListenerEvent( 102 | LogAnalyticsStreamingListenerSuite.streamingListenerReceiverStarted 103 | ) 104 | 105 | this.assertEvent(json, event) 106 | } 107 | 108 | test("should invoke sendToSink for StreamingListenerReceiverError event with full class name") { 109 | val (json, event) = this.onStreamingListenerEvent( 110 | LogAnalyticsStreamingListenerSuite.streamingListenerReceiverError 111 | ) 112 | 113 | this.assertEvent(json, event) 114 | } 115 | 116 | test("should invoke sendToSink for StreamingListenerReceiverStopped event with full class name") { 117 | val (json, event) = this.onStreamingListenerEvent( 118 | LogAnalyticsStreamingListenerSuite.streamingListenerReceiverStopped 119 | ) 120 | 121 | this.assertEvent(json, event) 122 | } 123 | 124 | test("should invoke sendToSink for StreamingListenerBatchSubmitted event with full class name") { 125 | val (json, event) = this.onStreamingListenerEvent( 126 | LogAnalyticsStreamingListenerSuite.streamingListenerBatchSubmitted 127 | ) 128 | 129 | this.assertEvent(json, event) 130 | } 131 | 132 | test("should invoke sendToSink for StreamingListenerBatchStarted event with full class name") { 133 | val (json, event) = this.onStreamingListenerEvent( 134 | LogAnalyticsStreamingListenerSuite.streamingListenerBatchStarted 135 | ) 136 | 137 | this.assertEvent(json, event) 138 | } 139 | 140 | test("should invoke sendToSink for StreamingListenerBatchCompleted event with full class name") { 141 | val (json, event) = this.onStreamingListenerEvent( 142 | LogAnalyticsStreamingListenerSuite.streamingListenerBatchCompleted 143 | ) 144 | 145 | this.assertEvent(json, event) 146 | } 147 | 148 | test("should invoke sendToSink for StreamingListenerOutputOperationStarted event with full class name") { 149 | val (json, event) = this.onStreamingListenerEvent( 150 | LogAnalyticsStreamingListenerSuite.streamingListenerOutputOperationStarted 151 | ) 152 | 153 | this.assertEvent(json, event) 154 | } 155 | 156 | test("should invoke sendToSink for StreamingListenerOutputOperationCompleted event with full class name") { 157 | val (json, event) = this.onStreamingListenerEvent( 158 | LogAnalyticsStreamingListenerSuite.streamingListenerOutputOperationCompleted 159 | ) 160 | 161 | this.assertEvent(json, event) 162 | } 163 | 164 | test("StreamingListenerStreamingStarted should have expected SparkEventTime") { 165 | val (json, _) = this.onStreamingListenerEvent( 166 | LogAnalyticsStreamingListenerSuite.streamingListenerStreamingStarted 167 | ) 168 | this.assertSparkEventTime( 169 | json, 170 | (_, value) => assert(value.extract[String] === ListenerSuite.EPOCH_TIME_AS_ISO8601) 171 | ) 172 | } 173 | 174 | test("StreamingListenerBatchCompleted should have expected SparkEventTime") { 175 | val (json, _) = this.onStreamingListenerEvent( 176 | LogAnalyticsStreamingListenerSuite.streamingListenerBatchCompleted 177 | ) 178 | this.assertSparkEventTime( 179 | json, 180 | (_, value) => assert(value.extract[String] === ListenerSuite.EPOCH_TIME_AS_ISO8601) 181 | ) 182 | } 183 | 184 | test("StreamingListenerBatchSubmitted should have expected SparkEventTime") { 185 | val (json, _) = this.onStreamingListenerEvent( 186 | LogAnalyticsStreamingListenerSuite.streamingListenerBatchSubmitted 187 | ) 188 | this.assertSparkEventTime( 189 | json, 190 | (_, value) => assert(value.extract[String] === ListenerSuite.EPOCH_TIME_AS_ISO8601) 191 | ) 192 | } 193 | 194 | test("StreamingListenerBatchStarted should have expected SparkEventTime") { 195 | val (json, _) = this.onStreamingListenerEvent( 196 | LogAnalyticsStreamingListenerSuite.streamingListenerBatchStarted 197 | ) 198 | this.assertSparkEventTime( 199 | json, 200 | (_, value) => assert(value.extract[String] === ListenerSuite.EPOCH_TIME_AS_ISO8601) 201 | ) 202 | } 203 | 204 | test("StreamingListenerBatchStarted with no processingStartTime should have SparkEventTime") { 205 | val (json, _) = this.onStreamingListenerEvent( 206 | LogAnalyticsStreamingListenerSuite.streamingListenerBatchStartedNoneProcessingStartTime 207 | ) 208 | this.assertSparkEventTime( 209 | json, 210 | (_, value) => assert(!value.extract[String].isEmpty) 211 | ) 212 | } 213 | 214 | test("StreamingListenerOutputOperationCompleted should have expected SparkEventTime") { 215 | val (json, _) = this.onStreamingListenerEvent( 216 | LogAnalyticsStreamingListenerSuite.streamingListenerOutputOperationCompleted 217 | ) 218 | this.assertSparkEventTime( 219 | json, 220 | (_, value) => assert(value.extract[String] === ListenerSuite.EPOCH_TIME_AS_ISO8601) 221 | ) 222 | } 223 | 224 | test("StreamingListenerOutputOperationCompleted with no endTime should have SparkEventTime") { 225 | val (json, _) = this.onStreamingListenerEvent( 226 | LogAnalyticsStreamingListenerSuite.streamingListenerOutputOperationCompleted 227 | ) 228 | this.assertSparkEventTime( 229 | json, 230 | (_, value) => assert(!value.extract[String].isEmpty) 231 | ) 232 | } 233 | 234 | test("StreamingListenerOutputOperationStarted should have expected SparkEventTime") { 235 | val (json, _) = this.onStreamingListenerEvent( 236 | LogAnalyticsStreamingListenerSuite.streamingListenerOutputOperationStarted 237 | ) 238 | this.assertSparkEventTime( 239 | json, 240 | (_, value) => assert(value.extract[String] === ListenerSuite.EPOCH_TIME_AS_ISO8601) 241 | ) 242 | } 243 | 244 | test("StreamingListenerOutputOperationStarted with no endTime should have SparkEventTime") { 245 | val (json, _) = this.onStreamingListenerEvent( 246 | LogAnalyticsStreamingListenerSuite.streamingListenerOutputOperationStarted 247 | ) 248 | this.assertSparkEventTime( 249 | json, 250 | (_, value) => assert(!value.extract[String].isEmpty) 251 | ) 252 | } 253 | 254 | test("StreamingListenerReceiverError should have SparkEventTime") { 255 | val (json, _) = this.onStreamingListenerEvent( 256 | LogAnalyticsStreamingListenerSuite.streamingListenerReceiverError 257 | ) 258 | this.assertSparkEventTime( 259 | json, 260 | (_, value) => assert(!value.extract[String].isEmpty) 261 | ) 262 | } 263 | 264 | test("StreamingListenerReceiverStarted should have SparkEventTime") { 265 | val (json, _) = this.onStreamingListenerEvent( 266 | LogAnalyticsStreamingListenerSuite.streamingListenerReceiverStarted 267 | ) 268 | this.assertSparkEventTime( 269 | json, 270 | (_, value) => assert(!value.extract[String].isEmpty) 271 | ) 272 | } 273 | 274 | test("StreamingListenerReceiverStopped should have SparkEventTime") { 275 | val (json, _) = this.onStreamingListenerEvent( 276 | LogAnalyticsStreamingListenerSuite.streamingListenerReceiverStopped 277 | ) 278 | this.assertSparkEventTime( 279 | json, 280 | (_, value) => assert(!value.extract[String].isEmpty) 281 | ) 282 | } 283 | } 284 | -------------------------------------------------------------------------------- /src/spark-listeners/src/test/scala/org/apache/spark/metrics/CustomMetricsSystemSuite.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.metrics 2 | 3 | import org.apache.spark._ 4 | import org.apache.spark.rpc.RpcEnv 5 | import org.mockito.AdditionalAnswers 6 | import org.mockito.ArgumentMatchers.{any, argThat} 7 | import org.mockito.Mockito.{mock, times, verify, when} 8 | import org.scalatest.BeforeAndAfterEach 9 | 10 | import scala.reflect.ClassTag 11 | 12 | object MetricsSystemsSuite { 13 | val MetricNamespace = "testmetrics" 14 | val DriverMetricNamespace = "testdrivermetrics" 15 | val ExecutorMetricNamespace = "testexecutormetrics" 16 | val CustomMetricNamespace = "custommetrics" 17 | val CounterName = "testcounter" 18 | val HistogramName = "testhistogram" 19 | val MeterName = "testmeter" 20 | val TimerName = "testtimer" 21 | val SettableGaugeName = "testsettablegauge" 22 | 23 | val NamespaceFieldName = "namespace" 24 | val EndpointNameFieldName = "endpointName" 25 | 26 | val GaugeName = "testrandomgauge" 27 | val InvalidCounterName = "invalidcounter" 28 | } 29 | 30 | class MetricsSystemsSuite extends SparkFunSuite 31 | with BeforeAndAfterEach 32 | with LocalSparkContext { 33 | 34 | private var env: SparkEnv = null 35 | private var rpcEnv: RpcEnv = null 36 | 37 | import TestImplicits._ 38 | 39 | override def beforeEach(): Unit = { 40 | super.beforeEach 41 | val conf = new SparkConf() 42 | .setMaster("local[2]") 43 | .setAppName("test") 44 | .set("spark.dynamicAllocation.testing", "true") 45 | .set("spark.driver.allowMultipleContexts", "true") 46 | sc = new SparkContext(conf) 47 | env = mock(classOf[SparkEnv]) 48 | rpcEnv = mock(classOf[RpcEnv]) 49 | when(env.conf).thenReturn(conf) 50 | when(env.rpcEnv).thenReturn(rpcEnv) 51 | SparkEnv.set(env) 52 | //when(sc.env).thenReturn(env) 53 | } 54 | 55 | override def afterEach(): Unit = { 56 | super.afterEach 57 | sc=null 58 | env = null 59 | rpcEnv = null 60 | } 61 | 62 | test("getMetricsSystem registers a MetricsSource and returns a LocalMetricsSystem on driver node") { 63 | val envMetricsSystem = mock(classOf[org.apache.spark.metrics.MetricsSystem]) 64 | when(env.metricsSystem).thenReturn(envMetricsSystem) 65 | when(env.executorId).thenReturn(SparkContext.DRIVER_IDENTIFIER) 66 | val metricsSystem = UserMetricsSystems.getMetricSystem( 67 | MetricsSystemsSuite.DriverMetricNamespace, 68 | (builder) => { 69 | builder.registerCounter(MetricsSystemsSuite.CounterName) 70 | } 71 | ) 72 | 73 | assert(metricsSystem !== null) 74 | verify(envMetricsSystem, times(1)).registerSource( 75 | argThat((source: org.apache.spark.metrics.source.Source) => source.metricRegistry.counter( 76 | MetricsSystemsSuite.CounterName 77 | ) != null)) 78 | } 79 | 80 | test("getMetricsSystem registers a MetricsSource and returns a LocalMetricsSystem on executor node") { 81 | val envMetricsSystem = mock(classOf[org.apache.spark.metrics.MetricsSystem]) 82 | when(env.metricsSystem).thenReturn(envMetricsSystem) 83 | when(env.executorId).thenReturn("0") 84 | val metricsSystem = UserMetricsSystems.getMetricSystem( 85 | MetricsSystemsSuite.ExecutorMetricNamespace, 86 | (builder) => { 87 | builder.registerCounter(MetricsSystemsSuite.CounterName) 88 | } 89 | ) 90 | 91 | assert(metricsSystem !== null) 92 | verify(envMetricsSystem, times(1)).registerSource( 93 | argThat((source: org.apache.spark.metrics.source.Source) => source.metricRegistry.counter( 94 | MetricsSystemsSuite.CounterName 95 | ) != null)) 96 | } 97 | 98 | def spyLambda[T <: AnyRef](realObj: T)(implicit classTag: ClassTag[T]): T = mock( 99 | classTag.runtimeClass.asInstanceOf[Class[T]], 100 | AdditionalAnswers.delegatesTo(realObj)) 101 | 102 | test("buildReceiverMetricsSystem succeeds when invoked on a driver") { 103 | val lambda = spyLambda((builder: ReceiverMetricSystemBuilder) => {}) 104 | when(env.executorId).thenReturn(SparkContext.DRIVER_IDENTIFIER) 105 | UserMetricsSystems.buildReceiverMetricSystem( 106 | env, 107 | lambda 108 | ) 109 | 110 | verify(lambda, times(1)).apply(any(classOf[ReceiverMetricSystemBuilder])) 111 | } 112 | 113 | test("buildReceiverMetricsSystem throws an IllegalStateException when invoked on an executor") { 114 | when(env.executorId).thenReturn("0") 115 | val caught = intercept[IllegalStateException] { 116 | UserMetricsSystems.buildReceiverMetricSystem( 117 | env, 118 | (builder) => {} 119 | ) 120 | } 121 | 122 | assert(caught !== null) 123 | assert(caught.getMessage == "buildReceiverMetricSystem cannot be invoked on a Spark executor") 124 | } 125 | 126 | test("getRemoteMetricsSystem throws an IllegalStateException when invoked on a driver") { 127 | when(env.executorId).thenReturn(SparkContext.DRIVER_IDENTIFIER) 128 | val caught = intercept[IllegalStateException] { 129 | UserMetricsSystems.getRemoteMetricSystem( 130 | MetricsSystemsSuite.MetricNamespace, 131 | (builder) => {} 132 | ) 133 | } 134 | 135 | assert(caught !== null) 136 | assert(caught.getMessage == "getRemoteMetricSystem cannot be invoked on a Spark driver") 137 | } 138 | 139 | test("getRemoteMetricsSystem succeeds when invoked on an executor") { 140 | val lambda = spyLambda((builder: RemoteMetricsSourceBuilder) => {}) 141 | when(env.executorId).thenReturn("0") 142 | val metricSystem = UserMetricsSystems.getRemoteMetricSystem( 143 | MetricsSystemsSuite.MetricNamespace, 144 | lambda 145 | ) 146 | 147 | assert(metricSystem !== null) 148 | verify(lambda, times(1)).apply(any(classOf[RemoteMetricsSourceBuilder])) 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /src/spark-listeners/src/test/scala/org/apache/spark/metrics/MetricProxiesSuite.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.metrics 2 | 3 | import java.util.concurrent.TimeUnit 4 | 5 | import com.codahale.metrics.{Clock, ExponentiallyDecayingReservoir, UniformReservoir} 6 | import org.apache.spark.SparkFunSuite 7 | import org.apache.spark.rpc.RpcEndpointRef 8 | import org.mockito.ArgumentMatchers._ 9 | import org.mockito.Mockito._ 10 | import org.scalatest.BeforeAndAfterEach 11 | import TestUtils._ 12 | 13 | 14 | 15 | 16 | object MetricProxiesSuite { 17 | val MetricNamespace = "testmetrics" 18 | val CounterName = "testcounter" 19 | val HistogramName = "testhistogram" 20 | val MeterName = "testmeter" 21 | val TimerName = "testtimer" 22 | val SettableGaugeName = "testsettablegauge" 23 | } 24 | 25 | class MetricProxiesSuite extends SparkFunSuite 26 | with BeforeAndAfterEach { 27 | 28 | import TestImplicits._ 29 | 30 | private var rpcMetricsReceiverRef: RpcEndpointRef = null 31 | 32 | val clockClazz = loadOneOf("com.codahale.metrics.jvm.CpuTimeClock", "com.codahale.metrics.Clock$CpuTimeClock").get 33 | 34 | override def beforeEach(): Unit = { 35 | super.beforeEach 36 | this.rpcMetricsReceiverRef = mock(classOf[RpcEndpointRef]) 37 | } 38 | 39 | override def afterEach(): Unit = { 40 | super.afterEach 41 | this.rpcMetricsReceiverRef = null 42 | } 43 | 44 | test("CounterProxy calls sendMetric with a CounterMessage for inc()") { 45 | val proxy = new CounterProxy( 46 | this.rpcMetricsReceiverRef, 47 | MetricProxiesSuite.MetricNamespace, 48 | MetricProxiesSuite.CounterName 49 | ) 50 | proxy.inc() 51 | verify(this.rpcMetricsReceiverRef).send(argThat((message: CounterMessage) => message.value === 1)) 52 | } 53 | 54 | test("CounterProxy calls sendMetric with a CounterMessage for inc(Long)") { 55 | val value = 12345L 56 | val proxy = new CounterProxy( 57 | this.rpcMetricsReceiverRef, 58 | MetricProxiesSuite.MetricNamespace, 59 | MetricProxiesSuite.CounterName 60 | ) 61 | proxy.inc(value) 62 | verify(this.rpcMetricsReceiverRef).send(argThat((message: CounterMessage) => message.value === value)) 63 | } 64 | 65 | test("CounterProxy calls sendMetric with a CounterMessage for dec()") { 66 | val proxy = new CounterProxy( 67 | this.rpcMetricsReceiverRef, 68 | MetricProxiesSuite.MetricNamespace, 69 | MetricProxiesSuite.CounterName 70 | ) 71 | proxy.dec() 72 | verify(this.rpcMetricsReceiverRef).send(argThat((message: CounterMessage) => message.value === -1)) 73 | } 74 | 75 | test("CounterProxy calls sendMetric with a CounterMessage for dec(Long)") { 76 | val value = 12345L 77 | val proxy = new CounterProxy( 78 | this.rpcMetricsReceiverRef, 79 | MetricProxiesSuite.MetricNamespace, 80 | MetricProxiesSuite.CounterName 81 | ) 82 | proxy.dec(value) 83 | verify(this.rpcMetricsReceiverRef).send(argThat((message: CounterMessage) => message.value === -value)) 84 | } 85 | 86 | test("HistogramProxy calls sendMetric with a HistogramMessage for update(Int)") { 87 | val value: Integer = 12345 88 | val proxy = new HistogramProxy( 89 | this.rpcMetricsReceiverRef, 90 | MetricProxiesSuite.MetricNamespace, 91 | MetricProxiesSuite.HistogramName 92 | ) 93 | proxy.update(value) 94 | verify(this.rpcMetricsReceiverRef).send(argThat((message: HistogramMessage) => message.value === value.toLong)) 95 | } 96 | 97 | test("HistogramProxy calls sendMetric with a HistogramMessage for update(Long)") { 98 | val value = 12345L 99 | val proxy = new HistogramProxy( 100 | this.rpcMetricsReceiverRef, 101 | MetricProxiesSuite.MetricNamespace, 102 | MetricProxiesSuite.HistogramName 103 | ) 104 | proxy.update(value) 105 | verify(this.rpcMetricsReceiverRef).send(argThat((message: HistogramMessage) => message.value === value)) 106 | } 107 | 108 | test("HistogramProxy calls sendMetric with a HistogramMessage for update(Long) and non-default reservoir") { 109 | val value = 12345L 110 | val proxy = new HistogramProxy( 111 | this.rpcMetricsReceiverRef, 112 | MetricProxiesSuite.MetricNamespace, 113 | MetricProxiesSuite.HistogramName, 114 | new UniformReservoir) 115 | proxy.update(value) 116 | verify(this.rpcMetricsReceiverRef).send(argThat( 117 | (message: HistogramMessage) => message.value === value && message.reservoirClass === classOf[UniformReservoir])) 118 | } 119 | 120 | test("MeterProxy calls sendMetric with a MeterMessage for mark()") { 121 | val proxy = new MeterProxy( 122 | this.rpcMetricsReceiverRef, 123 | MetricProxiesSuite.MetricNamespace, 124 | MetricProxiesSuite.MeterName) 125 | proxy.mark() 126 | verify(this.rpcMetricsReceiverRef).send(argThat((message: MeterMessage) => message.value === 1)) 127 | } 128 | 129 | test("MeterProxy calls sendMetric with a MeterMessage for mark(Long)") { 130 | val value = 12345L 131 | val proxy = new MeterProxy( 132 | this.rpcMetricsReceiverRef, 133 | MetricProxiesSuite.MetricNamespace, 134 | MetricProxiesSuite.MeterName) 135 | proxy.mark(value) 136 | verify(this.rpcMetricsReceiverRef).send(argThat((message: MeterMessage) => message.value === value)) 137 | } 138 | 139 | test("MeterProxy calls sendMetric with a MeterMessage for mark(Long) and non-default clock") { 140 | val value = 12345L 141 | val proxy = new MeterProxy( 142 | this.rpcMetricsReceiverRef, 143 | MetricProxiesSuite.MetricNamespace, 144 | MetricProxiesSuite.HistogramName, 145 | clockClazz.newInstance().asInstanceOf[Clock]) 146 | proxy.mark(value) 147 | verify(this.rpcMetricsReceiverRef).send(argThat( 148 | (message: MeterMessage) => message.value === value && message.clockClass === clockClazz)) 149 | } 150 | 151 | test("TimerProxy calls sendMetric with a TimerMessage for update(Long, TimeUnit)") { 152 | val value = 12345L 153 | val proxy = new TimerProxy( 154 | this.rpcMetricsReceiverRef, 155 | MetricProxiesSuite.MetricNamespace, 156 | MetricProxiesSuite.TimerName 157 | ) 158 | 159 | proxy.update(value, TimeUnit.SECONDS) 160 | verify(this.rpcMetricsReceiverRef).send(argThat( 161 | (message: TimerMessage) => message.value === value && message.timeUnit === TimeUnit.SECONDS)) 162 | } 163 | 164 | test("TimerProxy calls sendMetric with a TimerMessage for time(Callable)") { 165 | val clock = mock(classOf[Clock]) 166 | // Make our clock return different values the second time so we can verify 167 | // The internal Meter inside the Timer calls getTick() in it's constructor, so we need to add an extra return 168 | // Spark3 for some reason, calls it once more, so I need to add a further value to the list 169 | when(clock.getTick()).thenReturn(1000, 1000, 2000, 3000) 170 | val proxy = new TimerProxy( 171 | this.rpcMetricsReceiverRef, 172 | MetricProxiesSuite.MetricNamespace, 173 | MetricProxiesSuite.TimerName, 174 | new ExponentiallyDecayingReservoir, 175 | clock 176 | ) 177 | 178 | import MetricsProxiesImplicits.callable 179 | proxy.time(() => Thread.sleep(100)) 180 | verify(this.rpcMetricsReceiverRef).send(argThat( 181 | (message: TimerMessage) => message.value === 1000 && message.timeUnit === TimeUnit.NANOSECONDS)) 182 | } 183 | 184 | test("TimerProxy calls sendMetric with a TimerMessage for update(Long, TimeUnit) and non-default reservoir and clock") { 185 | val value = 12345L 186 | val proxy = new TimerProxy( 187 | this.rpcMetricsReceiverRef, 188 | MetricProxiesSuite.MetricNamespace, 189 | MetricProxiesSuite.TimerName, 190 | new UniformReservoir, 191 | clockClazz.newInstance().asInstanceOf[Clock] 192 | ) 193 | 194 | proxy.update(value, TimeUnit.SECONDS) 195 | verify(this.rpcMetricsReceiverRef).send(argThat( 196 | (message: TimerMessage) => message.value === value && 197 | message.timeUnit === TimeUnit.SECONDS && 198 | message.reservoirClass === classOf[UniformReservoir] && 199 | message.clockClass === clockClazz)) 200 | } 201 | 202 | test("SettableGaugeProxy calls sendMetric with a SettableGaugeMessage for set(Long)") { 203 | val value = 12345L 204 | val proxy = new SettableGaugeProxy[Long]( 205 | this.rpcMetricsReceiverRef, 206 | MetricProxiesSuite.MetricNamespace, 207 | MetricProxiesSuite.SettableGaugeName 208 | ) 209 | proxy.set(value) 210 | verify(this.rpcMetricsReceiverRef).send(argThat((message: SettableGaugeMessage[Long]) => message.value === value)) 211 | } 212 | 213 | test("SettableGauge getValue returns same value as set(T)") { 214 | val value = 12345L 215 | val settableGauge = new SettableGauge[Long] {} 216 | settableGauge.set(value) 217 | assert(settableGauge.getValue === value) 218 | } 219 | } 220 | -------------------------------------------------------------------------------- /src/spark-listeners/src/test/scala/org/apache/spark/metrics/MetricsSystemsSuite.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.metrics 2 | 3 | import com.codahale.metrics._ 4 | import org.apache.spark._ 5 | import org.mockito.Mockito.mock 6 | import org.scalatest.BeforeAndAfterEach 7 | 8 | //object MetricsSystemsSuite { 9 | // val MetricNamespace = "testmetrics" 10 | // val CounterName = "testcounter" 11 | // val HistogramName = "testhistogram" 12 | // val MeterName = "testmeter" 13 | // val TimerName = "testtimer" 14 | // val SettableGaugeName = "testsettablegauge" 15 | // val GaugeName = "testrandomgauge" 16 | // val InvalidCounterName = "invalidcounter" 17 | //} 18 | 19 | class LocalMetricsSystemsSuite extends SparkFunSuite 20 | with BeforeAndAfterEach { 21 | 22 | private var counter: Counter = null 23 | private var histogram: Histogram = null 24 | private var meter: Meter = null 25 | private var timer: Timer = null 26 | private var settableGauge: SettableGauge[Long] = null 27 | private var gauge: Gauge[Long] = null 28 | private var metricsSource: MetricsSource = null 29 | 30 | override def beforeEach(): Unit = { 31 | super.beforeEach 32 | this.counter = mock(classOf[Counter]) 33 | this.histogram = mock(classOf[Histogram]) 34 | this.meter = mock(classOf[Meter]) 35 | this.timer = mock(classOf[Timer]) 36 | this.settableGauge = mock(classOf[SettableGauge[Long]]) 37 | this.gauge = mock(classOf[Gauge[Long]]) 38 | val metricRegistry = new MetricRegistry() 39 | metricRegistry.register(MetricsSystemsSuite.CounterName, this.counter) 40 | metricRegistry.register(MetricsSystemsSuite.HistogramName, this.histogram) 41 | metricRegistry.register(MetricsSystemsSuite.MeterName, this.meter) 42 | metricRegistry.register(MetricsSystemsSuite.TimerName, this.timer) 43 | metricRegistry.register(MetricsSystemsSuite.SettableGaugeName, this.settableGauge) 44 | metricRegistry.register(MetricsSystemsSuite.GaugeName, this.gauge) 45 | this.metricsSource = new MetricsSource(MetricsSystemsSuite.MetricNamespace, metricRegistry) 46 | } 47 | 48 | override def afterEach(): Unit = { 49 | super.afterEach 50 | this.counter = null 51 | this.histogram = null 52 | this.meter = null 53 | this.timer = null 54 | this.settableGauge = null 55 | this.gauge = null 56 | } 57 | 58 | test("metricsSource cannot be null") { 59 | val caught = intercept[IllegalArgumentException] { 60 | val metricsSystem = new LocalMetricsSystem(null) 61 | } 62 | 63 | assert(caught !== null) 64 | assert(caught.getMessage.contains("metricsSource cannot be null")) 65 | } 66 | 67 | test("counter() returns named counter") { 68 | val metricsSystem = new LocalMetricsSystem(this.metricsSource) 69 | val metric = metricsSystem.counter(MetricsSystemsSuite.CounterName) 70 | assert(metric !== null) 71 | } 72 | 73 | test("histogram() returns named histogram") { 74 | val metricsSystem = new LocalMetricsSystem(this.metricsSource) 75 | val metric = metricsSystem.histogram(MetricsSystemsSuite.HistogramName) 76 | assert(metric !== null) 77 | } 78 | 79 | test("meter() returns named meter") { 80 | val metricsSystem = new LocalMetricsSystem(this.metricsSource) 81 | val metric = metricsSystem.meter(MetricsSystemsSuite.MeterName) 82 | assert(metric !== null) 83 | } 84 | 85 | test("timer() returns named timer") { 86 | val metricsSystem = new LocalMetricsSystem(this.metricsSource) 87 | val metric = metricsSystem.timer(MetricsSystemsSuite.TimerName) 88 | assert(metric !== null) 89 | } 90 | 91 | test("gauge() returns named gauge") { 92 | val metricsSystem = new LocalMetricsSystem(this.metricsSource) 93 | val metric = metricsSystem.gauge(MetricsSystemsSuite.SettableGaugeName) 94 | assert(metric !== null) 95 | } 96 | 97 | test("gauge() throws a SparkException for a gauge that does not inherit from SettableGauge[T]") { 98 | val metricsSystem = new LocalMetricsSystem(this.metricsSource) 99 | val caught = intercept[SparkException] { 100 | val metric = metricsSystem.gauge(MetricsSystemsSuite.GaugeName) 101 | } 102 | 103 | assert(caught !== null) 104 | assert(caught.getMessage.contains("does not extend SettableGauge[T]")) 105 | } 106 | 107 | test("counter() throws a SparkException for a counter that does not exist") { 108 | val metricsSystem = new LocalMetricsSystem(this.metricsSource) 109 | val caught = intercept[SparkException] { 110 | val metric = metricsSystem.counter(MetricsSystemsSuite.InvalidCounterName) 111 | } 112 | 113 | assert(caught !== null) 114 | assert(caught.getMessage.contains("was not found")) 115 | } 116 | } 117 | 118 | class RpcMetricsSystemsSuite extends SparkFunSuite 119 | with BeforeAndAfterEach { 120 | 121 | private var counter: CounterProxy = null 122 | private var histogram: HistogramProxy = null 123 | private var meter: MeterProxy = null 124 | private var timer: TimerProxy = null 125 | private var settableGauge: SettableGaugeProxy[Long] = null 126 | private var metricsSource: MetricsSource = null 127 | 128 | override def beforeEach(): Unit = { 129 | super.beforeEach 130 | this.counter = mock(classOf[CounterProxy]) 131 | this.histogram = mock(classOf[HistogramProxy]) 132 | this.meter = mock(classOf[MeterProxy]) 133 | this.timer = mock(classOf[TimerProxy]) 134 | this.settableGauge = mock(classOf[SettableGaugeProxy[Long]]) 135 | val metricRegistry = new MetricRegistry() 136 | metricRegistry.register(MetricsSystemsSuite.CounterName, this.counter) 137 | metricRegistry.register(MetricsSystemsSuite.HistogramName, this.histogram) 138 | metricRegistry.register(MetricsSystemsSuite.MeterName, this.meter) 139 | metricRegistry.register(MetricsSystemsSuite.TimerName, this.timer) 140 | metricRegistry.register(MetricsSystemsSuite.SettableGaugeName, this.settableGauge) 141 | this.metricsSource = new MetricsSource(MetricsSystemsSuite.MetricNamespace, metricRegistry) 142 | } 143 | 144 | override def afterEach(): Unit = { 145 | super.afterEach 146 | this.counter = null 147 | this.histogram = null 148 | this.meter = null 149 | this.timer = null 150 | this.settableGauge = null 151 | } 152 | 153 | test("metricsSource cannot be null") { 154 | val caught = intercept[IllegalArgumentException] { 155 | val metricsSystem = new RpcMetricsSystem(null) 156 | } 157 | 158 | assert(caught !== null) 159 | assert(caught.getMessage.contains("metricsSource cannot be null")) 160 | } 161 | 162 | test("counter() returns named counter") { 163 | val metricsSystem = new RpcMetricsSystem(this.metricsSource) 164 | val metric = metricsSystem.counter(MetricsSystemsSuite.CounterName) 165 | assert(metric !== null) 166 | } 167 | 168 | test("histogram() returns named histogram") { 169 | val metricsSystem = new RpcMetricsSystem(this.metricsSource) 170 | val metric = metricsSystem.histogram(MetricsSystemsSuite.HistogramName) 171 | assert(metric !== null) 172 | } 173 | 174 | test("meter() returns named meter") { 175 | val metricsSystem = new RpcMetricsSystem(this.metricsSource) 176 | val metric = metricsSystem.meter(MetricsSystemsSuite.MeterName) 177 | assert(metric !== null) 178 | } 179 | 180 | test("timer() returns named timer") { 181 | val metricsSystem = new RpcMetricsSystem(this.metricsSource) 182 | val metric = metricsSystem.timer(MetricsSystemsSuite.TimerName) 183 | assert(metric !== null) 184 | } 185 | 186 | test("gauge() returns named gauge") { 187 | val metricsSystem = new RpcMetricsSystem(this.metricsSource) 188 | val metric = metricsSystem.gauge(MetricsSystemsSuite.SettableGaugeName) 189 | assert(metric !== null) 190 | } 191 | 192 | test("counter() throws a SparkException for a counter that does not exist") { 193 | val metricsSystem = new RpcMetricsSystem(this.metricsSource) 194 | val caught = intercept[SparkException] { 195 | val metric = metricsSystem.counter(MetricsSystemsSuite.InvalidCounterName) 196 | } 197 | 198 | assert(caught !== null) 199 | assert(caught.getMessage.contains("was not found")) 200 | } 201 | } 202 | -------------------------------------------------------------------------------- /src/spark-listeners/src/test/scala/org/apache/spark/metrics/ReceiverMetricSystemBuilderSuite.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.metrics 2 | 3 | import org.apache.spark._ 4 | import org.apache.spark.rpc.{RpcEndpoint, RpcEndpointRef, RpcEnv} 5 | import org.mockito.ArgumentMatchers 6 | import org.mockito.ArgumentMatchers.{any, argThat} 7 | import org.mockito.Mockito._ 8 | import org.scalatest.BeforeAndAfterEach 9 | 10 | object ReceiverMetricSystemBuilderSuite { 11 | val MetricNamespace = "testmetrics" 12 | val CounterName = "testcounter" 13 | val EndpointName = "testendpoint" 14 | } 15 | 16 | class ReceiverMetricSystemBuilderSuite extends SparkFunSuite 17 | with BeforeAndAfterEach { 18 | import TestImplicits._ 19 | 20 | private var env: SparkEnv = null 21 | private var rpcEnv: RpcEnv = null 22 | 23 | override def beforeEach(): Unit = { 24 | super.beforeEach 25 | val conf = new SparkConf() 26 | .setMaster("local[2]") 27 | .setAppName("test") 28 | .set("spark.dynamicAllocation.testing", "true") 29 | .set("spark.driver.allowMultipleContexts", "true") 30 | env = mock(classOf[SparkEnv]) 31 | rpcEnv = mock(classOf[RpcEnv]) 32 | when(env.executorId).thenReturn(SparkContext.DRIVER_IDENTIFIER) 33 | when(env.conf).thenReturn(conf) 34 | when(env.rpcEnv).thenReturn(rpcEnv) 35 | SparkEnv.set(env) 36 | } 37 | 38 | override def afterEach(): Unit = { 39 | super.afterEach 40 | env = null 41 | rpcEnv = null 42 | } 43 | 44 | test("sparkEnv cannot be null") { 45 | val caught = intercept[IllegalArgumentException] { 46 | val builder = new ReceiverMetricSystemBuilder(null) 47 | } 48 | 49 | assert(caught !== null) 50 | assert(caught.getMessage.contains("sparkEnv cannot be null")) 51 | } 52 | 53 | test("endpointName cannot be null") { 54 | val caught = intercept[IllegalArgumentException] { 55 | val builder = new ReceiverMetricSystemBuilder(env, null) 56 | } 57 | 58 | assert(caught !== null) 59 | assert(caught.getMessage.contains("endpointName cannot be null, empty, or only whitespace")) 60 | } 61 | 62 | test("endpointName cannot be empty") { 63 | val caught = intercept[IllegalArgumentException] { 64 | val builder = new ReceiverMetricSystemBuilder(env, "") 65 | } 66 | 67 | assert(caught !== null) 68 | assert(caught.getMessage.contains("endpointName cannot be null, empty, or only whitespace")) 69 | } 70 | 71 | test("endpointName cannot be only whitespace") { 72 | val caught = intercept[IllegalArgumentException] { 73 | val builder = new ReceiverMetricSystemBuilder(env, " ") 74 | } 75 | 76 | assert(caught !== null) 77 | assert(caught.getMessage.contains("endpointName cannot be null, empty, or only whitespace")) 78 | } 79 | 80 | test("ReceiverMetricSystemBuilder cannot be used on executors") { 81 | when(env.executorId).thenReturn("0") 82 | val caught = intercept[IllegalStateException] { 83 | val builder = new ReceiverMetricSystemBuilder(env, RpcMetricsReceiver.DefaultEndpointName) 84 | } 85 | 86 | assert(caught !== null) 87 | assert(caught.getMessage.contains("ReceiverMetricSystemBuilder can only be used on a driver")) 88 | } 89 | 90 | test("build() registers one metrics source") { 91 | val metricsSystem = mock(classOf[org.apache.spark.metrics.MetricsSystem]) 92 | when(env.metricsSystem).thenReturn(metricsSystem) 93 | when(rpcEnv.setupEndpoint(any[String], any[RpcEndpoint])).thenReturn(mock(classOf[RpcEndpointRef])) 94 | 95 | val builder = new ReceiverMetricSystemBuilder(env, ReceiverMetricSystemBuilderSuite.EndpointName) 96 | builder.addSource(ReceiverMetricSystemBuilderSuite.MetricNamespace, builder => { 97 | builder.registerCounter(ReceiverMetricSystemBuilderSuite.CounterName) 98 | }) 99 | builder.build 100 | ///verify(this.rpcMetricsReceiverRef).send(argThat((message: CounterMessage) => message.value === 1)) 101 | verify(metricsSystem, times(1)).registerSource( 102 | argThat((source: org.apache.spark.metrics.source.Source) => source.metricRegistry.counter( 103 | ReceiverMetricSystemBuilderSuite.CounterName 104 | ) != null)) 105 | verify(rpcEnv, times(1)).setupEndpoint(ArgumentMatchers.eq( 106 | ReceiverMetricSystemBuilderSuite.EndpointName), any[RpcMetricsReceiver] 107 | ) 108 | } 109 | } -------------------------------------------------------------------------------- /src/spark-listeners/src/test/scala/org/apache/spark/metrics/TestUtils.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.metrics 2 | 3 | import org.mockito.ArgumentMatcher 4 | 5 | import scala.reflect.ClassTag 6 | import scala.util.Try 7 | 8 | object TestImplicits { 9 | 10 | import scala.language.implicitConversions 11 | 12 | implicit def matcher[T](f: (T) => Boolean): ArgumentMatcher[T] = new ArgumentMatcher[T] { 13 | override def matches(argument: T): Boolean = f(argument) 14 | } 15 | 16 | implicit class ListImplicits[T](lst: List[T]) { 17 | def insertAt(index: Int, objs: T*): List[T] = { 18 | val (a, b) = lst.splitAt(index) 19 | a ::: objs.toList ::: b 20 | } 21 | 22 | } 23 | 24 | } 25 | 26 | object TestUtils { 27 | def getField[T: ClassTag](fieldName: String): java.lang.reflect.Field = { 28 | val field = scala.reflect.classTag[T].runtimeClass.getDeclaredField(fieldName) 29 | field.setAccessible(true) 30 | field 31 | } 32 | 33 | def newInstance[T](clazz: Class[T], args: Any*): Option[T] = { 34 | val argslst = args.map(_.asInstanceOf[Object]) 35 | val res = clazz.getConstructors 36 | .toStream 37 | .filter(_.getParameterTypes.length == args.size) 38 | val res2 = res 39 | .map(c => Try { 40 | c.newInstance(argslst: _*) 41 | }) 42 | .flatMap(_.toOption) 43 | .headOption 44 | .map(_.asInstanceOf[T]) 45 | res2 46 | } 47 | 48 | def loadOneOf(clazzPaths: String*): Option[Class[_]] = 49 | clazzPaths 50 | .toStream 51 | .map(cl => Try { 52 | Class.forName(cl) 53 | }) 54 | .find(_.isSuccess) 55 | .map(_.get) 56 | } 57 | -------------------------------------------------------------------------------- /src/spark-listeners/src/test/scala/org/apache/spark/sql/streaming/LogAnalyticsStreamingQueryListenerSuite.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.sql.streaming 2 | 3 | import java.util.UUID 4 | 5 | import org.apache.spark.listeners.ListenerSuite 6 | import org.apache.spark.metrics.TestImplicits._ 7 | import org.apache.spark.metrics.TestUtils._ 8 | import org.apache.spark.sql.Row 9 | import org.apache.spark.sql.streaming.StreamingQueryListener.{QueryProgressEvent, QueryStartedEvent, QueryTerminatedEvent} 10 | import org.scalatest.BeforeAndAfterEach 11 | 12 | import scala.collection.JavaConversions.mapAsJavaMap 13 | 14 | object LogAnalyticsStreamingQueryListenerSuite { 15 | //Spark3 requires 1 more argument than spark 2.4 16 | val queryStartedEvent = 17 | newInstance(classOf[QueryStartedEvent], UUID.randomUUID, UUID.randomUUID, "name") 18 | .orElse(newInstance(classOf[QueryStartedEvent], UUID.randomUUID, UUID.randomUUID, "name", (System.currentTimeMillis() / 1000).toString)) 19 | .get 20 | 21 | val queryTerminatedEvent = new QueryTerminatedEvent(UUID.randomUUID, UUID.randomUUID, None) 22 | val queryProgressEvent = { 23 | // v3.0.1-: StateOperatorProgress: (numRowsTotal: Long, numRowsUpdated: Long, memoryUsedBytes: Long, customMetrics: java.util.Map[String,Long]) 24 | // v3.1.* : StateOperatorProgress: (numRowsTotal: Long, numRowsUpdated: Long, memoryUsedBytes: Long, numRowsDroppedByWatermark: Long, customMetrics: java.util.Map[String,Long]) 25 | // v3.2.0+: StateOperatorProgress: (operatorName: String, numRowsTotal: Long, numRowsUpdated: Long, allUpdatesTimeMs: Long, numRowsRemoved: Long, allRemovalsTimeMs: Long, commitTimeMs: Long, memoryUsedBytes: Long, numRowsDroppedByWatermark: Long, numShufflePartitions: Long, numStateStoreInstances: Long, customMetrics: java.util.Map[String,Long]) 26 | val v30argsStateOperatorProgress = List[Any](0, 1, 2, new java.util.HashMap()) 27 | val v31argsStateOperatorProgress = List[Any](0, 1, 2, 3, new java.util.HashMap()) 28 | val v32argsStateOperatorProgress = List[Any]("operatorName", 0, 1, 10, 2, 10, 10, 3, 4, 5, 6, new java.util.HashMap()) 29 | 30 | val stateOperatorProgress = newInstance(classOf[StateOperatorProgress], v30argsStateOperatorProgress:_*) 31 | .orElse(newInstance(classOf[StateOperatorProgress], v31argsStateOperatorProgress:_*)) 32 | .orElse(newInstance(classOf[StateOperatorProgress], v32argsStateOperatorProgress:_*)) 33 | .get 34 | 35 | // v3.1.2-: SourceProgress: (description: String, startOffset: String, endOffset: String, numInputRows: Long, inputRowsPerSecond: Double, processedRowsPerSecond: Double) 36 | // v3.2.0+: SourceProgress: (description: String, startOffset: String, endOffset: String, latestOffset: String, numInputRows: Long, inputRowsPerSecond: Double, processedRowsPerSecond: Double, metrics: java.util.Map[String,String]) 37 | val v31argsSourceProgress = List[Any]("source", "123", "456", 678, Double.NaN, Double.NegativeInfinity) 38 | val v32argsSourceProgress = List[Any]("source", "123", "456", "789", 1000, Double.NaN, Double.NegativeInfinity, new java.util.HashMap()) 39 | 40 | val spark2args = List[Any]( 41 | UUID.randomUUID, 42 | UUID.randomUUID, 43 | "test", 44 | ListenerSuite.EPOCH_TIME_AS_ISO8601, 45 | 2L, 46 | mapAsJavaMap(Map("total" -> 0L)), 47 | mapAsJavaMap(Map.empty[String, String]), 48 | Array(stateOperatorProgress), 49 | Array( 50 | newInstance(classOf[SourceProgress], v31argsSourceProgress:_*) 51 | .orElse(newInstance(classOf[SourceProgress], v32argsSourceProgress:_*)) 52 | .get 53 | ), 54 | new SinkProgress("sink") 55 | ) 56 | 57 | val spark3args = spark2args.insertAt(4, 1234L) ::: List(mapAsJavaMap(Map[String, Row]())) 58 | 59 | val streamingQueryProgress = newInstance(classOf[StreamingQueryProgress], spark2args:_*) 60 | .orElse(newInstance(classOf[StreamingQueryProgress], spark3args:_*)) 61 | .get 62 | 63 | new QueryProgressEvent(streamingQueryProgress) 64 | } 65 | } 66 | 67 | class LogAnalyticsStreamingQueryListenerSuite extends ListenerSuite 68 | with BeforeAndAfterEach { 69 | 70 | test("should invoke sendToSink for QueryStartedEvent with full class name") { 71 | val (json, event) = this.onStreamingQueryListenerEvent( 72 | LogAnalyticsStreamingQueryListenerSuite.queryStartedEvent 73 | ) 74 | 75 | this.assertEvent(json, event) 76 | } 77 | 78 | test("should invoke sendToSink for QueryTerminatedEvent with full class name") { 79 | val (json, event) = this.onStreamingQueryListenerEvent( 80 | LogAnalyticsStreamingQueryListenerSuite.queryTerminatedEvent 81 | ) 82 | 83 | this.assertEvent(json, event) 84 | } 85 | 86 | test("should invoke sendToSink for QueryProgressEvent with full class name") { 87 | val (json, event) = this.onStreamingQueryListenerEvent( 88 | LogAnalyticsStreamingQueryListenerSuite.queryProgressEvent 89 | ) 90 | 91 | this.assertEvent(json, event) 92 | } 93 | 94 | test("QueryProgressEvent should have expected SparkEventTime") { 95 | val (json, _) = this.onStreamingQueryListenerEvent( 96 | LogAnalyticsStreamingQueryListenerSuite.queryProgressEvent 97 | ) 98 | 99 | this.assertSparkEventTime( 100 | json, 101 | (_, value) => assert(value.extract[String] === ListenerSuite.EPOCH_TIME_AS_ISO8601) 102 | ) 103 | } 104 | 105 | test("QueryStartedEvent should have SparkEventTime") { 106 | val (json, _) = this.onStreamingQueryListenerEvent( 107 | LogAnalyticsStreamingQueryListenerSuite.queryStartedEvent 108 | ) 109 | this.assertSparkEventTime( 110 | json, 111 | (_, value) => assert(!value.extract[String].isEmpty) 112 | ) 113 | } 114 | 115 | test("QueryTerminatedEvent should have SparkEventTime") { 116 | val (json, _) = this.onStreamingQueryListenerEvent( 117 | LogAnalyticsStreamingQueryListenerSuite.queryTerminatedEvent 118 | ) 119 | this.assertSparkEventTime( 120 | json, 121 | (_, value) => assert(!value.extract[String].isEmpty) 122 | ) 123 | } 124 | } 125 | --------------------------------------------------------------------------------