├── .dockerignore
├── .gitignore
├── .scalafmt.conf
├── Dockerfile
├── LICENSE
├── README.md
├── benchmarks
├── arm64
│ ├── README.md
│ ├── charts
│ │ ├── tpcds-benchmark
│ │ │ ├── Chart.yaml
│ │ │ ├── templates
│ │ │ │ ├── _helpers.tpl
│ │ │ │ └── sparkapplication.yaml
│ │ │ └── values.yaml
│ │ └── tpcds-data-generation
│ │ │ ├── Chart.yaml
│ │ │ ├── templates
│ │ │ ├── _helpers.tpl
│ │ │ └── sparkapplication.yaml
│ │ │ └── values.yaml
│ └── terraform
│ │ └── alicloud
│ │ ├── main.tf
│ │ ├── provider.tf
│ │ ├── terraform.tfvars
│ │ ├── user_data.sh
│ │ └── variables.tf
├── hadoop-aliyun
│ ├── README.md
│ ├── charts
│ │ └── tpcds-benchmark
│ │ │ ├── Chart.yaml
│ │ │ ├── templates
│ │ │ ├── _helpers.tpl
│ │ │ └── sparkapplication.yaml
│ │ │ └── values.yaml
│ ├── result.csv
│ └── terraform
│ │ └── alicloud
│ │ ├── main.tf
│ │ ├── provider.tf
│ │ ├── terraform.tfvars
│ │ ├── user_data.sh
│ │ └── variables.tf
└── jindosdk
│ ├── README.md
│ ├── charts
│ ├── tpcds-benchmark
│ │ ├── Chart.yaml
│ │ ├── templates
│ │ │ ├── _helpers.tpl
│ │ │ └── sparkapplication.yaml
│ │ └── values.yaml
│ └── tpcds-data-generation
│ │ ├── Chart.yaml
│ │ ├── templates
│ │ ├── _helpers.tpl
│ │ └── sparkapplication.yaml
│ │ └── values.yaml
│ ├── result.csv
│ └── terraform
│ └── alicloud
│ ├── main.tf
│ ├── provider.tf
│ ├── terraform.tfvars
│ ├── user_data.sh
│ └── variables.tf
├── build.sbt
├── charts
├── tpcds-benchmark
│ ├── Chart.yaml
│ ├── templates
│ │ ├── _helpers.tpl
│ │ └── sparkapplication.yaml
│ └── values.yaml
└── tpcds-data-generation
│ ├── Chart.yaml
│ ├── templates
│ ├── _helpers.tpl
│ └── sparkapplication.yaml
│ └── values.yaml
├── docs
├── benchmark
│ ├── apache-spark-vs-emr-spark.md
│ ├── hadoop-aliyun
│ │ ├── cloud-disk-read-write-bps.png
│ │ ├── cloud-disk-read-write-iops.png
│ │ ├── cpu-usage.png
│ │ ├── index.md
│ │ ├── memory-usage.png
│ │ ├── network-bandwidth.png
│ │ ├── oss-bandwidth-usage.png
│ │ └── system-load.png
│ ├── setup.md
│ ├── spark-on-ack-arm64
│ │ ├── cloud-disk-read-write-bps.png
│ │ ├── cloud-disk-read-write-iops.png
│ │ ├── cpu-usage.png
│ │ ├── index.md
│ │ ├── memory-usage.png
│ │ ├── network-bandwidth.png
│ │ ├── oss-bandwidth-usage.png
│ │ ├── result.csv
│ │ └── system-load.png
│ ├── spark-on-ack
│ │ ├── cloud-disk-read-write-bps.png
│ │ ├── cloud-disk-read-write-iops.png
│ │ ├── cpu-usage.png
│ │ ├── index.md
│ │ ├── memory-usage.png
│ │ ├── network-bandwidth.png
│ │ ├── oss-bandwidth-usage.png
│ │ ├── result.csv
│ │ └── system-load.png
│ ├── spark-on-ecs-vs-on-ack
│ │ └── index.md
│ ├── spark-on-x86-vs-on-arm64
│ │ ├── cost_comparison.png
│ │ ├── index.md
│ │ └── performance_comparison.png
│ ├── tpcds-benchmark.md
│ └── tpcds-data-generation.md
├── bestpractice
│ ├── emrspark-ess-jindofs.md
│ ├── emrspark-ess.md
│ ├── emrspark-jindofs.md
│ ├── emrspark.md
│ └── generate-data.md
├── img
│ ├── alluxio-overview.jpg
│ ├── alluxio_capacity.png
│ ├── apache-spark-per-10t.jpg
│ ├── apache-spark-total-10t.jpg
│ ├── create_ack_cluster.jpeg
│ ├── emr-spark-ess-jindofs-per-1t.jpg
│ ├── emr-spark-ess-jindofs-total-1t.jpg
│ ├── emr-spark-jindofs-per-1t.jpg
│ ├── emr-spark-jindofs-total-1t.jpg
│ ├── emr-spark-rss-per-10t.jpg
│ ├── emr-spark-rss-total-10t.jpg
│ ├── get_spark_history_svc.jpeg
│ ├── get_sparkapplication_id.jpeg
│ ├── install_spark_history.jpeg
│ ├── install_spark_operator.jpeg
│ ├── jindofs.png
│ ├── localhost_spark_ui.jpeg
│ ├── mount_disk.jpeg
│ ├── port-forward_svc.jpg
│ ├── spark_vs_alluxio.jpg
│ ├── sparkapplication_svc.jpg
│ └── tpcds_per_query.jpeg
├── performance
│ ├── emr-spark.md
│ ├── jindofs.md
│ ├── oss.md
│ ├── remote-shuffle-service.md
│ ├── serverless-spark
│ │ ├── index.md
│ │ └── serverless-spark.jpeg
│ └── spark-operator.md
└── quickstart
│ ├── benchmark_code.md
│ ├── benchmark_env.md
│ ├── benchmark_result.md
│ ├── benchmark_steps.md
│ └── debugging_guide.md
├── kubernetes
├── alluxio
│ └── config.yaml
├── emr
│ ├── jar
│ │ └── spark-sql-perf-assembly-0.5.0-SNAPSHOT.jar
│ ├── tpcds-benchmark-with-emrspark-ess-jindofs.yaml
│ ├── tpcds-benchmark-with-emrspark-ess.yaml
│ ├── tpcds-benchmark-with-emrspark-jindofs.yaml
│ ├── tpcds-benchmark-with-emrspark.yaml
│ └── tpcds-data-generation.yaml
└── spark
│ ├── tpcds-benchmark-with-alluxio.yaml
│ ├── tpcds-benchmark.yaml
│ └── tpcds-data-generation.yaml
├── lib
└── spark-sql-perf_2.12-0.5.1-SNAPSHOT.jar
├── project
├── build.properties
└── plugins.sbt
├── src
└── main
│ └── scala
│ └── com
│ └── aliyun
│ └── ack
│ └── spark
│ └── tpcds
│ ├── Benchmark.scala
│ └── DataGeneration.scala
└── terraform
└── alicloud
├── datasources.tf
├── graph.svg
├── main.tf
├── modules
├── celeborn
│ ├── main.tf
│ ├── master_user_data.sh
│ ├── variables.tf
│ └── worker_user_data.sh
├── cs
│ ├── main.tf
│ ├── outputs.tf
│ └── variables.tf
├── ecs
│ ├── main.tf
│ ├── outputs.tf
│ └── variables.tf
├── fluid
│ ├── main.tf
│ └── variables.tf
├── oss
│ ├── main.tf
│ ├── outputs.tf
│ └── variables.tf
├── resource-manager
│ ├── main.tf
│ ├── outputs.tf
│ └── variables.tf
├── spark
│ ├── main.tf
│ ├── user_data.sh
│ ├── user_data_arm64.sh
│ └── variables.tf
└── vpc
│ ├── main.tf
│ ├── outputs.tf
│ └── variables.tf
├── provider.tf
├── root.tf
├── terraform.tfvars
└── variables.tf
/.dockerignore:
--------------------------------------------------------------------------------
1 | .bsp/
2 | .git/
3 | .metals/
4 | .vscode/
5 | benchmarks/
6 | charts/
7 | docs/
8 | kubernetes/
9 | target/
10 | terraform/
11 | .dockerignore
12 | .gitignore
13 | .scalafmt.conf
14 | Dockerfile
15 | LICENSE
16 | README.md
17 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Class files
2 | target/
3 |
4 | # SBT
5 | .metals/
6 | .bsp/
7 |
8 | # Benchmark
9 | benchmark/*/values.yaml
10 | config/benchmark.properties
11 | config/values.yaml
12 |
13 | # Terraform
14 | .terraform/
15 | .terraform.lock.hcl
16 | .terraform.tfstate.lock.info
17 | terraform.tfstate
18 | terraform.tfstate.backup
19 |
20 | # Various IDEs
21 | .idea/
22 | .vscode/
23 |
24 | # Mac OS
25 | .DS_Store
26 |
--------------------------------------------------------------------------------
/.scalafmt.conf:
--------------------------------------------------------------------------------
1 | version = "3.7.15"
2 | runner.dialect = scala212
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | ARG SPARK_IMAGE=spark:3.5.5
2 |
3 | ARG SBT_IMAGE=sbtscala/scala-sbt:eclipse-temurin-jammy-17.0.10_7_1.10.4_2.12.20
4 |
5 | FROM debian:bullseye-slim AS tpcds-kit-builder
6 |
7 | ENV GIT_COMMIT_ID=1b7fb7529edae091684201fab142d956d6afd881
8 |
9 | WORKDIR /app
10 |
11 | RUN set -eux && \
12 | apt-get update && \
13 | apt-get install -y gcc make flex bison byacc git
14 |
15 | RUN set -eux && \
16 | git clone https://github.com/databricks/tpcds-kit.git && \
17 | cd tpcds-kit && \
18 | git checkout ${GIT_COMMIT_ID} && \
19 | cd tools && \
20 | make OS=LINUX
21 |
22 | FROM ${SBT_IMAGE} AS benchmark-builder
23 |
24 | WORKDIR /app
25 |
26 | COPY . .
27 |
28 | RUN set -eux && \
29 | sbt assembly
30 |
31 | FROM ${SPARK_IMAGE}
32 |
33 | COPY --from=tpcds-kit-builder /app/tpcds-kit/tools /opt/tpcds-kit/tools
34 |
35 | COPY --from=benchmark-builder /app/target/scala-2.12/*.jar /opt/spark/jars/
36 |
37 | COPY lib /opt/spark/jars/
38 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Aliyun (Alibaba Cloud) Container Service
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/benchmarks/arm64/charts/tpcds-benchmark/Chart.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v2
2 | name: tpcds-benchmark
3 | description: A Helm chart for Kubernetes
4 | type: application
5 | version: 0.1.0
6 | appVersion: 0.1.0
7 |
--------------------------------------------------------------------------------
/benchmarks/arm64/charts/tpcds-benchmark/templates/_helpers.tpl:
--------------------------------------------------------------------------------
1 | {{/*
2 | Expand the name of the chart.
3 | */}}
4 | {{- define "tpcds-benchmark.name" -}}
5 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
6 | {{- end }}
7 |
8 | {{/*
9 | Create a default fully qualified app name.
10 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
11 | If release name contains chart name it will be used as a full name.
12 | */}}
13 | {{- define "tpcds-benchmark.fullname" -}}
14 | {{- if .Values.fullnameOverride }}
15 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
16 | {{- else }}
17 | {{- $name := default .Chart.Name .Values.nameOverride }}
18 | {{- if contains $name .Release.Name }}
19 | {{- .Release.Name | trunc 63 | trimSuffix "-" }}
20 | {{- else }}
21 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
22 | {{- end }}
23 | {{- end }}
24 | {{- end }}
25 |
26 | {{/*
27 | Create chart name and version as used by the chart label.
28 | */}}
29 | {{- define "tpcds-benchmark.chart" -}}
30 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
31 | {{- end }}
32 |
33 | {{/*
34 | Common labels
35 | */}}
36 | {{- define "tpcds-benchmark.labels" -}}
37 | helm.sh/chart: {{ include "tpcds-benchmark.chart" . }}
38 | {{ include "tpcds-benchmark.selectorLabels" . }}
39 | {{- if .Chart.AppVersion }}
40 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
41 | {{- end }}
42 | app.kubernetes.io/managed-by: {{ .Release.Service }}
43 | {{- end }}
44 |
45 | {{/*
46 | Selector labels
47 | */}}
48 | {{- define "tpcds-benchmark.selectorLabels" -}}
49 | app.kubernetes.io/name: {{ include "tpcds-benchmark.name" . }}
50 | app.kubernetes.io/instance: {{ .Release.Name }}
51 | {{- end }}
52 |
53 | {{/*
54 | Create the name of the service account to use
55 | */}}
56 | {{- define "tpcds-benchmark.serviceAccountName" -}}
57 | {{- if .Values.serviceAccount.create }}
58 | {{- default (include "tpcds-benchmark.fullname" .) .Values.serviceAccount.name }}
59 | {{- else }}
60 | {{- default "default" .Values.serviceAccount.name }}
61 | {{- end }}
62 | {{- end }}
63 |
--------------------------------------------------------------------------------
/benchmarks/arm64/charts/tpcds-benchmark/values.yaml:
--------------------------------------------------------------------------------
1 | # Default values for tpcds-benchmark.
2 | # This is a YAML-formatted file.
3 | # Declare variables to be passed into your templates.
4 |
5 | image:
6 | # -- Image registry
7 | registry: registry-cn-beijing-vpc.ack.aliyuncs.com
8 | # -- Image repository
9 | repository: ack-demo/spark-tpcds-benchmark
10 | # -- Image tag
11 | tag: 3.5.3-0.1
12 | # -- Image pull policy
13 | pullPolicy: IfNotPresent
14 | # -- Image pull secrets
15 | pullSecrets: []
16 | # - name: pull-secret
17 |
18 | oss:
19 | # -- OSS bucket
20 | bucket: example-bucket
21 | # -- OSS endpoint
22 | endpoint: oss-cn-beijing-internal.aliyuncs.com
23 |
24 | benchmark:
25 | # -- Scale factor
26 | scaleFactor: 3072
27 | # -- Number of iterations
28 | numIterations: 1
29 | # -- Whether to optimize queries
30 | optimizeQueries: false
31 | # -- Filter queries, will run all if empty
32 | queries: []
33 | # - q70-v2.4
34 | # - q82-v2.4
35 | # - q64-v2.4
36 |
--------------------------------------------------------------------------------
/benchmarks/arm64/charts/tpcds-data-generation/Chart.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v2
2 | name: tpcds-data-generation
3 | description: A Helm chart for Kubernetes
4 | type: application
5 | version: 0.1.0
6 | appVersion: 0.1.0
7 |
--------------------------------------------------------------------------------
/benchmarks/arm64/charts/tpcds-data-generation/templates/_helpers.tpl:
--------------------------------------------------------------------------------
1 | {{/*
2 | Expand the name of the chart.
3 | */}}
4 | {{- define "tpcds-data-generation.name" -}}
5 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
6 | {{- end }}
7 |
8 | {{/*
9 | Create a default fully qualified app name.
10 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
11 | If release name contains chart name it will be used as a full name.
12 | */}}
13 | {{- define "tpcds-data-generation.fullname" -}}
14 | {{- if .Values.fullnameOverride }}
15 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
16 | {{- else }}
17 | {{- $name := default .Chart.Name .Values.nameOverride }}
18 | {{- if contains $name .Release.Name }}
19 | {{- .Release.Name | trunc 63 | trimSuffix "-" }}
20 | {{- else }}
21 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
22 | {{- end }}
23 | {{- end }}
24 | {{- end }}
25 |
26 | {{/*
27 | Create chart name and version as used by the chart label.
28 | */}}
29 | {{- define "tpcds-data-generation.chart" -}}
30 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
31 | {{- end }}
32 |
33 | {{/*
34 | Common labels
35 | */}}
36 | {{- define "tpcds-data-generation.labels" -}}
37 | helm.sh/chart: {{ include "tpcds-data-generation.chart" . }}
38 | {{ include "tpcds-data-generation.selectorLabels" . }}
39 | {{- if .Chart.AppVersion }}
40 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
41 | {{- end }}
42 | app.kubernetes.io/managed-by: {{ .Release.Service }}
43 | {{- end }}
44 |
45 | {{/*
46 | Selector labels
47 | */}}
48 | {{- define "tpcds-data-generation.selectorLabels" -}}
49 | app.kubernetes.io/name: {{ include "tpcds-data-generation.name" . }}
50 | app.kubernetes.io/instance: {{ .Release.Name }}
51 | {{- end }}
52 |
53 | {{/*
54 | Create the name of the service account to use
55 | */}}
56 | {{- define "tpcds-data-generation.serviceAccountName" -}}
57 | {{- if .Values.serviceAccount.create }}
58 | {{- default (include "tpcds-data-generation.fullname" .) .Values.serviceAccount.name }}
59 | {{- else }}
60 | {{- default "default" .Values.serviceAccount.name }}
61 | {{- end }}
62 | {{- end }}
63 |
--------------------------------------------------------------------------------
/benchmarks/arm64/charts/tpcds-data-generation/values.yaml:
--------------------------------------------------------------------------------
1 | # Default values for tpcds-benchmark.
2 | # This is a YAML-formatted file.
3 | # Declare variables to be passed into your templates.
4 |
5 | image:
6 | # -- Image registry
7 | registry: registry-cn-beijing-vpc.ack.aliyuncs.com
8 | # -- Image repository
9 | repository: ack-demo/spark-tpcds-benchmark
10 | # -- Image tag
11 | tag: 3.5.3-0.1
12 | # -- Image pull policy
13 | pullPolicy: IfNotPresent
14 | # -- Image pull secrets
15 | pullSecrets: []
16 | # - name: pull-secret
17 |
18 | oss:
19 | # -- OSS bucket
20 | bucket: example-bucket
21 | # -- OSS endpoint
22 | endpoint: oss-cn-beijing-internal.aliyuncs.com
23 |
24 | benchmark:
25 | # -- Scale factor
26 | scaleFactor: 3072
27 | # -- Number of partitions
28 | numPartitions: 640
29 |
--------------------------------------------------------------------------------
/benchmarks/arm64/terraform/alicloud/main.tf:
--------------------------------------------------------------------------------
1 | terraform {
2 | required_providers {
3 | alicloud = {
4 | source = "hashicorp/alicloud"
5 | version = "1.223.2"
6 | }
7 | }
8 |
9 | required_version = ">= 1.8.0"
10 | }
11 |
12 | resource "random_string" "suffix" {
13 | length = 16
14 | lower = true
15 | upper = false
16 | special = false
17 | }
18 |
19 | resource "alicloud_resource_manager_resource_group" "default" {
20 | resource_group_name = "rg-${random_string.suffix.result}"
21 | display_name = "rg-${random_string.suffix.result}"
22 | }
23 |
24 | resource "alicloud_vpc" "default" {
25 | vpc_name = "vpc-${random_string.suffix.result}"
26 | cidr_block = "192.168.0.0/16"
27 | resource_group_id = alicloud_resource_manager_resource_group.default.id
28 | }
29 |
30 | resource "alicloud_vswitch" "default" {
31 | vswitch_name = "vsw-${random_string.suffix.result}"
32 | cidr_block = "192.168.0.0/24"
33 | vpc_id = alicloud_vpc.default.id
34 | zone_id = var.zone_id
35 | }
36 |
37 | resource "alicloud_security_group" "default" {
38 | name = "sg-${random_string.suffix.result}"
39 | vpc_id = alicloud_vpc.default.id
40 | resource_group_id = alicloud_resource_manager_resource_group.default.id
41 | security_group_type = "normal"
42 | }
43 |
44 | resource "alicloud_security_group_rule" "default" {
45 | type = "ingress"
46 | ip_protocol = "all"
47 | port_range = "-1/-1"
48 | cidr_ip = "192.168.0.0/16"
49 | security_group_id = alicloud_security_group.default.id
50 | priority = 1
51 | }
52 |
53 | resource "alicloud_security_group_rule" "icmp" {
54 | type = "ingress"
55 | ip_protocol = "icmp"
56 | port_range = "-1/-1"
57 | cidr_ip = "0.0.0.0/0"
58 | security_group_id = alicloud_security_group.default.id
59 | priority = 1
60 | }
61 |
62 | resource "alicloud_cs_managed_kubernetes" "default" {
63 | name = "ack-${random_string.suffix.result}"
64 | timezone = "Asia/Shanghai"
65 | version = "1.32.1-aliyun.1"
66 |
67 | worker_vswitch_ids = [alicloud_vswitch.default.id]
68 | pod_vswitch_ids = [alicloud_vswitch.default.id]
69 | service_cidr = "172.16.0.0/16"
70 |
71 | addons {
72 | name = "terway-eniip"
73 | }
74 |
75 | proxy_mode = "ipvs"
76 | cluster_domain = "cluster.local"
77 | deletion_protection = false
78 | cluster_spec = "ack.pro.small"
79 | load_balancer_spec = "slb.s1.small"
80 | new_nat_gateway = true
81 | slb_internet_enabled = true
82 | resource_group_id = alicloud_resource_manager_resource_group.default.id
83 | security_group_id = alicloud_security_group.default.id
84 | }
85 |
86 | resource "alicloud_cs_kubernetes_node_pool" "spark-master" {
87 | node_pool_name = "spark-master"
88 | cluster_id = alicloud_cs_managed_kubernetes.default.id
89 | vswitch_ids = [alicloud_vswitch.default.id]
90 | desired_size = var.spark.master.instance_count
91 | instance_types = [var.spark.master.instance_type]
92 | image_type = "AliyunLinux3"
93 | system_disk_category = "cloud_essd"
94 | system_disk_size = 40
95 | system_disk_performance_level = "PL1"
96 |
97 | labels {
98 | key = "spark.tpcds.benchmark/role"
99 | value = "spark-master"
100 | }
101 |
102 | resource_group_id = alicloud_resource_manager_resource_group.default.id
103 | security_group_ids = [alicloud_security_group.default.id]
104 | }
105 |
106 | resource "alicloud_cs_kubernetes_node_pool" "spark-worker" {
107 | node_pool_name = "spark-worker"
108 | cluster_id = alicloud_cs_managed_kubernetes.default.id
109 | vswitch_ids = [alicloud_vswitch.default.id]
110 | desired_size = var.spark.worker.instance_count
111 | instance_types = [var.spark.worker.instance_type]
112 | image_type = "AliyunLinux3Arm64"
113 | system_disk_category = "cloud_essd"
114 | system_disk_size = 40
115 | system_disk_performance_level = "PL1"
116 | data_disks {
117 | category = "cloud_essd"
118 | size = 300
119 | performance_level = "PL1"
120 | device = "/dev/vdb"
121 | }
122 | data_disks {
123 | category = "cloud_essd"
124 | size = 300
125 | performance_level = "PL1"
126 | device = "/dev/vdc"
127 | }
128 | data_disks {
129 | category = "cloud_essd"
130 | size = 300
131 | performance_level = "PL1"
132 | device = "/dev/vdd"
133 | }
134 | data_disks {
135 | category = "cloud_essd"
136 | size = 300
137 | performance_level = "PL1"
138 | device = "/dev/vde"
139 | }
140 | data_disks {
141 | category = "cloud_essd"
142 | size = 300
143 | performance_level = "PL1"
144 | device = "/dev/vdf"
145 | }
146 | data_disks {
147 | category = "cloud_essd"
148 | size = 300
149 | performance_level = "PL1"
150 | device = "/dev/vdg"
151 | }
152 | data_disks {
153 | category = "cloud_essd"
154 | size = 40
155 | performance_level = "PL1"
156 | device = "/dev/vdh"
157 | }
158 |
159 | labels {
160 | key = "spark.tpcds.benchmark/role"
161 | value = "spark-worker"
162 | }
163 |
164 | taints {
165 | key = "spark.tpcds.benchmark/role"
166 | value = "spark-worker"
167 | effect = "NoSchedule"
168 | }
169 |
170 | kubelet_configuration {
171 | eviction_hard = {
172 | "imagefs.available" = "5%"
173 | "memory.available" = "100Mi"
174 | "nodefs.available" = "5%"
175 | "nodefs.inodesFree" = "5%"
176 | }
177 | system_reserved = {
178 | cpu = "300m"
179 | memory = "600Mi"
180 | pid = "1000"
181 | }
182 | kube_reserved = {
183 | cpu = "300m"
184 | memory = "600Mi"
185 | pid = "1000"
186 | }
187 | }
188 |
189 | user_data = base64encode(file("user_data.sh"))
190 |
191 | resource_group_id = alicloud_resource_manager_resource_group.default.id
192 | security_group_ids = [alicloud_security_group.default.id]
193 | }
194 |
--------------------------------------------------------------------------------
/benchmarks/arm64/terraform/alicloud/provider.tf:
--------------------------------------------------------------------------------
1 | provider "alicloud" {
2 | region = var.region
3 | profile = var.profile
4 | }
5 |
--------------------------------------------------------------------------------
/benchmarks/arm64/terraform/alicloud/terraform.tfvars:
--------------------------------------------------------------------------------
1 | region = "cn-beijing"
2 |
3 | zone_id = "cn-beijing-i"
4 |
5 | profile = "default"
6 |
7 | spark = {
8 | master = {
9 | instance_count = 1
10 | instance_type = "ecs.g7.2xlarge"
11 | }
12 | worker = {
13 | instance_count = 6
14 | instance_type = "ecs.g7.8xlarge"
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/benchmarks/arm64/terraform/alicloud/user_data.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # 打印命令
4 | set -ex
5 |
6 | # 添加 parted
7 | yum install -y parted e2fsprogs
8 |
9 | # 为数据盘新建分区
10 | disks=(/dev/nvme1n1 /dev/nvme2n1 /dev/nvme3n1 /dev/nvme4n1 /dev/nvme5n1 /dev/nvme6n1)
11 | for disk in ${disks[@]}; do
12 | parted ${disk} mklabel gpt
13 | parted ${disk} mkpart primary 1 100%
14 | parted ${disk} align-check optimal 1
15 | done
16 | partprobe
17 |
18 | # 为分区创建文件系统
19 | for disk in ${disks[@]}; do
20 | mkfs -t xfs ${disk}p1
21 | done
22 |
23 | # 挂载分区
24 | cp /etc/fstab /etc/fstab.bak
25 | n=${#disks[@]}
26 | for ((i = 0; i < n; i++)); do
27 | dir="/mnt/disk$(($i + 1))"
28 | mkdir -p ${dir}
29 | echo "$(blkid ${disks[i]}p1 | awk '{print $2}' | sed 's/\"//g') ${dir} xfs defaults 0 0" >>/etc/fstab
30 | done
31 | mount -a
32 |
33 | # 修改挂载点访问权限
34 | chmod a+w /mnt/disk*
35 |
--------------------------------------------------------------------------------
/benchmarks/arm64/terraform/alicloud/variables.tf:
--------------------------------------------------------------------------------
1 | variable "region" {
2 | type = string
3 | default = "cn-beijing"
4 | }
5 |
6 | variable "zone_id" {
7 | type = string
8 | default = "cn-beijing-i"
9 | }
10 |
11 | variable "profile" {
12 | type = string
13 | default = "default"
14 | }
15 |
16 | variable "spark" {
17 | type = object({
18 | master = object({
19 | instance_count = number
20 | instance_type = string
21 | })
22 | worker = object({
23 | instance_count = number
24 | instance_type = string
25 | })
26 | })
27 | default = {
28 | master = {
29 | instance_count = 0
30 | instance_type = "ecs.g7.2xlarge"
31 | }
32 | worker = {
33 | instance_count = 0
34 | instance_type = "ecs.g7.8xlarge"
35 | }
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/benchmarks/hadoop-aliyun/charts/tpcds-benchmark/Chart.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v2
2 | name: tpcds-benchmark
3 | description: A Helm chart for Kubernetes
4 | type: application
5 | version: 0.1.0
6 | appVersion: 0.1.0
7 |
--------------------------------------------------------------------------------
/benchmarks/hadoop-aliyun/charts/tpcds-benchmark/templates/_helpers.tpl:
--------------------------------------------------------------------------------
1 | {{/*
2 | Expand the name of the chart.
3 | */}}
4 | {{- define "tpcds-benchmark.name" -}}
5 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
6 | {{- end }}
7 |
8 | {{/*
9 | Create a default fully qualified app name.
10 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
11 | If release name contains chart name it will be used as a full name.
12 | */}}
13 | {{- define "tpcds-benchmark.fullname" -}}
14 | {{- if .Values.fullnameOverride }}
15 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
16 | {{- else }}
17 | {{- $name := default .Chart.Name .Values.nameOverride }}
18 | {{- if contains $name .Release.Name }}
19 | {{- .Release.Name | trunc 63 | trimSuffix "-" }}
20 | {{- else }}
21 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
22 | {{- end }}
23 | {{- end }}
24 | {{- end }}
25 |
26 | {{/*
27 | Create chart name and version as used by the chart label.
28 | */}}
29 | {{- define "tpcds-benchmark.chart" -}}
30 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
31 | {{- end }}
32 |
33 | {{/*
34 | Common labels
35 | */}}
36 | {{- define "tpcds-benchmark.labels" -}}
37 | helm.sh/chart: {{ include "tpcds-benchmark.chart" . }}
38 | {{ include "tpcds-benchmark.selectorLabels" . }}
39 | {{- if .Chart.AppVersion }}
40 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
41 | {{- end }}
42 | app.kubernetes.io/managed-by: {{ .Release.Service }}
43 | {{- end }}
44 |
45 | {{/*
46 | Selector labels
47 | */}}
48 | {{- define "tpcds-benchmark.selectorLabels" -}}
49 | app.kubernetes.io/name: {{ include "tpcds-benchmark.name" . }}
50 | app.kubernetes.io/instance: {{ .Release.Name }}
51 | {{- end }}
52 |
53 | {{/*
54 | Create the name of the service account to use
55 | */}}
56 | {{- define "tpcds-benchmark.serviceAccountName" -}}
57 | {{- if .Values.serviceAccount.create }}
58 | {{- default (include "tpcds-benchmark.fullname" .) .Values.serviceAccount.name }}
59 | {{- else }}
60 | {{- default "default" .Values.serviceAccount.name }}
61 | {{- end }}
62 | {{- end }}
63 |
--------------------------------------------------------------------------------
/benchmarks/hadoop-aliyun/charts/tpcds-benchmark/values.yaml:
--------------------------------------------------------------------------------
1 | # Default values for tpcds-benchmark.
2 | # This is a YAML-formatted file.
3 | # Declare variables to be passed into your templates.
4 |
5 | image:
6 | # -- Image registry
7 | registry: registry-cn-beijing-vpc.ack.aliyuncs.com
8 | # -- Image repository
9 | repository: ack-demo/spark-tpcds-benchmark
10 | # -- Image tag
11 | tag: 3.5.3-0.1
12 | # -- Image pull policy
13 | pullPolicy: IfNotPresent
14 | # -- Image pull secrets
15 | pullSecrets: []
16 | # - name: pull-secret
17 |
18 | oss:
19 | # -- OSS bucket
20 | bucket: example-bucket
21 | # -- OSS endpoint
22 | endpoint: oss-cn-beijing-internal.aliyuncs.com
23 |
24 | benchmark:
25 | # -- Scale factor
26 | scaleFactor: 3072
27 | # -- Number of iterations
28 | numIterations: 1
29 | # -- Whether to optimize queries
30 | optimizeQueries: false
31 | # -- Filter queries, will run all if empty
32 | queries: []
33 | # - q70-v2.4
34 | # - q82-v2.4
35 | # - q64-v2.4
36 |
--------------------------------------------------------------------------------
/benchmarks/hadoop-aliyun/result.csv:
--------------------------------------------------------------------------------
1 | q1-v2.4,7.555827733,14.645017924,9.525067462200001,2.5904191714618894
2 | q10-v2.4,10.139219717,12.947744009,10.972181106399999,1.0039544386190118
3 | q11-v2.4,55.874544235,57.542876171,56.6287968262,0.5430662413947229
4 | q12-v2.4,4.579125717,5.402954092,4.9307222384,0.32301686392230966
5 | q13-v2.4,15.9073942,18.265080114,17.072347103000002,0.9933099727270132
6 | q14a-v2.4,119.177384819,129.495269281,123.5429690906,3.786778786711343
7 | q14b-v2.4,103.46409887899999,115.289697081,109.85192228479998,4.662932511628337
8 | q15-v2.4,13.842170409,17.986607644,15.4219717892,1.3734498605155039
9 | q16-v2.4,43.282444291,55.276480839,47.590108713199996,4.59185208768297
10 | q17-v2.4,14.042981717,14.980429148999999,14.637328397000001,0.3634001139334673
11 | q18-v2.4,28.403695799,41.949688296,32.238505677199996,4.921754787967268
12 | q19-v2.4,7.363309527,8.245975126,7.656795179400001,0.30514480884743084
13 | q2-v2.4,25.816357012,32.470428279,28.547582622199997,2.451879133915654
14 | q20-v2.4,6.086647,6.4193623209999995,6.273996961,0.12623830909269768
15 | q21-v2.4,2.378381887,3.4156040450000003,2.8758012694,0.39138118136789785
16 | q22-v2.4,16.086610273,17.14673522,16.5761784312,0.3809763520523912
17 | q23a-v2.4,258.076754195,269.193093559,265.27780513740004,3.973645169102721
18 | q23b-v2.4,368.528432661,388.671088098,380.94798291300003,7.663851230049627
19 | q24a-v2.4,189.671067115,209.15216272299998,196.59965459739996,7.0735138307511125
20 | q24b-v2.4,169.089763371,182.15292864999998,175.2570714872,4.5769447104419365
21 | q25-v2.4,11.874169405,13.685109596,12.6089382824,0.6396215210419964
22 | q26-v2.4,9.310247432,12.234403261999999,10.9519195662,1.167272378992625
23 | q27-v2.4,11.289865549,15.073064023,13.2927417792,1.2331188089959368
24 | q28-v2.4,96.97509236500001,125.295863472,112.94800023280001,9.869950063745005
25 | q29-v2.4,38.803619543,41.97551136,40.068913475,1.2826696130533022
26 | q3-v2.4,5.364783305,7.824145384,6.2223974736,0.8462572739417858
27 | q30-v2.4,20.625805219999997,21.593344244,20.9982609472,0.3287445731381358
28 | q31-v2.4,17.829719669,25.059305644,21.779922139,2.3771383205792267
29 | q32-v2.4,2.720048041,4.683839247,3.3433007876000005,0.6961096711707798
30 | q33-v2.4,9.704455745999999,12.813910995,11.487099950600001,1.0967935667484785
31 | q34-v2.4,7.898450826,9.110205018,8.256833764800001,0.4484369296701056
32 | q35-v2.4,24.445621609000003,28.419166817,26.066038887200005,1.3988650902774291
33 | q36-v2.4,10.442542739,14.228895264999998,12.410845556599998,1.2762512908986128
34 | q37-v2.4,20.072886561,25.434238496000003,22.857843468,1.9864712650677976
35 | q38-v2.4,30.999390854,31.964420292,31.2859972066,0.34561495021578303
36 | q39a-v2.4,6.8627511519999995,7.2298869009999995,7.0608150454,0.11908449974127251
37 | q39b-v2.4,5.610674997,6.385491304,5.9222366724,0.26585784635081794
38 | q4-v2.4,367.93584179799996,374.496266095,371.539061803,2.3286806342391855
39 | q40-v2.4,16.463025893999998,21.682886973000002,19.2418352114,1.6764054241600101
40 | q41-v2.4,1.05442466,1.5361814,1.2747248748,0.19715515022243038
41 | q42-v2.4,2.554909318,3.899367612,3.4005177458000007,0.4700603955042605
42 | q43-v2.4,8.078254149000001,9.896694641,8.957477204000002,0.6635069337947319
43 | q44-v2.4,41.550018738,54.162684629000005,48.663437501,4.2148260123451955
44 | q45-v2.4,14.805159845,20.566858976,16.8483266144,1.9615425240330668
45 | q46-v2.4,13.708602385,15.891152766,14.930230506799997,0.7300563130353975
46 | q47-v2.4,14.537253275,17.721562365999997,15.533307980199998,1.1445041779908376
47 | q48-v2.4,14.502060713,17.395740532,15.9916758498,0.9875399045826255
48 | q49-v2.4,29.651472364,42.489712072,37.601138269,4.444309640017966
49 | q5-v2.4,37.334653149,40.200471311,39.2507282382,0.9886498881274202
50 | q50-v2.4,101.391678285,105.781127081,102.69716413560002,1.5935547273864317
51 | q51-v2.4,18.693100599,22.018999121,20.032070217199998,1.2816532821197753
52 | q52-v2.4,2.28741606,4.112365464000001,3.1125799768,0.5962140676132831
53 | q53-v2.4,6.969523639,10.692510120000001,8.7085242732,1.199973805766159
54 | q54-v2.4,11.412873501,13.927249072,12.6033706528,0.8285475418649881
55 | q55-v2.4,2.476024336,3.89929411,3.2573972826000004,0.46950323781500314
56 | q56-v2.4,11.167401115,12.48719738,12.0896413296,0.49039838179588596
57 | q57-v2.4,10.011080384000001,11.792459323,11.037184577,0.5917755073183687
58 | q58-v2.4,4.376534702,7.203354707000001,5.953215469,1.0254708580280585
59 | q59-v2.4,23.156396418,34.160966369,28.4156941152,3.49225350518341
60 | q6-v2.4,13.070614896999999,15.60744374,14.014995768599999,0.9282019042221119
61 | q60-v2.4,14.731191304,19.66390691,16.5136571402,1.7881210242320205
62 | q61-v2.4,8.821308386,11.460828377,9.676813126599999,0.9203215490588001
63 | q62-v2.4,10.415500262,17.370040453,13.605939325200001,2.32137023353903
64 | q63-v2.4,7.415368265,10.95750388,8.6614520724,1.3092332760032435
65 | q64-v2.4,131.346003199,164.305281001,142.6887317056,11.592601856013589
66 | q65-v2.4,38.243325244,39.455820337,38.797347355599996,0.4272797273899433
67 | q66-v2.4,12.808035632,17.160159465,15.0615725896,1.4475748377297115
68 | q67-v2.4,769.775210296,781.8728767829999,775.7869474142,4.714861010478136
69 | q68-v2.4,8.771130701,10.705161347999999,9.3756038822,0.7047911441610056
70 | q69-v2.4,9.775576563000001,11.114773268999999,10.5136436122,0.5489375903128211
71 | q7-v2.4,13.023328125,16.290491273,14.659974144200001,1.144137952058827
72 | q70-v2.4,18.568223587000002,22.762795158,20.132875812800002,1.5265775159110402
73 | q71-v2.4,6.317815402,9.284696618,7.462921685,1.0245048254410207
74 | q72-v2.4,34.545024402,37.855211897000004,35.791231272000005,1.1375600062074522
75 | q73-v2.4,4.625113768,4.803249523000001,4.7231101598,0.06935726775623359
76 | q74-v2.4,47.742458582,48.627326292999996,48.020538134800006,0.32562188219485755
77 | q75-v2.4,83.773148871,87.936735725,85.45101943979999,1.5109531589349858
78 | q76-v2.4,47.902195574,53.092793132,50.0153055606,1.8751372767480663
79 | q77-v2.4,4.625710815000001,5.525386221,5.0158455474,0.3263215865681226
80 | q78-v2.4,140.072784853,148.793414521,144.359776035,3.7877318101211377
81 | q79-v2.4,8.043859199,10.584506912,8.750632112400002,0.9458295548172388
82 | q8-v2.4,7.488463216,8.756565388999999,8.202961499,0.4802299213424071
83 | q80-v2.4,37.678019479,43.77925282,39.8464992736,2.242299127605053
84 | q81-v2.4,17.389926685,19.717641134,18.400887212199997,0.810545298607923
85 | q82-v2.4,37.872316255,39.366411694,38.380020683000005,0.5355719013008041
86 | q83-v2.4,2.23556385,3.01051841,2.6742351806,0.2563610271711477
87 | q84-v2.4,11.58943321,16.406073601,13.5190641438,1.712521432316087
88 | q85-v2.4,17.730514398,25.712928753,19.6707546448,3.040351179347042
89 | q86-v2.4,4.818967488999999,8.152198697,5.7807977128,1.2028729164093217
90 | q87-v2.4,30.755230952999998,34.140598065999995,31.881264702599992,1.201093326422785
91 | q88-v2.4,90.14547673999999,114.130656328,96.31127402519999,9.030468283157418
92 | q89-v2.4,8.612939995000001,11.532649208,9.692841781800002,0.9873480720382047
93 | q9-v2.4,79.609407603,87.886195604,83.26658918460001,3.1943463401434364
94 | q90-v2.4,11.343873734999999,15.817066508,12.580913439799998,1.635053035123793
95 | q91-v2.4,4.004272509,5.583233218999999,4.7409098592,0.5312560634570823
96 | q92-v2.4,2.679482601,3.689624003,3.0633189884,0.34324438124740586
97 | q93-v2.4,169.17162040300002,178.823478963,176.0718832048,3.604740942204125
98 | q94-v2.4,32.831807897,41.108575898000005,36.379850151599996,3.053098719484592
99 | q95-v2.4,91.738810352,95.363992544,93.6360985476,1.4433886352403469
100 | q96-v2.4,14.032909249000001,19.409014901,16.278186813599998,2.1187887248160058
101 | q97-v2.4,40.717438293,42.967118715999995,42.0139412096,0.8044572221752353
102 | q98-v2.4,7.213829552,9.159006702000001,7.983071198199999,0.682698666912065
103 | q99-v2.4,14.984824776,18.425158197000002,16.483918421400002,1.268318536867176
104 | ss_max-v2.4,28.861745663,35.437128162,31.9152317112,2.2989497172157667
105 |
--------------------------------------------------------------------------------
/benchmarks/hadoop-aliyun/terraform/alicloud/main.tf:
--------------------------------------------------------------------------------
1 | terraform {
2 | required_providers {
3 | alicloud = {
4 | source = "hashicorp/alicloud"
5 | version = "1.223.2"
6 | }
7 | }
8 |
9 | required_version = ">= 1.8.0"
10 | }
11 |
12 | resource "random_string" "suffix" {
13 | length = 16
14 | lower = true
15 | upper = false
16 | special = false
17 | }
18 |
19 | resource "alicloud_resource_manager_resource_group" "default" {
20 | resource_group_name = "rg-${random_string.suffix.result}"
21 | display_name = "rg-${random_string.suffix.result}"
22 | }
23 |
24 | resource "alicloud_vpc" "default" {
25 | vpc_name = "vpc-${random_string.suffix.result}"
26 | cidr_block = "192.168.0.0/16"
27 | resource_group_id = alicloud_resource_manager_resource_group.default.id
28 | }
29 |
30 | resource "alicloud_vswitch" "default" {
31 | vswitch_name = "vsw-${random_string.suffix.result}"
32 | cidr_block = "192.168.0.0/24"
33 | vpc_id = alicloud_vpc.default.id
34 | zone_id = var.zone_id
35 | }
36 |
37 | resource "alicloud_security_group" "default" {
38 | name = "sg-${random_string.suffix.result}"
39 | vpc_id = alicloud_vpc.default.id
40 | resource_group_id = alicloud_resource_manager_resource_group.default.id
41 | security_group_type = "normal"
42 | }
43 |
44 | resource "alicloud_security_group_rule" "default" {
45 | type = "ingress"
46 | ip_protocol = "all"
47 | port_range = "-1/-1"
48 | cidr_ip = "192.168.0.0/16"
49 | security_group_id = alicloud_security_group.default.id
50 | priority = 1
51 | }
52 |
53 | resource "alicloud_security_group_rule" "icmp" {
54 | type = "ingress"
55 | ip_protocol = "icmp"
56 | port_range = "-1/-1"
57 | cidr_ip = "0.0.0.0/0"
58 | security_group_id = alicloud_security_group.default.id
59 | priority = 1
60 | }
61 |
62 | resource "alicloud_cs_managed_kubernetes" "default" {
63 | name = "ack-${random_string.suffix.result}"
64 | timezone = "Asia/Shanghai"
65 | version = "1.32.1-aliyun.1"
66 |
67 | worker_vswitch_ids = [alicloud_vswitch.default.id]
68 | pod_vswitch_ids = [alicloud_vswitch.default.id]
69 | service_cidr = "172.16.0.0/16"
70 |
71 | addons {
72 | name = "terway-eniip"
73 | }
74 |
75 | proxy_mode = "ipvs"
76 | cluster_domain = "cluster.local"
77 | deletion_protection = false
78 | cluster_spec = "ack.pro.small"
79 | load_balancer_spec = "slb.s1.small"
80 | new_nat_gateway = true
81 | slb_internet_enabled = true
82 | resource_group_id = alicloud_resource_manager_resource_group.default.id
83 | security_group_id = alicloud_security_group.default.id
84 | }
85 |
86 | resource "alicloud_cs_kubernetes_node_pool" "spark-master" {
87 | node_pool_name = "spark-master"
88 | cluster_id = alicloud_cs_managed_kubernetes.default.id
89 | vswitch_ids = [alicloud_vswitch.default.id]
90 | desired_size = var.spark.master.instance_count
91 | instance_types = [var.spark.master.instance_type]
92 | image_type = "AliyunLinux3"
93 | system_disk_category = "cloud_essd"
94 | system_disk_size = 40
95 | system_disk_performance_level = "PL1"
96 |
97 | labels {
98 | key = "spark.tpcds.benchmark/role"
99 | value = "spark-master"
100 | }
101 |
102 | resource_group_id = alicloud_resource_manager_resource_group.default.id
103 | security_group_ids = [alicloud_security_group.default.id]
104 | }
105 |
106 | resource "alicloud_cs_kubernetes_node_pool" "spark-worker" {
107 | node_pool_name = "spark-worker"
108 | cluster_id = alicloud_cs_managed_kubernetes.default.id
109 | vswitch_ids = [alicloud_vswitch.default.id]
110 | desired_size = var.spark.worker.instance_count
111 | instance_types = [var.spark.worker.instance_type]
112 | image_type = "AliyunLinux3"
113 | system_disk_category = "cloud_essd"
114 | system_disk_size = 40
115 | system_disk_performance_level = "PL1"
116 | data_disks {
117 | category = "cloud_essd"
118 | size = 300
119 | performance_level = "PL1"
120 | device = "/dev/vdb"
121 | }
122 | data_disks {
123 | category = "cloud_essd"
124 | size = 300
125 | performance_level = "PL1"
126 | device = "/dev/vdc"
127 | }
128 | data_disks {
129 | category = "cloud_essd"
130 | size = 300
131 | performance_level = "PL1"
132 | device = "/dev/vdd"
133 | }
134 | data_disks {
135 | category = "cloud_essd"
136 | size = 300
137 | performance_level = "PL1"
138 | device = "/dev/vde"
139 | }
140 | data_disks {
141 | category = "cloud_essd"
142 | size = 300
143 | performance_level = "PL1"
144 | device = "/dev/vdf"
145 | }
146 | data_disks {
147 | category = "cloud_essd"
148 | size = 300
149 | performance_level = "PL1"
150 | device = "/dev/vdg"
151 | }
152 | data_disks {
153 | category = "cloud_essd"
154 | size = 40
155 | performance_level = "PL1"
156 | device = "/dev/vdh"
157 | }
158 |
159 | labels {
160 | key = "spark.tpcds.benchmark/role"
161 | value = "spark-worker"
162 | }
163 |
164 | taints {
165 | key = "spark.tpcds.benchmark/role"
166 | value = "spark-worker"
167 | effect = "NoSchedule"
168 | }
169 |
170 | kubelet_configuration {
171 | eviction_hard = {
172 | "imagefs.available" = "5%"
173 | "memory.available" = "100Mi"
174 | "nodefs.available" = "5%"
175 | "nodefs.inodesFree" = "5%"
176 | }
177 | system_reserved = {
178 | cpu = "300m"
179 | memory = "600Mi"
180 | pid = "1000"
181 | }
182 | kube_reserved = {
183 | cpu = "300m"
184 | memory = "600Mi"
185 | pid = "1000"
186 | }
187 | }
188 |
189 | user_data = base64encode(file("user_data.sh"))
190 |
191 | resource_group_id = alicloud_resource_manager_resource_group.default.id
192 | security_group_ids = [alicloud_security_group.default.id]
193 | }
194 |
--------------------------------------------------------------------------------
/benchmarks/hadoop-aliyun/terraform/alicloud/provider.tf:
--------------------------------------------------------------------------------
1 | provider "alicloud" {
2 | region = var.region
3 | profile = var.profile
4 | }
5 |
--------------------------------------------------------------------------------
/benchmarks/hadoop-aliyun/terraform/alicloud/terraform.tfvars:
--------------------------------------------------------------------------------
1 | region = "cn-beijing"
2 |
3 | zone_id = "cn-beijing-i"
4 |
5 | profile = "default"
6 |
7 | spark = {
8 | master = {
9 | instance_count = 1
10 | instance_type = "ecs.g7.2xlarge"
11 | }
12 | worker = {
13 | instance_count = 6
14 | instance_type = "ecs.g7.8xlarge"
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/benchmarks/hadoop-aliyun/terraform/alicloud/user_data.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # 打印命令
4 | set -ex
5 |
6 | # 添加 parted
7 | yum install -y parted e2fsprogs
8 |
9 | # 为数据盘新建分区
10 | disks=(/dev/vdb /dev/vdc /dev/vdd /dev/vde /dev/vdf /dev/vdg)
11 | for disk in ${disks[@]}; do
12 | parted ${disk} mklabel gpt
13 | parted ${disk} mkpart primary 1 100%
14 | parted ${disk} align-check optimal 1
15 | done
16 | partprobe
17 |
18 | # 为分区创建文件系统
19 | for disk in ${disks[@]}; do
20 | mkfs -t xfs ${disk}1
21 | done
22 |
23 | # 挂载分区
24 | cp /etc/fstab /etc/fstab.bak
25 | n=${#disks[@]}
26 | for ((i = 0; i < n; i++)); do
27 | dir="/mnt/disk$(($i + 1))"
28 | mkdir -p ${dir}
29 | echo "$(blkid ${disks[i]}1 | awk '{print $2}' | sed 's/\"//g') ${dir} xfs defaults 0 0" >>/etc/fstab
30 | chmod g+w ${dir}
31 | done
32 | mount -a
33 |
34 | # 修改挂载点访问权限
35 | chmod a+w /mnt/disk*
36 |
--------------------------------------------------------------------------------
/benchmarks/hadoop-aliyun/terraform/alicloud/variables.tf:
--------------------------------------------------------------------------------
1 | variable "region" {
2 | type = string
3 | default = "cn-beijing"
4 | }
5 |
6 | variable "zone_id" {
7 | type = string
8 | default = "cn-beijing-i"
9 | }
10 |
11 | variable "profile" {
12 | type = string
13 | default = "default"
14 | }
15 |
16 | variable "spark" {
17 | type = object({
18 | master = object({
19 | instance_count = number
20 | instance_type = string
21 | })
22 | worker = object({
23 | instance_count = number
24 | instance_type = string
25 | })
26 | })
27 | default = {
28 | master = {
29 | instance_count = 0
30 | instance_type = "ecs.g7.2xlarge"
31 | }
32 | worker = {
33 | instance_count = 0
34 | instance_type = "ecs.g7.8xlarge"
35 | }
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/benchmarks/jindosdk/charts/tpcds-benchmark/Chart.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v2
2 | name: tpcds-benchmark
3 | description: A Helm chart for Kubernetes
4 | type: application
5 | version: 0.1.0
6 | appVersion: 0.1.0
7 |
--------------------------------------------------------------------------------
/benchmarks/jindosdk/charts/tpcds-benchmark/templates/_helpers.tpl:
--------------------------------------------------------------------------------
1 | {{/*
2 | Expand the name of the chart.
3 | */}}
4 | {{- define "tpcds-benchmark.name" -}}
5 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
6 | {{- end }}
7 |
8 | {{/*
9 | Create a default fully qualified app name.
10 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
11 | If release name contains chart name it will be used as a full name.
12 | */}}
13 | {{- define "tpcds-benchmark.fullname" -}}
14 | {{- if .Values.fullnameOverride }}
15 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
16 | {{- else }}
17 | {{- $name := default .Chart.Name .Values.nameOverride }}
18 | {{- if contains $name .Release.Name }}
19 | {{- .Release.Name | trunc 63 | trimSuffix "-" }}
20 | {{- else }}
21 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
22 | {{- end }}
23 | {{- end }}
24 | {{- end }}
25 |
26 | {{/*
27 | Create chart name and version as used by the chart label.
28 | */}}
29 | {{- define "tpcds-benchmark.chart" -}}
30 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
31 | {{- end }}
32 |
33 | {{/*
34 | Common labels
35 | */}}
36 | {{- define "tpcds-benchmark.labels" -}}
37 | helm.sh/chart: {{ include "tpcds-benchmark.chart" . }}
38 | {{ include "tpcds-benchmark.selectorLabels" . }}
39 | {{- if .Chart.AppVersion }}
40 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
41 | {{- end }}
42 | app.kubernetes.io/managed-by: {{ .Release.Service }}
43 | {{- end }}
44 |
45 | {{/*
46 | Selector labels
47 | */}}
48 | {{- define "tpcds-benchmark.selectorLabels" -}}
49 | app.kubernetes.io/name: {{ include "tpcds-benchmark.name" . }}
50 | app.kubernetes.io/instance: {{ .Release.Name }}
51 | {{- end }}
52 |
53 | {{/*
54 | Create the name of the service account to use
55 | */}}
56 | {{- define "tpcds-benchmark.serviceAccountName" -}}
57 | {{- if .Values.serviceAccount.create }}
58 | {{- default (include "tpcds-benchmark.fullname" .) .Values.serviceAccount.name }}
59 | {{- else }}
60 | {{- default "default" .Values.serviceAccount.name }}
61 | {{- end }}
62 | {{- end }}
63 |
--------------------------------------------------------------------------------
/benchmarks/jindosdk/charts/tpcds-benchmark/values.yaml:
--------------------------------------------------------------------------------
1 | # Default values for tpcds-benchmark.
2 | # This is a YAML-formatted file.
3 | # Declare variables to be passed into your templates.
4 |
5 | image:
6 | # -- Image registry
7 | registry: registry-cn-beijing-vpc.ack.aliyuncs.com
8 | # -- Image repository
9 | repository: ack-demo/spark-tpcds-benchmark
10 | # -- Image tag
11 | tag: 3.5.3-0.1
12 | # -- Image pull policy
13 | pullPolicy: IfNotPresent
14 | # -- Image pull secrets
15 | pullSecrets: []
16 | # - name: pull-secret
17 |
18 | oss:
19 | # -- OSS bucket
20 | bucket: example-bucket
21 | # -- OSS endpoint
22 | endpoint: oss-cn-beijing-internal.aliyuncs.com
23 |
24 | benchmark:
25 | # -- Scale factor
26 | scaleFactor: 3072
27 | # -- Number of iterations
28 | numIterations: 1
29 | # -- Whether to optimize queries
30 | optimizeQueries: false
31 | # -- Filter queries, will run all if empty
32 | queries: []
33 | # - q70-v2.4
34 | # - q82-v2.4
35 | # - q64-v2.4
36 |
--------------------------------------------------------------------------------
/benchmarks/jindosdk/charts/tpcds-data-generation/Chart.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v2
2 | name: tpcds-data-generation
3 | description: A Helm chart for Kubernetes
4 | type: application
5 | version: 0.1.0
6 | appVersion: 0.1.0
7 |
--------------------------------------------------------------------------------
/benchmarks/jindosdk/charts/tpcds-data-generation/templates/_helpers.tpl:
--------------------------------------------------------------------------------
1 | {{/*
2 | Expand the name of the chart.
3 | */}}
4 | {{- define "tpcds-data-generation.name" -}}
5 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
6 | {{- end }}
7 |
8 | {{/*
9 | Create a default fully qualified app name.
10 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
11 | If release name contains chart name it will be used as a full name.
12 | */}}
13 | {{- define "tpcds-data-generation.fullname" -}}
14 | {{- if .Values.fullnameOverride }}
15 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
16 | {{- else }}
17 | {{- $name := default .Chart.Name .Values.nameOverride }}
18 | {{- if contains $name .Release.Name }}
19 | {{- .Release.Name | trunc 63 | trimSuffix "-" }}
20 | {{- else }}
21 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
22 | {{- end }}
23 | {{- end }}
24 | {{- end }}
25 |
26 | {{/*
27 | Create chart name and version as used by the chart label.
28 | */}}
29 | {{- define "tpcds-data-generation.chart" -}}
30 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
31 | {{- end }}
32 |
33 | {{/*
34 | Common labels
35 | */}}
36 | {{- define "tpcds-data-generation.labels" -}}
37 | helm.sh/chart: {{ include "tpcds-data-generation.chart" . }}
38 | {{ include "tpcds-data-generation.selectorLabels" . }}
39 | {{- if .Chart.AppVersion }}
40 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
41 | {{- end }}
42 | app.kubernetes.io/managed-by: {{ .Release.Service }}
43 | {{- end }}
44 |
45 | {{/*
46 | Selector labels
47 | */}}
48 | {{- define "tpcds-data-generation.selectorLabels" -}}
49 | app.kubernetes.io/name: {{ include "tpcds-data-generation.name" . }}
50 | app.kubernetes.io/instance: {{ .Release.Name }}
51 | {{- end }}
52 |
53 | {{/*
54 | Create the name of the service account to use
55 | */}}
56 | {{- define "tpcds-data-generation.serviceAccountName" -}}
57 | {{- if .Values.serviceAccount.create }}
58 | {{- default (include "tpcds-data-generation.fullname" .) .Values.serviceAccount.name }}
59 | {{- else }}
60 | {{- default "default" .Values.serviceAccount.name }}
61 | {{- end }}
62 | {{- end }}
63 |
--------------------------------------------------------------------------------
/benchmarks/jindosdk/charts/tpcds-data-generation/values.yaml:
--------------------------------------------------------------------------------
1 | # Default values for tpcds-benchmark.
2 | # This is a YAML-formatted file.
3 | # Declare variables to be passed into your templates.
4 |
5 | image:
6 | # -- Image registry
7 | registry: registry-cn-beijing-vpc.ack.aliyuncs.com
8 | # -- Image repository
9 | repository: ack-demo/spark-tpcds-benchmark
10 | # -- Image tag
11 | tag: 3.5.3-0.1
12 | # -- Image pull policy
13 | pullPolicy: IfNotPresent
14 | # -- Image pull secrets
15 | pullSecrets: []
16 | # - name: pull-secret
17 |
18 | oss:
19 | # -- OSS bucket
20 | bucket: example-bucket
21 | # -- OSS endpoint
22 | endpoint: oss-cn-beijing-internal.aliyuncs.com
23 |
24 | benchmark:
25 | # -- Scale factor
26 | scaleFactor: 3072
27 | # -- Number of partitions
28 | numPartitions: 640
29 |
--------------------------------------------------------------------------------
/benchmarks/jindosdk/result.csv:
--------------------------------------------------------------------------------
1 | q1-v2.4,5.82740906,12.178573564,7.1879224572,2.4960589575459804
2 | q10-v2.4,7.573033074,8.666177659,8.1316966384,0.3648029760376505
3 | q11-v2.4,53.436323165000005,53.921298953,53.75794683080001,0.17454812541879453
4 | q12-v2.4,3.947420554,5.6403560420000005,4.431623394400001,0.6216755464731617
5 | q13-v2.4,12.143909611,13.038156586,12.6490554062,0.3300995714054239
6 | q14a-v2.4,107.46807310999999,115.669366161,111.85752393800001,2.8042210551273055
7 | q14b-v2.4,105.816832083,110.095723619,107.19382418820001,1.5379012340740084
8 | q15-v2.4,13.701433589,14.150182401,13.954124861,0.14472680646740804
9 | q16-v2.4,36.780885457,40.265582759,37.7290644368,1.2985744150482166
10 | q17-v2.4,12.995115227,13.452064576000001,13.1343180432,0.16627461371977903
11 | q18-v2.4,26.913184947,28.227103931000002,27.487764137,0.521758556010464
12 | q19-v2.4,6.53442605,6.785478434,6.6804468676,0.09669073284863788
13 | q2-v2.4,21.132480734,22.656578223,21.6629419568,0.5803333423209112
14 | q20-v2.4,5.325541791,5.768802061000001,5.512311510799999,0.17759409301795656
15 | q21-v2.4,1.870814209,2.354133757,2.0752153428,0.16786013784580675
16 | q22-v2.4,15.361660048,15.842826313,15.610420318,0.16316416909917944
17 | q23a-v2.4,252.781698925,267.126780039,263.6303611626,5.445611471905185
18 | q23b-v2.4,362.414191738,396.677759852,380.3411332984,11.85351761507152
19 | q24a-v2.4,168.934801095,195.761364316,179.9287417506,9.510202007542013
20 | q24b-v2.4,156.003102491,186.871001702,165.02367737039998,11.386752754897792
21 | q25-v2.4,9.959931248,10.710937722,10.389834249200002,0.2773901160922852
22 | q26-v2.4,6.968146132,9.120252821,7.9289348678,0.7467140720959528
23 | q27-v2.4,8.532492410000001,10.062598792000001,9.309868940800001,0.5251773595287177
24 | q28-v2.4,79.790235462,86.565475439,83.5998490682,2.7366682823821984
25 | q29-v2.4,35.326786917,37.729033465,36.606352364399996,0.8259770016569948
26 | q3-v2.4,3.6432516919999998,6.147688761,4.305347787600001,0.9331086197419158
27 | q30-v2.4,18.220981156999997,18.633722778,18.437182831999998,0.14174993480267611
28 | q31-v2.4,15.942112084,21.284957720999998,18.4043419582,1.9913891609695626
29 | q32-v2.4,2.345385271,2.577304551,2.4563204285999998,0.07893259517224921
30 | q33-v2.4,8.450874475,12.325880425,10.5759360036,1.4506385441266296
31 | q34-v2.4,5.186600594,5.957797243,5.4886637674,0.2539189080514718
32 | q35-v2.4,20.837288122999997,24.231901855,22.1871265838,1.1473412689581974
33 | q36-v2.4,7.836719654,9.303440971,8.411142502,0.5375508429923208
34 | q37-v2.4,18.839476587,23.085603574,20.425648218,1.6288495424610643
35 | q38-v2.4,29.27162852,29.888590269,29.704503160799998,0.22100625676104002
36 | q39a-v2.4,5.6715128990000006,5.987813289,5.8983753884,0.11670704181381068
37 | q39b-v2.4,5.124887764,5.544479965000001,5.3199175924,0.13795567629304478
38 | q4-v2.4,351.110730348,356.019829643,354.574727248,1.803913576819102
39 | q40-v2.4,11.522964602,11.855520104,11.665169434000001,0.12009998943452786
40 | q41-v2.4,0.630769625,0.734893715,0.690501993,0.045762016697730806
41 | q42-v2.4,1.5774040230000002,2.108183575,1.8526063143999998,0.1974445726816084
42 | q43-v2.4,5.243902481,5.536814871,5.4029682634,0.12019622763754292
43 | q44-v2.4,28.837908760999998,34.309312426999995,31.282211627200002,1.8384245102982972
44 | q45-v2.4,13.153637891,13.628053518,13.339359959800001,0.17041278652280728
45 | q46-v2.4,11.287654004,12.271047093,11.758188709399999,0.3187610867133229
46 | q47-v2.4,13.463470857,14.189145694,13.7540722738,0.2650707791268198
47 | q48-v2.4,11.241675956,12.103887507,11.707699290599999,0.325604968552439
48 | q49-v2.4,17.834184225,20.899016822,19.4238399256,1.0754022056717856
49 | q5-v2.4,34.621159285000005,36.314129427,35.1836529778,0.626921312924483
50 | q50-v2.4,95.74525486499999,103.6536799,100.2474700934,2.555714600741113
51 | q51-v2.4,15.991921506999999,16.623763728999997,16.232791363,0.2499447379507674
52 | q52-v2.4,1.7025136520000002,1.887806007,1.7745731660000001,0.06949580697355572
53 | q53-v2.4,5.1523663619999995,5.8181278579999995,5.4547989575999996,0.23320098695816197
54 | q54-v2.4,9.884381209999999,10.559274303,10.2496401932,0.23028904786036253
55 | q55-v2.4,1.722851433,1.854796877,1.8043138242,0.04516697302902219
56 | q56-v2.4,9.618347347,12.203963006,11.2766603764,0.9722058751923929
57 | q57-v2.4,8.914394118,9.394239197000001,9.094486610199999,0.18040418586569554
58 | q58-v2.4,3.500587732,3.7713736840000003,3.6416817162,0.09703387195726251
59 | q59-v2.4,20.758752511999997,21.683708199,21.1298770402,0.32775226611819375
60 | q6-v2.4,12.440352381999999,15.804344041,13.647992317200002,1.1629592307863936
61 | q60-v2.4,13.255234117,13.908884819999999,13.479432537200001,0.23210091359158994
62 | q61-v2.4,6.61329201,7.086961272,6.7748774466,0.17045367659258243
63 | q62-v2.4,7.156209132,7.642842674000001,7.4162772407999995,0.1618788158931069
64 | q63-v2.4,4.832625473,5.236620013,5.0496996798,0.15297990775471795
65 | q64-v2.4,118.185557968,122.85405472,120.3819681836,1.6459305459978475
66 | q65-v2.4,37.528430785999994,38.122415229,37.7123882112,0.20992940071828223
67 | q66-v2.4,11.574929771999999,12.738605019000001,11.8822441496,0.4340046024026489
68 | q67-v2.4,754.5656415010001,785.364204246,771.5511458138001,10.811958985534648
69 | q68-v2.4,7.227422299,7.7298664509999995,7.4512599586,0.2250363828920852
70 | q69-v2.4,7.1655399509999995,7.585426471,7.4151779448,0.1463373047409165
71 | q7-v2.4,9.218510941,30.921400187,14.3854956262,8.305414189617764
72 | q70-v2.4,13.038384813,14.662040791999999,13.803610042399999,0.6790784435658018
73 | q71-v2.4,4.49366801,5.534065178,4.958342104,0.3485496889032831
74 | q72-v2.4,30.596248559,32.154659074,31.222147883800005,0.5634306317924787
75 | q73-v2.4,4.058459936,4.237710261999999,4.1268822094,0.06837608437373675
76 | q74-v2.4,43.92400146,45.089497551,44.507426736,0.40051477172070366
77 | q75-v2.4,70.99690424,77.740359476,73.7622416966,2.5833038102991885
78 | q76-v2.4,37.639858732,44.782194468,41.615109209799996,3.054873881024291
79 | q77-v2.4,2.924383521,3.658697241,3.2924847838000004,0.24097283466005903
80 | q78-v2.4,135.53521893,138.923424414,137.4993615154,1.5130299533763367
81 | q79-v2.4,6.585374795,7.010601788,6.7634720192,0.1431610726607435
82 | q8-v2.4,7.054331802,8.213357267,7.4826677602,0.40186409283060637
83 | q80-v2.4,30.762710629,33.956214714000005,31.763478078600002,1.1364042478378251
84 | q81-v2.4,17.026213071999997,30.736642845,24.8111300872,6.311979062312886
85 | q82-v2.4,36.560933301,38.963618909,38.080525601199994,1.0161681076399023
86 | q83-v2.4,1.556373215,2.012291026,1.8411018434,0.16810816431476194
87 | q84-v2.4,7.54376091,8.449147638000001,7.9804698894,0.32813136466517406
88 | q85-v2.4,11.305392544,12.246462002,11.6826919822,0.3126755402485902
89 | q86-v2.4,4.166293958,7.4550414609999995,4.846635666799999,1.3044220575400889
90 | q87-v2.4,29.857144283,30.197245811000002,30.0568481086,0.12204064209977733
91 | q88-v2.4,68.946333353,73.469319152,70.9938738528,1.50355082387917
92 | q89-v2.4,6.465980558999999,7.482841177999999,7.048671743599999,0.37705910005996873
93 | q9-v2.4,65.27047729499999,70.363230309,68.0142375148,1.7138701157064395
94 | q90-v2.4,7.551024351,7.847182861,7.724840075,0.14168198677742588
95 | q91-v2.4,2.9543571280000003,3.161031375,3.0627617616,0.08722093797597069
96 | q92-v2.4,1.734802363,2.452249051,1.9558326864000002,0.25509095562300277
97 | q93-v2.4,165.97376033700002,175.536953304,170.9042379306,3.7117345343598487
98 | q94-v2.4,27.420068029,29.638806560000003,28.471682324800003,0.8848546965648024
99 | q95-v2.4,85.688196242,88.987488021,87.09022427260001,1.2108144847560522
100 | q96-v2.4,10.45255504,11.140827458,10.739148343600002,0.2511779065303994
101 | q97-v2.4,39.057097029999994,39.838978937,39.6167914878,0.28567950252043145
102 | q98-v2.4,6.654428135,7.631984391,7.311427204199999,0.3560569231980692
103 | q99-v2.4,10.458687404,11.871515145,11.1029979198,0.49911583952624683
104 | ss_max-v2.4,24.555110707,26.187189824,25.5329044538,0.6290656580040886
105 |
--------------------------------------------------------------------------------
/benchmarks/jindosdk/terraform/alicloud/main.tf:
--------------------------------------------------------------------------------
1 | terraform {
2 | required_providers {
3 | alicloud = {
4 | source = "hashicorp/alicloud"
5 | version = "1.223.2"
6 | }
7 | }
8 |
9 | required_version = ">= 1.8.0"
10 | }
11 |
12 | resource "random_string" "suffix" {
13 | length = 16
14 | lower = true
15 | upper = false
16 | special = false
17 | }
18 |
19 | resource "alicloud_resource_manager_resource_group" "default" {
20 | resource_group_name = "rg-${random_string.suffix.result}"
21 | display_name = "rg-${random_string.suffix.result}"
22 | }
23 |
24 | resource "alicloud_vpc" "default" {
25 | vpc_name = "vpc-${random_string.suffix.result}"
26 | cidr_block = "192.168.0.0/16"
27 | resource_group_id = alicloud_resource_manager_resource_group.default.id
28 | }
29 |
30 | resource "alicloud_vswitch" "default" {
31 | vswitch_name = "vsw-${random_string.suffix.result}"
32 | cidr_block = "192.168.0.0/24"
33 | vpc_id = alicloud_vpc.default.id
34 | zone_id = var.zone_id
35 | }
36 |
37 | resource "alicloud_security_group" "default" {
38 | name = "sg-${random_string.suffix.result}"
39 | vpc_id = alicloud_vpc.default.id
40 | resource_group_id = alicloud_resource_manager_resource_group.default.id
41 | security_group_type = "normal"
42 | }
43 |
44 | resource "alicloud_security_group_rule" "default" {
45 | type = "ingress"
46 | ip_protocol = "all"
47 | port_range = "-1/-1"
48 | cidr_ip = "192.168.0.0/16"
49 | security_group_id = alicloud_security_group.default.id
50 | priority = 1
51 | }
52 |
53 | resource "alicloud_security_group_rule" "icmp" {
54 | type = "ingress"
55 | ip_protocol = "icmp"
56 | port_range = "-1/-1"
57 | cidr_ip = "0.0.0.0/0"
58 | security_group_id = alicloud_security_group.default.id
59 | priority = 1
60 | }
61 |
62 | resource "alicloud_cs_managed_kubernetes" "default" {
63 | name = "ack-${random_string.suffix.result}"
64 | timezone = "Asia/Shanghai"
65 | version = "1.32.1-aliyun.1"
66 |
67 | worker_vswitch_ids = [alicloud_vswitch.default.id]
68 | pod_vswitch_ids = [alicloud_vswitch.default.id]
69 | service_cidr = "172.16.0.0/16"
70 |
71 | addons {
72 | name = "terway-eniip"
73 | }
74 |
75 | proxy_mode = "ipvs"
76 | cluster_domain = "cluster.local"
77 | deletion_protection = false
78 | cluster_spec = "ack.pro.small"
79 | load_balancer_spec = "slb.s1.small"
80 | new_nat_gateway = true
81 | slb_internet_enabled = true
82 | resource_group_id = alicloud_resource_manager_resource_group.default.id
83 | security_group_id = alicloud_security_group.default.id
84 | }
85 |
86 | resource "alicloud_cs_kubernetes_node_pool" "spark-master" {
87 | node_pool_name = "spark-master"
88 | cluster_id = alicloud_cs_managed_kubernetes.default.id
89 | vswitch_ids = [alicloud_vswitch.default.id]
90 | desired_size = var.spark.master.instance_count
91 | instance_types = [var.spark.master.instance_type]
92 | image_type = "AliyunLinux3"
93 | system_disk_category = "cloud_essd"
94 | system_disk_size = 40
95 | system_disk_performance_level = "PL1"
96 |
97 | labels {
98 | key = "spark.tpcds.benchmark/role"
99 | value = "spark-master"
100 | }
101 |
102 | resource_group_id = alicloud_resource_manager_resource_group.default.id
103 | security_group_ids = [alicloud_security_group.default.id]
104 | }
105 |
106 | resource "alicloud_cs_kubernetes_node_pool" "spark-worker" {
107 | node_pool_name = "spark-worker"
108 | cluster_id = alicloud_cs_managed_kubernetes.default.id
109 | vswitch_ids = [alicloud_vswitch.default.id]
110 | desired_size = var.spark.worker.instance_count
111 | instance_types = [var.spark.worker.instance_type]
112 | image_type = "AliyunLinux3"
113 | system_disk_category = "cloud_essd"
114 | system_disk_size = 40
115 | system_disk_performance_level = "PL1"
116 | data_disks {
117 | category = "cloud_essd"
118 | size = 300
119 | performance_level = "PL1"
120 | device = "/dev/vdb"
121 | }
122 | data_disks {
123 | category = "cloud_essd"
124 | size = 300
125 | performance_level = "PL1"
126 | device = "/dev/vdc"
127 | }
128 | data_disks {
129 | category = "cloud_essd"
130 | size = 300
131 | performance_level = "PL1"
132 | device = "/dev/vdd"
133 | }
134 | data_disks {
135 | category = "cloud_essd"
136 | size = 300
137 | performance_level = "PL1"
138 | device = "/dev/vde"
139 | }
140 | data_disks {
141 | category = "cloud_essd"
142 | size = 300
143 | performance_level = "PL1"
144 | device = "/dev/vdf"
145 | }
146 | data_disks {
147 | category = "cloud_essd"
148 | size = 300
149 | performance_level = "PL1"
150 | device = "/dev/vdg"
151 | }
152 | data_disks {
153 | category = "cloud_essd"
154 | size = 40
155 | performance_level = "PL1"
156 | device = "/dev/vdh"
157 | }
158 |
159 | labels {
160 | key = "spark.tpcds.benchmark/role"
161 | value = "spark-worker"
162 | }
163 |
164 | taints {
165 | key = "spark.tpcds.benchmark/role"
166 | value = "spark-worker"
167 | effect = "NoSchedule"
168 | }
169 |
170 | kubelet_configuration {
171 | eviction_hard = {
172 | "imagefs.available" = "5%"
173 | "memory.available" = "100Mi"
174 | "nodefs.available" = "5%"
175 | "nodefs.inodesFree" = "5%"
176 | }
177 | system_reserved = {
178 | cpu = "300m"
179 | memory = "600Mi"
180 | pid = "1000"
181 | }
182 | kube_reserved = {
183 | cpu = "300m"
184 | memory = "600Mi"
185 | pid = "1000"
186 | }
187 | }
188 |
189 | user_data = base64encode(file("user_data.sh"))
190 |
191 | resource_group_id = alicloud_resource_manager_resource_group.default.id
192 | security_group_ids = [alicloud_security_group.default.id]
193 | }
194 |
--------------------------------------------------------------------------------
/benchmarks/jindosdk/terraform/alicloud/provider.tf:
--------------------------------------------------------------------------------
1 | provider "alicloud" {
2 | region = var.region
3 | profile = var.profile
4 | }
5 |
--------------------------------------------------------------------------------
/benchmarks/jindosdk/terraform/alicloud/terraform.tfvars:
--------------------------------------------------------------------------------
1 | region = "cn-beijing"
2 |
3 | zone_id = "cn-beijing-i"
4 |
5 | profile = "default"
6 |
7 | spark = {
8 | master = {
9 | instance_count = 1
10 | instance_type = "ecs.g7.2xlarge"
11 | }
12 | worker = {
13 | instance_count = 6
14 | instance_type = "ecs.g7.8xlarge"
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/benchmarks/jindosdk/terraform/alicloud/user_data.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # 打印命令
4 | set -ex
5 |
6 | # 添加 parted
7 | yum install -y parted e2fsprogs
8 |
9 | # 为数据盘新建分区
10 | disks=(/dev/vdb /dev/vdc /dev/vdd /dev/vde /dev/vdf /dev/vdg)
11 | for disk in ${disks[@]}; do
12 | parted ${disk} mklabel gpt
13 | parted ${disk} mkpart primary 1 100%
14 | parted ${disk} align-check optimal 1
15 | done
16 | partprobe
17 |
18 | # 为分区创建文件系统
19 | for disk in ${disks[@]}; do
20 | mkfs -t xfs ${disk}1
21 | done
22 |
23 | # 挂载分区
24 | cp /etc/fstab /etc/fstab.bak
25 | n=${#disks[@]}
26 | for ((i = 0; i < n; i++)); do
27 | dir="/mnt/disk$(($i + 1))"
28 | mkdir -p ${dir}
29 | echo "$(blkid ${disks[i]}1 | awk '{print $2}' | sed 's/\"//g') ${dir} xfs defaults 0 0" >>/etc/fstab
30 | chmod g+w ${dir}
31 | done
32 | mount -a
33 |
34 | # 修改挂载点访问权限
35 | chmod a+w /mnt/disk*
36 |
--------------------------------------------------------------------------------
/benchmarks/jindosdk/terraform/alicloud/variables.tf:
--------------------------------------------------------------------------------
1 | variable "region" {
2 | type = string
3 | default = "cn-beijing"
4 | }
5 |
6 | variable "zone_id" {
7 | type = string
8 | default = "cn-beijing-i"
9 | }
10 |
11 | variable "profile" {
12 | type = string
13 | default = "default"
14 | }
15 |
16 | variable "spark" {
17 | type = object({
18 | master = object({
19 | instance_count = number
20 | instance_type = string
21 | })
22 | worker = object({
23 | instance_count = number
24 | instance_type = string
25 | })
26 | })
27 | default = {
28 | master = {
29 | instance_count = 0
30 | instance_type = "ecs.g7.2xlarge"
31 | }
32 | worker = {
33 | instance_count = 0
34 | instance_type = "ecs.g7.8xlarge"
35 | }
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/build.sbt:
--------------------------------------------------------------------------------
1 | ThisBuild / organization := "com.aliyun.ack"
2 | ThisBuild / version := "0.1"
3 | ThisBuild / scalaVersion := "2.12.20"
4 |
5 | val sparkVersion = "3.5.5"
6 |
7 | lazy val benchmark = (project in file("."))
8 | .settings(
9 | name := "spark-tpcds-benchmark",
10 | libraryDependencies ++= Seq(
11 | "org.apache.spark" %% "spark-core" % sparkVersion % "provided",
12 | "org.apache.spark" %% "spark-sql" % sparkVersion % "provided",
13 | "com.github.scopt" %% "scopt" % "4.1.0"
14 | ),
15 | javacOptions ++= Seq("-source", "1.8", "-target", "1.8")
16 | )
17 |
--------------------------------------------------------------------------------
/charts/tpcds-benchmark/Chart.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v2
2 | name: tpcds-benchmark
3 | description: A Helm chart for Kubernetes
4 | type: application
5 | version: 0.1.0
6 | appVersion: 0.1.0
7 |
--------------------------------------------------------------------------------
/charts/tpcds-benchmark/templates/_helpers.tpl:
--------------------------------------------------------------------------------
1 | {{/*
2 | Expand the name of the chart.
3 | */}}
4 | {{- define "tpcds-benchmark.name" -}}
5 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
6 | {{- end }}
7 |
8 | {{/*
9 | Create a default fully qualified app name.
10 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
11 | If release name contains chart name it will be used as a full name.
12 | */}}
13 | {{- define "tpcds-benchmark.fullname" -}}
14 | {{- if .Values.fullnameOverride }}
15 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
16 | {{- else }}
17 | {{- $name := default .Chart.Name .Values.nameOverride }}
18 | {{- if contains $name .Release.Name }}
19 | {{- .Release.Name | trunc 63 | trimSuffix "-" }}
20 | {{- else }}
21 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
22 | {{- end }}
23 | {{- end }}
24 | {{- end }}
25 |
26 | {{/*
27 | Create chart name and version as used by the chart label.
28 | */}}
29 | {{- define "tpcds-benchmark.chart" -}}
30 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
31 | {{- end }}
32 |
33 | {{/*
34 | Common labels
35 | */}}
36 | {{- define "tpcds-benchmark.labels" -}}
37 | helm.sh/chart: {{ include "tpcds-benchmark.chart" . }}
38 | {{ include "tpcds-benchmark.selectorLabels" . }}
39 | {{- if .Chart.AppVersion }}
40 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
41 | {{- end }}
42 | app.kubernetes.io/managed-by: {{ .Release.Service }}
43 | {{- end }}
44 |
45 | {{/*
46 | Selector labels
47 | */}}
48 | {{- define "tpcds-benchmark.selectorLabels" -}}
49 | app.kubernetes.io/name: {{ include "tpcds-benchmark.name" . }}
50 | app.kubernetes.io/instance: {{ .Release.Name }}
51 | {{- end }}
52 |
53 | {{/*
54 | Create the name of the service account to use
55 | */}}
56 | {{- define "tpcds-benchmark.serviceAccountName" -}}
57 | {{- if .Values.serviceAccount.create }}
58 | {{- default (include "tpcds-benchmark.fullname" .) .Values.serviceAccount.name }}
59 | {{- else }}
60 | {{- default "default" .Values.serviceAccount.name }}
61 | {{- end }}
62 | {{- end }}
63 |
--------------------------------------------------------------------------------
/charts/tpcds-benchmark/values.yaml:
--------------------------------------------------------------------------------
1 | # Default values for tpcds-benchmark.
2 | # This is a YAML-formatted file.
3 | # Declare variables to be passed into your templates.
4 |
5 | image:
6 | # -- Image registry
7 | registry: registry-cn-beijing-vpc.ack.aliyuncs.com
8 | # -- Image repository
9 | repository: ack-demo/spark-tpcds-benchmark
10 | # -- Image tag
11 | tag: 3.5.3-0.1
12 | # -- Image pull policy
13 | pullPolicy: IfNotPresent
14 | # -- Image pull secrets
15 | pullSecrets: []
16 | # - name: pull-secret
17 |
18 | oss:
19 | # -- OSS bucket
20 | bucket: example-bucket
21 | # -- OSS region
22 | region: cn-beijing
23 | # -- OSS endpoint
24 | endpoint: oss-cn-beijing-internal.aliyuncs.com
25 |
26 | benchmark:
27 | # -- Scale factor
28 | scaleFactor: 3072
29 | # -- Number of iterations
30 | numIterations: 1
31 | # -- Whether to optimize queries
32 | optimizeQueries: false
33 | # -- Filter queries, will run all if empty
34 | queries: []
35 | # - q70-v2.4
36 | # - q82-v2.4
37 | # - q64-v2.4
38 |
39 | # -- Specifies which SDK to use when accessing OSS.
40 | # Available options are:
41 | # 1. hadoop-aliyun (Ref: https://apache.github.io/hadoop/hadoop-aliyun/tools/hadoop-aliyun/index.html)
42 | # 2. hadoop-aws (Ref: https://apache.github.io/hadoop/hadoop-aws/tools/hadoop-aws/index.html)
43 | # 3. jindoSDK (Ref: https://github.com/aliyun/alibabacloud-jindodata)
44 | sdk: hadoop-aliyun
45 |
--------------------------------------------------------------------------------
/charts/tpcds-data-generation/Chart.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v2
2 | name: tpcds-data-generation
3 | description: A Helm chart for Kubernetes
4 | type: application
5 | version: 0.1.0
6 | appVersion: 0.1.0
7 |
--------------------------------------------------------------------------------
/charts/tpcds-data-generation/templates/_helpers.tpl:
--------------------------------------------------------------------------------
1 | {{/*
2 | Expand the name of the chart.
3 | */}}
4 | {{- define "tpcds-data-generation.name" -}}
5 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
6 | {{- end }}
7 |
8 | {{/*
9 | Create a default fully qualified app name.
10 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
11 | If release name contains chart name it will be used as a full name.
12 | */}}
13 | {{- define "tpcds-data-generation.fullname" -}}
14 | {{- if .Values.fullnameOverride }}
15 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
16 | {{- else }}
17 | {{- $name := default .Chart.Name .Values.nameOverride }}
18 | {{- if contains $name .Release.Name }}
19 | {{- .Release.Name | trunc 63 | trimSuffix "-" }}
20 | {{- else }}
21 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
22 | {{- end }}
23 | {{- end }}
24 | {{- end }}
25 |
26 | {{/*
27 | Create chart name and version as used by the chart label.
28 | */}}
29 | {{- define "tpcds-data-generation.chart" -}}
30 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
31 | {{- end }}
32 |
33 | {{/*
34 | Common labels
35 | */}}
36 | {{- define "tpcds-data-generation.labels" -}}
37 | helm.sh/chart: {{ include "tpcds-data-generation.chart" . }}
38 | {{ include "tpcds-data-generation.selectorLabels" . }}
39 | {{- if .Chart.AppVersion }}
40 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
41 | {{- end }}
42 | app.kubernetes.io/managed-by: {{ .Release.Service }}
43 | {{- end }}
44 |
45 | {{/*
46 | Selector labels
47 | */}}
48 | {{- define "tpcds-data-generation.selectorLabels" -}}
49 | app.kubernetes.io/name: {{ include "tpcds-data-generation.name" . }}
50 | app.kubernetes.io/instance: {{ .Release.Name }}
51 | {{- end }}
52 |
53 | {{/*
54 | Create the name of the service account to use
55 | */}}
56 | {{- define "tpcds-data-generation.serviceAccountName" -}}
57 | {{- if .Values.serviceAccount.create }}
58 | {{- default (include "tpcds-data-generation.fullname" .) .Values.serviceAccount.name }}
59 | {{- else }}
60 | {{- default "default" .Values.serviceAccount.name }}
61 | {{- end }}
62 | {{- end }}
63 |
--------------------------------------------------------------------------------
/charts/tpcds-data-generation/values.yaml:
--------------------------------------------------------------------------------
1 | # Default values for tpcds-benchmark.
2 | # This is a YAML-formatted file.
3 | # Declare variables to be passed into your templates.
4 |
5 | image:
6 | # -- Image registry
7 | registry: registry-cn-beijing-vpc.ack.aliyuncs.com
8 | # -- Image repository
9 | repository: ack-demo/spark-tpcds-benchmark
10 | # -- Image tag
11 | tag: 3.5.3-0.1
12 | # -- Image pull policy
13 | pullPolicy: IfNotPresent
14 | # -- Image pull secrets
15 | pullSecrets: []
16 | # - name: pull-secret
17 |
18 | oss:
19 | # -- OSS bucket
20 | bucket: example-bucket
21 | # -- OSS region
22 | region: cn-beijing
23 | # -- OSS endpoint
24 | endpoint: oss-cn-beijing-internal.aliyuncs.com
25 |
26 | benchmark:
27 | # -- Scale factor
28 | scaleFactor: 3072
29 | # -- Number of partitions
30 | numPartitions: 640
31 |
32 | # -- Specifies which SDK to use when accessing OSS.
33 | # Available options are:
34 | # 1. hadoop-aliyun (Ref: https://apache.github.io/hadoop/hadoop-aliyun/tools/hadoop-aliyun/index.html)
35 | # 2. hadoop-aws (Ref: https://apache.github.io/hadoop/hadoop-aws/tools/hadoop-aws/index.html)
36 | # 3. jindoSDK (Ref: https://github.com/aliyun/alibabacloud-jindodata)
37 | sdk: hadoop-aliyun
38 |
--------------------------------------------------------------------------------
/docs/benchmark/apache-spark-vs-emr-spark.md:
--------------------------------------------------------------------------------
1 | # Apache Spark v.s. EMR Spark on ACK 性能对比
2 |
3 | ## 概述
4 |
5 | 本文在同一 ACK 集群环境中分别用 Apache Spark 和阿里云 EMR Spark 运行相同规模的 TPC-DS 基准测试,并对两者的性能进行对比。
6 |
7 | ## 集群环境
8 |
9 | | 集群配置 | 参数 |
10 | | --------------- | ------------------------------------------------------------ |
11 | | 集群类型 | ACK标准集群 |
12 | | 集群版本 | 1.16.9-aliyun.1 |
13 | | ECS实例 | ECS规格:ecs.d1ne.6xlarge
操作系统:CentOS 7.7 64位
CPU:24核
内存:96G
数据盘:5500GB HDDx12 |
14 | | Worker Node个数 | 20 |
15 |
16 | ## 对比结果
17 |
18 | ### Apache Spark vs EMR Spark
19 |
20 | 测试数据:10TB
21 |
22 | 
23 |
24 | 
25 |
26 | 在10TB数据上测试,EMR Spark相比社区版Apache Spark约有57%的性能提升,详细测试过程参考[使用EMR Spark运行Spark工作负载](../bestpractice/emrspark.md)。
27 |
28 | ### EMR Spark vs EMR Spark + Remote Shuffle Service
29 |
30 | 测试数据:10TB
31 |
32 | 
33 |
34 | 
35 |
36 | 在10TB数据上,增加Shuffle Service后,相比直接使用EMR Spark,约有16%的性能提升。详细测试过程请参考[使用EMR Spark + Remote Shuffle Service运行Spark工作负载](../bestpractice/emrspark-ess.md)。
37 |
38 | ### EMR Spark vs EMR Spark + JindoFS
39 |
40 | 测试数据:1TB
41 |
42 | 
43 |
44 | 
45 |
46 | 在1TB数据上,使用JindoFS做数据分布式缓存后,相比直接使用EMR Spark,得到约15%性能提升。详细测试过程请参考[使用EMR Spark + JindoFS运行Spark工作负载](../bestpractice/emrspark-jindofs.md)。
47 |
--------------------------------------------------------------------------------
/docs/benchmark/hadoop-aliyun/cloud-disk-read-write-bps.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/benchmark/hadoop-aliyun/cloud-disk-read-write-bps.png
--------------------------------------------------------------------------------
/docs/benchmark/hadoop-aliyun/cloud-disk-read-write-iops.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/benchmark/hadoop-aliyun/cloud-disk-read-write-iops.png
--------------------------------------------------------------------------------
/docs/benchmark/hadoop-aliyun/cpu-usage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/benchmark/hadoop-aliyun/cpu-usage.png
--------------------------------------------------------------------------------
/docs/benchmark/hadoop-aliyun/memory-usage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/benchmark/hadoop-aliyun/memory-usage.png
--------------------------------------------------------------------------------
/docs/benchmark/hadoop-aliyun/network-bandwidth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/benchmark/hadoop-aliyun/network-bandwidth.png
--------------------------------------------------------------------------------
/docs/benchmark/hadoop-aliyun/oss-bandwidth-usage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/benchmark/hadoop-aliyun/oss-bandwidth-usage.png
--------------------------------------------------------------------------------
/docs/benchmark/hadoop-aliyun/system-load.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/benchmark/hadoop-aliyun/system-load.png
--------------------------------------------------------------------------------
/docs/benchmark/spark-on-ack-arm64/cloud-disk-read-write-bps.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/benchmark/spark-on-ack-arm64/cloud-disk-read-write-bps.png
--------------------------------------------------------------------------------
/docs/benchmark/spark-on-ack-arm64/cloud-disk-read-write-iops.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/benchmark/spark-on-ack-arm64/cloud-disk-read-write-iops.png
--------------------------------------------------------------------------------
/docs/benchmark/spark-on-ack-arm64/cpu-usage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/benchmark/spark-on-ack-arm64/cpu-usage.png
--------------------------------------------------------------------------------
/docs/benchmark/spark-on-ack-arm64/memory-usage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/benchmark/spark-on-ack-arm64/memory-usage.png
--------------------------------------------------------------------------------
/docs/benchmark/spark-on-ack-arm64/network-bandwidth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/benchmark/spark-on-ack-arm64/network-bandwidth.png
--------------------------------------------------------------------------------
/docs/benchmark/spark-on-ack-arm64/oss-bandwidth-usage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/benchmark/spark-on-ack-arm64/oss-bandwidth-usage.png
--------------------------------------------------------------------------------
/docs/benchmark/spark-on-ack-arm64/result.csv:
--------------------------------------------------------------------------------
1 | q1-v2.4,6.403341799,14.113337988,8.110346962000001,3.0072773026535176
2 | q10-v2.4,7.925076748,9.735050749,8.859851016599999,0.7062512866520814
3 | q11-v2.4,44.772593332999996,46.987956649,46.0210590318,0.7955303035720936
4 | q12-v2.4,4.779693126,5.655751446,5.0701750946,0.31502307952753555
5 | q13-v2.4,13.565382688,16.944541015,15.043409411999999,1.1478876927498043
6 | q14a-v2.4,127.972405122,133.39763662899998,130.08566996379997,1.9644635099712715
7 | q14b-v2.4,111.962040462,123.435222735,117.2949632782,4.122875578276983
8 | q15-v2.4,13.654991439,14.299390541000001,14.026876698399999,0.25479388092175814
9 | q16-v2.4,38.160023408,51.282346682000004,44.041532853,4.39200820233433
10 | q17-v2.4,12.382950515000001,13.585545535,12.922711180200002,0.3867699635053326
11 | q18-v2.4,29.764483285999997,32.580894582,30.8709355578,0.9551782311484887
12 | q19-v2.4,7.575141244,8.487050711,8.0599446862,0.3015586914436512
13 | q2-v2.4,24.847467029,42.722474430000005,30.238407568400003,6.455989615682667
14 | q20-v2.4,6.07467122,6.521158145999999,6.3502311806,0.16749299927607597
15 | q21-v2.4,2.334248386,3.041132433,2.624342529,0.30168316814964863
16 | q22-v2.4,15.791765379000001,17.470257275,16.3519068954,0.6050697722586454
17 | q23a-v2.4,247.19072309700002,269.194406393,256.7901344234,9.14143306577228
18 | q23b-v2.4,347.095614555,376.91161961499995,359.68113510374997,10.835390097514555
19 | q24a-v2.4,170.73195806,222.394821357,186.81856552999997,18.850993884899363
20 | q24b-v2.4,154.569191914,181.776721361,161.57589051999997,10.191832092747783
21 | q25-v2.4,10.168618281999999,11.310371117999999,10.797841795,0.418828419556337
22 | q26-v2.4,8.32073711,10.909086046,9.5687395584,0.868934197904754
23 | q27-v2.4,9.160393658,11.338790632,10.12739899,0.7975963914623747
24 | q28-v2.4,86.43398751999999,100.024091722,93.70992886719998,4.530918546869185
25 | q29-v2.4,34.694480544,37.394205241,35.6780936814,1.102404252659388
26 | q3-v2.4,4.425652285,10.658408525999999,6.0468644542,2.3246590138935876
27 | q30-v2.4,22.221355283,24.362323517,23.5632527726,0.7897931942834879
28 | q31-v2.4,15.535605863,22.435048117999997,19.1734882166,2.7754359387002783
29 | q32-v2.4,2.7240765799999997,4.294419565,3.4110461104,0.5180495245529745
30 | q33-v2.4,9.486729407999999,14.0507404,10.8410765628,1.6684470542531107
31 | q34-v2.4,5.754013076000001,6.858750014,6.3262469768,0.36247159301883486
32 | q35-v2.4,22.054084172,28.678958477000002,25.5516858866,2.131843967920238
33 | q36-v2.4,9.201601921,11.342347661,10.1660546476,0.8033960435433924
34 | q37-v2.4,20.614719791,25.339465841000003,22.7586753862,1.6267379678394953
35 | q38-v2.4,24.363691602,25.894881488,25.013908631,0.56493746506244
36 | q39a-v2.4,7.2125916100000005,7.898016800000001,7.573020051199999,0.23697983984591808
37 | q39b-v2.4,6.18982408,6.457127251,6.3307544906,0.11155595117662326
38 | q4-v2.4,411.27097534399996,416.429732382,413.1030275622,1.7518852154912812
39 | q40-v2.4,12.701440823,15.116922094,13.6072663976,0.9370664149170012
40 | q41-v2.4,0.735660971,1.064742505,0.8516507424,0.11769377387532427
41 | q42-v2.4,2.1424351539999997,2.566336106,2.3445844576,0.1730397418357677
42 | q43-v2.4,5.221034044,5.895489853,5.578208038800001,0.2675375069381337
43 | q44-v2.4,35.767253492,43.123901767,39.088050853000006,2.7321370388020187
44 | q45-v2.4,14.676387136999999,16.24020423,15.256181076999999,0.6489453313215297
45 | q46-v2.4,14.427242047,15.689932881,15.280348465200001,0.45794322281169697
46 | q47-v2.4,15.574771598,16.561673385,16.024442170999997,0.34720748229917753
47 | q48-v2.4,13.14376774,14.165969597,13.582910882799998,0.3534627105889629
48 | q49-v2.4,24.847529014,27.807120036,25.949745603400004,1.0226374444989852
49 | q5-v2.4,37.217741593,40.841633301,38.1758336198,1.3637240880115
50 | q50-v2.4,88.902628371,106.73569835,97.013035085,6.729219006565264
51 | q51-v2.4,17.650206145,18.522226133,18.0863460476,0.3173514020221161
52 | q52-v2.4,2.1186154459999997,2.22236456,2.1760548741999997,0.04471532999239851
53 | q53-v2.4,5.541361597,6.945895214,6.1308230004,0.47765772015250924
54 | q54-v2.4,10.480562544,11.998553545,11.145047851,0.5785241999873934
55 | q55-v2.4,2.282213192,2.596218903,2.4718316440000003,0.11371205184911885
56 | q56-v2.4,9.013889321,13.735663051,10.562323225399998,1.6578829837835554
57 | q57-v2.4,10.846739192,11.813498036999999,11.3235967424,0.30606111764504235
58 | q58-v2.4,5.285776542,6.052705576,5.7267195286,0.2901917190211392
59 | q59-v2.4,24.296451883000003,26.350496043,24.998931687200002,0.7095267391199042
60 | q6-v2.4,15.253336406,16.982870718999997,16.129227191,0.7343692217040048
61 | q60-v2.4,13.171913549000001,18.007604105,15.6198705214,1.7316221019179017
62 | q61-v2.4,7.9207158259999995,8.870970827999999,8.4566962112,0.3511242043446138
63 | q62-v2.4,8.165779923,11.188606775,9.520968088999998,1.0797726459511297
64 | q63-v2.4,5.2532056009999994,6.687694218,6.1321477966,0.49987278890075076
65 | q64-v2.4,131.18216769100002,139.66165989400002,136.3903266718,3.6949907245897045
66 | q65-v2.4,39.527864126,41.438105649,40.6470150722,0.73790726794108
67 | q66-v2.4,11.944895418,16.266170765000002,13.356976803,1.5262260356762227
68 | q67-v2.4,824.834365725,953.592219898,876.5785078716001,43.60044480825489
69 | q68-v2.4,8.018602137,9.789883804999999,8.833794428000001,0.6619565055625116
70 | q69-v2.4,8.086222853,9.677931814999999,8.6095748212,0.5901133398471722
71 | q7-v2.4,10.655323532,17.72042745,12.908784262200001,2.53689258899848
72 | q70-v2.4,12.524132598,14.411998091,13.339655970799999,0.6203014490996702
73 | q71-v2.4,4.796424706,6.902401469,6.1352114306,0.8666732619665505
74 | q72-v2.4,30.185360558,33.343284462,31.847113491400002,1.2618025257467707
75 | q73-v2.4,4.07596665,4.758130047,4.394250324,0.23437617102591404
76 | q74-v2.4,39.609209111,42.034777242,40.538453946800004,0.9273946012601438
77 | q75-v2.4,76.175883508,84.964818563,79.1853127072,3.661016550087643
78 | q76-v2.4,48.110222857000004,60.20403243,52.6974528608,4.4656675887183015
79 | q77-v2.4,3.657823893,4.6782597500000005,4.0992353078,0.35978757135058226
80 | q78-v2.4,135.273596329,144.133638319,138.5148938698,3.2484652059360704
81 | q79-v2.4,7.339861827,8.515751003,7.8078936262,0.42491863604184127
82 | q8-v2.4,7.966001344,10.23591579,8.684960683,0.8252855195166873
83 | q80-v2.4,33.401593257,38.877170373,35.237511009,1.9686386175709432
84 | q81-v2.4,19.417714241000002,46.429227286,36.519808876,9.4715331274212
85 | q82-v2.4,37.396584625,41.270269618,39.6436047382,1.6097228363399385
86 | q83-v2.4,2.184723509,3.33329588,2.665223494,0.372673475182942
87 | q84-v2.4,8.53208389,9.846927667,9.2276907376,0.518592084106568
88 | q85-v2.4,13.002436506,16.702057365,14.891505778200003,1.1800328961216877
89 | q86-v2.4,4.904867805,5.439357235,5.148247554599999,0.17843809022695464
90 | q87-v2.4,24.393585114,25.127197267,24.558069942,0.28488626684885726
91 | q88-v2.4,78.7644847,91.614683828,84.54430226180001,4.242549769900448
92 | q89-v2.4,7.45227157,9.86460246,8.311342793200001,0.8722823283576866
93 | q9-v2.4,78.2763318,91.73873975,84.7820893802,5.341610603371891
94 | q90-v2.4,9.018264886,12.035711667,9.8776809608,1.1016814318457697
95 | q91-v2.4,3.309663625,5.368012049,3.8723665273999996,0.7635696335475226
96 | q92-v2.4,2.275226487,3.165737054,2.548989435,0.3205299610638202
97 | q93-v2.4,189.061058163,193.990356962,191.68693537579998,2.023929208422184
98 | q94-v2.4,29.167269231,34.975495122,31.3409928716,2.01367221037295
99 | q95-v2.4,72.104501149,76.888610978,74.05870401920001,1.7079831665403682
100 | q96-v2.4,11.44562275,14.913310049,12.8495213302,1.313787446313863
101 | q97-v2.4,38.64428042,53.627609378,42.0062587314,5.819342651112627
102 | q98-v2.4,6.97448467,8.024952473,7.5863038148,0.3467894862651928
103 | q99-v2.4,12.60409641,15.364135885,13.7726032862,0.9690467806801478
104 | ss_max-v2.4,23.568913382999998,26.611204045999997,24.978771977199997,1.0616156616432846
105 |
--------------------------------------------------------------------------------
/docs/benchmark/spark-on-ack-arm64/system-load.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/benchmark/spark-on-ack-arm64/system-load.png
--------------------------------------------------------------------------------
/docs/benchmark/spark-on-ack/cloud-disk-read-write-bps.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/benchmark/spark-on-ack/cloud-disk-read-write-bps.png
--------------------------------------------------------------------------------
/docs/benchmark/spark-on-ack/cloud-disk-read-write-iops.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/benchmark/spark-on-ack/cloud-disk-read-write-iops.png
--------------------------------------------------------------------------------
/docs/benchmark/spark-on-ack/cpu-usage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/benchmark/spark-on-ack/cpu-usage.png
--------------------------------------------------------------------------------
/docs/benchmark/spark-on-ack/memory-usage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/benchmark/spark-on-ack/memory-usage.png
--------------------------------------------------------------------------------
/docs/benchmark/spark-on-ack/network-bandwidth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/benchmark/spark-on-ack/network-bandwidth.png
--------------------------------------------------------------------------------
/docs/benchmark/spark-on-ack/oss-bandwidth-usage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/benchmark/spark-on-ack/oss-bandwidth-usage.png
--------------------------------------------------------------------------------
/docs/benchmark/spark-on-ack/result.csv:
--------------------------------------------------------------------------------
1 | q1-v2.4,5.82740906,12.178573564,7.1879224572,2.4960589575459804
2 | q10-v2.4,7.573033074,8.666177659,8.1316966384,0.3648029760376505
3 | q11-v2.4,53.436323165000005,53.921298953,53.75794683080001,0.17454812541879453
4 | q12-v2.4,3.947420554,5.6403560420000005,4.431623394400001,0.6216755464731617
5 | q13-v2.4,12.143909611,13.038156586,12.6490554062,0.3300995714054239
6 | q14a-v2.4,107.46807310999999,115.669366161,111.85752393800001,2.8042210551273055
7 | q14b-v2.4,105.816832083,110.095723619,107.19382418820001,1.5379012340740084
8 | q15-v2.4,13.701433589,14.150182401,13.954124861,0.14472680646740804
9 | q16-v2.4,36.780885457,40.265582759,37.7290644368,1.2985744150482166
10 | q17-v2.4,12.995115227,13.452064576000001,13.1343180432,0.16627461371977903
11 | q18-v2.4,26.913184947,28.227103931000002,27.487764137,0.521758556010464
12 | q19-v2.4,6.53442605,6.785478434,6.6804468676,0.09669073284863788
13 | q2-v2.4,21.132480734,22.656578223,21.6629419568,0.5803333423209112
14 | q20-v2.4,5.325541791,5.768802061000001,5.512311510799999,0.17759409301795656
15 | q21-v2.4,1.870814209,2.354133757,2.0752153428,0.16786013784580675
16 | q22-v2.4,15.361660048,15.842826313,15.610420318,0.16316416909917944
17 | q23a-v2.4,252.781698925,267.126780039,263.6303611626,5.445611471905185
18 | q23b-v2.4,362.414191738,396.677759852,380.3411332984,11.85351761507152
19 | q24a-v2.4,168.934801095,195.761364316,179.9287417506,9.510202007542013
20 | q24b-v2.4,156.003102491,186.871001702,165.02367737039998,11.386752754897792
21 | q25-v2.4,9.959931248,10.710937722,10.389834249200002,0.2773901160922852
22 | q26-v2.4,6.968146132,9.120252821,7.9289348678,0.7467140720959528
23 | q27-v2.4,8.532492410000001,10.062598792000001,9.309868940800001,0.5251773595287177
24 | q28-v2.4,79.790235462,86.565475439,83.5998490682,2.7366682823821984
25 | q29-v2.4,35.326786917,37.729033465,36.606352364399996,0.8259770016569948
26 | q3-v2.4,3.6432516919999998,6.147688761,4.305347787600001,0.9331086197419158
27 | q30-v2.4,18.220981156999997,18.633722778,18.437182831999998,0.14174993480267611
28 | q31-v2.4,15.942112084,21.284957720999998,18.4043419582,1.9913891609695626
29 | q32-v2.4,2.345385271,2.577304551,2.4563204285999998,0.07893259517224921
30 | q33-v2.4,8.450874475,12.325880425,10.5759360036,1.4506385441266296
31 | q34-v2.4,5.186600594,5.957797243,5.4886637674,0.2539189080514718
32 | q35-v2.4,20.837288122999997,24.231901855,22.1871265838,1.1473412689581974
33 | q36-v2.4,7.836719654,9.303440971,8.411142502,0.5375508429923208
34 | q37-v2.4,18.839476587,23.085603574,20.425648218,1.6288495424610643
35 | q38-v2.4,29.27162852,29.888590269,29.704503160799998,0.22100625676104002
36 | q39a-v2.4,5.6715128990000006,5.987813289,5.8983753884,0.11670704181381068
37 | q39b-v2.4,5.124887764,5.544479965000001,5.3199175924,0.13795567629304478
38 | q4-v2.4,351.110730348,356.019829643,354.574727248,1.803913576819102
39 | q40-v2.4,11.522964602,11.855520104,11.665169434000001,0.12009998943452786
40 | q41-v2.4,0.630769625,0.734893715,0.690501993,0.045762016697730806
41 | q42-v2.4,1.5774040230000002,2.108183575,1.8526063143999998,0.1974445726816084
42 | q43-v2.4,5.243902481,5.536814871,5.4029682634,0.12019622763754292
43 | q44-v2.4,28.837908760999998,34.309312426999995,31.282211627200002,1.8384245102982972
44 | q45-v2.4,13.153637891,13.628053518,13.339359959800001,0.17041278652280728
45 | q46-v2.4,11.287654004,12.271047093,11.758188709399999,0.3187610867133229
46 | q47-v2.4,13.463470857,14.189145694,13.7540722738,0.2650707791268198
47 | q48-v2.4,11.241675956,12.103887507,11.707699290599999,0.325604968552439
48 | q49-v2.4,17.834184225,20.899016822,19.4238399256,1.0754022056717856
49 | q5-v2.4,34.621159285000005,36.314129427,35.1836529778,0.626921312924483
50 | q50-v2.4,95.74525486499999,103.6536799,100.2474700934,2.555714600741113
51 | q51-v2.4,15.991921506999999,16.623763728999997,16.232791363,0.2499447379507674
52 | q52-v2.4,1.7025136520000002,1.887806007,1.7745731660000001,0.06949580697355572
53 | q53-v2.4,5.1523663619999995,5.8181278579999995,5.4547989575999996,0.23320098695816197
54 | q54-v2.4,9.884381209999999,10.559274303,10.2496401932,0.23028904786036253
55 | q55-v2.4,1.722851433,1.854796877,1.8043138242,0.04516697302902219
56 | q56-v2.4,9.618347347,12.203963006,11.2766603764,0.9722058751923929
57 | q57-v2.4,8.914394118,9.394239197000001,9.094486610199999,0.18040418586569554
58 | q58-v2.4,3.500587732,3.7713736840000003,3.6416817162,0.09703387195726251
59 | q59-v2.4,20.758752511999997,21.683708199,21.1298770402,0.32775226611819375
60 | q6-v2.4,12.440352381999999,15.804344041,13.647992317200002,1.1629592307863936
61 | q60-v2.4,13.255234117,13.908884819999999,13.479432537200001,0.23210091359158994
62 | q61-v2.4,6.61329201,7.086961272,6.7748774466,0.17045367659258243
63 | q62-v2.4,7.156209132,7.642842674000001,7.4162772407999995,0.1618788158931069
64 | q63-v2.4,4.832625473,5.236620013,5.0496996798,0.15297990775471795
65 | q64-v2.4,118.185557968,122.85405472,120.3819681836,1.6459305459978475
66 | q65-v2.4,37.528430785999994,38.122415229,37.7123882112,0.20992940071828223
67 | q66-v2.4,11.574929771999999,12.738605019000001,11.8822441496,0.4340046024026489
68 | q67-v2.4,754.5656415010001,785.364204246,771.5511458138001,10.811958985534648
69 | q68-v2.4,7.227422299,7.7298664509999995,7.4512599586,0.2250363828920852
70 | q69-v2.4,7.1655399509999995,7.585426471,7.4151779448,0.1463373047409165
71 | q7-v2.4,9.218510941,30.921400187,14.3854956262,8.305414189617764
72 | q70-v2.4,13.038384813,14.662040791999999,13.803610042399999,0.6790784435658018
73 | q71-v2.4,4.49366801,5.534065178,4.958342104,0.3485496889032831
74 | q72-v2.4,30.596248559,32.154659074,31.222147883800005,0.5634306317924787
75 | q73-v2.4,4.058459936,4.237710261999999,4.1268822094,0.06837608437373675
76 | q74-v2.4,43.92400146,45.089497551,44.507426736,0.40051477172070366
77 | q75-v2.4,70.99690424,77.740359476,73.7622416966,2.5833038102991885
78 | q76-v2.4,37.639858732,44.782194468,41.615109209799996,3.054873881024291
79 | q77-v2.4,2.924383521,3.658697241,3.2924847838000004,0.24097283466005903
80 | q78-v2.4,135.53521893,138.923424414,137.4993615154,1.5130299533763367
81 | q79-v2.4,6.585374795,7.010601788,6.7634720192,0.1431610726607435
82 | q8-v2.4,7.054331802,8.213357267,7.4826677602,0.40186409283060637
83 | q80-v2.4,30.762710629,33.956214714000005,31.763478078600002,1.1364042478378251
84 | q81-v2.4,17.026213071999997,30.736642845,24.8111300872,6.311979062312886
85 | q82-v2.4,36.560933301,38.963618909,38.080525601199994,1.0161681076399023
86 | q83-v2.4,1.556373215,2.012291026,1.8411018434,0.16810816431476194
87 | q84-v2.4,7.54376091,8.449147638000001,7.9804698894,0.32813136466517406
88 | q85-v2.4,11.305392544,12.246462002,11.6826919822,0.3126755402485902
89 | q86-v2.4,4.166293958,7.4550414609999995,4.846635666799999,1.3044220575400889
90 | q87-v2.4,29.857144283,30.197245811000002,30.0568481086,0.12204064209977733
91 | q88-v2.4,68.946333353,73.469319152,70.9938738528,1.50355082387917
92 | q89-v2.4,6.465980558999999,7.482841177999999,7.048671743599999,0.37705910005996873
93 | q9-v2.4,65.27047729499999,70.363230309,68.0142375148,1.7138701157064395
94 | q90-v2.4,7.551024351,7.847182861,7.724840075,0.14168198677742588
95 | q91-v2.4,2.9543571280000003,3.161031375,3.0627617616,0.08722093797597069
96 | q92-v2.4,1.734802363,2.452249051,1.9558326864000002,0.25509095562300277
97 | q93-v2.4,165.97376033700002,175.536953304,170.9042379306,3.7117345343598487
98 | q94-v2.4,27.420068029,29.638806560000003,28.471682324800003,0.8848546965648024
99 | q95-v2.4,85.688196242,88.987488021,87.09022427260001,1.2108144847560522
100 | q96-v2.4,10.45255504,11.140827458,10.739148343600002,0.2511779065303994
101 | q97-v2.4,39.057097029999994,39.838978937,39.6167914878,0.28567950252043145
102 | q98-v2.4,6.654428135,7.631984391,7.311427204199999,0.3560569231980692
103 | q99-v2.4,10.458687404,11.871515145,11.1029979198,0.49911583952624683
104 | ss_max-v2.4,24.555110707,26.187189824,25.5329044538,0.6290656580040886
105 |
--------------------------------------------------------------------------------
/docs/benchmark/spark-on-ack/system-load.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/benchmark/spark-on-ack/system-load.png
--------------------------------------------------------------------------------
/docs/benchmark/spark-on-ecs-vs-on-ack/index.md:
--------------------------------------------------------------------------------
1 | # Spark on ACK 基准测试之对比 on ECS 和 ACK
2 |
3 | 本文将在同一规模的 ECS 和 ACK 集群中分别运行 Scale Factor 为 3072 的 TPC-DS 基准测试。
4 |
5 | ## 基准测试环境
6 |
7 | 本文使用的基准测试环境如下:
8 |
9 | | **集群类型** | ACK 专业版 |
10 | | ------------ | ---------------------------------------------- |
11 | | **K8s 版本** | 1.26.3-aliyun.1 |
12 | | **地域** | 华北2(北京) |
13 | | **实例规格** | ecs.g8y.8xlarge(32 vCPU + 128 GB) |
14 | | **节点数量** | 1 master 节点 + 6 worker 节点 |
15 | | **操作系统** | Alibaba Cloud Linux 3.2104 LTS 64位 ARM版 |
16 | | **本地存储** | 每个 worker 节点挂载 6 块 300 GB ESSD PL1 云盘 |
17 |
18 | 注:
19 |
20 | - master 节点仅用于调度 driver pod,不调度 worker pod。
21 | - worker 节点用于调度 executor pod。
22 |
23 | ## 基准测试过程
24 |
25 | 1. 创建基准测试环境,详情请参见[搭建基准测试环境](../setup-env/index.md);
26 | 2. 生成测试数据,详情请参见[生成基准测试数据集](../../../charts/tpcds-data-generation/README.md)
27 | 3. 运行基准测试,详情请参见[运行 TPCDS 基准测试](../../../charts/tpcds-benchmark/README.md)
28 |
29 | ## 基准测试配置
30 |
31 | 在基准测试阶段,总共调度 60 个 executor pod,其中每个 worker 节点调度 10 个 executor pod,每个 executor pod 分配 3 个 cpu 核心和 12g 内存(9g 堆内内存 + 3g 堆外内存),因此每个节点的 cpu request 为 30,内存的 request 和 limit 均为 120g 。
32 |
33 | ## 基准测试结果参考
34 |
35 | 本次基准测试在相同数量和规格的 ECS 和 ACK 集群环境下运行规模为 3TB(SF=3072)的 TPCDS 基准测试,结果如下:
36 |
37 | - Spark on ECS 环境下运行了 3 次,平均用时为 4786 秒;Spark on ACK 环境下运行了 5 次,平均用时为 4758.6 秒,相比于前者降低约 0.5%,鉴于查询时间存在一定的波动,可以认为两者查询性能几乎一致。
38 |
--------------------------------------------------------------------------------
/docs/benchmark/spark-on-x86-vs-on-arm64/cost_comparison.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/benchmark/spark-on-x86-vs-on-arm64/cost_comparison.png
--------------------------------------------------------------------------------
/docs/benchmark/spark-on-x86-vs-on-arm64/index.md:
--------------------------------------------------------------------------------
1 | # Spark on ACK 基准测试之对比 x86_64 架构和 arm64 架构
2 |
3 | 为了对比 Spark on ACK 在 x86/arm64 架构下的性能和成本,本次基准测试创建了相同规模的 x86 架构和 arm64 架构 ACK 集群,它们具有相同的节点数量、vCPU 数量和内存大小,并挂载了相同数量和规模的云盘,区别主要在于 x86 架构集群使用了 g7 实例规格族 ECS,arm64 架构集群使用了 g8y 实例规格族 ECS。
4 |
5 | ## 基准测试环境
6 |
7 | 本次基准测试所使用的测试环境如下:
8 |
9 | | | **x86 架构** | **arm64 架构** |
10 | | ------------ | ------------------------------------------------------------ | ----------------------------------------------------------- |
11 | | **集群类型** | ACK pro | ACK pro |
12 | | **K8s 版本** | 1.26.3-aliyun.1 | 1.26.3-aliyun.1 |
13 | | **地域** | 华北2(北京) | 华北2(北京) |
14 | | **实例规格** | ecs.g7.8xlarge(32 vCPU + 128 GB) | ecs.g8y.8xlarge(32 vCPU + 128 GB) |
15 | | **节点数量** | 1 master 节点(g7.2xlarge)
6 worker 节点 (g7.8xlarge) | 1 master 节点 (g8y.2xlarge)
6 worker 节点 (g8y.8xlarge) |
16 | | **操作系统** | Alibaba Cloud Linux 3.2104 LTS 64位 | Alibaba Cloud Linux 3.2104 LTS 64位 ARM版 |
17 | | **镜像 ID** | aliyun_3_x64_20G_alibase_20230727.vhd | aliyun_3_arm64_20G_alibase_20230515.vhd |
18 |
19 | ## 测试数据集
20 |
21 | 本次基准测试使用的测试数据集规模为 SF=3072,数据分区数量为 640。
22 |
23 | ## 测试结果
24 |
25 | x86/arm64 性能对比如下图所示:
26 |
27 | 
28 |
29 | x86/arm64 成本对比如下图所示:
30 |
31 | 
32 |
33 | 6 节点 3 TB 基准测试结果如下:
34 |
35 | - 数据生成:arm64 相比于 x86 用时减少 `3.67%`,ECS 成本减少 `26.35%`。
36 | - 端到端查询:arm64 相比于 x86 用时减少 `5.11%`,ECS 成本减少 `27.45%`。
37 |
38 | 注:
39 |
40 | - ECS 成本的计算方式为:`总成本 = 查询总时间 x 集群单价`。
41 | - 本次基准测试在 x86/arm64 架构下使用了相同的作业参数,但是未对作业参数进行调优,因此不代表最优性能,主要用于对比两种架构下的性能/成本。
42 |
--------------------------------------------------------------------------------
/docs/benchmark/spark-on-x86-vs-on-arm64/performance_comparison.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/benchmark/spark-on-x86-vs-on-arm64/performance_comparison.png
--------------------------------------------------------------------------------
/docs/benchmark/tpcds-benchmark.md:
--------------------------------------------------------------------------------
1 | # 运行 TPC-DS 基准测试
2 |
3 | 本文说明如何运行 TPC-DS 基准测试。
4 |
5 | ## 前提条件
6 |
7 | - 已经在本地机器安装 [Git](https://git-scm.com/)、[Docker](https://www.docker.com/)、[kubectl](https://kubernetes.io/docs/reference/kubectl/) 和 [Helm 3](https://helm.sh/) 等工具;
8 | - 已经在本地机器安装 ossutil 工具,详情请参见[安装 ossutil](https://help.aliyun.com/zh/oss/developer-reference/install-ossutil);
9 | - 已经搭建基准测试环境,详情参见[搭建 Spark on ACK 基准测试环境](setup.md);
10 | - 已经生成 TPC-DS 基准测试数据集并上传至 OSS,详情请参见[生成 TPC-DS 测试数据集成](tpcds-data-generation.md)。
11 |
12 | ## 提交基准测试作业
13 |
14 | 1. 执行如下命令,设置基准测试作业参数:
15 |
16 | ```shell
17 | # 规模因子
18 | SCALE_FACTOR=3072
19 | ```
20 |
21 | 2. 执行如下命令,提交基准测试作业:
22 |
23 | ```shell
24 | helm install tpcds-benchmark charts/tpcds-benchmark \
25 | --namespace spark \
26 | --create-namespace \
27 | --set image.registry=${IMAGE_REGISTRY} \
28 | --set image.repository=${IMAGE_REPOSITORY} \
29 | --set image.tag=${IMAGE_TAG} \
30 | --set oss.bucket=${OSS_BUCKET} \
31 | --set oss.endpoint=${OSS_ENDPOINT} \
32 | --set benchmark.scaleFactor=${SCALE_FACTOR} \
33 | --set benchmark.numIterations=1
34 | ```
35 |
36 | 可以添加更多形如 `--set key=value` 的参数指定基准测试的配置,支持的配置选项请参见 `charts/tpcds-benchmark/values.yaml`。
37 |
38 | 3. 执行如下命令,实时查看基准测试作业状态:
39 |
40 | ```shell
41 | kubectl get -n spark -w sparkapplication tpcds-benchmark-${SCALE_FACTOR}gb
42 | ```
43 |
44 | 4. 执行如下命令,实时查看 Driver Pod 日志输出:
45 |
46 | ```shell
47 | kubectl logs -n spark -f tpcds-benchmark-${SCALE_FACTOR}gb-driver
48 | ```
49 |
50 | ## 查看基准测试结果
51 |
52 | 1. 执行如下命令,查看基准测试输出:
53 |
54 | ```shell
55 | ossutil ls -s oss://${OSS_BUCKET}/spark/result/tpcds/${SCALE_FACTOR}gb/
56 | ```
57 |
58 | 期望输出如下:
59 |
60 | ```shell
61 | oss://example-bucket/spark/result/tpcds/SF=3072/
62 | oss://example-bucket/spark/result/tpcds/SF=3072/timestamp=1716901969870/
63 | oss://example-bucket/spark/result/tpcds/SF=3072/timestamp=1716901969870/_SUCCESS
64 | oss://example-bucket/spark/result/tpcds/SF=3072/timestamp=1716901969870/part-00000-80c681de-ae8d-4449-b647-5e3d373edef1-c000.json
65 | oss://example-bucket/spark/result/tpcds/SF=3072/timestamp=1716901969870/summary.csv/
66 | oss://example-bucket/spark/result/tpcds/SF=3072/timestamp=1716901969870/summary.csv/_SUCCESS
67 | oss://example-bucket/spark/result/tpcds/SF=3072/timestamp=1716901969870/summary.csv/part-00000-5a5d1e4a-3fe0-43a1-8248-3259af4f10a7-c000.csv
68 | Object Number is: 7
69 |
70 | 0.172532(s) elapsed
71 | ```
72 |
73 | 2. 执行如下命令,从 OSS 下载基准测试结果至本地并保存为 `result.csv`:
74 |
75 | ```shell
76 | ossutil cp oss://example-bucket/spark/result/tpcds/SF=3072/timestamp=1716901969870/summary.csv/part-00000-5a5d1e4a-3fe0-43a1-8248-3259af4f10a7-c000.csv result.csv
77 | ```
78 |
79 | 3. 执行如下命令,查看基准测试结果:
80 |
81 | ```shell
82 | cat result.csv
83 | ```
84 |
85 | 期望输出如下(已省略部分内容):
86 |
87 | ```shell
88 | q1-v2.4,13.169382888,13.169382888,13.169382888,0.0
89 | q10-v2.4,9.502788331,9.502788331,9.502788331,0.0
90 | q11-v2.4,57.161809588,57.161809588,57.161809588,0.0
91 | q12-v2.4,5.344221526999999,5.344221526999999,5.344221526999999,0.0
92 | q13-v2.4,16.183193874,16.183193874,16.183193874,0.0
93 | q14a-v2.4,121.433786224,121.433786224,121.433786224,0.0
94 | q14b-v2.4,112.871190193,112.871190193,112.871190193,0.0
95 | q15-v2.4,14.63114106,14.63114106,14.63114106,0.0
96 | q16-v2.4,47.082124609,47.082124609,47.082124609,0.0
97 | q17-v2.4,14.320191869,14.320191869,14.320191869,0.0
98 | q18-v2.4,30.619759895999998,30.619759895999998,30.619759895999998,0.0
99 | q19-v2.4,7.874492828999999,7.874492828999999,7.874492828999999,0.0
100 | q2-v2.4,34.106892226999996,34.106892226999996,34.106892226999996,0.0
101 | q20-v2.4,6.1991251609999996,6.1991251609999996,6.1991251609999996,0.0
102 | ...
103 | ```
104 |
105 | 输出结果分为五列,分别为查询名称、最短运行时间(秒)、最长运行时间(秒)、平均运行时间(秒)和标准差(秒)。本示例由于只跑了一轮查询,因此最短/最长/平均执行时间相同,标准差为 0。
106 |
107 | ## 环境清理
108 |
109 | 1. 执行如下命令,删除基准测试作业:
110 |
111 | ```shell
112 | helm uninstall -n spark tpcds-benchmark
113 | ```
114 |
115 | 2. 执行如下命令,删除 PVC 资源:
116 |
117 | ```shell
118 | kubectl delete -f oss-pvc.yaml
119 | ```
120 |
121 | 3. 执行如下命令,删除 PV 资源:
122 |
123 | ```shell
124 | kubectl delete -f oss-pv.yaml
125 | ```
126 |
127 | 4. 执行如下命令,删除 Secret 资源:
128 |
129 | ```shell
130 | kubectl delete -f oss-secret.yaml
131 | ```
132 |
133 | 5. 如果不再需要本示例中创建的存储桶,执行如下命令,删除 OSS 存储桶:
134 |
135 | ```shell
136 | ossutil rm oss://${OSS_BUCKET} -b
137 | ```
138 |
139 | 注意事项:
140 |
141 | - 删除 OSS 存储桶为不可逆操作,请谨慎操作,以免数据丢失。
142 |
143 | 6. 销毁本集群测试集群环境:
144 |
145 | ```shell
146 | terraform -chdir=terraform/alicloud destroy
147 | ```
148 |
--------------------------------------------------------------------------------
/docs/benchmark/tpcds-data-generation.md:
--------------------------------------------------------------------------------
1 | # TPC-DS 测试数据集生成
2 |
3 | 本文说明如何生成 TPC-DS 基准测试所需要用到的数据集。
4 |
5 | ## 前提条件
6 |
7 | - 已经在本地机器安装 [Git](https://git-scm.com/)、[Docker](https://www.docker.com/)、[kubectl](https://kubernetes.io/docs/reference/kubectl/) 和 [Helm 3](https://helm.sh/) 等工具。
8 | - 已经在本地机器安装 ossutil 工具,详情请参见[安装 ossutil](https://help.aliyun.com/zh/oss/developer-reference/install-ossutil);
9 | - 已经搭建基准测试环境,详情参见[搭建 Spark on ACK 基准测试环境](setup.md);
10 |
11 | ## 提交数据集生成作业
12 |
13 | 1. 执行如下命令,设置数据生成作业参数:
14 |
15 | ```shell
16 | # 规模因子
17 | SCALE_FACTOR=3072
18 |
19 | # 分区数量
20 | NUM_PARTITIONS=640
21 | ```
22 |
23 | 2. 执行如下命令,提交数据生成作业:
24 |
25 | ```shell
26 | helm install tpcds-data-generation charts/tpcds-data-generation \
27 | --namespace spark \
28 | --create-namespace \
29 | --set image.registry=${IMAGE_REGISTRY} \
30 | --set image.repository=${IMAGE_REPOSITORY} \
31 | --set image.tag=${IMAGE_TAG} \
32 | --set oss.bucket=${OSS_BUCKET} \
33 | --set oss.endpoint=${OSS_ENDPOINT} \
34 | --set benchmark.scaleFactor=${SCALE_FACTOR} \
35 | --set benchmark.numPartitions=${NUM_PARTITIONS}
36 | ```
37 |
38 | 3. 执行如下命令,实时查看 Spark 作业状态:
39 |
40 | ```shell
41 | kubectl get -n spark -w sparkapplication tpcds-data-generation-${SCALE_FACTOR}gb
42 | ```
43 |
44 | 4. 执行如下命令,实时查看 Driver 日志输出:
45 |
46 | ```shell
47 | kubectl logs -n spark -f tpcds-data-generation-${SCALE_FACTOR}gb-driver
48 | ```
49 |
50 | ## 查看数据集
51 |
52 | 当作业执行完成之后,执行如下命令,查看生成的数据集目录结构:
53 |
54 | ```shell
55 | ossutil ls -d oss://${OSS_BUCKET}/spark/data/tpcds/${SCALE_FACTOR}gb/
56 | ```
57 |
58 | 预期输出:
59 |
60 | ```text
61 | oss://example-bucket/spark/data/tpcds/SF=3072/
62 | oss://example-bucket/spark/data/tpcds/SF=3072/call_center/
63 | oss://example-bucket/spark/data/tpcds/SF=3072/catalog_page/
64 | oss://example-bucket/spark/data/tpcds/SF=3072/catalog_returns/
65 | oss://example-bucket/spark/data/tpcds/SF=3072/catalog_sales/
66 | oss://example-bucket/spark/data/tpcds/SF=3072/customer/
67 | oss://example-bucket/spark/data/tpcds/SF=3072/customer_address/
68 | oss://example-bucket/spark/data/tpcds/SF=3072/customer_demographics/
69 | oss://example-bucket/spark/data/tpcds/SF=3072/date_dim/
70 | oss://example-bucket/spark/data/tpcds/SF=3072/household_demographics/
71 | oss://example-bucket/spark/data/tpcds/SF=3072/income_band/
72 | oss://example-bucket/spark/data/tpcds/SF=3072/inventory/
73 | oss://example-bucket/spark/data/tpcds/SF=3072/item/
74 | oss://example-bucket/spark/data/tpcds/SF=3072/promotion/
75 | oss://example-bucket/spark/data/tpcds/SF=3072/reason/
76 | oss://example-bucket/spark/data/tpcds/SF=3072/ship_mode/
77 | oss://example-bucket/spark/data/tpcds/SF=3072/store/
78 | oss://example-bucket/spark/data/tpcds/SF=3072/store_returns/
79 | oss://example-bucket/spark/data/tpcds/SF=3072/store_sales/
80 | oss://example-bucket/spark/data/tpcds/SF=3072/time_dim/
81 | oss://example-bucket/spark/data/tpcds/SF=3072/warehouse/
82 | oss://example-bucket/spark/data/tpcds/SF=3072/web_page/
83 | oss://example-bucket/spark/data/tpcds/SF=3072/web_returns/
84 | oss://example-bucket/spark/data/tpcds/SF=3072/web_sales/
85 | oss://example-bucket/spark/data/tpcds/SF=3072/web_site/
86 | Object and Directory Number is: 25
87 |
88 | 0.446278(s) elapsed
89 | ```
90 |
--------------------------------------------------------------------------------
/docs/bestpractice/emrspark-ess.md:
--------------------------------------------------------------------------------
1 | 本文介绍如何在ACK上运行Spark作业,并使用EMR Spark Core和Remote Shuffle Service优化性能。
2 |
3 | ### 前提条件
4 | - ACK标准集群,节点规格选用ecs.d1ne.6xlarge大数据型,共20个Worker节点。
5 | - 阿里云OSS,并创建一个bucket,用来替换YAML文件中的OSS配置。
6 | - 利用TPC-DS生成10TB数据,存储在阿里云OSS上,详情参考[生成数据](./generate-data.md)。
7 |
8 | ### 环境准备
9 | - **Worker节点挂载磁盘**
10 |
11 | ecs.d1ne.6xlarge型实例默认自带12块5500G HDD数据盘,这些数据盘需要挂载后才能使用,挂载方式如下
12 |
13 | ```shell
14 | wget https://shilei-tpc-ds.oss-cn-beijing.aliyuncs.com/tools/mount.tgz
15 | tar -xzvf mount.tgz
16 | cd mount/
17 | ./mount
18 | # SSH password: 此时输入SSH密码后,开始自动执行磁盘挂载
19 | ```
20 |
21 | - **安装ack-spark-operator**
22 |
23 | 通过安装ack-spark-operator组件,您可以使用ACK Spark Operator简化提交作业的操作。
24 |
25 | 1). 登录容器服务管理控制台。
26 |
27 | 2). 在控制台左侧导航栏中,选择**市场 > 应用目录**。
28 |
29 | 3). 在**应用目录**页面,找到并单击**ack-spark-operator**。
30 |
31 | 4). 在**应用目录 - ack-spark-operator**页面右侧,单击**创建**。
32 |
33 | - **安装ack-spark-history-server**(可选)
34 |
35 | ACK Spark History Server通过记录Spark执行任务过程中的日志和事件信息,并提供UI界面,帮助排查问题。
36 |
37 | 在创建**ack-spark-history-server**组件时,您需在**参数**页签配置OSS相关的信息,用于存储Spark历史数据。
38 |
39 | 1). 登录容器服务管理控制台。
40 |
41 | 2). 在控制台左侧导航栏中,选择**市场 > 应用目录**。
42 |
43 | 3). 在**应用目录**页面,找到并单击**ack-spark-history-server**。
44 |
45 | 4). 在**应用目录 -** **ack-spark-history-server**页面右侧,单击**创建**。
46 |
47 | - **部署remote-shuffle-service**
48 |
49 | remote-shuffle-service可通过钉钉群联系我们,获取安装方式。
50 |
51 |
52 | ### 提交Spark作业
53 |
54 | ```yaml
55 | apiVersion: "sparkoperator.k8s.io/v1beta2"
56 | kind: SparkApplication
57 | metadata:
58 | name: tpcds-benchmark-emrspark-ess-10t
59 | namespace: default
60 | spec:
61 | type: Scala
62 | mode: cluster
63 | image: "" # 可通过钉钉群联系我们获取
64 | imagePullPolicy: Always
65 | mainClass: com.databricks.spark.sql.perf.tpcds.TPCDS_Standalone
66 | mainApplicationFile: "oss:///jars/spark-sql-perf-assembly-0.5.0-SNAPSHOT.jar"
67 | arguments:
68 | - "--dataset_location"
69 | - "oss:///datasets/"
70 | - "--output_location"
71 | - "oss:///outputs/ack-pr-10t-emr-with-ess"
72 | - "--iterations"
73 | - "1"
74 | - "--shuffle_partitions"
75 | - "1000"
76 | - "--scale_factor"
77 | - "10000"
78 | - "--regenerate_dataset"
79 | - "false"
80 | - "--regenerate_metadata"
81 | - "false"
82 | - "--only_generate_data_and_meta"
83 | - "false"
84 | - "--format"
85 | - "parquet"
86 | - "--query_exclude_list"
87 | - "q14a,q14b,q67"
88 | sparkVersion: 2.4.5
89 | restartPolicy:
90 | type: Never
91 | hadoopConf:
92 | "fs.oss.impl": "org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem"
93 | "fs.oss.endpoint": ""
94 | "fs.oss.accessKeyId": ""
95 | "fs.oss.accessKeySecret": ""
96 | hive.metastore.uris: thrift://service-hive-metastore.default:9083
97 | hive.metastore.client.socket.timeout: 600s
98 | sparkConf:
99 | spark.eventLog.enabled: "true"
100 | spark.eventLog.dir: "oss:///spark/eventlogs"
101 | spark.driver.extraJavaOptions: "-XX:-PrintGC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps"
102 | spark.driver.maxResultSize: 40g
103 | spark.executor.extraJavaOptions: "-XX:MaxDirectMemorySize=6g -XX:-PrintGC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps"
104 | spark.locality.wait.node: "0"
105 | spark.locality.wait.process: "0"
106 | spark.locality.wait.rack: "0"
107 | spark.locality.wait: "0"
108 | spark.memory.fraction: "0.8"
109 | spark.memory.offHeap.enabled: "false"
110 | spark.memory.offHeap.size: "17179869184"
111 | spark.sql.adaptive.bloomFilterJoin.enabled: "false"
112 | spark.sql.adaptive.enabled: "false"
113 | spark.sql.analyze.column.async.delay: "200"
114 | spark.sql.auto.reused.cte.enabled: "true"
115 | spark.sql.broadcastTimeout: "3600"
116 | spark.sql.columnVector.offheap.enabled: "false"
117 | spark.sql.crossJoin.enabled: "true"
118 | spark.sql.delete.optimizeInSubquery: "true"
119 | spark.sql.dynamic.runtime.filter.bbf.enabled: "false"
120 | spark.sql.dynamic.runtime.filter.enabled: "true"
121 | spark.sql.dynamic.runtime.filter.exact.enabled: "true"
122 | spark.sql.dynamic.runtime.filter.table.size.lower.limit: "1069547520"
123 | spark.sql.dynamic.runtime.filter.table.size.upper.limit: "5368709120"
124 | spark.sql.files.openCostInBytes: "34108864"
125 | spark.sql.inMemoryColumnarStorage.compressed: "true"
126 | spark.sql.join.preferNativeJoin: "false"
127 | spark.sql.native.codecache: "true"
128 | spark.sql.native.codegen.wholeStage: "false"
129 | spark.sql.native.nativewrite: "false"
130 | spark.sql.pkfk.optimize.enable: "true"
131 | spark.sql.pkfk.riJoinElimination: "true"
132 | spark.sql.shuffle.partitions: "1000"
133 | spark.sql.simplifyDecimal.enabled: "true"
134 | spark.sql.sources.parallelPartitionDiscovery.parallelism: "432"
135 | spark.sql.sources.parallelPartitionDiscovery.threshold: "32"
136 | spark.shuffle.reduceLocality.enabled: "false"
137 | spark.shuffle.service.enabled: "true"
138 | spark.dynamicAllocation.enabled: "false"
139 | spark.shuffle.manager: org.apache.spark.shuffle.ess.EssShuffleManager
140 | spark.ess.master.host: emr-rss-master.spark-rss
141 | spark.ess.master.port: "9099"
142 | spark.ess.push.data.buffer.size: 64k
143 | spark.ess.push.data.max.inflight: "2048"
144 | spark.ess.rpc.io.clientThreads: "8"
145 | spark.ess.rpc.io.serverThreads: "8"
146 | spark.ess.data.io.clientThreads: "8"
147 | spark.ess.data.io.numConnectionsPerPeer: "8"
148 | driver:
149 | cores: 15
150 | coreLimit: 15000m
151 | memory: 50g
152 | labels:
153 | version: 2.4.5
154 | serviceAccount: spark
155 | env:
156 | - name: TZ
157 | value: "Asia/Shanghai"
158 | executor:
159 | cores: 4
160 | coreLimit: 6000m
161 | instances: 20
162 | memory: 24g
163 | memoryOverhead: 10g
164 | deleteOnTermination: false
165 | labels:
166 | version: 2.4.5
167 | env:
168 | - name: TZ
169 | value: "Asia/Shanghai"
170 | ```
171 | 完整YAML文件可参考[tpcds-benchmark-with-emrspark-ess](../../kubernetes/emr/tpcds-benchmark-with-emrspark-ess.yaml),其中spec.mainApplicationFile中的jar包
172 | 可通过这里[下载](../../kubernetes/emr/jar/spark-sql-perf-assembly-0.5.0-SNAPSHOT.jar),放在自己的OSS中。
--------------------------------------------------------------------------------
/docs/bestpractice/generate-data.md:
--------------------------------------------------------------------------------
1 | 本文介绍如何在ACK上,使用EMR Spark和TPC-DS生成测试数据。
2 |
3 | ### 前提条件
4 | - ACK标准集群,节点规格选用ecs.d1ne.6xlarge大数据型,共20个Worker节点。
5 | - 阿里云OSS,并创建一个bucket,用来替换YAML文件中的OSS配置。
6 |
7 | ### 环境准备
8 |
9 | - **安装ack-spark-operator**
10 |
11 | 通过安装ack-spark-operator组件,您可以使用ACK Spark Operator简化提交作业的操作。
12 |
13 | 1). 登录容器服务管理控制台。
14 |
15 | 2). 在控制台左侧导航栏中,选择**市场 > 应用目录**。
16 |
17 | 3). 在**应用目录**页面,找到并单击**ack-spark-operator**。
18 |
19 | 4). 在**应用目录 - ack-spark-operator**页面右侧,单击**创建**。
20 |
21 | - **安装ack-spark-history-server**(可选)
22 |
23 | ACK Spark History Server通过记录Spark执行任务过程中的日志和事件信息,并提供UI界面,帮助排查问题。
24 |
25 | 在创建**ack-spark-history-server**组件时,您需在**参数**页签配置OSS相关的信息,用于存储Spark历史数据。
26 |
27 | 1). 登录容器服务管理控制台。
28 |
29 | 2). 在控制台左侧导航栏中,选择**市场 > 应用目录**。
30 |
31 | 3). 在**应用目录**页面,找到并单击**ack-spark-history-server**。
32 |
33 | 4). 在**应用目录 -** **ack-spark-history-server**页面右侧,单击**创建**。
34 |
35 |
36 | ### 提交Spark作业
37 |
38 | ```yaml
39 | apiVersion: "sparkoperator.k8s.io/v1beta2"
40 | kind: SparkApplication
41 | metadata:
42 | name: tpcds-data-generation-10t
43 | namespace: default
44 | spec:
45 | type: Scala
46 | mode: cluster
47 | image: registry.cn-beijing.aliyuncs.com/zf-spark/spark-2.4.5:for-tpc-ds-2
48 | imagePullPolicy: Always
49 | mainClass: com.databricks.spark.sql.perf.tpcds.TPCDS_Standalone
50 | mainApplicationFile: "oss:///jars/spark-sql-perf-assembly-0.5.0-SNAPSHOT.jar"
51 | arguments:
52 | - "--dataset_location"
53 | - "oss:///datasets/"
54 | - "--output_location"
55 | - "oss:///outputs/ack-pr-10t-emr"
56 | - "--iterations"
57 | - "1"
58 | - "--shuffle_partitions"
59 | - "1000"
60 | - "--scale_factor"
61 | - "10000" #指定生成数据大小,默认单位为GB
62 | - "--regenerate_dataset"
63 | - "true"
64 | - "--regenerate_metadata"
65 | - "true"
66 | - "--only_generate_data_and_meta"
67 | - "true"
68 | - "--format"
69 | - "parquet"
70 | sparkVersion: 2.4.5
71 | restartPolicy:
72 | type: Never
73 | sparkConf:
74 | spark.eventLog.enabled: "true"
75 | spark.eventLog.dir: "oss:///spark/eventlogs"
76 | spark.driver.extraJavaOptions: "-XX:-PrintGC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps"
77 | spark.driver.maxResultSize: 40g
78 | spark.executor.extraJavaOptions: "-XX:MaxDirectMemorySize=32g -XX:-PrintGC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps"
79 | spark.locality.wait.node: "0"
80 | spark.locality.wait.process: "0"
81 | spark.locality.wait.rack: "0"
82 | spark.locality.wait: "0"
83 | spark.memory.fraction: "0.8"
84 | spark.memory.offHeap.enabled: "false"
85 | spark.memory.offHeap.size: "17179869184"
86 | spark.sql.adaptive.bloomFilterJoin.enabled: "false"
87 | spark.sql.adaptive.enabled: "false"
88 | spark.sql.analyze.column.async.delay: "200"
89 | spark.sql.auto.reused.cte.enabled: "true"
90 | spark.sql.broadcastTimeout: "3600"
91 | spark.sql.columnVector.offheap.enabled: "false"
92 | spark.sql.crossJoin.enabled: "true"
93 | spark.sql.delete.optimizeInSubquery: "true"
94 | spark.sql.dynamic.runtime.filter.bbf.enabled: "false"
95 | spark.sql.dynamic.runtime.filter.enabled: "true"
96 | spark.sql.dynamic.runtime.filter.exact.enabled: "true"
97 | spark.sql.dynamic.runtime.filter.table.size.lower.limit: "1069547520"
98 | spark.sql.dynamic.runtime.filter.table.size.upper.limit: "5368709120"
99 | spark.sql.files.openCostInBytes: "34108864"
100 | spark.sql.inMemoryColumnarStorage.compressed: "true"
101 | spark.sql.join.preferNativeJoin: "false"
102 | spark.sql.native.codecache: "true"
103 | spark.sql.native.codegen.wholeStage: "false"
104 | spark.sql.native.nativewrite: "false"
105 | spark.sql.pkfk.optimize.enable: "true"
106 | spark.sql.pkfk.riJoinElimination: "true"
107 | spark.sql.shuffle.partitions: "1000"
108 | spark.sql.simplifyDecimal.enabled: "true"
109 | spark.sql.sources.parallelPartitionDiscovery.parallelism: "432"
110 | spark.sql.sources.parallelPartitionDiscovery.threshold: "32"
111 | spark.shuffle.reduceLocality.enabled: "false"
112 | spark.shuffle.service.enabled: "true"
113 | spark.dynamicAllocation.enabled: "false"
114 | driver:
115 | cores: 15
116 | coreLimit: 15000m
117 | memory: 30g
118 | labels:
119 | version: 2.4.5
120 | serviceAccount: spark
121 | env:
122 | - name: TZ
123 | value: "Asia/Shanghai"
124 | executor:
125 | cores: 8
126 | coreLimit: 8000m
127 | instances: 20
128 | memory: 24g
129 | labels:
130 | version: 2.4.5
131 | env:
132 | - name: TZ
133 | value: "Asia/Shanghai"
134 | ```
135 | 完整YAML文件可参考[tpcds-data-generation](../../kubernetes/emr/tpcds-data-generation.yaml),其中spec.mainApplicationFile中的jar包
136 | 可通过这里[下载](../../kubernetes/emr/jar/spark-sql-perf-assembly-0.5.0-SNAPSHOT.jar),放在自己的OSS中。
--------------------------------------------------------------------------------
/docs/img/alluxio-overview.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/img/alluxio-overview.jpg
--------------------------------------------------------------------------------
/docs/img/alluxio_capacity.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/img/alluxio_capacity.png
--------------------------------------------------------------------------------
/docs/img/apache-spark-per-10t.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/img/apache-spark-per-10t.jpg
--------------------------------------------------------------------------------
/docs/img/apache-spark-total-10t.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/img/apache-spark-total-10t.jpg
--------------------------------------------------------------------------------
/docs/img/create_ack_cluster.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/img/create_ack_cluster.jpeg
--------------------------------------------------------------------------------
/docs/img/emr-spark-ess-jindofs-per-1t.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/img/emr-spark-ess-jindofs-per-1t.jpg
--------------------------------------------------------------------------------
/docs/img/emr-spark-ess-jindofs-total-1t.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/img/emr-spark-ess-jindofs-total-1t.jpg
--------------------------------------------------------------------------------
/docs/img/emr-spark-jindofs-per-1t.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/img/emr-spark-jindofs-per-1t.jpg
--------------------------------------------------------------------------------
/docs/img/emr-spark-jindofs-total-1t.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/img/emr-spark-jindofs-total-1t.jpg
--------------------------------------------------------------------------------
/docs/img/emr-spark-rss-per-10t.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/img/emr-spark-rss-per-10t.jpg
--------------------------------------------------------------------------------
/docs/img/emr-spark-rss-total-10t.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/img/emr-spark-rss-total-10t.jpg
--------------------------------------------------------------------------------
/docs/img/get_spark_history_svc.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/img/get_spark_history_svc.jpeg
--------------------------------------------------------------------------------
/docs/img/get_sparkapplication_id.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/img/get_sparkapplication_id.jpeg
--------------------------------------------------------------------------------
/docs/img/install_spark_history.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/img/install_spark_history.jpeg
--------------------------------------------------------------------------------
/docs/img/install_spark_operator.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/img/install_spark_operator.jpeg
--------------------------------------------------------------------------------
/docs/img/jindofs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/img/jindofs.png
--------------------------------------------------------------------------------
/docs/img/localhost_spark_ui.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/img/localhost_spark_ui.jpeg
--------------------------------------------------------------------------------
/docs/img/mount_disk.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/img/mount_disk.jpeg
--------------------------------------------------------------------------------
/docs/img/port-forward_svc.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/img/port-forward_svc.jpg
--------------------------------------------------------------------------------
/docs/img/spark_vs_alluxio.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/img/spark_vs_alluxio.jpg
--------------------------------------------------------------------------------
/docs/img/sparkapplication_svc.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/img/sparkapplication_svc.jpg
--------------------------------------------------------------------------------
/docs/img/tpcds_per_query.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/img/tpcds_per_query.jpeg
--------------------------------------------------------------------------------
/docs/performance/emr-spark.md:
--------------------------------------------------------------------------------
1 | EMR Spark是运行在阿里云平台上的大数据处理解决方案,在开源版Apache Spark的基础上做了大量性能、功能以及稳定性方面的改造,并且在和阿里云基础服务的适配上做了非常多的工作。主要有以下核心技术:
2 |
3 | - 实现SparkSQL事务功能,支持update、delete语句。
4 | - 实现PK、FK、NOT NULL等SQL Constraint,并应用在SQL优化中。
5 | - 实现Relational Cache:SparkSQL的物化视图。
6 | - 实现多租户高可用的SparkSQL JDBC Server。
7 | - SparkSQL部分性能优化列表:
8 | - 支持Runtime Filter。
9 | - 使用Adaptive Execution,可在运行时调整作业行为。
10 | - CBO Join Reorder进一步优化,支持遗传算法。
11 | - Shuffle流程优化,构建异步非阻塞的Shuffle IO。
--------------------------------------------------------------------------------
/docs/performance/jindofs.md:
--------------------------------------------------------------------------------
1 | 计算存储分离已经成为云计算的一种发展趋势。在计算存储分离之前,普遍采用的是传统的计算存储相互融合的架构,但是这种架构存在一定的问题,比如在集群扩容的时候会面临计算能力和存储能力相互不匹配的问题。用户在某些情况下只需要扩容计算能力或者存储能力,而传统的融合架构不能满足用户的这种需求,进行单独的扩充计算或者存储能力;其次在缩容的时候可能会遇到人工干预,人工干预完后需要保证数据在多个节点中同步,而当有多个副本需要同步时候,可能会造成的数据丢失。计算存储分离架构则可以很好的解决这些问题,使得用户只需要关心整个集群的计算能力,但同时也会引入读写数据网络延迟的问题。
2 |
3 | JindoFS是一种云原生的文件系统,结合OSS和本地存储,成为E-MapReduce产品的新一代存储系统,为上层计算提供了高效可靠的存储。
4 |
5 | JindoFS 提供了块存储模式(Block)和缓存模式(Cache)的存储模式。
6 |
7 | JindoFS 采用了本地存储和OSS的异构多备份机制,Storage Service提供了数据存储能力,首先使用OSS作为存储后端,保证数据的高可靠性,同时利用本地存储实现冗余备份,利用本地的备份,可以加速数据读取;另外,JindoFS 的元数据通过本地服务Namespace Service管理,从而保证了元数据操作的性能(和HDFS元数据操作性能相似)。
8 |
9 | 
--------------------------------------------------------------------------------
/docs/performance/oss.md:
--------------------------------------------------------------------------------
1 | 在运行Spark任务时,用户数据经常会存放在OSS上,针对一些多Executor、小文件场景,可通过以下配置来优化性能:
2 | ```yaml
3 | fs.oss.paging.maximum: 1000
4 | fs.oss.multipart.download.threads: 32
5 | fs.oss.max.total.tasks: 256
6 | fs.oss.connection.maximum: 2048
7 | ```
8 | 更多配置可参考[hadoop-aliyun](https://hadoop.apache.org/docs/stable/hadoop-aliyun/tools/hadoop-aliyun/index.html)
--------------------------------------------------------------------------------
/docs/performance/remote-shuffle-service.md:
--------------------------------------------------------------------------------
1 | TODO
--------------------------------------------------------------------------------
/docs/performance/serverless-spark/index.md:
--------------------------------------------------------------------------------
1 | # Serverless Spark
2 |
3 | ## 什么是 Serverless Spark?
4 |
5 | [弹性容器实例(Elastic Container Instance,ECI)](https://help.aliyun.com/ack/serverless-kubernetes/user-guide/eci-pod/)能够为 Kubernetes 提供基础的容器 Pod 运行环境,通过将 Spark 作业中的 Driver 和 Executor Pod 动态调度到 ECI 上,即可以实现无服务器 Spark 作业执行。每个容器实例底层通过轻量级虚拟化安全沙箱技术完全强隔离,容器实例间互不影响。
6 |
7 | 
8 |
9 | 将 Spark 作业运行在 ECI 上具有以下优势:
10 |
11 | - **超大容量**:集群无需额外配置既可获得最多 2 万 Pod 容量,无需提前规划容量。
12 | - **秒级弹性**:可在极短时间内创建出数千个 Pod以交付大量算力,无需担心业务高峰期间 Pod 创建时延受到影响。
13 | - **节约成本**:ECI Pod 按需创建,按量计费,不会由于资源闲置造成资源浪费,并且能够支持 Spot 实例、多种实例组合,节省成本。
14 |
15 | ## 如何将 Spark 作业运行在 ECI 上?
16 |
17 | 由于 ECI 节点被打上了特定的标签和污点,为了将 Spark 作业运行在 ECI 上,只需要在 Driver 或 Executor Pod 中添加如下配置。首先,nodeSelector 中需要添加 `type: virtual-kubelet` 以将 Pod 调度到 ECI 上。其次,tolerations 中需要添加额外的污点容忍,以容忍 ECI 的污点:
18 |
19 | ```yaml
20 | nodeSelector:
21 | type: virtual-kubelet
22 |
23 | tolerations:
24 | - key: virtual-kubelet.io/provider
25 | operator: Equal
26 | value: alibabacloud
27 | effect: NoSchedule
28 | ```
29 |
30 | ```yaml
31 | apiVersion: sparkoperator.k8s.io/v1beta2
32 | kind: SparkApplication
33 | metadata:
34 | name: spark-pi
35 | spec:
36 | type: Scala
37 | mode: cluster
38 | image: apache/spark:3.5.0
39 | mainClass: org.apache.spark.examples.SparkPi
40 | mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.12-3.5.0.jar
41 | sparkVersion: 3.5.0
42 | driver:
43 | cores: 1
44 | coreLimit: 1200m
45 | memory: 512m
46 | serviceAccount: spark
47 | annotations:
48 | k8s.aliyun.com/eci-use-specs: "2-4Gi"
49 | nodeSelector:
50 | type: virtual-kubelet
51 | tolerations:
52 | - key: virtual-kubelet.io/provider
53 | operator: Equal
54 | value: alibabacloud
55 | effect: NoSchedule
56 | executor:
57 | instances: 2
58 | cores: 2
59 | memory: 4g
60 | annotations:
61 | k8s.aliyun.com/eci-use-specs: "2-4Gi"
62 | nodeSelector:
63 | type: virtual-kubelet
64 | tolerations:
65 | - key: virtual-kubelet.io/provider
66 | operator: Equal
67 | value: alibabacloud
68 | effect: NoSchedule
69 | ```
70 |
71 | 在上面的示例中,我们通过注解 `k8s.aliyun.com/eci-use-specs: "2-4Gi"` 指定了 ECI Pod 的规格为 2 vCPU + 4 GiB 内存。需要注意的是,并不是所有的 vCPU 和内存规格组合都支持,具体请参考[通过指定 vCPU 和内存创建 ECI Pod](https://help.aliyun.com/ack/serverless-kubernetes/user-guide/specify-cpu-and-memory-specifications-to-create-an-elastic-container-instance/)。
72 |
73 | 指定 ECI Pod 规格有多种方式,例如指定 vCPU 和内存、指定 ECS 规格等,
74 |
75 | ##
76 |
77 | ## 使用 ImageCache 加速镜像拉取
78 |
79 | ECI 支持镜像缓存功能,以便 Kubernetes 用户可以加速镜像拉取,提升 Pod 创建速度。具体地说,ECI 提供了一种名为 ImageCache 的集群级别的 CRD 资源,详细的使用文档请参考[使用 ImageCache 加速创建 Pod](https://help.aliyun.com/ack/serverless-kubernetes/user-guide/use-image-caches-to-accelerate-the-creation-of-pods/)。
80 |
81 | 仍然以上面的 Spark 作业为例,其使用的镜像为社区镜像 `apache/spark:3.5.0`,我们对比一下使用 ImageCache 前后镜像的拉取速度。在使用之前,我们查看 driver pod 的事件:
82 |
83 | ```shell
84 | $ kubectl describe pod spark-pi-driver
85 | ...
86 | Events:
87 | Type Reason Age From Message
88 | ---- ------ ---- ---- -------
89 | Normal Scheduled 24m default-scheduler Successfully assigned spark-operator/spark-pi-driver to virtual-kubelet-cn-beijing-i
90 | Normal UserInstanceTypeSpec 24m EciService [eci.containergroup]The user-specified instanceType for current eci instance is 2.0-4.0Gi
91 | Warning ImageCacheMissed 24m EciService [eci.imagecache]Missed image cache.
92 | Normal ImageCacheAutoCreated 24m EciService [eci.imagecache]Image cache imc-2ze5hdcnngenmwc1jmwf is auto created
93 | Normal Pulling 24m kubelet Pulling image "apache/spark:3.5.0"
94 | Normal Pulled 23m kubelet Successfully pulled image "apache/spark:3.5.0" in 1m41.289s (1m41.289s including waiting)
95 | Normal Created 23m kubelet Created container spark-kubernetes-driver
96 | Normal Started 23m kubelet Started container spark-kubernetes-driver
97 | ```
98 |
99 | 事件显示 ImageCache 未命中,并创建了新的 ImageCache,其 ID 为 `imc-2ze5hdcnngenmwc1jmwf`。接下来,我们在 Driver 和 Executor 中添加如下注解以明确指定该镜像缓存:
100 |
101 | ```yaml
102 | annotations:
103 | k8s.aliyun.com/eci-image-snapshot-id: imc-2ze5hdcnngenmwc1jmwf
104 | ```
105 |
106 | 再次执行作业,观察 driver pod 的镜像拉取速度:
107 |
108 | ```shell
109 | $ kubectl describe pod spark-pi-driver
110 | ...
111 | Events:
112 | Type Reason Age From Message
113 | ---- ------ ---- ---- -------
114 | Normal Scheduled 13s default-scheduler Successfully assigned spark-operator/spark-pi-driver to virtual-kubelet-cn-beijing-i
115 | Normal UserInstanceTypeSpec 24s EciService [eci.containergroup]The user-specified instanceType for current eci instance is 2.0-4.0Gi
116 | Normal SuccessfulHitImageCache 23s EciService [eci.imagecache]Successfully hit image cache imc-2ze5hdcnngenmwc1jmwf, eci will be scheduled with this image cache.
117 | Normal Pulled 4s kubelet Container image "apache/spark:3.5.0" already present on machine
118 | Normal Created 4s kubelet Created container spark-kubernetes-driver
119 | Normal Started 3s kubelet Started container spark-kubernetes-driver
120 | ```
121 |
122 | Driver pod 事件显示镜像缓存成功命中,无需再次拉取镜像。
123 |
124 | 实际使用过程中,可以在 Driver/Executor Pod 中添加如下注解以实现自动创建和匹配镜像缓存,而无需明确指定镜像缓存 ID:
125 |
126 | ```yaml
127 | annotations:
128 | k8s.aliyun.com/eci-image-cache: "true"
129 | ```
130 |
--------------------------------------------------------------------------------
/docs/performance/serverless-spark/serverless-spark.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/docs/performance/serverless-spark/serverless-spark.jpeg
--------------------------------------------------------------------------------
/docs/performance/spark-operator.md:
--------------------------------------------------------------------------------
1 | Spark on Kubernetes Operator帮助用户在Kubernetes上像其他工作负载一样用通用的方式运行Spark Application,为了让Spark能更好的运行在Kubernetes中,我们对Spark Operator也做了一些优化工作。
2 |
3 | - 相比社区版Spark Operator实现中的阻塞串行调度,ACK版本支持非阻塞并行调度,调度性能可达350 Pods/s,能够快速把Spark作业调度到节点上。
4 | - 增强Spark Kernel对Kubernetes原生能力的支持,如Tolerations、Labels、Node Name。
5 | - Spark Kernel支持dynamic allocation,资源利用率可提升30%。
6 | - 支持设置Spark Job使用自定义调度器。
--------------------------------------------------------------------------------
/docs/quickstart/benchmark_code.md:
--------------------------------------------------------------------------------
1 | 1. [测试环境搭建](benchmark_env.md)
2 | 2. [测试代码开发](benchmark_code.md)
3 | 3. [Spark on ACK测试](benchmark_steps.md)
4 | 4. [测试结果分析](benchmark_result.md)
5 | 5. [问题排查定位](debugging_guide.md)
6 |
7 | *说明:为了方便测试,已经提供了预制镜像(registry.cn-beijing.aliyuncs.com/yukong/ack-spark-benchmark:1.0.0),可以直接使用。*
8 |
9 | ### 准备工作
10 |
11 | 测试代码依赖databricks两个工具:一个是tpcds测试包,另一个是测试数据生成工具tpcds-kit。
12 |
13 | #### 1)打包tpcds依赖jar
14 |
15 | databricks的tpcds: https://github.com/databricks/spark-sql-perf
16 |
17 | ```shell
18 | git clone https://github.com/databricks/spark-sql-perf.git
19 | sbt package
20 | ```
21 |
22 | 得到jar包:spark-sql-perf_2.11-0.5.1-SNAPSHOT,作为测试项目的依赖。
23 |
24 |
25 |
26 | #### 2)编译tpcds-kit
27 |
28 | tpcds标准测试数据集生成工具: https://github.com/databricks/tpcds-kit
29 |
30 | ```shell
31 | git clone https://github.com/davies/tpcds-kit.git
32 | yum install gcc gcc-c++ bison flex cmake ncurses-devel
33 | cd tpcds-kit/tools
34 | cp Makefile.suite Makefile # 复制Makefile.suite为Makefile
35 | make
36 | #验证
37 | ./dsqgen --help
38 | ```
39 |
40 | 编译后生成二进制可执行程序,本实验主要依赖两个:dsdgen(数据生成)和dsqgen(查询生成)
41 |
42 | ### 编写代码
43 |
44 | #### 1)生成数据
45 |
46 | DataGeneration.scala
47 |
48 | ```scala
49 | package com.aliyun.spark.benchmark.tpcds
50 |
51 | import com.databricks.spark.sql.perf.tpcds.TPCDSTables
52 | import org.apache.log4j.{Level, LogManager}
53 | import org.apache.spark.sql.SparkSession
54 |
55 | import scala.util.Try
56 |
57 | object DataGeneration {
58 | def main(args: Array[String]) {
59 | val tpcdsDataDir = args(0)
60 | val dsdgenDir = args(1)
61 | val format = Try(args(2).toString).getOrElse("parquet")
62 | val scaleFactor = Try(args(3).toString).getOrElse("1")
63 | val genPartitions = Try(args(4).toInt).getOrElse(100)
64 | val partitionTables = Try(args(5).toBoolean).getOrElse(false)
65 | val clusterByPartitionColumns = Try(args(6).toBoolean).getOrElse(false)
66 | val onlyWarn = Try(args(7).toBoolean).getOrElse(false)
67 |
68 | println(s"DATA DIR is $tpcdsDataDir")
69 | println(s"Tools dsdgen executable located in $dsdgenDir")
70 | println(s"Scale factor is $scaleFactor GB")
71 |
72 | val spark = SparkSession
73 | .builder
74 | .appName(s"TPCDS Generate Data $scaleFactor GB")
75 | .getOrCreate()
76 |
77 | if (onlyWarn) {
78 | println(s"Only WARN")
79 | LogManager.getLogger("org").setLevel(Level.WARN)
80 | }
81 |
82 | val tables = new TPCDSTables(spark.sqlContext,
83 | dsdgenDir = dsdgenDir,
84 | scaleFactor = scaleFactor,
85 | useDoubleForDecimal = false,
86 | useStringForDate = false)
87 |
88 | println(s"Generating TPCDS data")
89 |
90 | tables.genData(
91 | location = tpcdsDataDir,
92 | format = format,
93 | overwrite = true, // overwrite the data that is already there
94 | partitionTables = partitionTables, // create the partitioned fact tables
95 | clusterByPartitionColumns = clusterByPartitionColumns, // shuffle to get partitions coalesced into single files.
96 | filterOutNullPartitionValues = false, // true to filter out the partition with NULL key value
97 | tableFilter = "", // "" means generate all tables
98 | numPartitions = genPartitions) // how many dsdgen partitions to run - number of input tasks.
99 |
100 | println(s"Data generated at $tpcdsDataDir")
101 |
102 | spark.stop()
103 | }
104 | }
105 | ```
106 |
107 | #### 2)查询数据
108 |
109 | BenchmarkSQL.scala
110 |
111 | ```scala
112 | package com.aliyun.spark.benchmark.tpcds
113 |
114 | import com.databricks.spark.sql.perf.tpcds.{TPCDS, TPCDSTables}
115 | import org.apache.spark.sql.SparkSession
116 | import org.apache.spark.sql.functions._
117 | import org.apache.spark.sql.functions.col
118 | import org.apache.log4j.{Level, LogManager}
119 | import scala.util.Try
120 |
121 | object BenchmarkSQL {
122 | def main(args: Array[String]) {
123 | val tpcdsDataDir = args(0)
124 | val resultLocation = args(1)
125 | val dsdgenDir = args(2)
126 | val format = Try(args(3).toString).getOrElse("parquet")
127 | val scaleFactor = Try(args(4).toString).getOrElse("1")
128 | val iterations = args(5).toInt
129 | val optimizeQueries = Try(args(6).toBoolean).getOrElse(false)
130 | val filterQueries = Try(args(7).toString).getOrElse("")
131 | val onlyWarn = Try(args(8).toBoolean).getOrElse(false)
132 |
133 | val databaseName = "tpcds_db"
134 | val timeout = 24*60*60
135 |
136 | println(s"DATA DIR is $tpcdsDataDir")
137 |
138 | val spark = SparkSession
139 | .builder
140 | .appName(s"TPCDS SQL Benchmark $scaleFactor GB")
141 | .getOrCreate()
142 |
143 | if (onlyWarn) {
144 | println(s"Only WARN")
145 | LogManager.getLogger("org").setLevel(Level.WARN)
146 | }
147 |
148 | val tables = new TPCDSTables(spark.sqlContext,
149 | dsdgenDir = dsdgenDir,
150 | scaleFactor = scaleFactor,
151 | useDoubleForDecimal = false,
152 | useStringForDate = false)
153 |
154 | if (optimizeQueries) {
155 | Try {
156 | spark.sql(s"create database $databaseName")
157 | }
158 | tables.createExternalTables(tpcdsDataDir, format, databaseName, overwrite = true, discoverPartitions = true)
159 | tables.analyzeTables(databaseName, analyzeColumns = true)
160 | spark.conf.set("spark.sql.cbo.enabled", "true")
161 | } else {
162 | tables.createTemporaryTables(tpcdsDataDir, format)
163 | }
164 |
165 | val tpcds = new TPCDS(spark.sqlContext)
166 |
167 | var query_filter : Seq[String] = Seq()
168 | if (!filterQueries.isEmpty) {
169 | println(s"Running only queries: $filterQueries")
170 | query_filter = filterQueries.split(",").toSeq
171 | }
172 |
173 | val filtered_queries = query_filter match {
174 | case Seq() => tpcds.tpcds2_4Queries
175 | case _ => tpcds.tpcds2_4Queries.filter(q => query_filter.contains(q.name))
176 | }
177 |
178 | // Start experiment
179 | val experiment = tpcds.runExperiment(
180 | filtered_queries,
181 | iterations = iterations,
182 | resultLocation = resultLocation,
183 | forkThread = true)
184 |
185 | experiment.waitForFinish(timeout)
186 |
187 | // Collect general results
188 | val resultPath = experiment.resultPath
189 | println(s"Reading result at $resultPath")
190 | val specificResultTable = spark.read.json(resultPath)
191 | specificResultTable.show()
192 |
193 | // Summarize results
194 | val result = specificResultTable
195 | .withColumn("result", explode(col("results")))
196 | .withColumn("executionSeconds", col("result.executionTime")/1000)
197 | .withColumn("queryName", col("result.name"))
198 | result.select("iteration", "queryName", "executionSeconds").show()
199 | println(s"Final results at $resultPath")
200 |
201 | val aggResults = result.groupBy("queryName").agg(
202 | callUDF("percentile", col("executionSeconds").cast("long"), lit(0.5)).as('medianRuntimeSeconds),
203 | callUDF("min", col("executionSeconds").cast("long")).as('minRuntimeSeconds),
204 | callUDF("max", col("executionSeconds").cast("long")).as('maxRuntimeSeconds)
205 | ).orderBy(col("queryName"))
206 | aggResults.repartition(1).write.csv(s"$resultPath/summary.csv")
207 | aggResults.show(105)
208 |
209 | spark.stop()
210 | }
211 | }
212 | ```
213 |
214 | ### 镜像制作
215 |
216 | 测试代码编译成jar后,可以和依赖的其他jar包一起,制作成镜像供测试使用,Dockerfile如下:
217 |
218 | ```dockerfile
219 | FROM registry.cn-hangzhou.aliyuncs.com/acs/spark:ack-2.4.5-f757ab6
220 | RUN mkdir -p /opt/spark/jars
221 | RUN mkdir -p /tmp/tpcds-kit
222 | COPY ./target/scala-2.11/spark-tpcds-assembly-0.1.jar /opt/spark/jars/
223 | COPY ./lib/*.jar /opt/spark/jars/
224 | COPY ./tpcds-kit/tools.tar.gz /tmp/tpcds-kit/
225 | RUN cd /tmp/tpcds-kit/ && tar -xzvf tools.tar.gz
226 | ```
227 |
--------------------------------------------------------------------------------
/docs/quickstart/benchmark_env.md:
--------------------------------------------------------------------------------
1 | 1. [测试环境搭建](benchmark_env.md)
2 | 2. [测试代码开发](benchmark_code.md)
3 | 3. [Spark on ACK测试](benchmark_steps.md)
4 | 4. [测试结果分析](benchmark_result.md)
5 | 5. [问题排查定位](debugging_guide.md)
6 |
7 | ## 环境准备
8 |
9 | ### 1)ACK集群
10 |
11 | 创建ACK标准专有集群,其中worker节点采用大数据网络增强型的ecs.d1ne.6xlarge规格,共20个节点。
12 |
13 | 
14 |
15 |
16 |
17 | 其中每个ecs.d1ne.6xlarge自带12块5TB的HDD数据盘,需要对这12个数据盘进行分区格式化挂载,操作步骤请参考[分区格式linux数据盘](https://help.aliyun.com/document_detail/34377.html?spm=a2c4g.11174283.6.813.4be652feB9omRD#title-f8r-9od-yn9)。格式化并挂载完成后,执行df -h,可以看到如下的挂载情况。/mnt目录下的12个文件路径会在Alluxio中用到。
18 |
19 | 
20 |
21 |
22 |
23 | ### 2)OSS
24 |
25 | 在阿里云OSS控制台上开通OSS服务,并创建一个bucket用来存放tpc-ds生成的数据、测试结果和测试过程中的日志等。本次实验中的bucket名字为cloudnativeai。
26 |
27 |
28 |
29 | ### 3)安装ack-spark-operator
30 |
31 | 在ack管控台,**市场**-**应用目录**中找到ack-spark-operator,点击右侧“**创建**”按钮,安装spark operator。
32 |
33 | 
34 |
35 | 安装完成后,执行命令查看是否成功
36 |
37 | ```shell
38 | kubectl get deployment ack-spark-operator -n spark-operator
39 | ```
40 |
41 |
42 |
43 | ### 4)安装ack-spark-history-server
44 |
45 | 
46 |
47 | ack-spark-history-server通过记录spark执行任务过程中的日志和事件信息,并提供UI界面,帮助排查问题。
48 |
49 | **市场**-**应用目录**中找到ack-spark-history-server,在参数页中配置oss相关信息,存储spark历史数据,点击右侧“**创建**”按钮,安装spark history server。
50 |
51 | ```yaml
52 | oss:
53 | enableOSS: false
54 | # Please input your accessKeyId
55 | alibabaCloudAccessKeyId: ""
56 | # Please input your accessKeySecret
57 | alibabaCloudAccessKeySecret: ""
58 | # oss bucket endpoint such as oss-cn-beijing.aliyuncs.com
59 | alibabaCloudOSSEndpoint: ""
60 | # oss file path such as oss://bucket-name/path
61 | eventsDir: "oss://cloudnativeai/spark/spark-events"
62 | ```
63 |
64 | 安装完成后,执行命令查看是否成功
65 |
66 | ```shell
67 | kubectl get service ack-spark-history-server -n {YOUR-NAMESPACE}
68 | ```
69 |
70 |
71 |
72 | ### 5)安装Alluxio
73 |
74 | alluxio需要在ack中通过helm安装,首先下载alluxio。
75 |
76 | ```shell
77 | wget http://kubeflow.oss-cn-beijing.aliyuncs.com/alluxio-0.6.8.tgz
78 | tar -xvf alluxio-0.6.8.tgz
79 | ```
80 |
81 | 然后在alluxio同级目录下新建一个config.yaml配置文件,关键配置如下:
82 |
83 | ```yaml
84 | # Site properties for all the components
85 | properties:
86 | fs.oss.accessKeyId: YOUR-ACCESS-KEY-ID
87 | fs.oss.accessKeySecret: YOUR-ACCESS-KEY-SECRET
88 | fs.oss.endpoint: oss-cn-beijing-internal.aliyuncs.com
89 | alluxio.master.mount.table.root.ufs: oss://cloudnativeai/
90 | alluxio.master.persistence.blacklist: .staging,_temporary
91 | alluxio.security.stale.channel.purge.interval: 365d
92 | alluxio.user.metrics.collection.enabled: 'true'
93 | alluxio.user.short.circuit.enabled: 'true'
94 | alluxio.user.file.write.tier.default: 1
95 | alluxio.user.block.size.bytes.default: 64MB #default 64MB
96 | alluxio.user.file.writetype.default: CACHE_THROUGH
97 | alluxio.user.file.metadata.load.type: ONCE
98 | alluxio.user.file.readtype.default: CACHE
99 | #alluxio.worker.allocator.class: alluxio.worker.block.allocator.MaxFreeAllocator
100 | alluxio.worker.allocator.class: alluxio.worker.block.allocator.RoundRobinAllocator
101 | alluxio.worker.file.buffer.size: 128MB
102 | alluxio.worker.evictor.class: alluxio.worker.block.evictor.LRUEvictor
103 | alluxio.job.master.client.threads: 5000
104 | alluxio.job.worker.threadpool.size: 300
105 | ```
106 |
107 | 其中oss的相关的accessKey、endpoint等信息需要替换成自己的。
108 |
109 | ```yaml
110 | tieredstore:
111 | levels:
112 | - level: 0
113 | alias: HDD
114 | mediumtype: HDD-0,HDD-1,HDD-2,HDD-3,HDD-4,HDD-5,HDD-6,HDD-7,HDD-8,HDD-9,HDD-10,HDD-11
115 | path: /mnt/disk1,/mnt/disk2,/mnt/disk3,/mnt/disk4,/mnt/disk5,/mnt/disk6,/mnt/disk7,/mnt/disk8,/mnt/disk9,/mnt/disk10,/mnt/disk11,/mnt/disk12
116 | type: hostPath
117 | quota: 1024G,1024G,1024G,1024G,1024G,1024G,1024G,1024G,1024G,1024G,1024G,1024G
118 | high: 0.95
119 | low: 0.7
120 | ```
121 |
122 | tieredstore中的中的mediumtype、path对应ack worker节点中挂载的数据盘。
123 |
124 | 完整的配置文件请参考[config.yaml](../../kubernetes/alluxio/config.yaml),下载config.yaml配置文件并修改配置,给ACK集群中的worker节点打上“alluxio=true”标签,然后通过helm命令安装alluxio。
125 |
126 | 安装alluxio
127 |
128 | ```shell
129 | helm install -f config.yaml -n alluxio alluxio alluxio
130 | ```
131 |
132 | 安装完成后,执行命令查看是否成功
133 |
134 | ```shell
135 | kubectl get pod -n alluxio
136 | ```
137 |
138 | 然后进入alluxio-admin,查看数据盘挂载是否成功
139 |
140 | ```shell
141 | kubectl exec -it alluxio-master-0 -n alluxio -- /bin/bash
142 |
143 | ./bin/alluxio fsadmin report capacity
144 | ```
145 |
146 | 如果能看到每个worker节点上都有挂载的数据盘,说明Alluxio安装配置成功。
147 |
148 | 
--------------------------------------------------------------------------------
/docs/quickstart/benchmark_result.md:
--------------------------------------------------------------------------------
1 | 1. [测试环境搭建](benchmark_env.md)
2 | 2. [测试代码开发](benchmark_code.md)
3 | 3. [Spark on ACK测试](benchmark_steps.md)
4 | 4. [测试结果分析](benchmark_result.md)
5 | 5. [问题排查定位](debugging_guide.md)
6 |
7 | ## 压测环境
8 |
9 | ### 硬件配置
10 |
11 | - **ACK集群说明**
12 |
13 | | 集群类型 | ACK标准专有集群 |
14 | | -------------- | ---------------------------------------------------- |
15 | | ECS实例 | ECS规格:ecs.d1ne.6xlarge
Aliyun Linux 2.1903
CPU: 24核,内存:96G
数据盘:5500G HDD x 12 |
16 | | Worker Node个数 | 20 |
17 |
18 |
19 |
20 | ### 软件配置
21 |
22 | - **软件版本**
23 |
24 | spark version: 2.4.5
25 |
26 | alluxio version: 2.3.0
27 |
28 | - **Spark配置说明**
29 |
30 | | spark.driver.cores | 5 |
31 | | -------------------------- | ----- |
32 | | spark.driver.memory (MB) | 20480 |
33 | | spark.executor.cores | 7 |
34 | | spark.executor.memory (MB) | 20480 |
35 | | spark.executor.instances | 20 |
36 |
37 | ## 压测结果
38 |
39 | ### Spark是否启用Alluxio对比
40 |
41 | 
42 |
43 | query任务总耗时
44 |
45 |
46 |
47 | | | total(Min) |
48 | | ---------------- | ---------- |
49 | | Spark with OSS | 180 |
50 | | Spark with Alluxio Cold | 145 |
51 | | Spark with Alluxio Warm | 137 |
52 |
53 |
54 |
55 | 
--------------------------------------------------------------------------------
/docs/quickstart/debugging_guide.md:
--------------------------------------------------------------------------------
1 | 1. [测试环境搭建](benchmark_env.md)
2 | 2. [测试代码开发](benchmark_code.md)
3 | 3. [Spark on ACK测试](benchmark_steps.md)
4 | 4. [测试结果分析](benchmark_result.md)
5 | 5. [问题排查定位](debugging_guide.md)
6 |
7 | 测试过程中,如果参数配置不对,比如内存太小,很容器出现OOM等异常,这里可以通过两种方式来排查问题。以执行tpcds-query-runner-with-alluxio为例说明。
8 |
9 | ## spark-ui
10 |
11 | 任务运行过程中,可以通过spark-ui实时看到每个sql的执行情况,方式如下:
12 |
13 | 执行 kubectl get services 可以看到如下信息:
14 |
15 | 
16 |
17 | 其中tpcds-benchmark-sql-ui-svc即spark-ui对应的service。
18 |
19 | 执行下面命令:
20 |
21 | ```shell
22 | kubectl port-forward svc/tpcds-benchmark-sql-ui-svc 4040:4040
23 | ```
24 |
25 | 可以看到
26 |
27 | ### 
28 |
29 | 然后在浏览器中输入:localhost:4040,就可以看到spark-ui中的任务执行情况。
30 |
31 | 
32 |
33 |
34 |
35 | ## ack-spark-history-server
36 |
37 | 任务结束后,如果想看到历史数据,可以采用这种方式,操作步骤如下:
38 |
39 | 执行如下命令,获取SparkApplication任务id。
40 |
41 | ```shell
42 | kubectl get sparkapplication tpcds-benchmark-sql -o yaml
43 | ```
44 |
45 | 
46 |
47 | 在上图中可以得到sparkApplicationId。
48 |
49 | 然后再获得ack-spark-history-server的endpoint
50 |
51 | ```shell
52 | kubectl get service ack-spark-history-server
53 | ```
54 |
55 | 
56 |
57 | 然后在浏览器中输入上图中的 EXTERNAL-IP和端口,就可以看到所以spark任务的历史数据,再通过上一步找到的sparkApplicationId找到对应记录即可。
--------------------------------------------------------------------------------
/kubernetes/emr/jar/spark-sql-perf-assembly-0.5.0-SNAPSHOT.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/kubernetes/emr/jar/spark-sql-perf-assembly-0.5.0-SNAPSHOT.jar
--------------------------------------------------------------------------------
/kubernetes/emr/tpcds-benchmark-with-emrspark-ess-jindofs.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: "sparkoperator.k8s.io/v1beta2"
2 | kind: SparkApplication
3 | metadata:
4 | name: tpcds-benchmark-emrspark-ess-jindofs-1t
5 | namespace: default
6 | spec:
7 | type: Scala
8 | mode: cluster
9 | image: registry.cn-beijing.aliyuncs.com/zf-spark/spark-2.4.5:for-tpc-ds-2
10 | imagePullPolicy: Always
11 | mainClass: com.databricks.spark.sql.perf.tpcds.TPCDS_Standalone
12 | mainApplicationFile: "jfs://default/jars/spark-sql-perf-assembly-0.5.0-SNAPSHOT.jar"
13 | arguments:
14 | - "--dataset_location"
15 | - "jfs://default/datasets/"
16 | - "--output_location"
17 | - "jfs://default/results-1t/"
18 | - "--iterations"
19 | - "1"
20 | - "--shuffle_partitions"
21 | - "1000"
22 | - "--scale_factor"
23 | - "1000"
24 | - "--regenerate_dataset"
25 | - "false"
26 | - "--regenerate_metadata"
27 | - "false"
28 | - "--only_generate_data_and_meta"
29 | - "false"
30 | - "--db_suffix"
31 | - "cluster_180405"
32 | - "--query_exclude_list"
33 | - "q23a,q23b,q24a,q24b,q77"
34 | - "--format"
35 | - "parquet"
36 | sparkVersion: 2.4.5
37 | restartPolicy:
38 | type: Never
39 | sparkConf:
40 | spark.driver.extraLibraryPath: /opt/spark/lib/native
41 | spark.executor.extraLibraryPath: /opt/spark/lib/native
42 | #CBO
43 | spark.sql.cbo.enabled: "true"
44 | spark.sql.cbo.joinReorder.enabled: "true"
45 | spark.sql.cbo.joinReorder.dp.star.filter: "false"
46 | spark.sql.cbo.joinReorder.dp.threshold: "12"
47 | spark.sql.cbo.outerJoinReorder.enabled: "true"
48 | #RF
49 | spark.sql.dynamic.runtime.filter.enabled: "true"
50 | spark.sql.dynamic.runtime.filter.bbf.enabled: "false"
51 | spark.sql.dynamic.runtime.filter.table.size.lower.limit: "1069547520"
52 | spark.sql.dynamic.runtime.filter.table.size.upper.limit: "5368709120"
53 | spark.sql.emr.fileindex.enabled: "false"
54 | spark.sql.intersect.groupby.placement: "true"
55 | spark.sql.extract.common.conjunct.filter: "true"
56 | spark.sql.infer.filter.from.joincondition: "true"
57 | spark.dynamicAllocation.enabled: "false"
58 | spark.ess.master.host: emr-rss-master.spark-rss
59 | spark.ess.master.port: "9099"
60 | spark.ess.rpc.io.clientThreads: "8"
61 | spark.ess.data.io.clientThreads: "8"
62 | spark.ess.data.io.numConnectionsPerPeer: "8"
63 | spark.ess.data.io.mode: NIO
64 | spark.shuffle.manager: org.apache.spark.shuffle.ess.EssShuffleManager
65 | spark.sql.uncorrelated.scalar.subquery.preexecution.enabled: "true"
66 | driver:
67 | cores: 5
68 | coreLimit: 5000m
69 | memory: 20g
70 | labels:
71 | version: 2.4.5
72 | serviceAccount: spark
73 | env:
74 | - name: TZ
75 | value: "Asia/Shanghai"
76 | - name: CLIENT_NAMESPACE_RPC_ADDRESS
77 | value: jindofs-master.jindofs:8101
78 | - name: CLIENT_STORAGE_RPC_PORT
79 | value: "6101"
80 | - name: CLIENT_STORAGE_RPC_HOST
81 | valueFrom:
82 | fieldRef:
83 | fieldPath: status.hostIP
84 | - name: JFS_CACHE_DATA_CACHE_ENABLE
85 | value: "1"
86 | executor:
87 | cores: 7
88 | coreLimit: 7000m
89 | instances: 20
90 | memory: 20g
91 | memoryOverhead: 6g
92 | labels:
93 | version: 2.4.5
94 | env:
95 | - name: SPARKLOGENV
96 | value: spark-executor
97 | - name: TZ
98 | value: "Asia/Shanghai"
99 | - name: CLIENT_NAMESPACE_RPC_ADDRESS
100 | value: jindofs-master.jindofs:8101
101 | - name: CLIENT_STORAGE_RPC_PORT
102 | value: "6101"
103 | - name: CLIENT_STORAGE_RPC_HOST
104 | valueFrom:
105 | fieldRef:
106 | fieldPath: status.hostIP
107 | - name: JFS_CACHE_DATA_CACHE_ENABLE
108 | value: "1"
109 |
--------------------------------------------------------------------------------
/kubernetes/emr/tpcds-benchmark-with-emrspark-ess.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: "sparkoperator.k8s.io/v1beta2"
2 | kind: SparkApplication
3 | metadata:
4 | name: tpcds-benchmark-emrspark-ess-10t
5 | namespace: default
6 | spec:
7 | type: Scala
8 | mode: cluster
9 | image: registry.cn-beijing.aliyuncs.com/zf-spark/spark-2.4.5:for-tpc-ds-2
10 | imagePullPolicy: Always
11 | mainClass: com.databricks.spark.sql.perf.tpcds.TPCDS_Standalone
12 | mainApplicationFile: "oss:///jars/spark-sql-perf-assembly-0.5.0-SNAPSHOT.jar"
13 | arguments:
14 | - "--dataset_location"
15 | - "oss:///datasets/"
16 | - "--output_location"
17 | - "oss:///outputs/ack-pr-10t-emr-with-ess"
18 | - "--iterations"
19 | - "1"
20 | - "--shuffle_partitions"
21 | - "1000"
22 | - "--scale_factor"
23 | - "10000"
24 | - "--regenerate_dataset"
25 | - "false"
26 | - "--regenerate_metadata"
27 | - "false"
28 | - "--only_generate_data_and_meta"
29 | - "false"
30 | - "--format"
31 | - "parquet"
32 | - "--query_exclude_list"
33 | - "q14a,q14b,q67"
34 | sparkVersion: 2.4.5
35 | restartPolicy:
36 | type: Never
37 | hadoopConf:
38 | "fs.oss.impl": "org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem"
39 | "fs.oss.endpoint": ""
40 | "fs.oss.accessKeyId": ""
41 | "fs.oss.accessKeySecret": ""
42 | hive.metastore.uris: thrift://service-hive-metastore.default:9083
43 | hive.metastore.client.socket.timeout: 600s
44 | sparkConf:
45 | spark.eventLog.enabled: "true"
46 | spark.eventLog.dir: "oss:///spark/eventlogs"
47 | spark.driver.extraJavaOptions: "-XX:-PrintGC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps"
48 | spark.driver.maxResultSize: 40g
49 | spark.executor.extraJavaOptions: "-XX:MaxDirectMemorySize=6g -XX:-PrintGC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps"
50 | spark.locality.wait.node: "0"
51 | spark.locality.wait.process: "0"
52 | spark.locality.wait.rack: "0"
53 | spark.locality.wait: "0"
54 | spark.memory.fraction: "0.8"
55 | spark.memory.offHeap.enabled: "false"
56 | spark.memory.offHeap.size: "17179869184"
57 | spark.sql.adaptive.bloomFilterJoin.enabled: "false"
58 | spark.sql.adaptive.enabled: "false"
59 | spark.sql.analyze.column.async.delay: "200"
60 | spark.sql.auto.reused.cte.enabled: "true"
61 | spark.sql.broadcastTimeout: "3600"
62 | spark.sql.columnVector.offheap.enabled: "false"
63 | spark.sql.crossJoin.enabled: "true"
64 | spark.sql.delete.optimizeInSubquery: "true"
65 | spark.sql.dynamic.runtime.filter.bbf.enabled: "false"
66 | spark.sql.dynamic.runtime.filter.enabled: "true"
67 | spark.sql.dynamic.runtime.filter.exact.enabled: "true"
68 | spark.sql.dynamic.runtime.filter.table.size.lower.limit: "1069547520"
69 | spark.sql.dynamic.runtime.filter.table.size.upper.limit: "5368709120"
70 | spark.sql.files.openCostInBytes: "34108864"
71 | spark.sql.inMemoryColumnarStorage.compressed: "true"
72 | spark.sql.join.preferNativeJoin: "false"
73 | spark.sql.native.codecache: "true"
74 | spark.sql.native.codegen.wholeStage: "false"
75 | spark.sql.native.nativewrite: "false"
76 | spark.sql.pkfk.optimize.enable: "true"
77 | spark.sql.pkfk.riJoinElimination: "true"
78 | spark.sql.shuffle.partitions: "1000"
79 | spark.sql.simplifyDecimal.enabled: "true"
80 | spark.sql.sources.parallelPartitionDiscovery.parallelism: "432"
81 | spark.sql.sources.parallelPartitionDiscovery.threshold: "32"
82 | spark.shuffle.reduceLocality.enabled: "false"
83 | spark.shuffle.service.enabled: "true"
84 | spark.dynamicAllocation.enabled: "false"
85 | spark.shuffle.manager: org.apache.spark.shuffle.ess.EssShuffleManager
86 | spark.ess.master.host: emr-rss-master.spark-rss
87 | spark.ess.master.port: "9099"
88 | spark.ess.push.data.buffer.size: 64k
89 | spark.ess.push.data.max.inflight: "2048"
90 | spark.ess.rpc.io.clientThreads: "8"
91 | spark.ess.rpc.io.serverThreads: "8"
92 | spark.ess.data.io.clientThreads: "8"
93 | spark.ess.data.io.numConnectionsPerPeer: "8"
94 | driver:
95 | cores: 15
96 | coreLimit: 15000m
97 | memory: 50g
98 | labels:
99 | version: 2.4.5
100 | serviceAccount: spark
101 | env:
102 | - name: TZ
103 | value: "Asia/Shanghai"
104 | executor:
105 | cores: 4
106 | coreLimit: 6000m
107 | instances: 20
108 | memory: 24g
109 | memoryOverhead: 10g
110 | deleteOnTermination: false
111 | labels:
112 | version: 2.4.5
113 | env:
114 | - name: TZ
115 | value: "Asia/Shanghai"
--------------------------------------------------------------------------------
/kubernetes/emr/tpcds-benchmark-with-emrspark-jindofs.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: "sparkoperator.k8s.io/v1beta2"
2 | kind: SparkApplication
3 | metadata:
4 | name: tpcds-benchmark-emrspark-ess-1t
5 | namespace: default
6 | spec:
7 | type: Scala
8 | mode: cluster
9 | image: registry.cn-beijing.aliyuncs.com/zf-spark/spark-2.4.5:for-tpc-ds-2
10 | imagePullPolicy: Always
11 | mainClass: com.databricks.spark.sql.perf.tpcds.TPCDS_Standalone
12 | mainApplicationFile: "oss:///jars/spark-sql-perf-assembly-0.5.0-SNAPSHOT.jar"
13 | arguments:
14 | - "--dataset_location"
15 | - "oss:///datasets/"
16 | - "--output_location"
17 | - "oss:///outputs/ack-pr-10t-emr-with-ess"
18 | - "--iterations"
19 | - "1"
20 | - "--shuffle_partitions"
21 | - "1000"
22 | - "--scale_factor"
23 | - "10000"
24 | - "--regenerate_dataset"
25 | - "false"
26 | - "--regenerate_metadata"
27 | - "false"
28 | - "--only_generate_data_and_meta"
29 | - "false"
30 | - "--format"
31 | - "parquet"
32 | - "--query_exclude_list"
33 | - "q14a,q14b,q67"
34 | sparkVersion: 2.4.5
35 | restartPolicy:
36 | type: Never
37 | hadoopConf:
38 | hive.metastore.uris: thrift://service-hive-metastore.default:9083
39 | hive.metastore.client.socket.timeout: 600s
40 | sparkConf:
41 | spark.eventLog.enabled: "true"
42 | spark.eventLog.dir: "oss:///spark/eventlogs"
43 | spark.driver.extraJavaOptions: "-XX:-PrintGC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps"
44 | spark.driver.maxResultSize: 40g
45 | spark.executor.extraJavaOptions: "-XX:MaxDirectMemorySize=6g -XX:-PrintGC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps"
46 | spark.locality.wait.node: "0"
47 | spark.locality.wait.process: "0"
48 | spark.locality.wait.rack: "0"
49 | spark.locality.wait: "0"
50 | spark.memory.fraction: "0.8"
51 | spark.memory.offHeap.enabled: "false"
52 | spark.memory.offHeap.size: "17179869184"
53 | spark.sql.adaptive.bloomFilterJoin.enabled: "false"
54 | spark.sql.adaptive.enabled: "false"
55 | spark.sql.analyze.column.async.delay: "200"
56 | spark.sql.auto.reused.cte.enabled: "true"
57 | spark.sql.broadcastTimeout: "3600"
58 | spark.sql.columnVector.offheap.enabled: "false"
59 | spark.sql.crossJoin.enabled: "true"
60 | spark.sql.delete.optimizeInSubquery: "true"
61 | spark.sql.dynamic.runtime.filter.bbf.enabled: "false"
62 | spark.sql.dynamic.runtime.filter.enabled: "true"
63 | spark.sql.dynamic.runtime.filter.exact.enabled: "true"
64 | spark.sql.dynamic.runtime.filter.table.size.lower.limit: "1069547520"
65 | spark.sql.dynamic.runtime.filter.table.size.upper.limit: "5368709120"
66 | spark.sql.files.openCostInBytes: "34108864"
67 | spark.sql.inMemoryColumnarStorage.compressed: "true"
68 | spark.sql.join.preferNativeJoin: "false"
69 | spark.sql.native.codecache: "true"
70 | spark.sql.native.codegen.wholeStage: "false"
71 | spark.sql.native.nativewrite: "false"
72 | spark.sql.pkfk.optimize.enable: "true"
73 | spark.sql.pkfk.riJoinElimination: "true"
74 | spark.sql.shuffle.partitions: "1000"
75 | spark.sql.simplifyDecimal.enabled: "true"
76 | spark.sql.sources.parallelPartitionDiscovery.parallelism: "432"
77 | spark.sql.sources.parallelPartitionDiscovery.threshold: "32"
78 | spark.shuffle.reduceLocality.enabled: "false"
79 | spark.shuffle.service.enabled: "true"
80 | spark.dynamicAllocation.enabled: "false"
81 | spark.local.dir: /mnt/diskb/spark-data,/mnt/diskc/spark-data,/mnt/diskd/spark-data,/mnt/diske/spark-data,/mnt/diskf/spark-data,/mnt/diskg/spark-data,/mnt/diskh/spark-data,/mnt/diski/spark-data,/mnt/diskj/spark-data,/mnt/diskk/spark-data,/mnt/diskl/spark-data,/mnt/diskm/spark-data
82 | spark.shuffle.manager: org.apache.spark.shuffle.sort.SortShuffleManager
83 | volumes:
84 | - name: diskb
85 | hostPath:
86 | path: /mnt/diskb
87 | type: Directory
88 | - name: diskc
89 | hostPath:
90 | path: /mnt/diskc
91 | type: Directory
92 | - name: diskd
93 | hostPath:
94 | path: /mnt/diskd
95 | type: Directory
96 | - name: diske
97 | hostPath:
98 | path: /mnt/diske
99 | type: Directory
100 | - name: diskf
101 | hostPath:
102 | path: /mnt/diskf
103 | type: Directory
104 | - name: diskg
105 | hostPath:
106 | path: /mnt/diskg
107 | type: Directory
108 | - name: diskh
109 | hostPath:
110 | path: /mnt/diskh
111 | type: Directory
112 | - name: diski
113 | hostPath:
114 | path: /mnt/diski
115 | type: Directory
116 | - name: diskj
117 | hostPath:
118 | path: /mnt/diskj
119 | type: Directory
120 | - name: diskk
121 | hostPath:
122 | path: /mnt/diskk
123 | type: Directory
124 | - name: diskl
125 | hostPath:
126 | path: /mnt/diskl
127 | type: Directory
128 | - name: diskm
129 | hostPath:
130 | path: /mnt/diskm
131 | type: Directory
132 | driver:
133 | cores: 15
134 | coreLimit: 15000m
135 | memory: 50g
136 | labels:
137 | version: 2.4.5
138 | serviceAccount: spark
139 | env:
140 | - name: TZ
141 | value: "Asia/Shanghai"
142 | executor:
143 | cores: 4
144 | coreLimit: 6000m
145 | instances: 20
146 | memory: 24g
147 | memoryOverhead: 10g
148 | deleteOnTermination: false
149 | labels:
150 | version: 2.4.5
151 | env:
152 | - name: TZ
153 | value: "Asia/Shanghai"
154 | volumeMounts:
155 | - mountPath: /mnt/diskb
156 | name: diskb
157 | - mountPath: /mnt/diskc
158 | name: diskc
159 | - mountPath: /mnt/diskd
160 | name: diskd
161 | - mountPath: /mnt/diske
162 | name: diske
163 | - mountPath: /mnt/diskf
164 | name: diskf
165 | - mountPath: /mnt/diskg
166 | name: diskg
167 | - mountPath: /mnt/diskh
168 | name: diskh
169 | - mountPath: /mnt/diski
170 | name: diski
171 | - mountPath: /mnt/diskj
172 | name: diskj
173 | - mountPath: /mnt/diskk
174 | name: diskk
175 | - mountPath: /mnt/diskl
176 | name: diskl
177 | - mountPath: /mnt/diskm
178 | name: diskm
--------------------------------------------------------------------------------
/kubernetes/emr/tpcds-benchmark-with-emrspark.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: "sparkoperator.k8s.io/v1beta2"
2 | kind: SparkApplication
3 | metadata:
4 | name: tpcds-benchmark-emrspark-10t
5 | namespace: default
6 | spec:
7 | type: Scala
8 | mode: cluster
9 | image: registry.cn-beijing.aliyuncs.com/zf-spark/spark-2.4.5:for-tpc-ds-2
10 | imagePullPolicy: Always
11 | mainClass: com.databricks.spark.sql.perf.tpcds.TPCDS_Standalone
12 | mainApplicationFile: "oss:///jars/spark-sql-perf-assembly-0.5.0-SNAPSHOT.jar"
13 | arguments:
14 | - "--dataset_location"
15 | - "oss:///datasets/"
16 | - "--output_location"
17 | - "oss:///outputs/ack-pr-10t-emr"
18 | - "--iterations"
19 | - "1"
20 | - "--shuffle_partitions"
21 | - "1000"
22 | - "--scale_factor"
23 | - "10000"
24 | - "--regenerate_dataset"
25 | - "false"
26 | - "--regenerate_metadata"
27 | - "false"
28 | - "--only_generate_data_and_meta"
29 | - "false"
30 | - "--format"
31 | - "parquet"
32 | - "--query_exclude_list"
33 | - "q14a,q14b,q67"
34 | sparkVersion: 2.4.5
35 | restartPolicy:
36 | type: Never
37 | hadoopConf:
38 | "fs.oss.impl": "org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem"
39 | "fs.oss.endpoint": ""
40 | "fs.oss.accessKeyId": ""
41 | "fs.oss.accessKeySecret": ""
42 | hive.metastore.uris: thrift://service-hive-metastore.default:9083
43 | hive.metastore.client.socket.timeout: 600s
44 | sparkConf:
45 | spark.eventLog.enabled: "true"
46 | spark.eventLog.dir: "oss:///spark/eventlogs"
47 | spark.driver.extraJavaOptions: "-XX:-PrintGC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps"
48 | spark.driver.maxResultSize: 40g
49 | spark.executor.extraJavaOptions: "-XX:MaxDirectMemorySize=6g -XX:-PrintGC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps"
50 | spark.locality.wait.node: "0"
51 | spark.locality.wait.process: "0"
52 | spark.locality.wait.rack: "0"
53 | spark.locality.wait: "0"
54 | spark.memory.fraction: "0.8"
55 | spark.memory.offHeap.enabled: "false"
56 | spark.memory.offHeap.size: "17179869184"
57 | spark.sql.adaptive.bloomFilterJoin.enabled: "false"
58 | spark.sql.adaptive.enabled: "false"
59 | spark.sql.analyze.column.async.delay: "200"
60 | spark.sql.auto.reused.cte.enabled: "true"
61 | spark.sql.broadcastTimeout: "3600"
62 | spark.sql.columnVector.offheap.enabled: "false"
63 | spark.sql.crossJoin.enabled: "true"
64 | spark.sql.delete.optimizeInSubquery: "true"
65 | spark.sql.dynamic.runtime.filter.bbf.enabled: "false"
66 | spark.sql.dynamic.runtime.filter.enabled: "true"
67 | spark.sql.dynamic.runtime.filter.exact.enabled: "true"
68 | spark.sql.dynamic.runtime.filter.table.size.lower.limit: "1069547520"
69 | spark.sql.dynamic.runtime.filter.table.size.upper.limit: "5368709120"
70 | spark.sql.files.openCostInBytes: "34108864"
71 | spark.sql.inMemoryColumnarStorage.compressed: "true"
72 | spark.sql.join.preferNativeJoin: "false"
73 | spark.sql.native.codecache: "true"
74 | spark.sql.native.codegen.wholeStage: "false"
75 | spark.sql.native.nativewrite: "false"
76 | spark.sql.pkfk.optimize.enable: "true"
77 | spark.sql.pkfk.riJoinElimination: "true"
78 | spark.sql.shuffle.partitions: "1000"
79 | spark.sql.simplifyDecimal.enabled: "true"
80 | spark.sql.sources.parallelPartitionDiscovery.parallelism: "432"
81 | spark.sql.sources.parallelPartitionDiscovery.threshold: "32"
82 | spark.shuffle.reduceLocality.enabled: "false"
83 | spark.shuffle.service.enabled: "false"
84 | spark.dynamicAllocation.enabled: "false"
85 | spark.local.dir: /mnt/diskb/spark-data,/mnt/diskc/spark-data,/mnt/diskd/spark-data,/mnt/diske/spark-data,/mnt/diskf/spark-data,/mnt/diskg/spark-data,/mnt/diskh/spark-data,/mnt/diski/spark-data,/mnt/diskj/spark-data,/mnt/diskk/spark-data,/mnt/diskl/spark-data,/mnt/diskm/spark-data
86 | spark.shuffle.manager: org.apache.spark.shuffle.sort.SortShuffleManager
87 | volumes:
88 | - name: diskb
89 | hostPath:
90 | path: /mnt/diskb
91 | type: Directory
92 | - name: diskc
93 | hostPath:
94 | path: /mnt/diskc
95 | type: Directory
96 | - name: diskd
97 | hostPath:
98 | path: /mnt/diskd
99 | type: Directory
100 | - name: diske
101 | hostPath:
102 | path: /mnt/diske
103 | type: Directory
104 | - name: diskf
105 | hostPath:
106 | path: /mnt/diskf
107 | type: Directory
108 | - name: diskg
109 | hostPath:
110 | path: /mnt/diskg
111 | type: Directory
112 | - name: diskh
113 | hostPath:
114 | path: /mnt/diskh
115 | type: Directory
116 | - name: diski
117 | hostPath:
118 | path: /mnt/diski
119 | type: Directory
120 | - name: diskj
121 | hostPath:
122 | path: /mnt/diskj
123 | type: Directory
124 | - name: diskk
125 | hostPath:
126 | path: /mnt/diskk
127 | type: Directory
128 | - name: diskl
129 | hostPath:
130 | path: /mnt/diskl
131 | type: Directory
132 | - name: diskm
133 | hostPath:
134 | path: /mnt/diskm
135 | type: Directory
136 | driver:
137 | cores: 15
138 | coreLimit: 15000m
139 | memory: 50g
140 | labels:
141 | version: 2.4.5
142 | serviceAccount: spark
143 | env:
144 | - name: TZ
145 | value: "Asia/Shanghai"
146 | executor:
147 | cores: 4
148 | coreLimit: 6000m
149 | instances: 20
150 | memory: 24g
151 | memoryOverhead: 10g
152 | deleteOnTermination: false
153 | labels:
154 | version: 2.4.5
155 | env:
156 | - name: TZ
157 | value: "Asia/Shanghai"
158 | volumeMounts:
159 | - mountPath: /mnt/diskb
160 | name: diskb
161 | - mountPath: /mnt/diskc
162 | name: diskc
163 | - mountPath: /mnt/diskd
164 | name: diskd
165 | - mountPath: /mnt/diske
166 | name: diske
167 | - mountPath: /mnt/diskf
168 | name: diskf
169 | - mountPath: /mnt/diskg
170 | name: diskg
171 | - mountPath: /mnt/diskh
172 | name: diskh
173 | - mountPath: /mnt/diski
174 | name: diski
175 | - mountPath: /mnt/diskj
176 | name: diskj
177 | - mountPath: /mnt/diskk
178 | name: diskk
179 | - mountPath: /mnt/diskl
180 | name: diskl
181 | - mountPath: /mnt/diskm
182 | name: diskm
--------------------------------------------------------------------------------
/kubernetes/emr/tpcds-data-generation.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: "sparkoperator.k8s.io/v1beta2"
2 | kind: SparkApplication
3 | metadata:
4 | name: tpcds-data-generation-10t
5 | namespace: default
6 | spec:
7 | type: Scala
8 | mode: cluster
9 | image: registry.cn-beijing.aliyuncs.com/zf-spark/spark-2.4.5:for-tpc-ds-2
10 | imagePullPolicy: Always
11 | mainClass: com.databricks.spark.sql.perf.tpcds.TPCDS_Standalone
12 | mainApplicationFile: "oss:///jars/spark-sql-perf-assembly-0.5.0-SNAPSHOT.jar"
13 | arguments:
14 | - "--dataset_location"
15 | - "oss:///datasets/"
16 | - "--output_location"
17 | - "oss:///outputs/ack-pr-10t-emr"
18 | - "--iterations"
19 | - "1"
20 | - "--shuffle_partitions"
21 | - "1000"
22 | - "--scale_factor"
23 | - "10000"
24 | - "--regenerate_dataset"
25 | - "true"
26 | - "--regenerate_metadata"
27 | - "true"
28 | - "--only_generate_data_and_meta"
29 | - "true"
30 | - "--format"
31 | - "parquet"
32 | sparkVersion: 2.4.5
33 | restartPolicy:
34 | type: Never
35 | sparkConf:
36 | spark.eventLog.enabled: "true"
37 | spark.eventLog.dir: "oss:///spark/eventlogs"
38 | spark.driver.extraJavaOptions: "-XX:-PrintGC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps"
39 | spark.driver.maxResultSize: 40g
40 | spark.executor.extraJavaOptions: "-XX:MaxDirectMemorySize=32g -XX:-PrintGC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps"
41 | spark.locality.wait.node: "0"
42 | spark.locality.wait.process: "0"
43 | spark.locality.wait.rack: "0"
44 | spark.locality.wait: "0"
45 | spark.memory.fraction: "0.8"
46 | spark.memory.offHeap.enabled: "false"
47 | spark.memory.offHeap.size: "17179869184"
48 | spark.sql.adaptive.bloomFilterJoin.enabled: "false"
49 | spark.sql.adaptive.enabled: "false"
50 | spark.sql.analyze.column.async.delay: "200"
51 | spark.sql.auto.reused.cte.enabled: "true"
52 | spark.sql.broadcastTimeout: "3600"
53 | spark.sql.columnVector.offheap.enabled: "false"
54 | spark.sql.crossJoin.enabled: "true"
55 | spark.sql.delete.optimizeInSubquery: "true"
56 | spark.sql.dynamic.runtime.filter.bbf.enabled: "false"
57 | spark.sql.dynamic.runtime.filter.enabled: "true"
58 | spark.sql.dynamic.runtime.filter.exact.enabled: "true"
59 | spark.sql.dynamic.runtime.filter.table.size.lower.limit: "1069547520"
60 | spark.sql.dynamic.runtime.filter.table.size.upper.limit: "5368709120"
61 | spark.sql.files.openCostInBytes: "34108864"
62 | spark.sql.inMemoryColumnarStorage.compressed: "true"
63 | spark.sql.join.preferNativeJoin: "false"
64 | spark.sql.native.codecache: "true"
65 | spark.sql.native.codegen.wholeStage: "false"
66 | spark.sql.native.nativewrite: "false"
67 | spark.sql.pkfk.optimize.enable: "true"
68 | spark.sql.pkfk.riJoinElimination: "true"
69 | spark.sql.shuffle.partitions: "1000"
70 | spark.sql.simplifyDecimal.enabled: "true"
71 | spark.sql.sources.parallelPartitionDiscovery.parallelism: "432"
72 | spark.sql.sources.parallelPartitionDiscovery.threshold: "32"
73 | spark.shuffle.reduceLocality.enabled: "false"
74 | spark.shuffle.service.enabled: "true"
75 | spark.dynamicAllocation.enabled: "false"
76 | driver:
77 | cores: 15
78 | coreLimit: 15000m
79 | memory: 30g
80 | labels:
81 | version: 2.4.5
82 | serviceAccount: spark
83 | env:
84 | - name: TZ
85 | value: "Asia/Shanghai"
86 | executor:
87 | cores: 8
88 | coreLimit: 8000m
89 | instances: 20
90 | memory: 24g
91 | labels:
92 | version: 2.4.5
93 | env:
94 | - name: TZ
95 | value: "Asia/Shanghai"
--------------------------------------------------------------------------------
/kubernetes/spark/tpcds-benchmark-with-alluxio.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: "sparkoperator.k8s.io/v1beta2"
2 | kind: SparkApplication
3 | metadata:
4 | name: tpcds-benchmark-with-alluxio
5 | namespace: default
6 | spec:
7 | type: Scala
8 | mode: cluster
9 | image: registry.cn-beijing.aliyuncs.com/yukong/ack-spark-benchmark:1.0.0
10 | imagePullPolicy: Always
11 | sparkVersion: 2.4.5
12 | mainClass: com.aliyun.spark.benchmark.tpcds.BenchmarkSQL
13 | mainApplicationFile: "local:///opt/spark/jars/ack-spark-benchmark-assembly-0.1.jar"
14 | arguments:
15 | # TPC-DS data localtion
16 | - "alluxio://alluxio-master-0.alluxio.svc.cluster.local:19998/spark/data/tpc-ds-data/1000g"
17 | # results location
18 | - "oss:///spark/result/tpcds-benchmark-result-1000g-with-alluxio"
19 | # Path to kit in the docker image
20 | - "/tmp/tpcds-kit/tools"
21 | # Data Format
22 | - "parquet"
23 | # Scale factor (in GB)
24 | - "1000"
25 | # Number of iterations
26 | - "1"
27 | # Optimize queries
28 | - "false"
29 | # Filter queries, will run all if empty - "q70-v2.4,q82-v2.4,q64-v2.4"
30 | - ""
31 | # Logging set to WARN
32 | - "true"
33 | hostNetwork: true
34 | dnsPolicy: ClusterFirstWithHostNet
35 | restartPolicy:
36 | type: Never
37 | timeToLiveSeconds: 86400
38 | hadoopConf:
39 | # OSS
40 | "fs.oss.impl": "org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem"
41 | "fs.oss.endpoint": ""
42 | "fs.oss.accessKeyId": ""
43 | "fs.oss.accessKeySecret": ""
44 | # OSS performance best practice
45 | "fs.oss.paging.maximum": 1000
46 | "fs.oss.multipart.download.threads": 32
47 | "fs.oss.max.total.tasks": 256
48 | "fs.oss.connection.maximum": 2048
49 | sparkConf:
50 | "spark.kubernetes.allocation.batch.size": "200"
51 | "spark.sql.adaptive.join.enabled": "true"
52 | "spark.eventLog.enabled": "true"
53 | "spark.eventLog.dir": "oss:///spark/spark-events"
54 | volumes:
55 | - name: "spark-local-dir-1"
56 | hostPath:
57 | path: "/mnt/disk1"
58 | type: Directory
59 | - name: "spark-local-dir-2"
60 | hostPath:
61 | path: "/mnt/disk2"
62 | type: Directory
63 | - name: "spark-local-dir-3"
64 | hostPath:
65 | path: "/mnt/disk3"
66 | type: Directory
67 | - name: "spark-local-dir-4"
68 | hostPath:
69 | path: "/mnt/disk4"
70 | type: Directory
71 | - name: "spark-local-dir-5"
72 | hostPath:
73 | path: "/mnt/disk5"
74 | type: Directory
75 | - name: "spark-local-dir-6"
76 | hostPath:
77 | path: "/mnt/disk6"
78 | type: Directory
79 | - name: "spark-local-dir-7"
80 | hostPath:
81 | path: "/mnt/disk7"
82 | type: Directory
83 | - name: "spark-local-dir-8"
84 | hostPath:
85 | path: "/mnt/disk8"
86 | type: Directory
87 | - name: "spark-local-dir-9"
88 | hostPath:
89 | path: "/mnt/disk9"
90 | type: Directory
91 | - name: "spark-local-dir-10"
92 | hostPath:
93 | path: "/mnt/disk10"
94 | type: Directory
95 | - name: "spark-local-dir-11"
96 | hostPath:
97 | path: "/mnt/disk11"
98 | type: Directory
99 | - name: "spark-local-dir-12"
100 | hostPath:
101 | path: "/mnt/disk12"
102 | type: Directory
103 | driver:
104 | cores: 5
105 | memory: "20480m"
106 | labels:
107 | version: 2.4.5
108 | spark-app: spark-tpcds
109 | role: driver
110 | serviceAccount: spark
111 | executor:
112 | cores: 7
113 | instances: 20
114 | memory: "20480m"
115 | memoryOverhead: "8g"
116 | labels:
117 | version: 2.4.5
118 | role: executor
119 | volumeMounts:
120 | - name: "spark-local-dir-1"
121 | mountPath: "/mnt/disk1"
122 | - name: "spark-local-dir-2"
123 | mountPath: "/mnt/disk2"
124 | - name: "spark-local-dir-3"
125 | mountPath: "/mnt/disk3"
126 | - name: "spark-local-dir-4"
127 | mountPath: "/mnt/disk4"
128 | - name: "spark-local-dir-5"
129 | mountPath: "/mnt/disk5"
130 | - name: "spark-local-dir-6"
131 | mountPath: "/mnt/disk6"
132 | - name: "spark-local-dir-7"
133 | mountPath: "/mnt/disk7"
134 | - name: "spark-local-dir-8"
135 | mountPath: "/mnt/disk8"
136 | - name: "spark-local-dir-9"
137 | mountPath: "/mnt/disk9"
138 | - name: "spark-local-dir-10"
139 | mountPath: "/mnt/disk10"
140 | - name: "spark-local-dir-11"
141 | mountPath: "/mnt/disk11"
142 | - name: "spark-local-dir-12"
143 | mountPath: "/mnt/disk12"
--------------------------------------------------------------------------------
/kubernetes/spark/tpcds-benchmark.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: "sparkoperator.k8s.io/v1beta2"
2 | kind: SparkApplication
3 | metadata:
4 | name: tpcds-benchmark
5 | namespace: default
6 | spec:
7 | type: Scala
8 | mode: cluster
9 | image: registry.cn-beijing.aliyuncs.com/yukong/ack-spark-benchmark:1.0.0
10 | imagePullPolicy: Always
11 | sparkVersion: 2.4.5
12 | mainClass: com.aliyun.spark.benchmark.tpcds.BenchmarkSQL
13 | mainApplicationFile: "local:///opt/spark/jars/ack-spark-benchmark-assembly-0.1.jar"
14 | arguments:
15 | # TPC-DS data localtion
16 | - "oss:///spark/data/tpc-ds-data/1000g"
17 | # results location
18 | - "oss:///spark/result/tpcds-benchmark-result-1000g"
19 | # Path to kit in the docker image
20 | - "/tmp/tpcds-kit/tools"
21 | # Data Format
22 | - "parquet"
23 | # Scale factor (in GB)
24 | - "1000"
25 | # Number of iterations
26 | - "1"
27 | # Optimize queries
28 | - "false"
29 | # Filter queries, will run all if empty - "q70-v2.4,q82-v2.4,q64-v2.4"
30 | - ""
31 | # Logging set to WARN
32 | - "true"
33 | hostNetwork: true
34 | dnsPolicy: ClusterFirstWithHostNet
35 | restartPolicy:
36 | type: Never
37 | timeToLiveSeconds: 86400
38 | hadoopConf:
39 | # OSS
40 | "fs.oss.impl": "org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem"
41 | "fs.oss.endpoint": ""
42 | "fs.oss.accessKeyId": ""
43 | "fs.oss.accessKeySecret": ""
44 | # OSS performance best practice
45 | "fs.oss.paging.maximum": 1000
46 | "fs.oss.multipart.download.threads": 32
47 | "fs.oss.max.total.tasks": 256
48 | "fs.oss.connection.maximum": 2048
49 | sparkConf:
50 | "spark.kubernetes.allocation.batch.size": "200"
51 | "spark.sql.adaptive.join.enabled": "true"
52 | "spark.eventLog.enabled": "true"
53 | "spark.eventLog.dir": "oss:///spark/spark-events"
54 | driver:
55 | cores: 5
56 | memory: "20480m"
57 | labels:
58 | version: 2.4.5
59 | spark-app: spark-tpcds
60 | role: driver
61 | serviceAccount: spark
62 | executor:
63 | cores: 7
64 | instances: 20
65 | memory: "20480m"
66 | memoryOverhead: "8g"
67 | labels:
68 | version: 2.4.5
69 | role: executor
--------------------------------------------------------------------------------
/kubernetes/spark/tpcds-data-generation.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: "sparkoperator.k8s.io/v1beta2"
2 | kind: SparkApplication
3 | metadata:
4 | name: tpcds-data-generation
5 | namespace: default
6 | spec:
7 | type: Scala
8 | image: registry.cn-beijing.aliyuncs.com/yukong/ack-spark-benchmark:1.0.0
9 | sparkVersion: 2.4.5
10 | mainClass: com.aliyun.spark.benchmark.tpcds.DataGeneration
11 | mainApplicationFile: "local:///opt/spark/jars/ack-spark-benchmark-assembly-0.1.jar"
12 | mode: cluster
13 | arguments:
14 | # TPC-DS data localtion
15 | - "oss:///spark/data/tpc-ds-data/1000g"
16 | # Path to kit in the docker image
17 | - "/tmp/tpcds-kit/tools"
18 | # Data Format
19 | - "parquet"
20 | # Scale factor (in GB)
21 | - "100000"
22 | # Generate data num partitions
23 | - "100"
24 | # Create the partitioned fact tables
25 | - "false"
26 | # Shuffle to get partitions coalesced into single files.
27 | - "false"
28 | # Logging set to WARN
29 | - "true"
30 | hadoopConf:
31 | # OSS
32 | "fs.oss.impl": "org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem"
33 | "fs.oss.endpoint": ""
34 | "fs.oss.accessKeyId": ""
35 | "fs.oss.accessKeySecret": ""
36 | # OSS performance best practice
37 | "fs.oss.paging.maximum": 1000
38 | "fs.oss.multipart.download.threads": 32
39 | "fs.oss.max.total.tasks": 256
40 | "fs.oss.connection.maximum": 2048
41 | sparkConf:
42 | "spark.kubernetes.allocation.batch.size": "100"
43 | "spark.sql.adaptive.enabled": "true"
44 | "spark.eventLog.enabled": "true"
45 | "spark.eventLog.dir": "oss:///spark/spark-events"
46 | driver:
47 | cores: 6
48 | memory: "20480m"
49 | serviceAccount: spark
50 | executor:
51 | instances: 20
52 | cores: 8
53 | memory: "61440m"
54 | memoryOverhead: 2g
55 | restartPolicy:
56 | type: Never
--------------------------------------------------------------------------------
/lib/spark-sql-perf_2.12-0.5.1-SNAPSHOT.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/benchmark-for-spark/6cb2181a1db5659465922218d15f8483e7aa7360/lib/spark-sql-perf_2.12-0.5.1-SNAPSHOT.jar
--------------------------------------------------------------------------------
/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=1.9.3
2 |
--------------------------------------------------------------------------------
/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.1.1")
2 |
--------------------------------------------------------------------------------
/src/main/scala/com/aliyun/ack/spark/tpcds/Benchmark.scala:
--------------------------------------------------------------------------------
1 | package com.aliyun.ack.spark.tpcds
2 |
3 | import scala.util.Try
4 |
5 | import com.databricks.spark.sql.perf.tpcds.{TPCDS, TPCDSTables}
6 | import org.apache.spark.sql.SparkSession
7 | import org.apache.spark.sql.functions._
8 | import org.apache.spark.sql.functions.col
9 | import org.apache.log4j.{Level, LogManager}
10 | import org.apache.spark.sql.types.DoubleType
11 | import scopt.OParser
12 |
13 | case class BenchmarkConfig(
14 | tpcdsDataPath: String = "",
15 | outputPath: String = "",
16 | dsdgenPath: String = "/opt/tpcds-kit/tools",
17 | format: String = "parquet",
18 | scaleFactor: Int = 1,
19 | iterations: Int = 1,
20 | optimizeQueries: Boolean = false,
21 | queries: String = "",
22 | onlyWarn: Boolean = false
23 | )
24 |
25 | object Benchmark {
26 |
27 | def main(args: Array[String]): Unit = {
28 | val builder = OParser.builder[BenchmarkConfig]
29 |
30 | val parser = {
31 | import builder._
32 | OParser.sequence(
33 | programName("Benchmark"),
34 | opt[String]("data")
35 | .required()
36 | .valueName("")
37 | .action((x, c) => c.copy(tpcdsDataPath = x))
38 | .text("path of tpcds data"),
39 | opt[String]("result")
40 | .required()
41 | .valueName("")
42 | .action((x, c) => c.copy(outputPath = x))
43 | .text("path of benchmark result"),
44 | opt[String]("dsdgen")
45 | .optional()
46 | .valueName("")
47 | .action((x, c) => c.copy(dsdgenPath = x))
48 | .text("path of tpcds-kit tools"),
49 | opt[String]("format")
50 | .valueName("")
51 | .action((x, c) => c.copy(format = x))
52 | .text("data format"),
53 | opt[Int]("scale-factor")
54 | .optional()
55 | .valueName("")
56 | .action((x, c) => c.copy(scaleFactor = x))
57 | .text("scale factor of tpcds data (in GB)"),
58 | opt[Int]("iterations")
59 | .optional()
60 | .action((x, c) => c.copy(iterations = x))
61 | .text("number of iterations"),
62 | opt[Unit]("optimize-queries")
63 | .optional()
64 | .action((_, c) => c.copy(optimizeQueries = true))
65 | .text("whether to optimize queries"),
66 | opt[String]("queries")
67 | .optional()
68 | .action((x, c) => c.copy(queries = x))
69 | .text("queries to execute(empty means all queries)"),
70 | opt[Unit]("only-warn")
71 | .optional()
72 | .action((_, c) => c.copy(onlyWarn = true))
73 | .text("set logging level to warning")
74 | )
75 | }
76 |
77 | val option = OParser.parse(parser, args, BenchmarkConfig())
78 | if (option.isEmpty) {
79 | System.exit(1)
80 | }
81 | val config = option.get.asInstanceOf[BenchmarkConfig]
82 | val databaseName = "tpcds_db"
83 | val timeout = 24 * 60 * 60
84 |
85 | println(s"DATA DIR is ${config.tpcdsDataPath}")
86 |
87 | val spark = SparkSession.builder
88 | .appName(s"TPCDS SQL Benchmark ${config.scaleFactor} GB")
89 | .getOrCreate()
90 |
91 | if (config.onlyWarn) {
92 | println(s"Only WARN")
93 | LogManager.getLogger("org").setLevel(Level.WARN)
94 | }
95 |
96 | val tables = new TPCDSTables(
97 | spark.sqlContext,
98 | dsdgenDir = config.dsdgenPath,
99 | scaleFactor = config.scaleFactor.toString,
100 | useDoubleForDecimal = false,
101 | useStringForDate = false
102 | )
103 |
104 | if (config.optimizeQueries) {
105 | Try {
106 | spark.sql(s"create database $databaseName")
107 | }
108 | tables.createExternalTables(
109 | config.tpcdsDataPath,
110 | config.format,
111 | databaseName,
112 | overwrite = true,
113 | discoverPartitions = true
114 | )
115 | tables.analyzeTables(databaseName, analyzeColumns = true)
116 | spark.conf.set("spark.sql.cbo.enabled", "true")
117 | } else {
118 | tables.createTemporaryTables(config.tpcdsDataPath, config.format)
119 | }
120 |
121 | val tpcds = new TPCDS(spark.sqlContext)
122 |
123 | var query_filter: Seq[String] = Seq()
124 | if (!config.queries.isEmpty) {
125 | println(s"Running only queries: ${config.queries}")
126 | query_filter = config.queries.split(",").toSeq
127 | }
128 |
129 | val filtered_queries = query_filter match {
130 | case Seq() => tpcds.tpcds2_4Queries
131 | case _ => tpcds.tpcds2_4Queries.filter(q => query_filter.contains(q.name))
132 | }
133 |
134 | // Start experiment
135 | val experiment = tpcds.runExperiment(
136 | filtered_queries,
137 | iterations = config.iterations,
138 | resultLocation = config.outputPath,
139 | forkThread = true
140 | )
141 |
142 | experiment.waitForFinish(timeout)
143 |
144 | // Collect general results
145 | val resultPath = experiment.resultPath
146 | println(s"Reading result at ${resultPath}")
147 | val specificResultTable = spark.read.json(resultPath)
148 | specificResultTable.show()
149 |
150 | // Summarize results
151 | val result = specificResultTable
152 | .withColumn("result", explode(col("results")))
153 | .withColumn("executionSeconds", col("result.executionTime") / 1000)
154 | .withColumn("queryName", col("result.name"))
155 | result.select("iteration", "queryName", "executionSeconds").show()
156 |
157 | val aggResults = result
158 | .groupBy("queryName")
159 | .agg(
160 | min("executionSeconds").cast(DoubleType).as("MinRuntimeInSeconds"),
161 | max("executionSeconds").cast(DoubleType).as("MaxRuntimeInSeconds"),
162 | mean("executionSeconds").cast(DoubleType).as("MeanRuntimeInSeconds"),
163 | stddev_pop("executionSeconds")
164 | .cast(DoubleType)
165 | .as("StandardDeviationInSeconds")
166 | )
167 | .orderBy("queryName")
168 |
169 | aggResults
170 | .repartition(1)
171 | .write
172 | .csv(s"${resultPath}/summary.csv")
173 |
174 | aggResults.show(105)
175 |
176 | spark.stop()
177 | }
178 | }
179 |
--------------------------------------------------------------------------------
/src/main/scala/com/aliyun/ack/spark/tpcds/DataGeneration.scala:
--------------------------------------------------------------------------------
1 | package com.aliyun.ack.spark.tpcds
2 |
3 | import scala.util.Try
4 |
5 | import com.databricks.spark.sql.perf.tpcds.TPCDSTables
6 | import org.apache.log4j.{Level, LogManager}
7 | import org.apache.spark.sql.SparkSession
8 | import scopt.OParser
9 |
10 | case class DataGenerationConfig(
11 | outputPath: String = "",
12 | dsdgenPath: String = "/opt/tpcds-kit/tools",
13 | format: String = "parquet",
14 | scaleFactor: Int = 1,
15 | patitionTable: Boolean = false,
16 | numPartitions: Int = 1,
17 | coalesced: Boolean = false,
18 | onlyWarn: Boolean = false
19 | )
20 |
21 | object DataGeneration {
22 |
23 | def main(args: Array[String]): Unit = {
24 |
25 | val builder = OParser.builder[DataGenerationConfig]
26 |
27 | val parser = {
28 | import builder._
29 | OParser.sequence(
30 | programName("DataGeneration"),
31 | opt[String]("output")
32 | .required()
33 | .valueName("")
34 | .action((x, c) => c.copy(outputPath = x))
35 | .text("output path of tpcds data"),
36 | opt[String]("dsdgen")
37 | .optional()
38 | .valueName("")
39 | .action((x, c) => c.copy(dsdgenPath = x))
40 | .text("path of tpcds-kit tools"),
41 | opt[String]("format")
42 | .optional()
43 | .valueName("")
44 | .action((x, c) => c.copy(format = x))
45 | .text("data format"),
46 | opt[Int]("scale-factor")
47 | .optional()
48 | .valueName("")
49 | .action((x, c) => c.copy(scaleFactor = x))
50 | .text("scale factor of tpcds data (in GB)"),
51 | opt[Unit]("create-partitions")
52 | .action((_, c) => c.copy(patitionTable = true))
53 | .optional()
54 | .text("whether to optimize queries"),
55 | opt[Int]("num-partitions")
56 | .optional()
57 | .action((x, c) => c.copy(numPartitions = x))
58 | .text("number of partitions"),
59 | opt[Unit]("coalesced")
60 | .optional()
61 | .action((_, c) => c.copy(coalesced = true))
62 | .text(
63 | "whether to shuffle to get partitions coalesced into single files"
64 | ),
65 | opt[Unit]("only-warn")
66 | .optional()
67 | .action((_, c) => c.copy(onlyWarn = true))
68 | .text("set logging level to warning")
69 | )
70 | }
71 |
72 | val option = OParser.parse(parser, args, DataGenerationConfig())
73 | if (option.isEmpty) {
74 | System.exit(1)
75 | }
76 | val config = option.get.asInstanceOf[DataGenerationConfig]
77 |
78 | println(s"DATA DIR is ${config.outputPath}")
79 | println(s"Tools dsdgen executable located in ${config.dsdgenPath}")
80 | println(s"Scale factor is ${config.scaleFactor} GB")
81 |
82 | val spark = SparkSession.builder
83 | .appName(s"TPCDS Generate Data ${config.scaleFactor} GB")
84 | .getOrCreate()
85 |
86 | if (config.onlyWarn) {
87 | println(s"Only WARN")
88 | LogManager.getLogger("org").setLevel(Level.WARN)
89 | }
90 |
91 | val tables = new TPCDSTables(
92 | spark.sqlContext,
93 | dsdgenDir = config.dsdgenPath,
94 | scaleFactor = config.scaleFactor.toString,
95 | useDoubleForDecimal = false,
96 | useStringForDate = false
97 | )
98 |
99 | println(s"Generating TPCDS data")
100 |
101 | tables.genData(
102 | location = config.outputPath,
103 | format = config.format,
104 | overwrite = true, // overwrite the data that is already there
105 | partitionTables =
106 | config.patitionTable, // create the partitioned fact tables
107 | clusterByPartitionColumns =
108 | config.coalesced, // shuffle to get partitions coalesced into single files.
109 | filterOutNullPartitionValues =
110 | false, // true to filter out the partition with NULL key value
111 | tableFilter = "", // "" means generate all tables
112 | numPartitions =
113 | config.numPartitions // how many dsdgen partitions to run - number of input tasks.
114 | )
115 |
116 | println(s"Data generated at ${config.outputPath}")
117 |
118 | spark.stop()
119 | }
120 | }
121 |
--------------------------------------------------------------------------------
/terraform/alicloud/datasources.tf:
--------------------------------------------------------------------------------
1 | data "alicloud_cs_kubernetes_addons" "default" {
2 | cluster_id = module.cs.cluster_id
3 | }
4 |
5 | data "alicloud_cs_cluster_credential" "default" {
6 | cluster_id = module.cs.cluster_id
7 | output_file = "~/.kube/config"
8 | }
9 |
--------------------------------------------------------------------------------
/terraform/alicloud/main.tf:
--------------------------------------------------------------------------------
1 | terraform {
2 | required_providers {
3 | alicloud = {
4 | source = "hashicorp/alicloud"
5 | version = "1.223.2"
6 | }
7 | }
8 |
9 | required_version = ">= 1.8.0"
10 | }
11 |
12 | resource "random_string" "suffix" {
13 | length = 16
14 | lower = true
15 | upper = false
16 | special = false
17 | }
18 |
--------------------------------------------------------------------------------
/terraform/alicloud/modules/celeborn/main.tf:
--------------------------------------------------------------------------------
1 | resource "alicloud_cs_kubernetes_node_pool" "celeborn-master" {
2 | node_pool_name = "celeborn-master"
3 | cluster_id = var.cluster_id
4 | vswitch_ids = var.vswitch_ids
5 | desired_size = var.master_instance_count
6 | instance_types = [var.master_instance_type]
7 | image_type = "AliyunLinux3"
8 | system_disk_category = "cloud_essd"
9 | system_disk_size = 40
10 | system_disk_performance_level = "PL1"
11 |
12 | data_disks {
13 | category = "cloud_essd"
14 | size = 300
15 | performance_level = "PL1"
16 | device = "/dev/vdb"
17 | }
18 |
19 | data_disks {
20 | category = "cloud_essd"
21 | size = 40
22 | performance_level = "PL1"
23 | device = "/dev/vdc"
24 | }
25 |
26 | labels {
27 | key = "celeborn.apache.org/role"
28 | value = "master"
29 | }
30 |
31 | taints {
32 | key = "celeborn.apache.org/role"
33 | value = "master"
34 | effect = "NoSchedule"
35 | }
36 |
37 | user_data = base64encode(file("${path.module}/master_user_data.sh"))
38 |
39 | resource_group_id = var.resource_group_id
40 | security_group_ids = [var.security_group_id]
41 | }
42 |
43 | resource "alicloud_cs_kubernetes_node_pool" "celeborn-worker" {
44 | node_pool_name = "celeborn-worker"
45 | cluster_id = var.cluster_id
46 | vswitch_ids = var.vswitch_ids
47 | desired_size = var.worker_instance_count
48 | instance_types = [var.worker_instance_type]
49 | image_type = "AliyunLinux3"
50 | system_disk_category = "cloud_essd"
51 | system_disk_size = 40
52 | system_disk_performance_level = "PL1"
53 |
54 | labels {
55 | key = "celeborn.apache.org/role"
56 | value = "worker"
57 | }
58 |
59 | taints {
60 | key = "celeborn.apache.org/role"
61 | value = "worker"
62 | effect = "NoSchedule"
63 | }
64 |
65 | user_data = base64encode(file("${path.module}/worker_user_data.sh"))
66 |
67 | resource_group_id = var.resource_group_id
68 | security_group_ids = [var.security_group_id]
69 | }
70 |
--------------------------------------------------------------------------------
/terraform/alicloud/modules/celeborn/master_user_data.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -eux
4 |
5 | yum install -y parted e2fsprogs
6 |
7 | output=$(fdisk -l | awk '/^Disk \//' | grep -o -E '/dev/(vd[a-z]|xvd[a-z]|nvme[0-9]n1)')
8 | disks=()
9 | while IFS= read -r line; do
10 | disks+=("$line")
11 | done <<<"$output"
12 |
13 | n=${#disks[@]}
14 |
15 | # Create one primary partition for every disk except for the first and the last one.
16 | for ((i = 1; i < n - 1; i++)); do
17 | disk="${disks[i]}"
18 | parted "${disk}" mklabel gpt
19 | parted "${disk}" mkpart primary 1 100%
20 | parted "${disk}" align-check optimal 1
21 | done
22 | partprobe
23 |
24 | # Create XFS file system for the first partition of every disk.
25 | for ((i = 1; i < n - 1; i++)); do
26 | disk="${disks[i]}"
27 | if [[ ${disk} =~ "/dev/nvme" ]]; then
28 | mkfs -t xfs "${disk}p1"
29 | elif [[ ${disk} =~ "/dev/vd" ]]; then
30 | mkfs -t xfs "${disk}1"
31 | elif [[ ${disk} =~ "/dev/xvd" ]]; then
32 | mkfs -t xfs "${disk}1"
33 | fi
34 | done
35 |
36 | # Mount file systems to /mnt/disk1, /mnt/disk2, etc.
37 | cp /etc/fstab /etc/fstab.bak
38 |
39 | for ((i = 1; i < n - 1; i++)); do
40 | dir="/mnt/disk$i"
41 | mkdir -p ${dir}
42 | if [[ ${disk} =~ "/dev/nvme" ]]; then
43 | echo "$(blkid "${disks[i]}p1" | awk '{print $2}' | sed 's/\"//g') ${dir} xfs defaults 0 0" >>/etc/fstab
44 | elif [[ ${disk} =~ "/dev/vd" ]]; then
45 | echo "$(blkid "${disks[i]}1" | awk '{print $2}' | sed 's/\"//g') ${dir} xfs defaults 0 0" >>/etc/fstab
46 | elif [[ ${disk} =~ "/dev/xvd" ]]; then
47 | echo "$(blkid "${disks[i]}1" | awk '{print $2}' | sed 's/\"//g') ${dir} xfs defaults 0 0" >>/etc/fstab
48 | fi
49 | done
50 |
51 | mount -a
52 |
53 | chmod a+w /mnt/disk*
54 |
--------------------------------------------------------------------------------
/terraform/alicloud/modules/celeborn/variables.tf:
--------------------------------------------------------------------------------
1 | variable "suffix" {
2 | type = string
3 | description = "The suffix of name."
4 | }
5 |
6 | variable "cluster_id" {
7 | type = string
8 | description = "The id of managed Kubernetes cluster."
9 | }
10 |
11 | variable "vswitch_ids" {
12 | type = list(string)
13 | description = "The list of vswitch id."
14 | }
15 |
16 | variable "master_instance_count" {
17 | type = number
18 | description = "Instance count of Celeborn master node pool."
19 | }
20 |
21 | variable "master_instance_type" {
22 | type = string
23 | description = "Instance type of Celeborn worker node pool"
24 | default = "ecs.g7.2xlarge"
25 | }
26 |
27 | variable "worker_instance_count" {
28 | type = number
29 | description = "Instance count of Celeborn worker node pool."
30 | }
31 |
32 | variable "worker_instance_type" {
33 | type = string
34 | description = "Instance type of Celeborn worker node pool."
35 | default = "ecs.i4.8xlarge"
36 | }
37 |
38 | variable "resource_group_id" {
39 | type = string
40 | description = "The id of resource group."
41 | }
42 |
43 | variable "security_group_id" {
44 | type = string
45 | description = "The id of security group."
46 | }
47 |
--------------------------------------------------------------------------------
/terraform/alicloud/modules/celeborn/worker_user_data.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -eux
4 |
5 | yum install -y parted e2fsprogs
6 |
7 | output=$(fdisk -l | awk '/^Disk \//' | grep -o -E '/dev/nvme[0-9]n1')
8 | disks=()
9 | while IFS= read -r line; do
10 | disks+=("$line")
11 | done <<<"$output"
12 |
13 | n=${#disks[@]}
14 |
15 | # Create one primary partition for every disk.
16 | for ((i = 0; i < n; i++)); do
17 | disk="${disks[i]}"
18 | parted "${disk}" mklabel gpt
19 | parted "${disk}" mkpart primary 1 100%
20 | parted "${disk}" align-check optimal 1
21 | done
22 | partprobe
23 |
24 | # Create XFS file system for the first partition of every disk.
25 | for ((i = 0; i < n; i++)); do
26 | disk="${disks[i]}"
27 | if [[ ${disk} =~ "/dev/nvme" ]]; then
28 | mkfs -t xfs "${disk}p1"
29 | elif [[ ${disk} =~ "/dev/vd" ]]; then
30 | mkfs -t xfs "${disk}1"
31 | elif [[ ${disk} =~ "/dev/xvd" ]]; then
32 | mkfs -t xfs "${disk}1"
33 | fi
34 | done
35 |
36 | # Mount file systems to /mnt/disk1, /mnt/disk2, etc.
37 | cp /etc/fstab /etc/fstab.bak
38 |
39 | for ((i = 0; i < n; i++)); do
40 | dir="/mnt/disk$((i + 1))"
41 | mkdir -p ${dir}
42 | if [[ ${disk} =~ "/dev/nvme" ]]; then
43 | echo "$(blkid "${disks[i]}p1" | awk '{print $2}' | sed 's/\"//g') ${dir} xfs defaults 0 0" >>/etc/fstab
44 | elif [[ ${disk} =~ "/dev/vd" ]]; then
45 | echo "$(blkid "${disks[i]}1" | awk '{print $2}' | sed 's/\"//g') ${dir} xfs defaults 0 0" >>/etc/fstab
46 | elif [[ ${disk} =~ "/dev/xvd" ]]; then
47 | echo "$(blkid "${disks[i]}1" | awk '{print $2}' | sed 's/\"//g') ${dir} xfs defaults 0 0" >>/etc/fstab
48 | fi
49 | done
50 |
51 | mount -a
52 |
53 | chmod a+w /mnt/disk*
54 |
--------------------------------------------------------------------------------
/terraform/alicloud/modules/cs/main.tf:
--------------------------------------------------------------------------------
1 | resource "alicloud_cs_managed_kubernetes" "default" {
2 | name = "ack-${var.suffix}"
3 | timezone = "Asia/Shanghai"
4 | version = "1.32.1-aliyun.1"
5 |
6 | worker_vswitch_ids = var.worker_vswitch_ids
7 | pod_vswitch_ids = var.pod_vswitch_ids
8 | service_cidr = "172.16.0.0/16"
9 | addons {
10 | name = "terway-eniip"
11 | }
12 |
13 | proxy_mode = "ipvs"
14 | cluster_domain = "cluster.local"
15 | deletion_protection = false
16 | cluster_spec = "ack.pro.small"
17 | load_balancer_spec = "slb.s1.small"
18 | new_nat_gateway = true
19 | slb_internet_enabled = true
20 | resource_group_id = var.resource_group_id
21 | security_group_id = var.security_group_id
22 | }
23 |
--------------------------------------------------------------------------------
/terraform/alicloud/modules/cs/outputs.tf:
--------------------------------------------------------------------------------
1 | output "cluster_id" {
2 | value = alicloud_cs_managed_kubernetes.default.id
3 | }
4 |
--------------------------------------------------------------------------------
/terraform/alicloud/modules/cs/variables.tf:
--------------------------------------------------------------------------------
1 | variable "suffix" {
2 | type = string
3 | description = "The suffix of name."
4 | }
5 |
6 | variable "worker_vswitch_ids" {
7 | type = list(string)
8 | description = "The id list of worker vswitch."
9 | }
10 |
11 | variable "pod_vswitch_ids" {
12 | type = list(string)
13 | description = "The id list of pod vswitch."
14 | }
15 |
16 | variable "resource_group_id" {
17 | type = string
18 | description = "The id of resource group."
19 | }
20 |
21 | variable "security_group_id" {
22 | type = string
23 | description = "The id of security group."
24 | }
25 |
--------------------------------------------------------------------------------
/terraform/alicloud/modules/ecs/main.tf:
--------------------------------------------------------------------------------
1 | resource "alicloud_security_group" "default" {
2 | name = "sg-${var.suffix}"
3 | vpc_id = var.vpc_id
4 | resource_group_id = var.resource_group_id
5 | security_group_type = var.security_group_type
6 | }
7 |
8 | resource "alicloud_security_group_rule" "default" {
9 | type = "ingress"
10 | ip_protocol = "all"
11 | port_range = "-1/-1"
12 | cidr_ip = "192.168.0.0/16"
13 | security_group_id = alicloud_security_group.default.id
14 | priority = 1
15 | }
16 |
17 | resource "alicloud_security_group_rule" "icmp" {
18 | type = "ingress"
19 | ip_protocol = "icmp"
20 | port_range = "-1/-1"
21 | cidr_ip = "0.0.0.0/0"
22 | security_group_id = alicloud_security_group.default.id
23 | priority = 1
24 | }
25 |
--------------------------------------------------------------------------------
/terraform/alicloud/modules/ecs/outputs.tf:
--------------------------------------------------------------------------------
1 | output "security_group_id" {
2 | value = alicloud_security_group.default.id
3 | }
4 |
--------------------------------------------------------------------------------
/terraform/alicloud/modules/ecs/variables.tf:
--------------------------------------------------------------------------------
1 | variable "suffix" {
2 | type = string
3 | description = "The suffix of name."
4 | }
5 |
6 | variable "vpc_id" {
7 | type = string
8 | description = "The id of the vpc."
9 | }
10 |
11 | variable "resource_group_id" {
12 | type = string
13 | description = "The id of the resource group."
14 | }
15 |
16 | variable "security_group_type" {
17 | type = string
18 | description = "The type of the security group."
19 | default = "normal"
20 | }
21 |
--------------------------------------------------------------------------------
/terraform/alicloud/modules/fluid/main.tf:
--------------------------------------------------------------------------------
1 | resource "alicloud_cs_kubernetes_node_pool" "fluid" {
2 | node_pool_name = "np-fluid-${var.suffix}"
3 | cluster_id = var.cluster_id
4 | vswitch_ids = var.vswitch_ids
5 | desired_size = var.instance_count
6 | instance_types = [var.instance_type]
7 | image_type = "AliyunLinux3"
8 | system_disk_category = "cloud_essd"
9 | system_disk_size = 40
10 | system_disk_performance_level = "PL1"
11 |
12 | labels {
13 | key = "benchmark.node.role"
14 | value = "fluid"
15 | }
16 |
17 | resource_group_id = var.resource_group_id
18 | security_group_ids = [var.security_group_id]
19 | }
20 |
--------------------------------------------------------------------------------
/terraform/alicloud/modules/fluid/variables.tf:
--------------------------------------------------------------------------------
1 | variable "suffix" {
2 | type = string
3 | description = "The suffix of name."
4 | }
5 |
6 | variable "cluster_id" {
7 | type = string
8 | description = "The id of managed kubernetes cluster."
9 | }
10 |
11 | variable "vswitch_ids" {
12 | type = list(string)
13 | }
14 |
15 | variable "instance_count" {
16 | type = number
17 | }
18 |
19 | variable "instance_type" {
20 | type = string
21 | }
22 |
23 | variable "resource_group_id" {
24 | type = string
25 | description = "The id of resource group."
26 | }
27 |
28 | variable "security_group_id" {
29 | type = string
30 | description = "The id of security group."
31 | }
32 |
--------------------------------------------------------------------------------
/terraform/alicloud/modules/oss/main.tf:
--------------------------------------------------------------------------------
1 | resource "alicloud_oss_bucket" "default" {
2 | bucket = "bucket-${var.suffix}"
3 | acl = "private"
4 | storage_class = "Standard"
5 | redundancy_type = "LRS"
6 | }
7 |
--------------------------------------------------------------------------------
/terraform/alicloud/modules/oss/outputs.tf:
--------------------------------------------------------------------------------
1 | output "id" {
2 | value = alicloud_oss_bucket.default.id
3 | description = "The name of the bucket."
4 | }
5 |
6 | output "extranet_endpoint" {
7 | value = alicloud_oss_bucket.default.extranet_endpoint
8 | description = "The extranet access endpoint of the bucket"
9 | }
10 |
11 | output "intranet_endpoint" {
12 | value = alicloud_oss_bucket.default.intranet_endpoint
13 | description = "The intranet access endpoint of the bucket."
14 | }
15 |
16 | output "location" {
17 | value = alicloud_oss_bucket.default.location
18 | description = "The location of the bucket."
19 | }
20 |
--------------------------------------------------------------------------------
/terraform/alicloud/modules/oss/variables.tf:
--------------------------------------------------------------------------------
1 | variable "suffix" {
2 | type = string
3 | description = "The suffix of name."
4 | }
5 |
--------------------------------------------------------------------------------
/terraform/alicloud/modules/resource-manager/main.tf:
--------------------------------------------------------------------------------
1 | resource "alicloud_resource_manager_resource_group" "default" {
2 | resource_group_name = "rg-${var.suffix}"
3 | display_name = "rg-${var.suffix}"
4 | }
5 |
--------------------------------------------------------------------------------
/terraform/alicloud/modules/resource-manager/outputs.tf:
--------------------------------------------------------------------------------
1 | output "resource_group_id" {
2 | value = alicloud_resource_manager_resource_group.default.id
3 | }
4 |
--------------------------------------------------------------------------------
/terraform/alicloud/modules/resource-manager/variables.tf:
--------------------------------------------------------------------------------
1 | variable "suffix" {
2 | type = string
3 | description = "The suffix of name."
4 | }
5 |
--------------------------------------------------------------------------------
/terraform/alicloud/modules/spark/main.tf:
--------------------------------------------------------------------------------
1 | resource "alicloud_cs_kubernetes_node_pool" "spark-master" {
2 | node_pool_name = "spark-master"
3 | cluster_id = var.cluster_id
4 | vswitch_ids = var.vswitch_ids
5 | instance_types = [var.master_instance_type]
6 | image_type = "AliyunLinux3"
7 | system_disk_category = "cloud_essd"
8 | system_disk_size = 40
9 | system_disk_performance_level = "PL1"
10 |
11 | labels {
12 | key = "spark.tpcds.benchmark/role"
13 | value = "spark-master"
14 | }
15 |
16 | desired_size = var.master_instance_count
17 | resource_group_id = var.resource_group_id
18 | security_group_ids = [var.security_group_id]
19 | }
20 |
21 | resource "alicloud_cs_kubernetes_node_pool" "spark-worker" {
22 | node_pool_name = "spark-worker"
23 | cluster_id = var.cluster_id
24 | vswitch_ids = var.vswitch_ids
25 | desired_size = var.worker_instance_count
26 | instance_types = [var.worker_instance_type]
27 | image_type = "AliyunLinux3"
28 | system_disk_category = "cloud_essd"
29 | system_disk_size = 40
30 | system_disk_performance_level = "PL1"
31 | data_disks {
32 | category = "cloud_essd"
33 | size = 300
34 | performance_level = "PL1"
35 | device = "/dev/vdb"
36 | }
37 | data_disks {
38 | category = "cloud_essd"
39 | size = 300
40 | performance_level = "PL1"
41 | device = "/dev/vdc"
42 | }
43 | data_disks {
44 | category = "cloud_essd"
45 | size = 300
46 | performance_level = "PL1"
47 | device = "/dev/vdd"
48 | }
49 | data_disks {
50 | category = "cloud_essd"
51 | size = 300
52 | performance_level = "PL1"
53 | device = "/dev/vde"
54 | }
55 | data_disks {
56 | category = "cloud_essd"
57 | size = 300
58 | performance_level = "PL1"
59 | device = "/dev/vdf"
60 | }
61 | data_disks {
62 | category = "cloud_essd"
63 | size = 300
64 | performance_level = "PL1"
65 | device = "/dev/vdg"
66 | }
67 | data_disks {
68 | category = "cloud_essd"
69 | size = 40
70 | performance_level = "PL1"
71 | device = "/dev/vdh"
72 | }
73 |
74 | labels {
75 | key = "spark.tpcds.benchmark/role"
76 | value = "spark-worker"
77 | }
78 |
79 | taints {
80 | key = "spark.tpcds.benchmark/role"
81 | value = "spark-worker"
82 | effect = "NoSchedule"
83 | }
84 |
85 | kubelet_configuration {
86 | eviction_hard = {
87 | "imagefs.available" = "5%"
88 | "memory.available" = "100Mi"
89 | "nodefs.available" = "5%"
90 | "nodefs.inodesFree" = "5%"
91 | }
92 | system_reserved = {
93 | cpu = "300m"
94 | memory = "600Mi"
95 | pid = "1000"
96 | }
97 | kube_reserved = {
98 | cpu = "300m"
99 | memory = "600Mi"
100 | pid = "1000"
101 | }
102 | }
103 |
104 | user_data = base64encode(file("${path.module}/user_data.sh"))
105 |
106 | resource_group_id = var.resource_group_id
107 | security_group_ids = [var.security_group_id]
108 | }
109 |
--------------------------------------------------------------------------------
/terraform/alicloud/modules/spark/user_data.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # 打印命令
4 | set -ex
5 |
6 | # 添加 parted
7 | yum install -y parted e2fsprogs
8 |
9 | # 为数据盘新建分区
10 | disks=(/dev/vdb /dev/vdc /dev/vdd /dev/vde /dev/vdf /dev/vdg)
11 | for disk in ${disks[@]}; do
12 | parted ${disk} mklabel gpt
13 | parted ${disk} mkpart primary 1 100%
14 | parted ${disk} align-check optimal 1
15 | done
16 | partprobe
17 |
18 | # 为分区创建文件系统
19 | for disk in ${disks[@]}; do
20 | mkfs -t xfs ${disk}1
21 | done
22 |
23 | # 挂载分区
24 | cp /etc/fstab /etc/fstab.bak
25 | n=${#disks[@]}
26 | for ((i = 0; i < n; i++)); do
27 | dir="/mnt/disk$(($i + 1))"
28 | mkdir -p ${dir}
29 | echo "$(blkid ${disks[i]}1 | awk '{print $2}' | sed 's/\"//g') ${dir} xfs defaults 0 0" >>/etc/fstab
30 | chmod g+w ${dir}
31 | done
32 | mount -a
33 |
34 | # 修改挂载点访问权限
35 | chmod a+w /mnt/disk*
36 |
--------------------------------------------------------------------------------
/terraform/alicloud/modules/spark/user_data_arm64.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # 打印命令
4 | set -ex
5 |
6 | # 添加 parted
7 | yum install -y parted e2fsprogs
8 |
9 | # 为数据盘新建分区
10 | disks=(/dev/nvme1n1 /dev/nvme2n1 /dev/nvme3n1 /dev/nvme4n1 /dev/nvme5n1 /dev/nvme6n1)
11 | for disk in ${disks[@]}; do
12 | parted ${disk} mklabel gpt
13 | parted ${disk} mkpart primary 1 100%
14 | parted ${disk} align-check optimal 1
15 | done
16 | partprobe
17 |
18 | # 为分区创建文件系统
19 | for disk in ${disks[@]}; do
20 | mkfs -t xfs ${disk}p1
21 | done
22 |
23 | # 挂载分区
24 | cp /etc/fstab /etc/fstab.bak
25 | n=${#disks[@]}
26 | for ((i = 0; i < n; i++)); do
27 | dir="/mnt/disk$(($i + 1))"
28 | mkdir -p ${dir}
29 | echo "$(blkid ${disks[i]}p1 | awk '{print $2}' | sed 's/\"//g') ${dir} xfs defaults 0 0" >>/etc/fstab
30 | done
31 | mount -a
32 |
33 | # 修改挂载点访问权限
34 | chmod a+w /mnt/disk*
35 |
--------------------------------------------------------------------------------
/terraform/alicloud/modules/spark/variables.tf:
--------------------------------------------------------------------------------
1 | variable "suffix" {
2 | type = string
3 | description = "The suffix of name."
4 | }
5 |
6 | variable "cluster_id" {
7 | type = string
8 | description = "The id of managed kubernetes cluster."
9 | }
10 |
11 | variable "vswitch_ids" {
12 | type = list(string)
13 | }
14 |
15 | variable "master_instance_count" {
16 | type = number
17 | default = 1
18 | }
19 |
20 | variable "master_instance_type" {
21 | type = string
22 | }
23 |
24 | variable "worker_instance_count" {
25 | type = number
26 | default = 1
27 | }
28 |
29 | variable "worker_instance_type" {
30 | type = string
31 | }
32 |
33 | variable "resource_group_id" {
34 | type = string
35 | description = "The id of resource group."
36 | }
37 |
38 | variable "security_group_id" {
39 | type = string
40 | description = "The id of security group."
41 | }
42 |
--------------------------------------------------------------------------------
/terraform/alicloud/modules/vpc/main.tf:
--------------------------------------------------------------------------------
1 | resource "alicloud_vpc" "default" {
2 | vpc_name = "vpc-${var.suffix}"
3 | cidr_block = "192.168.0.0/16"
4 | resource_group_id = var.resource_group_id
5 | }
6 |
7 | resource "alicloud_vswitch" "default" {
8 | vswitch_name = "vsw-${var.suffix}"
9 | cidr_block = "192.168.0.0/24"
10 | vpc_id = alicloud_vpc.default.id
11 | zone_id = var.zone_id
12 | }
13 |
--------------------------------------------------------------------------------
/terraform/alicloud/modules/vpc/outputs.tf:
--------------------------------------------------------------------------------
1 | output "vpc_id" {
2 | value = alicloud_vpc.default.id
3 | }
4 |
5 | output "vswitch_id" {
6 | value = alicloud_vswitch.default.id
7 | }
8 |
--------------------------------------------------------------------------------
/terraform/alicloud/modules/vpc/variables.tf:
--------------------------------------------------------------------------------
1 | variable "suffix" {
2 | type = string
3 | description = "The suffix of name."
4 | }
5 |
6 | variable "zone_id" {
7 | type = string
8 | description = "The AZ for the VSwitch."
9 | }
10 |
11 | variable "resource_group_id" {
12 | type = string
13 | description = "The id of the resource group."
14 | }
15 |
--------------------------------------------------------------------------------
/terraform/alicloud/provider.tf:
--------------------------------------------------------------------------------
1 | provider "alicloud" {
2 | region = var.region
3 | profile = var.profile
4 | }
5 |
--------------------------------------------------------------------------------
/terraform/alicloud/root.tf:
--------------------------------------------------------------------------------
1 | # Create resource group
2 | module "resource_manager" {
3 | source = "./modules/resource-manager"
4 | suffix = random_string.suffix.id
5 | }
6 |
7 | # Create VPC and vswitch
8 | module "vpc" {
9 | source = "./modules/vpc"
10 | suffix = random_string.suffix.id
11 | zone_id = var.zone_id
12 | resource_group_id = module.resource_manager.resource_group_id
13 | }
14 |
15 | # Create security group
16 | module "ecs" {
17 | source = "./modules/ecs"
18 | suffix = random_string.suffix.id
19 | vpc_id = module.vpc.vpc_id
20 | resource_group_id = module.resource_manager.resource_group_id
21 | }
22 |
23 | # module "oss" {
24 | # source = "./modules/oss"
25 | # suffix = random_string.suffix.id
26 | # }
27 |
28 | # Create ACK
29 | module "cs" {
30 | source = "./modules/cs"
31 | suffix = random_string.suffix.id
32 | worker_vswitch_ids = [module.vpc.vswitch_id]
33 | pod_vswitch_ids = [module.vpc.vswitch_id]
34 | resource_group_id = module.resource_manager.resource_group_id
35 | security_group_id = module.ecs.security_group_id
36 | }
37 |
38 | # Create node pool for spark
39 | module "spark" {
40 | source = "./modules/spark"
41 | suffix = random_string.suffix.id
42 | cluster_id = module.cs.cluster_id
43 | vswitch_ids = [module.vpc.vswitch_id]
44 | master_instance_count = var.spark_master_instance_count
45 | master_instance_type = var.spark_master_instance_type
46 | worker_instance_count = var.spark_worker_instance_count
47 | worker_instance_type = var.spark_worker_instance_type
48 | resource_group_id = module.resource_manager.resource_group_id
49 | security_group_id = module.ecs.security_group_id
50 | }
51 |
52 | # Create node pool for celeborn
53 | module "celeborn" {
54 | source = "./modules/celeborn"
55 | suffix = random_string.suffix.id
56 | cluster_id = module.cs.cluster_id
57 | vswitch_ids = [module.vpc.vswitch_id]
58 | master_instance_count = var.celeborn_master_instance_count
59 | master_instance_type = var.celeborn_master_instance_type
60 | worker_instance_count = var.celeborn_worker_instance_count
61 | worker_instance_type = var.celeborn_worker_instance_type
62 | resource_group_id = module.resource_manager.resource_group_id
63 | security_group_id = module.ecs.security_group_id
64 | }
65 |
66 | # Create node pool for fluid
67 | module "fluid" {
68 | source = "./modules/fluid"
69 | suffix = random_string.suffix.id
70 | cluster_id = module.cs.cluster_id
71 | vswitch_ids = [module.vpc.vswitch_id]
72 | instance_count = var.fluid_instance_count
73 | instance_type = var.fluid_instance_type
74 | resource_group_id = module.resource_manager.resource_group_id
75 | security_group_id = module.ecs.security_group_id
76 | }
77 |
--------------------------------------------------------------------------------
/terraform/alicloud/terraform.tfvars:
--------------------------------------------------------------------------------
1 | # Alicloud
2 | profile = "default"
3 | zone_id = "cn-beijing-i"
4 |
5 | # Spark master node pool
6 | spark_master_instance_count = 0
7 | spark_master_instance_type = "ecs.g7.4xlarge"
8 |
9 | # Spark worker node pool
10 | spark_worker_instance_count = 0
11 | spark_worker_instance_type = "ecs.g7.8xlarge"
12 |
13 | # Celeborn master node pool
14 | celeborn_master_instance_count = 0
15 | celeborn_master_instance_type = "ecs.g8i.2xlarge"
16 |
17 | # Celeborn worker node pool
18 | celeborn_worker_instance_count = 0
19 | celeborn_worker_instance_type = "ecs.i4.8xlarge"
20 |
21 | # Fluid node pool
22 | fluid_instance_count = 0
23 | fluid_instance_type = "ecs.i3.2xlarge"
24 |
--------------------------------------------------------------------------------
/terraform/alicloud/variables.tf:
--------------------------------------------------------------------------------
1 | variable "profile" {
2 | type = string
3 | default = "default"
4 | }
5 |
6 | variable "region" {
7 | type = string
8 | default = "cn-beijing"
9 | }
10 |
11 | variable "zone_id" {
12 | type = string
13 | default = "cn-beijing-i"
14 | }
15 |
16 | variable "bucket_name" {
17 | type = string
18 | description = "The name of bucket."
19 | default = "ack-spark-benchmark"
20 | }
21 |
22 | # Spark
23 | variable "spark_master_instance_count" {
24 | type = number
25 | }
26 |
27 | variable "spark_master_instance_type" {
28 | type = string
29 | }
30 |
31 | variable "spark_worker_instance_count" {
32 | type = number
33 | }
34 |
35 | variable "spark_worker_instance_type" {
36 | type = string
37 | }
38 |
39 | # Celeborn
40 | variable "celeborn_master_instance_count" {
41 | type = number
42 | description = "Instance count of Celeborn master node pool."
43 | }
44 |
45 | variable "celeborn_master_instance_type" {
46 | type = string
47 | description = "Instance type of Celeborn worker node pool"
48 | default = ""
49 | }
50 |
51 | variable "celeborn_worker_instance_count" {
52 | type = number
53 | description = "Instance count of Celeborn worker node pool."
54 | }
55 |
56 | variable "celeborn_worker_instance_type" {
57 | type = string
58 | description = "Instance type of Celeborn worker node pool."
59 | }
60 |
61 | # Fluid
62 | variable "fluid_instance_count" {
63 | type = number
64 | }
65 |
66 | variable "fluid_instance_type" {
67 | type = string
68 | }
69 |
--------------------------------------------------------------------------------