├── .buildkite └── hooks │ └── pre-exit ├── .gitignore ├── LICENSE ├── README.md ├── create-instance.sh ├── discord.sh ├── dos-report.sh ├── influx_data.sh ├── main.sh ├── prepare-envs.sh ├── print-log.sh ├── simple_test.sh ├── slack.sh ├── start-build-dependency.sh ├── start-dos-test.sh ├── start-upload-logs.sh └── utils.sh /.buildkite/hooks/pre-exit: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | ( 6 | echo "~~~ safe check for machines" 7 | if [[ "$KEEP_INSTANCES" == "true" ]]; then 8 | echo "(skip): KEEP_INSTANCES = $KEEP_INSTANCES" 9 | exit 0 10 | fi 11 | 12 | if [ ! -e instance_ip.out ]; then 13 | echo "(skip): instance_ip.out doesn't exist" 14 | exit 0 15 | fi 16 | 17 | read -a instance_name ret_create.out # will be used for parsing ip and name 36 | gce_create_exit="$?" 37 | # testing 38 | # ret_create="--- name: mango-bencher-tester-221219-07-44-58 nat_ip: 34.83.208.239" 39 | # echo $ret_create > ret_create.out 40 | # gce_create_exit=0 41 | if [[ $gce_create_exit -eq 0 || $gce_create_exit == "0" ]];then 42 | instance_zone+=("$zone") 43 | sship=$(sed 's/^.*nat_ip: //g' ret_create.out) 44 | instance_ip+=("$sship") 45 | gc_name=$(sed 's/^.*--- name: //g' ret_create.out | sed 's/ nat_ip:.*//g') 46 | instance_name+=("$gc_name") 47 | else 48 | exit $gce_create_exit 49 | fi 50 | } 51 | 52 | function create_machines() { 53 | instance_ip=() 54 | instance_name=() 55 | instance_zone=() 56 | for _ in $(seq 1 "$1") 57 | do 58 | if [[ $count -ge ${#available_zone[@]} ]];then 59 | count=0 60 | fi 61 | zone=${available_zone[$count]} 62 | create_gce "$zone" 63 | (( count+=1 )) || true 64 | echo "gc instance is created in $zone" 65 | 66 | # always update `.out` files to keep the state up-to-date. 67 | echo "${instance_ip[@]}" > instance_ip.out 68 | echo "${instance_name[@]}" > instance_name.out 69 | echo "${instance_zone[@]}" > instance_zone.out 70 | 71 | sleep $create_interval # avoid too quick build 72 | done 73 | } 74 | 75 | function append_machines() { 76 | for _ in $(seq 1 "$1") 77 | do 78 | if [[ $count -ge ${#available_zone[@]} ]];then 79 | count=0 80 | fi 81 | zone=${available_zone[$count]} 82 | create_gce "$zone" "append" 83 | (( count+=1 )) || true 84 | echo "gc instance is created in $zone" 85 | sleep $create_interval # avoid too quick build 86 | done 87 | echo "${instance_ip[@]}" > instance_ip.out 88 | echo "${instance_zone[@]}" > instance_name.out 89 | echo "${instance_zone[@]}" > instance_zone.out 90 | } 91 | 92 | 93 | function delete_machines(){ 94 | echo ----- stage: remove gc instances ------ 95 | echo instance_name : "${instance_name[@]}" 96 | echo instance_zone : "${instance_zone[@]}" 97 | for idx in "${!instance_name[@]}" 98 | do 99 | gcloud compute instances delete --quiet "${instance_name[$idx]}" --zone="${instance_zone[$idx]}" 100 | done 101 | 102 | rm instance_ip.out 103 | rm instance_name.out 104 | rm instance_zone.out 105 | } -------------------------------------------------------------------------------- /discord.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -ex 3 | discord_bot_name="bench-tps dos ${test_type}" 4 | discord_avatar_url="$DISCORD_AVATAR_URL" 5 | [[ -z "$DISCORD_WEBHOOK" ]]&&echo "ERROR : DISCORD_WEBHOOK=$DISCORD_WEBHOOK"&&exit 1 6 | 7 | # give discord_txt a value to send to discord channel via webhook 8 | function discord_send(){ 9 | curl -H "Content-Type: application/json" -H "Expect: application/json" -X POST "${DISCORD_WEBHOOK}" -d "${discord_txt}" 2>/dev/null 10 | } 11 | 12 | printf -v test_config '**Test Configuration:**\\n```%s\\n%s\\n%s\\n%s\\n%s\\n%s\\n%s\\n%s\\n%s\\n%s\\n%s\\n```' \ 13 | "test-type = $test_type" "client = $client" "branch = $SOLANA_BUILD_BRANCH" "commit = $git_commit" \ 14 | "cluster version = $cluster_version" "bench-tps-clients = $num_clients" "read-client-keys = $client_keypair_path" \ 15 | "duration = $duration" "tx_count = $tx_count" "thread_batch_sleep_ms = $thread_batch_sleep_ms" "durable_nonce = $USE_DURABLE_NONCE" 16 | 17 | # Construct Slack Result_Details Report 18 | printf -v time_range 'time range: %s ~ %s' \ 19 | "$(date --rfc-3339=seconds -u -d @$start_time)" "$(date --rfc-3339=seconds -u -d @$stop_time)" 20 | printf -v slot_range '%s\\n%s' \ 21 | "$start_slot_txt" "$end_slot_txt" 22 | printf -v s_tx_count '%s\\n%s\\n%s\\n%s' \ 23 | "$mean_tx_count_txt" "$max_tx_count_txt" "$p90_tx_count_txt" "$p99_tx_count_txt" 24 | printf -v s_tower_vote_dist '%s\\n%s\\n%s\\n%s' \ 25 | "$mean_tower_vote_distance_txt" "$max_tower_vote_distance_txt" "$p90_tower_vote_distance_txt" "$p99_tower_vote_distance_txt" 26 | printf -v s_optimistic_slot_elapsed '%s\\n%s\\n%s\\n%s' \ 27 | "$mean_optimistic_slot_elapsed_txt" "$max_optimistic_slot_elapsed_txt" "$p90_optimistic_slot_elapsed_txt" "$p99_optimistic_slot_elapsed_txt" 28 | printf -v s_ct_stats_block_cost '%s\\n%s\\n%s\\n%s' \ 29 | "$mean_ct_stats_block_cost_txt" "$max_ct_stats_block_cost_txt" "$p90_ct_stats_block_cost_txt" "$p99_ct_stats_block_cost_txt" 30 | printf -v s_ct_stats_tx_count '%s\\n%s\\n%s\\n%s' \ 31 | "$mean_mean_ct_stats_tx_count_txt" "$max_mean_ct_stats_tx_count_txt" "$p90_mean_ct_stats_tx_count_txt" "$p99_mean_ct_stats_tx_count_txt" 32 | printf -v s_ct_stats_number_of_accts '%s\\n%s\\n%s\\n%s' \ 33 | "$mean_ct_stats_num_of_accts_txt" "$max_ct_stats_num_of_accts_txt" "$p90_ct_stats_num_of_accts_txt" "$p99_ct_stats_num_of_accts_txt" 34 | printf -v blocks_fill '%s\\n%s\\n%s\\n%s\\n%s' \ 35 | "$total_blocks_txt" "$blocks_fill_50_txt" "$blocks_fill_90_txt" "$blocks_fill_50_percent_txt" "$blocks_fill_90_percent_txt" 36 | printf -v skip_rate '%s\\n%s\\n%s\\n%s\\n' \ 37 | "$mean_skip_rate_txt" "$max_skip_rate_txt" "$skip_rate_90_txt" "$mean_skip_rate_b4_test_txt" 38 | 39 | printf -v buildkite_link '%s' "[Buildkite]($BUILDKITE_BUILD_URL)" 40 | printf -v grafana_link '%s' "[Grafana]($gf_url)" 41 | # compose report without link 42 | printf -v test_report '%s %s\\n%s\\n**Test Details:**\\n```%s\\n%s\\n%s\\n%s\\n%s\\n%s\\n%s\\n%s\\n%s\\n%s\\n```' \ 43 | "$grafana_link" "$buildkite_link" \ 44 | "$test_config" "$time_range" "$slot_range" \ 45 | "$s_tx_count" "$s_tower_vote_dist" "$s_optimistic_slot_elapsed" \ 46 | "$s_ct_stats_block_cost" "$s_ct_stats_tx_count" "$s_ct_stats_number_of_accts" "$blocks_fill" "$skip_rate" 47 | 48 | # compose discord message 49 | d_username="\"username\": \"${discord_bot_name}\"" 50 | d_content="\"content\": \"${test_report}\"" 51 | d_avatar="\"avatar_url\": \"${discord_avatar_url}\"" 52 | discord_txt="{${d_avatar},${d_username},${d_content}}" 53 | -------------------------------------------------------------------------------- /dos-report.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | ## solana-bench-tps config 3 | set -ex 4 | # read env 5 | source "env-artifact.sh" 6 | source "utils.sh" 7 | # check ENV 8 | # no env , exit 9 | [[ ! $START_TIME ]]&& echo START_TIME env not found&&exit 1 10 | [[ ! $START_TIME2 ]]&& echo START_TIME2 env not found&&exit 1 11 | [[ ! $STOP_TIME ]]&& echo STOP_TIME env not found&&exit 1 12 | [[ ! $STOP_TIME2 ]]&& echo STOP_TIME2 env not found&&exit 1 13 | [[ ! $INFLUX_TOKEN ]]&& echo INFLUX_TOKEN env not found&&exit 1 14 | [[ ! $INFLUX_HOST ]]&& echo INFLUX_HOST env not found&&exit 1 15 | [[ ! $REPORT_BUCKET ]] && echo REPORT_BUCKET env not found&&exit 1 16 | # set large data set 17 | [[ ! $LARGE_DATA_SET ]] && LARGE_DATA_SET="false" 18 | if [[ -n $BUILDKITE_BUILD_URL ]] ; then 19 | BUILD_BUTTON_TEXT="Build Kite Job" 20 | else 21 | BUILD_BUTTON_TEXT="Build URL not defined" 22 | BUILDKITE_BUILD_URL="https://buildkite.com/solana-labs/" 23 | fi 24 | ## setup window interval for query 25 | window_interval="10s" 26 | window_interval_long="10s" 27 | oversize_window=$(echo "${DURATION}+300" | bc) 28 | printf -v oversize_window "%ss" "$oversize_window" 29 | if [[ "$LARGE_DATA_SET" == "true" ]];then 30 | [[ ! "$INFLUX_WINDOW_INTERVAL" ]] && INFLUX_WINDOW_INTERVAL="10m" 31 | [[ ! "$INFLUX_WINDOW_INTERVAL_LONG" ]] && INFLUX_WINDOW_INTERVAL_LONG="30m" 32 | window_interval=$INFLUX_WINDOW_INTERVAL 33 | window_interval_long=$INFLUX_WINDOW_INTERVAL_LONG 34 | oversize_window="12h" 35 | fi 36 | 37 | ## Configuration 38 | test_type=$TEST_TYPE 39 | client="tpu" 40 | [[ "$USE_TPU_CLIENT" == "false" ]] && client="rpc" 41 | git_commit=$SOLANA_GIT_COMMIT 42 | cluster_version=$CLUSTER_VERSION 43 | num_clients=$NUM_CLIENT 44 | client_keypair_path="keypair-configs/$KEYPAIR_FILE" 45 | duration=$DURATION 46 | tx_count=$TX_COUNT 47 | thread_batch_sleep_ms=$THREAD_BATCH_SLEEP_MS 48 | API_V2_HOST="${INFLUX_HOST}/api/v2/query" 49 | HEADER_AUTH="Authorization: Token ${INFLUX_TOKEN}" 50 | CURL_TIMEOUT=12 51 | start_time=$START_TIME 52 | start_time2=$START_TIME2 53 | stop_time=$STOP_TIME 54 | stop_time2=$STOP_TIME2 55 | 56 | ## make sure 57 | source utils.sh 58 | source influx_data.sh 59 | 60 | query(){ 61 | local retry=0 62 | for retry in 0 1 2 63 | do 64 | if [[ $retry -gt 0 ]];then 65 | printf "start retry:%s\n%s\n" $retry 66 | sleep 2 67 | fi 68 | if [[ -z "$1" ]];then 69 | echo "query command is empty!" 70 | echo "$1" 71 | fi 72 | curl --connect-timeout ${CURL_TIMEOUT} --request POST \ 73 | "${API_V2_HOST}" \ 74 | --header "${HEADER_AUTH}" \ 75 | --header 'Accept: application/csv' \ 76 | --header 'Content-type: application/vnd.flux' \ 77 | --data "$1" > query.result 78 | local n=0 79 | local arr=() 80 | local line 81 | while IFS= read -r line 82 | do 83 | if [[ ${#line} -gt 1 ]];then # last line is empty but length=1 84 | arr+=("$line") 85 | let n=n+1 86 | fi 87 | done < query.result 88 | 89 | if [[ $n -gt 1 ]]; then 90 | printf "%s\n" "valid return" 91 | break 92 | else # empty or error 93 | printf "*retry:%s\nquery error:%s\n" $retry ${arr[0]} 94 | fi 95 | done 96 | } 97 | 98 | for f in "${!FLUX[@]}" 99 | do 100 | echo "----FLUX ($count) $f----" 101 | echo "${FLUX[$f]}" 102 | done 103 | 104 | declare -A FLUX_RESULT # collect results 105 | for f in "${!FLUX[@]}" 106 | do 107 | 108 | if [[ -z "${FLUX[${f}]}" ]];then 109 | printf "***%s %s\n%s\n" $f "is return zero-length" ${FLUX[${f}]} 110 | fi 111 | query "${FLUX[${f}]}" 112 | if [[ -f 'query.result' ]];then 113 | 114 | FLUX_RESULT[${f}]="`cat query.result`" 115 | printf "%s %s\n" $f ${FLUX_RESULT[${f}]} 116 | else 117 | printf "%s%s\n" "$f" "no query.result" 118 | fi 119 | sleep 1 120 | done 121 | 122 | ## For debug , printout each result of 123 | # for r in "${!FLUX_RESULT[@]}" 124 | # do 125 | # result=${FLUX_RESULT[${r}]} 126 | # echo "---- $r result ----" 127 | # echo "$result" 128 | # echo "-----$r end-------" 129 | # done 130 | 131 | ## result should be like this 132 | ## ,result,table,_value 133 | ## ,_result,0,137371131 134 | 135 | get_value() { 136 | local arr=() 137 | local n=0 138 | local line 139 | while IFS= read -r line 140 | do 141 | if [[ ${#line} -gt 1 ]];then # last line is empty but length=1 142 | arr+=("$line") 143 | let n=n+1 144 | fi 145 | done <<< $result_input 146 | 147 | if [[ $n -gt 1 ]]; then 148 | while IFS=, read -r empty result table val host_id 149 | do 150 | _value="$(echo "$val"|tr -d '\r\n')" #return value include a new line 151 | done <<< "${arr[1]}" 152 | else 153 | _value="na" 154 | fi 155 | } 156 | 157 | declare -A DATAPOINT # collect results 158 | # write data to benchmark-report-tmp bucket 159 | # $2:influxdb endpoint $data to write 160 | write_datapoint_v2() { 161 | curl -i --connect-timeout "${CURL_TIMEOUT}" -XPOST "${INFLUX_HOST}/api/v2/write?bucket=${REPORT_BUCKET}/autogen&precision=ns" \ 162 | --header "${HEADER_AUTH}" \ 163 | --data-raw "$1" 164 | } 165 | result_detail="" 166 | # time for influx only 167 | DATAPOINT[start_time]="$start_time" 168 | DATAPOINT[stop_time]="$stop_time" 169 | printf -v time_range_str "\"time range: %s ~ %s\"" \ 170 | "$(date --rfc-3339=seconds -u -d @$start_time)" "$(date --rfc-3339=seconds -u -d @$stop_time)" 171 | DATAPOINT[time_range]="$time_range_str" 172 | # slot 173 | result_input=${FLUX_RESULT['start_slot']} 174 | get_value 175 | start_slot_txt="start_slot: $_value" 176 | DATAPOINT[start_slot]="$_value" 177 | result_input=${FLUX_RESULT['end_slot']} 178 | get_value 179 | end_slot_txt="end_slot: $_value" 180 | DATAPOINT[end_slot]="$_value" 181 | # TPS : the query result is tps*{$window_interval}, so we need to divide {$window_interval} to get the real tps 182 | result_input=${FLUX_RESULT['mean_tx_count']} 183 | get_value 184 | extract_time_in_sec "${window_interval}" 185 | [[ ${duration_in_seconds} -eq "0" ]]&& tps="0" || tps=$(echo "scale=0;$_value/${duration_in_seconds}"|bc) 186 | mean_tx_count_txt="mean_tps: $tps" 187 | DATAPOINT[mean_tps]="$tps" 188 | result_input=${FLUX_RESULT['max_tx_count']} 189 | get_value 190 | extract_time_in_sec "${window_interval}" 191 | [[ ${duration_in_seconds} -eq "0" ]]&& tps="0" || tps=$(echo "scale=0;$_value/${duration_in_seconds}"|bc) 192 | max_tx_count_txt="max_tps: $tps" 193 | DATAPOINT[max_tps]="$tps" 194 | result_input=${FLUX_RESULT['p90_tx_count']} 195 | get_value 196 | extract_time_in_sec "${window_interval}" 197 | [[ ${duration_in_seconds} -eq "0" ]]&& tps="0" || tps=$(echo "scale=0;$_value/${duration_in_seconds}"|bc) 198 | p90_tx_count_txt="90th_tx_count: $tps" 199 | DATAPOINT[90th_tx_count]="$tps" 200 | result_input="${FLUX_RESULT['p99_tx_count']}" 201 | get_value 202 | extract_time_in_sec "${window_interval}" 203 | tps=$(echo "scale=0;$_value/${duration_in_seconds}"|bc) 204 | p99_tx_count_txt="99th_tx_count: $tps" 205 | DATAPOINT[99th_tx_count]="$tps" 206 | # tower distance 207 | result_input="${FLUX_RESULT['mean_tower_vote_distance']}" 208 | echo "${FLUX_RESULT['mean_tower_vote_distance']}" 209 | get_value 210 | mean_tower_vote_distance_txt="mean_tower_vote_distance: $_value" 211 | DATAPOINT[mean_tower_vote_distance]="$_value" 212 | result_input="${FLUX_RESULT['max_tower_vote_distance']}" 213 | get_value 214 | max_tower_vote_distance_txt="max_tower_vote_distance: $_value" 215 | DATAPOINT[max_tower_vote_distance]="$_value" 216 | result_input="${FLUX_RESULT['min_tower_vote_distance']}" 217 | get_value 218 | result_input="${FLUX_RESULT['p90_tower_vote_distance']}" 219 | get_value 220 | p90_tower_vote_distance_txt="90th_tower_vote_distance: $_value" 221 | DATAPOINT[90th_tower_vote_distance]="$_value" 222 | result_input="${FLUX_RESULT['p99_tower_vote_distance']}" 223 | get_value 224 | p99_tower_vote_distance_txt="99th_tower_vote_distance: $_value" 225 | DATAPOINT[99th_tower_vote_distance]="$_value" 226 | # optimistic_slot_elapsed 227 | result_input="${FLUX_RESULT['mean_optimistic_slot_elapsed']}" 228 | get_value 229 | mean_optimistic_slot_elapsed_txt="mean_optimistic_slot_elapsed: $_value" 230 | DATAPOINT[mean_optimistic_slot_elapsed]="$_value" 231 | result_input="${FLUX_RESULT['max_optimistic_slot_elapsed']}" 232 | get_value 233 | max_optimistic_slot_elapsed_txt="max_optimistic_slot_elapsed: $_value" 234 | DATAPOINT[max_optimistic_slot_elapsed]="$_value" 235 | result_input="${FLUX_RESULT['p90_optimistic_slot_elapsed']}" 236 | get_value 237 | p90_optimistic_slot_elapsed_txt="90th_optimistic_slot_elapsed: $_value" 238 | result_input="${FLUX_RESULT['p99_optimistic_slot_elapsed']}" 239 | DATAPOINT[90th_optimistic_slot_elapsed]="$_value" 240 | get_value 241 | p99_optimistic_slot_elapsed_txt="99th_optimistic_slot_elapsed: $_value" 242 | DATAPOINT[99th_optimistic_slot_elapsed]="$_value" 243 | # ct_stats_block_cost 244 | result_input="${FLUX_RESULT['mean_ct_stats_block_cost']}" 245 | get_value 246 | mean_ct_stats_block_cost_txt="mean_cost_tracker_stats_block_cost: $_value" 247 | DATAPOINT[mean_cost_tracker_stats_block_cost]="$_value" 248 | result_input="${FLUX_RESULT['max_ct_stats_block_cost']}" 249 | get_value 250 | max_ct_stats_block_cost_txt="max_cost_tracker_stats_block_cost: $_value" 251 | DATAPOINT[max_cost_tracker_stats_block_cost]="$_value" 252 | result_input="${FLUX_RESULT['p90_ct_stats_block_cost']}" 253 | get_value 254 | p90_ct_stats_block_cost_txt="90th_cost_tracker_stats_block_cost: $_value" 255 | DATAPOINT[90th_cost_tracker_stats_block_cost]="$_value" 256 | result_input="${FLUX_RESULT['p99_ct_stats_block_cost']}" 257 | get_value 258 | p99_ct_stats_block_cost_txt="99th_cost_tracker_stats_block_cost: $_value" 259 | DATAPOINT[99th_cost_tracker_stats_block_cost]="$_value" 260 | 261 | # ct_stats_block_cost 262 | result_input="${FLUX_RESULT['mean_ct_stats_transaction_count']}" 263 | get_value 264 | mean_mean_ct_stats_tx_count_txt="mean_cost_tracker_stats_transaction_count: $_value" 265 | DATAPOINT[mean_cost_tracker_stats_transaction_count]="$_value" 266 | result_input="${FLUX_RESULT['max_ct_stats_transaction_count']}" 267 | get_value 268 | max_mean_ct_stats_tx_count_txt="max_cost_tracker_stats_transaction_count: $_value" 269 | DATAPOINT[max_cost_tracker_stats_transaction_count]="$_value" 270 | result_input="${FLUX_RESULT['p90_ct_stats_transaction_count']}" 271 | get_value 272 | p90_mean_ct_stats_tx_count_txt="90th_cost_tracker_stats_transaction_count: $_value" 273 | DATAPOINT[90th_cost_tracker_stats_transaction_count]="$_value" 274 | result_input="${FLUX_RESULT['p99_ct_stats_transaction_count']}" 275 | get_value 276 | p99_mean_ct_stats_tx_count_txt="99th_cost_tracker_stats_transaction_count: $_value" 277 | DATAPOINT[99th_cost_tracker_stats_transaction_count]="$_value" 278 | # ct_stats_number_of_accounts 279 | result_input="${FLUX_RESULT['mean_ct_stats_number_of_accounts']}" 280 | get_value 281 | mean_ct_stats_num_of_accts_txt="mean_cost_tracker_stats_number_of_accounts: $_value" 282 | DATAPOINT[mean_cost_tracker_stats_number_of_accounts]="$_value" 283 | result_input="${FLUX_RESULT['max_ct_stats_number_of_accounts']}" 284 | get_value 285 | max_ct_stats_num_of_accts_txt="max_cost_tracker_stats_number_of_accounts: $_value" 286 | DATAPOINT[max_cost_tracker_stats_number_of_accounts]="$_value" 287 | result_input="${FLUX_RESULT['p90_ct_stats_number_of_accounts']}" 288 | get_value 289 | p90_ct_stats_num_of_accts_txt="90th_cost_tracker_stats_number_of_accounts: $_value" 290 | DATAPOINT[90th_cost_tracker_stats_number_of_accounts]="$_value" 291 | result_input="${FLUX_RESULT['p99_ct_stats_number_of_accounts']}" 292 | get_value 293 | p99_ct_stats_num_of_accts_txt="99th_cost_tracker_stats_number_of_accounts: $_value" 294 | DATAPOINT[99th_cost_tracker_stats_number_of_accounts]="$_value" 295 | 296 | # # blocks fill 297 | result_input="${FLUX_RESULT['total_blocks']}" 298 | get_value 299 | if [[ "$_value" == "na" ]];then 300 | _value=0 301 | fi 302 | total_blocks_tmp=$_value 303 | total_blocks_txt="numb_total_blocks: $_value" 304 | DATAPOINT[numb_total_blocks]="$_value" 305 | result_input="${FLUX_RESULT['blocks_fill_50']}" 306 | get_value 307 | blocks_fill_50_txt="numb_blocks_50_full: $_value" 308 | DATAPOINT[numb_blocks_50_full]="$_value" 309 | if [[ "$_value" == "na" || $total_blocks_tmp -eq 0 ]];then 310 | percent_value="0%" 311 | percent_raw_value=0 312 | else 313 | percent_raw_value=$(echo "scale=2;($_value/$total_blocks_tmp)*100" | bc) 314 | printf -v percent_value "%.0f%s" $percent_raw_value "%" 315 | fi 316 | blocks_fill_50_percent_txt="blocks_50_full: $percent_value" 317 | DATAPOINT[blocks_50_full]="$percent_raw_value" 318 | result_input="${FLUX_RESULT['blocks_fill_90']}" 319 | get_value 320 | blocks_fill_90_txt="numb_blocks_90_full: $_value" 321 | DATAPOINT[numb_blocks_90_full]="$_value" 322 | if [[ "$_value" == "na" || $total_blocks_tmp -eq 0 ]];then 323 | percent_value="0%" 324 | percent_raw_value=0 325 | else 326 | percent_raw_value=$(echo "scale=2;($_value/$total_blocks_tmp)*100" | bc) 327 | printf -v percent_value "%.0f%s" $percent_raw_value "%" 328 | fi 329 | blocks_fill_90_percent_txt="blocks_90_full: $percent_value" 330 | DATAPOINT[blocks_90_full]="$percent_raw_value" 331 | # skip_rate 332 | result_input="${FLUX_RESULT['mean_skip_rate']}" 333 | get_value 334 | [[ $_value != "na" ]] && printf -v precision "%.2f" "$_value" || precision="na" 335 | mean_skip_rate_txt="mean_skip_rate: $precision%" 336 | DATAPOINT[mean_skip_rate]="$_value" 337 | 338 | result_input="${FLUX_RESULT['max_skip_rate']}" 339 | get_value 340 | [[ $_value != "na" ]] && printf -v precision "%.2f" "$_value" || precision="na" 341 | max_skip_rate_txt="max_skip_rate: $precision%" 342 | DATAPOINT[max_skip_rate]="$_value" 343 | 344 | result_input="${FLUX_RESULT['skip_rate_90']}" 345 | get_value 346 | [[ $_value != "na" ]] && printf -v precision "%.2f" "$_value" || precision="na" 347 | skip_rate_90_txt="skip_rate_90: $precision%" 348 | DATAPOINT[skip_rate_90]="$_value" 349 | 350 | result_input="${FLUX_RESULT['mean_skip_rate_b4_test']}" 351 | get_value 352 | [[ $_value != "na" ]] && printf -v precision "%.2f" "$_value" || precision="na" 353 | mean_skip_rate_b4_test_txt="mean_skip_rate_b4_test: $precision%" 354 | DATAPOINT[mean_skip_rate_b4_test]="$_value" 355 | 356 | #write data report to the influx 357 | 358 | build="$BUILDKITE_BUILD_NUMBER" 359 | [[ ! "$BUILDKITE_BUILD_NUMBER" ]] && build="0" 360 | utc_sec=$(date +%s) 361 | write_ts=$(echo "scale=2;${utc_sec}*1000000000" | bc) 362 | 363 | declare -A FIELD_MEASUREMENT 364 | FIELD_MEASUREMENT[mean_tps]=tps 365 | 366 | for r in "${!DATAPOINT[@]}" 367 | do 368 | measurement=${FIELD_MEASUREMENT[$r]} 369 | write_data="$measurement,build=$build,client=$client,branch=$SOLANA_BUILD_BRANCH,git_commit=$git_commit,cluster_version=$cluster_version,\ 370 | clients_num=$num_clients,duration=$duration,tx_count=$tx_count,thread_batch_sleep_ms=$thread_batch_sleep_ms,durable_nonce=$USE_DURABLE_NONCE $r=${DATAPOINT[$r]} $write_ts" 371 | write_datapoint_v2 "$write_data" "$API_V2_HOST" 372 | done 373 | 374 | ## create Grafana link 375 | gf_from=$(echo "scale=2;${start_time}*1000" | bc) 376 | gf_to=$(echo "scale=2;${stop_time}*1000" | bc) 377 | gf_prefix="https://metrics.solana.com:3000/d/monitor-edge/cluster-telemetry?orgId=1&from=" 378 | gf_postfix="&var-datasource=Influx-Enterprise&var-testnet=tds&var-hostid=All" 379 | printf -v gf_url "%s%s%s%s%s" $gf_prefix $gf_from "&to=" $gf_to $gf_postfix 380 | 381 | if [[ $SLACK_WEBHOOK ]];then 382 | source slack.sh 383 | slack_send 384 | fi 385 | 386 | if [[ $DISCORD_WEBHOOK ]];then 387 | source discord.sh 388 | discord_send 389 | fi 390 | 391 | -------------------------------------------------------------------------------- /influx_data.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # slot 3 | source utils.sh 4 | _start_slot='from(bucket: "tds")|> range(start:'${start_time}' ,stop:'${start_time2}') 5 | |> filter(fn: (r) => r._measurement == "optimistic_slot") 6 | |> group(columns: ["slot"])|> median() 7 | |>drop(columns: ["_measurement", "_field", "_start", "_stop","_time","host_id", "slot"])' 8 | 9 | _end_slot='from(bucket: "tds")|> range(start:'${stop_time2}' ,stop:'${stop_time}') 10 | |> filter(fn: (r) => r._measurement == "optimistic_slot") 11 | |> group(columns: ["slot"])|> median() 12 | |> drop(columns: ["_measurement", "_field", "_start", "_stop","_time","host_id", "slot"])' 13 | 14 | # TPS: Notetice that tthe result of TPS need to divide window_interval to get the correct result 15 | _mean_tx_count='from(bucket: "tds")|> range(start:'${start_time}' ,stop:'${stop_time}') 16 | |> filter(fn: (r) => r._measurement == "replay-slot-stats" and r._field == "total_transactions") 17 | |> aggregateWindow(every:'${window_interval}', fn: sum) 18 | |> group() |> median()|>toInt()' 19 | _max_tx_count='from(bucket: "tds")|> range(start:'${start_time}' ,stop:'${stop_time}') 20 | |> filter(fn: (r) => r._measurement == "replay-slot-stats" and r._field == "total_transactions") 21 | |> aggregateWindow(every:'${window_interval}', fn: sum) 22 | |> group() |> max() 23 | |>drop(columns: ["_measurement", "_start", "_stop","host_id","_field"])' 24 | _min_tx_count='from(bucket: "tds")|> range(start:'${start_time}' ,stop:'${stop_time}') 25 | |> filter(fn: (r) => r._measurement == "replay-slot-stats" and r._field == "total_transactions") 26 | |> aggregateWindow(every:'${window_interval}', fn: sum) 27 | |> group() |> min()' 28 | _90_tx_count='from(bucket: "tds")|> range(start:'${start_time}' ,stop:'${stop_time}') 29 | |> filter(fn: (r) => r._measurement == "replay-slot-stats" and r._field == "total_transactions") 30 | |> aggregateWindow(every: '${window_interval_long}', fn: sum) 31 | |> group()|> quantile(column: "_value", q:0.9)' 32 | 33 | _99_tx_count='from(bucket: "tds")|> range(start:'${start_time}' ,stop:'${stop_time}') 34 | |> filter(fn: (r) => r._measurement == "replay-slot-stats" and r._field == "total_transactions") 35 | |> aggregateWindow(every: '${window_interval_long}', fn: sum) 36 | |> group()|> quantile(column: "_value", q:0.99)' 37 | 38 | # tower_vote_distance 39 | _mean_tower_vote_distance='from(bucket: "tds")|> range(start:'${start_time}' ,stop:'${stop_time}') 40 | |> filter(fn: (r) => r._measurement == "tower-vote") 41 | |> aggregateWindow(every: '${window_interval}',fn: last) 42 | |> pivot(rowKey:["host_id"], columnKey: ["_field"], valueColumn: "_value") 43 | |> map(fn: (r) => ({ r with _value: r.latest - r.root})) 44 | |> group()|> mean()|>toInt()' 45 | _max_tower_vote_distance='from(bucket: "tds")|> range(start:'${start_time}' ,stop:'${stop_time}') 46 | |> filter(fn: (r) => r._measurement == "tower-vote") 47 | |> aggregateWindow(every: '${window_interval}',fn: last) 48 | |> pivot(rowKey:["host_id"], columnKey: ["_field"], valueColumn: "_value") 49 | |> map(fn: (r) => ({ r with _value: r.latest - r.root})) 50 | |> group()|> max()|>toInt() 51 | |>drop(columns: ["_measurement", "_start", "_stop","count","host_id","latest","root"])' 52 | _min_tower_vote_distance='from(bucket: "tds")|> range(start:'${start_time}' ,stop:'${stop_time}') 53 | |> filter(fn: (r) => r._measurement == "tower-vote") 54 | |> aggregateWindow(every: '${window_interval}',fn: last) 55 | |> pivot(rowKey:["host_id"], columnKey: ["_field"], valueColumn: "_value") 56 | |> map(fn: (r) => ({ r with _value: r.latest - r.root})) 57 | |> group()|> min()|>toInt() 58 | |>drop(columns: ["_measurement", "_start", "_stop","count","host_id","latest","root"])' 59 | _90_tower_vote_distance='from(bucket: "tds")|> range(start:'${start_time}' ,stop:'${stop_time}') 60 | |> filter(fn: (r) => r._measurement == "tower-vote") 61 | |> aggregateWindow(every: '${window_interval}',fn: last) 62 | |> pivot(rowKey:["host_id"], columnKey: ["_field"], valueColumn: "_value") 63 | |> map(fn: (r) => ({ r with _value: r.latest - r.root})) 64 | |> group()|> quantile(column: "_value", q:0.9)|>toInt()' 65 | _99_tower_vote_distance='from(bucket: "tds")|> range(start:'${start_time}' ,stop:'${stop_time}') 66 | |> filter(fn: (r) => r._measurement == "tower-vote") 67 | |> aggregateWindow(every: '${window_interval}',fn: last) 68 | |> pivot(rowKey:["host_id"], columnKey: ["_field"], valueColumn: "_value") 69 | |> map(fn: (r) => ({ r with _value: r.latest - r.root})) 70 | |> group()|> quantile(column: "_value", q:0.99)|>toInt()' 71 | # optimistic_slot_elapsed 72 | _mean_optimistic_slot_elapsed='from(bucket: "tds")|> range(start:'${start_time}' ,stop:'${stop_time}') 73 | |> filter(fn: (r) => r._measurement == "optimistic_slot_elapsed") 74 | |> aggregateWindow(every: '${window_interval}', fn: mean) 75 | |> group()|> mean()|>toInt() 76 | |> drop(columns: ["_start", "_stop"])' 77 | 78 | _max_optimistic_slot_elapsed='from(bucket: "tds")|> range(start:'${start_time}' ,stop:'${stop_time}') 79 | |> filter(fn: (r) => r._measurement == "optimistic_slot_elapsed") 80 | |> group()|> max()|>toInt() 81 | |> drop(columns: ["_measurement","_field", "_start", "_stop","host_id","_time"])' 82 | 83 | _min_optimistic_slot_elapsed='from(bucket: "tds")|> range(start:'${start_time}' ,stop:'${stop_time}') 84 | |> filter(fn: (r) => r._measurement == "optimistic_slot_elapsed") 85 | |> aggregateWindow(every: '${window_interval}', fn: min) 86 | |> group()|>min()|>toInt() 87 | |> drop(columns: ["_measurement","_field", "_start", "_stop","host_id","latest","_time"])' 88 | _90_optimistic_slot_elapsed='from(bucket: "tds")|> range(start:'${start_time}' ,stop:'${stop_time}') 89 | |> filter(fn: (r) => r._measurement == "optimistic_slot_elapsed") 90 | |> aggregateWindow(every: '${window_interval_long}', fn: mean) 91 | |> group()|>quantile(column: "_value", q:0.9)|>toInt() 92 | |> drop(columns: ["_start", "_stop"])' 93 | _99_optimistic_slot_elapsed='from(bucket: "tds")|> range(start:'${start_time}' ,stop:'${stop_time}') 94 | |> filter(fn: (r) => r._measurement == "optimistic_slot_elapsed") 95 | |> aggregateWindow(every: '${window_interval_long}', fn: mean) 96 | |> group()|>quantile(column: "_value", q:0.99)|>toInt() 97 | |> drop(columns: ["_start", "_stop"])' 98 | # ct_stats_block_cost 99 | _mean_ct_stats_block_cost='from(bucket: "tds")|> range(start:'${start_time}' ,stop:'${stop_time}') 100 | |> filter(fn: (r) => r._measurement == "cost_tracker_stats" and r["_field"] == "block_cost") 101 | |> aggregateWindow(every: '${window_interval}', fn: mean) 102 | |> group()|> mean()|>toInt() 103 | |> drop(columns:["_start", "_stop"])' 104 | _max_ct_stats_block_cost='from(bucket: "tds")|> range(start:'${start_time}' ,stop:'${stop_time}') 105 | |> filter(fn: (r) => r._measurement == "cost_tracker_stats" and r["_field"] == "block_cost") 106 | |> aggregateWindow(every: '${window_interval}', fn: max) 107 | |> group()|> max()|>toInt() 108 | |> drop(columns: ["_measurement","_field", "_start", "_stop","host_id","_time"])' 109 | _min_ct_stats_block_cost='from(bucket: "tds")|> range(start:'${start_time}' ,stop:'${stop_time}') 110 | |> filter(fn: (r) => r._measurement == "cost_tracker_stats" and r["_field"] == "block_cost") 111 | |> aggregateWindow(every: '${window_interval}', fn: min) 112 | |> group()|> min()|>toInt() 113 | |> drop(columns: ["_measurement","_field", "_start", "_stop","host_id","_time"])' 114 | _90_ct_stats_block_cost='from(bucket: "tds")|> range(start:'${start_time}' ,stop:'${stop_time}') 115 | |> filter(fn: (r) => r._measurement == "cost_tracker_stats" and r["_field"] == "block_cost") 116 | |> aggregateWindow(every: '${window_interval}', fn: (column, tables=<-) => tables |> quantile(q: 0.9)) 117 | |> group()|>quantile(column: "_value", q:0.90) 118 | |> group()|> min()|>toInt() 119 | |> drop(columns: ["_start", "_stop"])' 120 | _99_ct_stats_block_cost='from(bucket: "tds")|> range(start:'${start_time}' ,stop:'${stop_time}') 121 | |> filter(fn: (r) => r._measurement == "cost_tracker_stats" and r["_field"] == "block_cost") 122 | |> aggregateWindow(every: '${window_interval}', fn: (column, tables=<-) => tables |> quantile(q: 0.99)) 123 | |> group()|>quantile(column: "_value", q:0.99) 124 | |> group()|> min()|>toInt() 125 | |> drop(columns: ["_start", "_stop"])' 126 | # ct_stats_transaction_count 127 | _mean_ct_stats_transaction_count='from(bucket: "tds")|> range(start:'${start_time}' ,stop:'${stop_time}') 128 | |> filter(fn: (r) => r["_measurement"] == "cost_tracker_stats" and r["_field"] == "transaction_count") 129 | |> aggregateWindow(every: '${window_interval}', fn: mean) 130 | |> group()|> mean()|>toInt() 131 | |> drop(columns: ["_start", "_stop"])' 132 | _max_ct_stats_transaction_count='from(bucket: "tds")|> range(start:'${start_time}' ,stop:'${stop_time}') 133 | |> filter(fn: (r) => r["_measurement"] == "cost_tracker_stats" and r["_field"] == "transaction_count") 134 | |> aggregateWindow(every: '${window_interval}', fn: max) 135 | |> group()|> max()|>toInt() 136 | |> drop(columns: ["_measurement","_field", "_start", "_stop","host_id","latest","_time"])' 137 | _min_ct_stats_transaction_count='from(bucket: "tds")|> range(start:'${start_time}' ,stop:'${stop_time}') 138 | |> filter(fn: (r) => r["_measurement"] == "cost_tracker_stats" and r["_field"] == "transaction_count") 139 | |> aggregateWindow(every: '${window_interval}', fn: min) 140 | |> group()|> min()|>toInt() 141 | |> drop(columns: ["_measurement","_field", "_start", "_stop","host_id","latest","_time"])' 142 | _90_ct_stats_transaction_count='from(bucket: "tds")|> range(start:'${start_time}' ,stop:'${stop_time}') 143 | |> filter(fn: (r) => r["_measurement"] == "cost_tracker_stats" and r["_field"] == "transaction_count") 144 | |> aggregateWindow(every: '${window_interval}', fn: (column, tables=<-) => tables |> quantile(q: 0.9)) 145 | |> group()|>quantile(column: "_value", q:0.90)|>toInt() 146 | |> drop(columns: ["_start", "_stop"])' 147 | _99_ct_stats_transaction_count='from(bucket: "tds")|> range(start:'${start_time}' ,stop:'${stop_time}') 148 | |> filter(fn: (r) => r["_measurement"] == "cost_tracker_stats" and r["_field"] == "transaction_count") 149 | |> aggregateWindow(every: '${window_interval}', fn: (column, tables=<-) => tables |> quantile(q: 0.99)) 150 | |> filter(fn: (r) => r["_field"] == "transaction_count") 151 | |> group()|>quantile(column: "_value", q:0.99)|>toInt() 152 | |> drop(columns: ["_start", "_stop"])' 153 | # ct_stats_number_of_accounts 154 | _mean_ct_stats_number_of_accounts='from(bucket: "tds")|> range(start:'${start_time}' ,stop:'${stop_time}') 155 | |> filter(fn: (r) => r._measurement == "cost_tracker_stats" and r["_field"] == "number_of_accounts") 156 | |> aggregateWindow(every: '${window_interval}', fn: mean) 157 | |> group()|> mean()|>toInt() 158 | |> drop(columns: ["_start", "_stop"])' 159 | _max_ct_stats_number_of_accounts='from(bucket: "tds")|> range(start:'${start_time}' ,stop:'${stop_time}') 160 | |> filter(fn: (r) => r._measurement == "cost_tracker_stats" and r["_field"] == "number_of_accounts") 161 | |> aggregateWindow(every: '${window_interval}', fn: max) 162 | |> group()|> max()|>toInt() 163 | |> drop(columns: ["_measurement","_field", "_start", "_stop","host_id","_time"])' 164 | _min_ct_stats_number_of_accounts='from(bucket: "tds")|> range(start:'${start_time}' ,stop:'${stop_time}') 165 | |> filter(fn: (r) => r._measurement == "cost_tracker_stats" and r["_field"] == "number_of_accounts") 166 | |> aggregateWindow(every: '${window_interval}', fn: min) 167 | |> group()|> min()|>toInt() 168 | |> drop(columns: ["_measurement","_field", "_start", "_stop","host_id","_time"])' 169 | _90_ct_stats_number_of_accounts='from(bucket: "tds")|> range(start:'${start_time}' ,stop:'${stop_time}') 170 | |> filter(fn: (r) => r._measurement == "cost_tracker_stats" and r["_field"] == "number_of_accounts") 171 | |> aggregateWindow(every: '${window_interval}', fn: (column, tables=<-) => tables |> quantile(q: 0.90)) 172 | |> group()|>quantile(column: "_value", q:0.90)|>toInt() 173 | |> drop(columns: ["_start", "_stop"])' 174 | _99_ct_stats_number_of_accounts='from(bucket: "tds")|> range(start:'${start_time}' ,stop:'${stop_time}') 175 | |> filter(fn: (r) => r._measurement == "cost_tracker_stats" and r["_field"] == "number_of_accounts") 176 | |> aggregateWindow(every: '${window_interval}', fn: (column, tables=<-) => tables |> quantile(q: 0.90)) 177 | |> group()|>quantile(column: "_value", q:0.99)|>toInt() 178 | |> drop(columns: ["_start", "_stop"])' 179 | #blocks fill 180 | _total_blocks='from(bucket: "tds")|> range(start:'${start_time}' ,stop:'${stop_time}') 181 | |> filter(fn: (r) => r._measurement == "cost_tracker_stats" and r["_field"] == "bank_slot") 182 | |> group() 183 | |> aggregateWindow(every: '${window_interval}', fn: count) 184 | |> sum() 185 | |> drop(columns: ["_start", "_stop"])' 186 | 187 | _blocks_fill_50='from(bucket: "tds")|> range(start:'${start_time}' ,stop:'${stop_time}') 188 | |> filter(fn: (r) => r._measurement == "cost_tracker_stats") 189 | |> filter(fn: (r) => r._field == "bank_slot" or r._field == "block_cost") 190 | |> pivot(rowKey:["_time", "host_id"], columnKey: ["_field"], valueColumn: "_value") 191 | |> group() 192 | |> filter(fn: (r) => r.block_cost > (48000000.0*0.5)) 193 | |> aggregateWindow(every: '${window_interval}', fn: (column, tables=<-) => tables |> count(column: "bank_slot")) 194 | |> sum(column: "bank_slot") 195 | |> drop(columns: ["_start", "_stop"])' 196 | _blocks_fill_90='from(bucket: "tds")|> range(start:'${start_time}' ,stop:'${stop_time}') 197 | |> filter(fn: (r) => r._measurement == "cost_tracker_stats") 198 | |> filter(fn: (r) => r._field == "bank_slot" or r._field == "block_cost") 199 | |> pivot(rowKey:["_time", "host_id"], columnKey: ["_field"], valueColumn: "_value") 200 | |> group() 201 | |> filter(fn: (r) => r.block_cost > (48000000.0*0.9)) 202 | |> aggregateWindow(every: '${window_interval}', fn: (column, tables=<-) => tables |> count(column: "bank_slot")) 203 | |> sum(column: "bank_slot") 204 | |> drop(columns: ["_start", "_stop"])' 205 | #skip_rate 206 | # $1:start_time 207 | # $2: stop_time 208 | # $3: oversize_window 209 | # $4: type of statistic (mean/max/percentile90) 210 | function skip_rate_query() { 211 | skip_rate_q_prefix='data_max=from(bucket: "tds")|> range(start:'$1' ,stop:'$2') 212 | |> filter(fn: (r) => r["_measurement"] == "bank-new_from_parent-heights") 213 | |> filter(fn: (r) => r["_field"] == "slot" or r["_field"] == "block_height") 214 | |> aggregateWindow(every:'$3', fn:max) 215 | |> max() 216 | |> group(columns: ["host_id"], mode:"by") 217 | data_min=from(bucket: "tds") 218 | |> range(start:'$1' ,stop:'$2') 219 | |> filter(fn: (r) => r["_measurement"] == "bank-new_from_parent-heights") 220 | |> filter(fn: (r) => r["_field"] == "slot" or r["_field"] == "block_height") 221 | |> aggregateWindow(every: '$3', fn:min) 222 | |> max() 223 | |> group(columns: ["host_id"], mode:"by") 224 | block_max=data_max|> filter(fn: (r) => r["_field"] == "block_height")|> set(key: "_field", value: "block_max") 225 | block_min=data_min|> filter(fn: (r) => r["_field"] == "block_height")|> set(key: "_field", value: "block_min") 226 | slot_max=data_max|> filter(fn: (r) => r["_field"] == "slot")|> set(key: "_field", value: "slot_max") 227 | slot_min=data_min|> filter(fn: (r) => r["_field"] == "slot")|> set(key: "_field", value: "slot_min") 228 | union(tables: [block_max, block_min, slot_max, slot_min]) 229 | |> pivot(rowKey:["_time"], columnKey: ["_field"], valueColumn: "_value") 230 | |> map(fn: (r) => ({ r with block_diff: r.block_max - r.block_min })) 231 | |> map(fn: (r) => ({ r with slot_diff: r.slot_max - r.slot_min })) 232 | |> map(fn: (r) => ({ r with skip_slot: r.slot_diff - r.block_diff })) 233 | |> filter(fn: (r) => r.slot_diff > 0) 234 | |> map(fn: (r) => ({ r with skip_rate_percent: r.skip_slot*100/r.slot_diff })) 235 | |> keep(columns: ["skip_rate_percent"])|> group()' 236 | case "$4" in 237 | 'mean') 238 | skip_rate_query=$skip_rate_q_prefix'|> mean(column: "skip_rate_percent")' 239 | ;; 240 | 'max') 241 | skip_rate_query=$skip_rate_q_prefix'|> max(column: "skip_rate_percent")' 242 | ;; 243 | 'percentile90') 244 | skip_rate_query=$skip_rate_q_prefix'|> quantile(q: 0.9, column: "skip_rate_percent")' 245 | ;; 246 | esac 247 | } 248 | skip_rate_query "$start_time" "$stop_time" "$oversize_window" "mean" 249 | _mean_skip_rate=$skip_rate_query 250 | skip_rate_query "$start_time" "$stop_time" "$oversize_window" "max" 251 | _max_skip_rate=$skip_rate_query 252 | skip_rate_query "$start_time" "$stop_time" "$oversize_window" "percentile90" 253 | _skip_rate_90=$skip_rate_query 254 | start_time_b4_test=$(get_time_before "$start_time" 3600) 255 | b4_stop_time_b4_test="$start_time" 256 | skip_rate_query "$start_time_b4_test" "$b4_stop_time_b4_test" "$oversize_window" "mean" 257 | _mean_skip_rate_b4_test=$skip_rate_query 258 | 259 | declare -A FLUX # FLUX command 260 | FLUX[start_slot]=$_start_slot 261 | FLUX[end_slot]=$_end_slot 262 | # TPS 263 | FLUX[mean_tx_count]=$_mean_tx_count 264 | FLUX[max_tx_count]=$_max_tx_count 265 | #FLUX[min_tx_count]=$_min_tx_count 266 | FLUX[p90_tx_count]=$_90_tx_count 267 | FLUX[p99_tx_count]=$_99_tx_count 268 | # # tower distance 269 | FLUX[mean_tower_vote_distance]=$_mean_tower_vote_distance 270 | FLUX[max_tower_vote_distance]=$_max_tower_vote_distance 271 | #FLUX[min_tower_vote_distance]=$_min_tower_vote_distance 272 | FLUX[p90_tower_vote_distance]=$_90_tower_vote_distance 273 | FLUX[p99_tower_vote_distance]=$_99_tower_vote_distance 274 | # # optimistic_slot_elapsed 275 | FLUX[mean_optimistic_slot_elapsed]=$_mean_optimistic_slot_elapsed 276 | FLUX[max_optimistic_slot_elapsed]=$_max_optimistic_slot_elapsed 277 | # FLUX[min_optimistic_slot_elapsed]=$_min_optimistic_slot_elapsed 278 | FLUX[p90_optimistic_slot_elapsed]=$_90_optimistic_slot_elapsed 279 | FLUX[p99_optimistic_slot_elapsed]=$_99_optimistic_slot_elapsed 280 | # # ct_stats_block_cost 281 | FLUX[mean_ct_stats_block_cost]=$_mean_ct_stats_block_cost 282 | FLUX[max_ct_stats_block_cost]=$_max_ct_stats_block_cost 283 | # FLUX[min_ct_stats_block_cost]=$_min_ct_stats_block_cost 284 | FLUX[p90_ct_stats_block_cost]=$_90_ct_stats_block_cost 285 | FLUX[p99_ct_stats_block_cost]=$_99_ct_stats_block_cost 286 | # ct_stats_transaction_count 287 | FLUX[mean_ct_stats_transaction_count]=$_mean_ct_stats_transaction_count 288 | FLUX[max_ct_stats_transaction_count]=$_max_ct_stats_transaction_count 289 | # FLUX[min_ct_stats_transaction_count]=$_min_ct_stats_transaction_count 290 | FLUX[p90_ct_stats_transaction_count]=$_90_ct_stats_transaction_count 291 | FLUX[p99_ct_stats_transaction_count]=$_99_ct_stats_transaction_count 292 | 293 | # ct_stats_number_of_accounts 294 | FLUX[mean_ct_stats_number_of_accounts]=$_mean_ct_stats_number_of_accounts 295 | FLUX[max_ct_stats_number_of_accounts]=$_max_ct_stats_number_of_accounts 296 | # FLUX[min_ct_stats_number_of_accounts]=$_min_ct_stats_number_of_accounts 297 | FLUX[p90_ct_stats_number_of_accounts]=$_90_ct_stats_number_of_accounts 298 | FLUX[p99_ct_stats_number_of_accounts]=$_99_ct_stats_number_of_accounts 299 | 300 | # blocks fill 301 | FLUX[total_blocks]=$_total_blocks 302 | FLUX[blocks_fill_50]=$_blocks_fill_50 303 | FLUX[blocks_fill_90]=$_blocks_fill_90 304 | 305 | # skip rate 306 | FLUX[mean_skip_rate]=$_mean_skip_rate 307 | FLUX[max_skip_rate]=$_max_skip_rate 308 | FLUX[skip_rate_90]=$_skip_rate_90 309 | FLUX[mean_skip_rate_b4_test]=$_mean_skip_rate_b4_test 310 | 311 | # Dos Report write to Influxdb 312 | 313 | declare -A FIELD_MEASUREMENT 314 | # measurement range 315 | FIELD_MEASUREMENT[start_time]=range 316 | FIELD_MEASUREMENT[stop_time]=range 317 | FIELD_MEASUREMENT[time_range]=range 318 | FIELD_MEASUREMENT[start_slot]=range 319 | FIELD_MEASUREMENT[end_slot]=range 320 | # tps 321 | FIELD_MEASUREMENT[mean_tps]=tps 322 | FIELD_MEASUREMENT[max_tps]=tps 323 | FIELD_MEASUREMENT[90th_tx_count]=tps 324 | FIELD_MEASUREMENT[99th_tx_count]=tps 325 | # tower_vote 326 | FIELD_MEASUREMENT[mean_tower_vote_distance]=tower_vote 327 | FIELD_MEASUREMENT[max_tower_vote_distance]=tower_vote 328 | FIELD_MEASUREMENT[90th_tower_vote_distance]=tower_vote 329 | FIELD_MEASUREMENT[99th_tower_vote_distance]=tower_vote 330 | # optimistic_slot_elapsed 331 | FIELD_MEASUREMENT[mean_optimistic_slot_elapsed]=optimistic_slot_elapsed 332 | FIELD_MEASUREMENT[max_optimistic_slot_elapsed]=optimistic_slot_elapsed 333 | FIELD_MEASUREMENT[90th_optimistic_slot_elapsed]=optimistic_slot_elapsed 334 | FIELD_MEASUREMENT[99th_optimistic_slot_elapsed]=optimistic_slot_elapsed 335 | # cost_tracker_stats 336 | FIELD_MEASUREMENT[mean_cost_tracker_stats_block_cost]=block_cost 337 | FIELD_MEASUREMENT[max_cost_tracker_stats_block_cost]=block_cost 338 | FIELD_MEASUREMENT[90th_cost_tracker_stats_block_cost]=block_cost 339 | FIELD_MEASUREMENT[99th_cost_tracker_stats_block_cost]=block_cost 340 | # transaction_count 341 | FIELD_MEASUREMENT[mean_cost_tracker_stats_transaction_count]=transaction_count 342 | FIELD_MEASUREMENT[max_cost_tracker_stats_transaction_count]=transaction_count 343 | FIELD_MEASUREMENT[90th_cost_tracker_stats_transaction_count]=transaction_count 344 | FIELD_MEASUREMENT[99th_cost_tracker_stats_transaction_count]=transaction_count 345 | # ct_stats_number_of_accounts 346 | FIELD_MEASUREMENT[mean_cost_tracker_stats_number_of_accounts]=number_of_accounts 347 | FIELD_MEASUREMENT[max_cost_tracker_stats_number_of_accounts]=number_of_accounts 348 | FIELD_MEASUREMENT[90th_cost_tracker_stats_number_of_accounts]=number_of_accounts 349 | FIELD_MEASUREMENT[99th_cost_tracker_stats_number_of_accounts]=number_of_accounts 350 | # blocks fill 351 | FIELD_MEASUREMENT[numb_total_blocks]=block_fill 352 | FIELD_MEASUREMENT[numb_blocks_50_full]=block_fill 353 | FIELD_MEASUREMENT[numb_blocks_90_full]=block_fill 354 | FIELD_MEASUREMENT[blocks_50_full]=block_fill 355 | FIELD_MEASUREMENT[blocks_90_full]=block_fill 356 | 357 | # skip rate 358 | FIELD_MEASUREMENT[mean_skip_rate]=skip_rate 359 | FIELD_MEASUREMENT[max_skip_rate]=skip_rate 360 | FIELD_MEASUREMENT[skip_rate_90]=skip_rate 361 | FIELD_MEASUREMENT[mean_skip_rate_b4_test]=skip_rate 362 | -------------------------------------------------------------------------------- /main.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -ex 3 | ## Directory settings 4 | dos_program_dir=$(pwd) 5 | source utils.sh 6 | # shellcheck source=/dev/null 7 | source "env-artifact.sh" 8 | 9 | echo ----- stage: machines and build and upload mango-simulation --- 10 | cd "$dos_program_dir" 11 | # shellcheck source=/dev/null 12 | source create-instance.sh 13 | create_machines "$NUM_CLIENT" 14 | echo ----- stage: build dependency mango_bencher configure_mango for machine------ 15 | client_num=1 16 | # ARTIFACT_BUCKET must in the step 17 | artifact_bucket="$ARTIFACT_BUCKET/$BUILDKITE_PIPELINE_ID/$BUILDKITE_BUILD_ID/$BUILDKITE_JOB_ID" 18 | artifact_file="$ENV_ARTIFACT_FILE" 19 | for sship in "${instance_ip[@]}" 20 | do 21 | [[ $client_num -eq 1 ]] && arg1="true" || arg1="false" 22 | # run start-build-dependency.sh which in agent machine 23 | ret_build_dependency=$(ssh -i id_ed25519_dos_test -o "UserKnownHostsFile=/dev/null" -o "StrictHostKeyChecking=no" sol@"$sship" 'bash -s' < start-build-dependency.sh "$arg1" "$artifact_bucket" "$artifact_file") 24 | (( client_num++ )) || true 25 | done 26 | 27 | echo ----- stage: run bench-tps test --- 28 | client_num=1 29 | for sship in "${instance_ip[@]}" 30 | do 31 | # run start-dos-test.sh which in client machine 32 | ret_run_dos=$(ssh -i id_ed25519_dos_test -o "UserKnownHostsFile=/dev/null" -o "StrictHostKeyChecking=no" sol@$sship "nohup /home/sol/start-dos-test.sh 1> start-dos-test.nohup 2> start-dos-test.nohup &") 33 | (( client_num++ )) || true 34 | done 35 | # # Get Time Start 36 | start_time=$(date -u +%s) 37 | start_time_adjust=$(get_time_after $start_time 5) 38 | 39 | echo ----- stage: wait for bencher concurrently ------ 40 | sleep $DURATION 41 | echo ----- stage: check finish of process --- 42 | sleep 5 43 | for sship in "${instance_ip[@]}" 44 | do 45 | ret_pid=$(ssh -i id_ed25519_dos_test -o "UserKnownHostsFile=/dev/null" -o "StrictHostKeyChecking=no" sol@$sship 'pgrep --full "bash /home/sol/start-dos-test.sh*"' > pid.txt) || true 46 | pid=$(cat pid.txt) 47 | [[ $pid == "" ]] && echo "$sship has finished run bench-tps" || echo "pid=$pid" 48 | while [ "$pid" != "" ] 49 | do 50 | sleep $TERMINATION_CHECK_INTERVAL 51 | ret_pid=$(ssh -i id_ed25519_dos_test -o "UserKnownHostsFile=/dev/null" -o "StrictHostKeyChecking=no" sol@$sship 'pgrep --full "bash /home/sol/start-dos-test.sh*"' > pid.txt) || true 52 | pid=$(cat pid.txt) 53 | [[ $pid == "" ]] && echo "$sship has finished run bench-tps" || echo "pid=$pid" 54 | done 55 | done 56 | 57 | estimate_stop_time=$(get_time_after $start_time $DURATION) 58 | 59 | ### Get Time Stop 60 | stop_time=$(date -u +%s) 61 | stop_time_adjust=$(get_time_before $stop_time 5) 62 | echo ----- stage: DOS report ------ 63 | testnet_version=$(get_testnet_ver $ENDPOINT) 64 | echo "START_TIME=${start_time}" >> env-artifact.sh 65 | echo "START_TIME2=${start_time_adjust}" >> env-artifact.sh 66 | echo "STOP_TIME=${stop_time}" >> env-artifact.sh 67 | echo "STOP_TIME2=${stop_time_adjust}" >> env-artifact.sh 68 | echo "CLUSTER_VERSION=$testnet_version" >> env-artifact.sh 69 | echo "BUILDKITE_BUILD_URL=$BUILDKITE_BUILD_URL" >> env-artifact.sh 70 | for n in "${instance_name[@]}" 71 | do 72 | printf -v instances "%s %s " $instances $n 73 | done 74 | echo "INSTANCES=\"$instances\"" >>env-artifact.sh 75 | CLUSTER_VERSION=$(get_testnet_ver $ENDPOINT) 76 | echo "CLUSTER_VERSION=$CLUSTER_VERSION" >> env-artifact.sh 77 | ret_dos_report=$(exec ./dos-report.sh) 78 | 79 | echo ----- stage: upload logs ------ 80 | cnt=1 81 | for sship in "${instance_ip[@]}" 82 | do 83 | ret_pre_build=$(ssh -i id_ed25519_dos_test -o "UserKnownHostsFile=/dev/null" -o "StrictHostKeyChecking=no" sol@$sship /home/sol/start-upload-logs.sh $cnt) 84 | (( cnt++ )) || true 85 | done 86 | sleep 5 87 | echo ----- stage: delete instances ------ 88 | if [[ "$KEEP_INSTANCES" != "true" ]];then 89 | echo ----- stage: delete instances ------ 90 | delete_machines 91 | fi 92 | exit 0 93 | 94 | 95 | 96 | 97 | -------------------------------------------------------------------------------- /prepare-envs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -ex 3 | echo ----- stage: checkout buildkite Steps Env ------ 4 | ## Buckets Memo 5 | ## There are 3 buckets in total 6 | ## 1.ARTIFACT_BUCKET : dos-agent bucket, share with other program use dos-agent as buildkite agent 7 | ## 2.DOS_BENCH_TPS_PRIVATE_BUCKET : private bucket, store private data 8 | ## 3.DOS_BENCH_TPS_LOG_BUCKET : log bucket 9 | 10 | # Check ENVs 11 | [[ ! "$ENDPOINT" ]]&& echo ENDPOINT env not found && exit 1 12 | [[ ! "$TX_COUNT" ]] && echo TX_COUNT env not found && exit 1 13 | [[ ! "$DURATION" ]] && echo DURATION env not found && exit 1 14 | [[ ! "$USE_TPU_CLIENT" ]] && USE_TPU_CLIENT="true" 15 | [[ ! "$THREAD_BATCH_SLEEP_MS" ]]&& THREAD_BATCH_SLEEP_MS=10 && echo THREAD_BATCH_SLEEP_MS env not found, use $TPU_DISABLE_QUIC 16 | [[ ! "$SUSTAINED" ]] && SUSTAINED="false" 17 | [[ ! "$USE_DURABLE_NONCE" ]] && USE_DURABLE_NONCE="false" 18 | # CI ENVs 19 | [[ ! "$TEST_TYPE" ]] && TEST_TYPE="QUIC" 20 | [[ ! "$GIT_TOKEN" ]]&& echo GIT_TOKEN env not found && exit 1 21 | [[ ! "$NUM_CLIENT" ]]&& echo NUM_CLIENT env not found && exit 1 22 | [[ ! "$TERMINATION_CHECK_INTERVAL" ]]&& TERMINATION_CHECK_INTERVAL=10 && echo TERMINATION_CHECK_INTERVAL env not found, use $TERMINATION_CHECK_INTERVAL 23 | [[ ! "$GIT_REPO_DIR" ]]&& GIT_REPO_DIR="bench-tps-dos-test" 24 | [[ ! "$SOLANA_BUILD_BRANCH" ]]&& SOLANA_BUILD_BRANCH=master 25 | if [[ ! "$SOLANA_GIT_COMMIT" ]];then 26 | ret=$(git clone https://github.com/solana-labs/solana.git) 27 | if [[ -d solana ]];then 28 | cd ./solana 29 | [[ ! "$SOLANA_BUILD_BRANCH" ]]&& SOLANA_BUILD_BRANCH=master 30 | ret=$(git checkout $SOLANA_BUILD_BRANCH) 31 | SOLANA_GIT_COMMIT=$(git rev-parse HEAD) 32 | cd ../ 33 | else 34 | echo "can not clone https://github.com/solana-labs/solana.git" 35 | exit 1 36 | fi 37 | fi 38 | [[ ! "$AVAILABLE_ZONE" ]]&& AVAILABLE_ZONE="us-central1-a us-west1-b asia-east1-b europe-west4-a" && echo no AVAILABLE_ZONE and use $AVAILABLE_ZONE 39 | [[ ! "$KEYPAIR_DIR" ]]&&KEYPAIR_DIR="keypair-configs" 40 | [[ ! "$KEYPAIR_FILE" ]]&&KEYPAIR_FILE="large-keypairs.yaml" 41 | [[ ! "$KEYPAIR_TAR_FILE" ]] && KEYPAIR_TAR_FILE=keypair-configs.tgz 42 | [[ ! "$ID_FILE" ]]&&ID_FILE="testnet-dos-funder.json" 43 | [[ ! "$BENCH_TPS_ARTIFACT_FILE" ]]&& BENCH_TPS_ARTIFACT_FILE="solana-bench-tps" 44 | [[ ! "$DOS_BENCH_TPS_PRIVATE_BUCKET" ]]&& DOS_BENCH_TPS_PRIVATE_BUCKET=bench-tps-dos-private 45 | [[ ! "$DOS_BENCH_TPS_LOG_BUCKET" ]]&& DOS_BENCH_TPS_LOG_BUCKET="bench-tps-dos-log" 46 | [[ ! "$SOLANA_REPO" ]]&& SOLANA_REPO=https://github.com/solana-labs/solana.git 47 | [[ ! "$KEEP_INSTANCES" ]]&& KEEP_INSTANCES="false" && echo KEEP_INSTANCES env not found, use $KEEP_INSTANCES 48 | [[ ! "$RUN_BENCH_AT_TS_UTC" ]]&& RUN_BENCH_AT_TS_UTC=0 && echo RUN_BENCH_AT_TS_UTC env not found, use $RUN_BENCH_AT_TS_UTC 49 | #[[ ! "$SLACK_WEBHOOK" ]]&&[[ ! "$DISCORD_WEBHOOK" ]]&& echo no WEBHOOK found&&exit 1 50 | # set large data set 51 | [[ ! "$LARGE_DATA_SET" ]] && LARGE_DATA_SET="false" 52 | # INFLUX_WINDOW_INTERVAL & INFLUX_WINDOW_INTERVAL_LONG is used only when LARGE_DATA_SET is true 53 | [[ ! "$INFLUX_WINDOW_INTERVAL" ]] && INFLUX_WINDOW_INTERVAL="10m" 54 | [[ ! "$INFLUX_WINDOW_INTERVAL_LONG" ]] && INFLUX_WINDOW_INTERVAL_LONG="30m" 55 | [[ ! "$ARTIFACT_BUCKET" ]] && echo no ARTIFACT_BUCKET && exit 1 56 | [[ ! "$ENV_ARTIFACT_FILE" ]] && ENV_ARTIFACT_FILE="env-artifact.sh" 57 | 58 | source utils.sh 59 | echo ----- stage: prepare metrics env for both query and write ------ 60 | [[ -f "dos-metrics-env.sh" ]]&& rm dos-metrics-env.sh 61 | download_file "gs://$DOS_BENCH_TPS_PRIVATE_BUCKET" dos-metrics-env.sh ./ 62 | [[ ! -f "dos-metrics-env.sh" ]]&& echo "NO dos-metrics-env.sh found" && exit 1 63 | [[ ! $REPORT_BUCKET ]] && REPORT_BUCKET="bench-tps-dos" && echo no REPORT_BUCKET use "$REPORT_BUCKET" 64 | 65 | echo ----- stage: prepare ssh key to dynamic clients ------ 66 | download_file "gs://$DOS_BENCH_TPS_PRIVATE_BUCKET" id_ed25519_dos_test ./ 67 | [[ ! -f "id_ed25519_dos_test" ]]&& echo "no id_ed25519_dos_test found" && exit 1 68 | chmod 600 id_ed25519_dos_test 69 | 70 | echo ----- stage: prepare env-artifact for clients ------ 71 | ## bench-tps Envs 72 | echo "ENDPOINT=$ENDPOINT" >> env-artifact.sh 73 | echo "TX_COUNT=$TX_COUNT" >> env-artifact.sh 74 | echo "DURATION=$DURATION" >> env-artifact.sh 75 | echo "USE_TPU_CLIENT=$USE_TPU_CLIENT" >> env-artifact.sh 76 | echo "THREAD_BATCH_SLEEP_MS=$THREAD_BATCH_SLEEP_MS" >> env-artifact.sh 77 | echo "USE_DURABLE_NONCE=$USE_DURABLE_NONCE" >> env-artifact.sh 78 | echo "SUSTAINED=$SUSTAINED" >> env-artifact.sh 79 | echo "TPU_DISABLE_QUIC=$TPU_DISABLE_QUIC" >> env-artifact.sh 80 | ## CI Envs 81 | echo "NUM_CLIENT=$NUM_CLIENT" >> env-artifact.sh 82 | echo "GIT_TOKEN=$GIT_TOKEN" >> env-artifact.sh 83 | echo "GIT_REPO_DIR=$GIT_REPO_DIR" >> env-artifact.sh 84 | echo "SOLANA_REPO=$SOLANA_REPO" >> env-artifact.sh 85 | echo "SOLANA_BUILD_BRANCH=$SOLANA_BUILD_BRANCH" >> env-artifact.sh 86 | echo "SOLANA_GIT_COMMIT=$SOLANA_GIT_COMMIT" >> env-artifact.sh 87 | echo "KEEP_INSTANCES=$KEEP_INSTANCES" >> env-artifact.sh 88 | echo "RUN_BENCH_AT_TS_UTC=$RUN_BENCH_AT_TS_UTC" >> env-artifact.sh 89 | echo "SLACK_WEBHOOK=$SLACK_WEBHOOK" >> env-artifact.sh 90 | ## downloaded files 91 | echo "KEYPAIR_DIR=$KEYPAIR_DIR" >> env-artifact.sh 92 | echo "KEYPAIR_FILE=$KEYPAIR_FILE" >> env-artifact.sh 93 | echo "KEYPAIR_TAR_FILE=$KEYPAIR_TAR_FILE" >> env-artifact.sh 94 | echo "ID_FILE=$ID_FILE" >> env-artifact.sh 95 | # buildkite build envs 96 | echo "BUILDKITE_BRANCH=$BUILDKITE_BRANCH" >> env-artifact.sh 97 | echo "BUILDKITE_REPO=$BUILDKITE_REPO" >> env-artifact.sh 98 | echo "BUILDKITE_PIPELINE_ID=$BUILDKITE_PIPELINE_ID" >> env-artifact.sh 99 | echo "BUILDKITE_BUILD_ID=$BUILDKITE_BUILD_ID" >> env-artifact.sh 100 | echo "BUILDKITE_JOB_ID=$BUILDKITE_JOB_ID" >> env-artifact.sh 101 | echo "BUILDKITE_BUILD_NUMBER=$BUILDKITE_BUILD_NUMBER" >> env-artifact.sh 102 | echo "BUILDKITE_BUILD_URL=$BUILDKITE_BUILD_URL" >> env-artifact.sh 103 | ## artifact address 104 | echo "DOS_BENCH_TPS_PRIVATE_BUCKET=$DOS_BENCH_TPS_PRIVATE_BUCKET" >> env-artifact.sh 105 | echo "DOS_BENCH_TPS_LOG_BUCKET=$DOS_BENCH_TPS_LOG_BUCKET" >> env-artifact.sh 106 | echo "ARTIFACT_BUCKET=$ARTIFACT_BUCKET" >> env-artifact.sh 107 | echo "ENV_ARTIFACT_FILE=$ENV_ARTIFACT_FILE" >> env-artifact.sh 108 | echo "BENCH_TPS_ARTIFACT_FILE=solana-bench-tps" >> env-artifact.sh 109 | ## large data set 110 | echo "LARGE_DATA_SET=$LARGE_DATA_SET" >> env-artifact.sh 111 | echo "INFLUX_WINDOW_INTERVAL=$INFLUX_WINDOW_INTERVAL" >> env-artifact.sh 112 | echo "INFLUX_WINDOW_INTERVAL_LONG=$INFLUX_WINDOW_INTERVAL_LONG" >> env-artifact.sh 113 | echo "REPORT_BUCKET=$REPORT_BUCKET" >> env-artifact.sh 114 | cat dos-metrics-env.sh >> env-artifact.sh 115 | -------------------------------------------------------------------------------- /print-log.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # print-log.sh 3 | # shellcheck source=/dev/null 4 | set -ex 5 | source utils.sh 6 | echo ---- stage: print log ---- 7 | if [[ "$PRINT_LOG" == "true" ]];then 8 | # only download start-dos-test(1).nohup file 9 | ret_download_log=$(download_file "gs://$ARTIFACT_BUCKET/$BUILDKITE_PIPELINE_ID/$BUILDKITE_BUILD_ID" start-dos-test-1.nohup ./) || true 10 | fi 11 | sleep 3 12 | if [[ -f "start-dos-test-1.nohup" ]];then 13 | cat start-dos-test-1.nohup 14 | else 15 | echo "no start-dos-test-1.nohup found" 16 | fi 17 | exit 0 -------------------------------------------------------------------------------- /simple_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -ex -------------------------------------------------------------------------------- /slack.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | msg=$(jq -n --arg v "$header" '[$v]') 3 | 4 | [[ -z "$SLACK_WEBHOOK" ]]&&echo "ERROR : SLACK_WEBHOOK=$SLACK_WEBHOOK"&&exit 1 5 | 6 | slack_send(){ 7 | sdata=$(jq --null-input --arg val "$slack_text" '{"blocks":$val}') 8 | curl -X POST -H 'Content-type: application/json' --data "$sdata" $SLACK_WEBHOOK 9 | } 10 | 11 | 12 | ## Construct Test_Configuration 13 | printf -v test_config "%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n" \ 14 | "test-type = $test_type" "client = $client" "branch = $SOLANA_BUILD_BRANCH" "commit = $git_commit" \ 15 | "cluster version = $cluster_version" "bench-tps-clients = $num_clients" "read-client-keys = $client_keypair_path" \ 16 | "duration = $duration" "tx_count = $tx_count" "thread_batch_sleep_ms = $thread_batch_sleep_ms" "durable_nonce = $USE_DURABLE_NONCE" 17 | 18 | # Construct Slack Result_Details Report 19 | printf -v s_time_start "%s%s" "time_start: $(date --rfc-3339=seconds -u -d @$start_time)" "\\n" 20 | printf -v s_time_end "%s%s" "time_end: $(date --rfc-3339=seconds -u -d @$stop_time)" "\\n" 21 | printf -v s_slot "%s%s%s%s" $start_slot_txt "\\n" $end_slot_txt "\\n" 22 | printf -v s_tx_count "%s%s%s%s%s%s%s%s%s%s" $mean_tx_count_txt "\\n" $max_tx_count_txt "\\n" $p90_tx_count_txt "\\n" $p99_tx_count_txt "\\n" 23 | printf -v s_tower_vote_distance "%s%s%s%s%s%s%s%s" $mean_tower_vote_distance_txt "\\n" $max_tower_vote_distance_txt "\\n" $p90_tower_vote_distance_txt "\\n" $p99_tower_vote_distance_txt "\\n" 24 | printf -v s_optimistic_slot_elapsed "%s%s%s%s%s%s%s%s" $mean_optimistic_slot_elapsed_txt "\\n" $max_optimistic_slot_elapsed_txt "\\n" $p90_optimistic_slot_elapsed_txt "\\n" $p99_optimistic_slot_elapsed_txt "\\n" 25 | printf -v s_ct_stats_block_cost "%s%s%s%s%s%s%s%s" $mean_ct_stats_block_cost_txt "\\n" $max_ct_stats_block_cost_txt "\\n" $p90_ct_stats_block_cost_txt "\\n" $p99_ct_stats_block_cost_txt "\\n" 26 | printf -v s_ct_stats_tx_count "%s%s%s%s%s%s%s%s" $mean_mean_ct_stats_tx_count_txt "\\n" $max_mean_ct_stats_tx_count_txt "\\n" $p90_mean_ct_stats_tx_count_txt "\\n" $p99_mean_ct_stats_tx_count_txt "\\n" 27 | printf -v s_ct_stats_number_of_accts "%s%s%s%s%s%s%s%s" $mean_ct_stats_num_of_accts_txt "\\n" $max_ct_stats_num_of_accts_txt "\\n" $p90_ct_stats_num_of_accts_txt "\\n" $p99_ct_stats_num_of_accts_txt "\\n" 28 | printf -v blocks_fill "%s%s%s%s%s%s%s%s%s%s" $total_blocks_txt "\\n" $blocks_fill_50_txt "\\n" $blocks_fill_90_txt "\\n" $blocks_fill_50_percent_txt "\\n" $blocks_fill_90_percent_txt "\\n" 29 | printf -v skip_rate "%s%s%s%s%s%s" $mean_skip_rate_txt "\\n" $max_skip_rate_txt "\\n" $skip_rate_90_txt "\\n" 30 | 31 | # combine all data 32 | printf -v s_detail_ret "%s%s%s%s%s%s%s%s%s%s%s" $s_time_start $s_time_end $s_slot $s_tx_count $s_tower_vote_distance $s_optimistic_slot_elapsed $s_ct_stats_block_cost $s_ct_stats_tx_count $s_ct_stats_number_of_accts $blocks_fill $skip_rate 33 | ## Compose block content 34 | conf='"```'${test_config}'```"' 35 | detail='"```'${s_detail_ret}'```"' 36 | ## compose block 37 | b1="{ 38 | \"type\": \"actions\", 39 | \"elements\": [ 40 | { 41 | \"type\": \"button\", 42 | \"text\": { 43 | \"type\": \"plain_text\", 44 | \"text\": \"Grafana\" 45 | }, 46 | \"url\": \"${gf_url}\" 47 | }, 48 | { 49 | \"type\": \"button\", 50 | \"text\": { 51 | \"type\": \"plain_text\", 52 | \"text\": \"Buildkite Job\" 53 | }, 54 | \"url\": \"${BUILDKITE_BUILD_URL}\" 55 | } 56 | ] 57 | }" 58 | b2='{ 59 | "type": "header", 60 | "text": { 61 | "type": "plain_text", 62 | "text": "Test_Configuration", 63 | "emoji": true 64 | } 65 | }' 66 | b3="{\"type\": \"section\",\"text\": {\"type\":\"mrkdwn\",\"text\": ${conf}}}" 67 | b4='{ 68 | "type": "header", 69 | "text": { 70 | "type": "plain_text", 71 | "text": "Result_Details", 72 | "emoji": true 73 | } 74 | }' 75 | b5="{\"type\": \"section\",\"text\": {\"type\":\"mrkdwn\",\"text\": ${detail}}}" 76 | 77 | # compose final message 78 | act_elem="[${b1},${b2},${b3},${b4},${b5}]" 79 | # echo $act_elem | jq . 80 | slack_text=$act_elem 81 | -------------------------------------------------------------------------------- /start-build-dependency.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -ex 3 | ## arg 1: wheather to build solana-bench-tps 4 | ## arg 2: ARTIFACT BUCKET 5 | ## arg 3: NAME OF ENV ARTIFACT FILE 6 | ## env 7 | 8 | ## fiunctions 9 | ## s1: bucket name s2: file name s3: local directory 10 | # s1: local file s2: bucket name 11 | upload_file() { 12 | gsutil cp "$1" "$2" 13 | } 14 | 15 | download_file() { 16 | for retry in 0 1 2 3 17 | do 18 | if [[ $retry -gt 2 ]];then 19 | break 20 | fi 21 | gsutil cp "$1/$2" "$3" 22 | if [[ ! -f "$2" ]];then 23 | echo NO "$2" found, retry 24 | else 25 | echo "$2" dowloaded 26 | break 27 | fi 28 | sleep 5 29 | done 30 | } 31 | 32 | ## Download key files from gsutil 33 | echo "arg1"="$1" 34 | [[ "$1" != "true" && "$1" != "false" ]] && build_binary="false" || build_binary="$1" 35 | [[ ! "$2" ]]&& echo "No artifact bucket" && exit 1 36 | [[ ! "$3" ]]&& echo "No artifact filename" && exit 1 37 | # Download env-artifact.sh 38 | download_file "gs://$2" "$3" "$HOME" 39 | sleep 1 40 | [[ ! -f "$3" ]] && echo no "$3" downloaded && exit 2 41 | # shellcheck source=/dev/null 42 | source $HOME/.profile 43 | # shellcheck source=env-artifact.sh 44 | source $HOME/env-artifact.sh 45 | 46 | ## preventing lock-file build fail, 47 | ## also need to disable software upgrade in image 48 | sudo fuser -vki -TERM /var/lib/dpkg/lock /var/lib/dpkg/lock-frontend || true 49 | sudo dpkg --configure -a 50 | sudo apt update 51 | ## pre-install and rust version 52 | sudo apt-get install -y libssl-dev libudev-dev pkg-config zlib1g-dev llvm clang cmake make libprotobuf-dev protobuf-compiler 53 | rustup default stable 54 | rustup update 55 | 56 | echo ------- stage: git clone bench-tps-dos ------ 57 | cd $HOME 58 | [[ -d "$GIT_REPO_DIR" ]]&& rm -rf $GIT_REPO_DIR 59 | git clone "$BUILDKITE_REPO" 60 | cd "$GIT_REPO_DIR" 61 | git checkout "$BUILDKITE_BRANCH" 62 | git branch 63 | echo ------- stage: download solana repos and build solana-bench-tps ------ 64 | cd "$HOME" 65 | 66 | if [[ "$build_binary" == "true" ]];then 67 | echo ------- build solana-bench-tps ------ 68 | [[ -d "$HOME/solana" ]]&& rm -rf "$HOME/solana" 69 | git clone "$SOLANA_REPO" 70 | 71 | [[ -d "$HOME/solana" ]] || exit 1 72 | cd "$HOME/solana" 73 | if [[ "$SOLANA_GIT_COMMIT" ]];then 74 | git checkout "$SOLANA_GIT_COMMIT" 75 | elif [[ "$SOLANA_BUILD_BRANCH" ]];then 76 | git checkout "$SOLANA_BUILD_BRANCH" 77 | fi 78 | cd "$HOME/solana/bench-tps" 79 | [[ -f "$HOME/solana/target/release/solana-bench-tps" ]]&& rm "$HOME/solana/target/release/solana-bench-tps" 80 | res=$(cargo build --release > bench-tps-build.output) 81 | echo "$res" 82 | if [[ -f "$HOME/solana/target/release/solana-bench-tps" ]];then 83 | cp "$HOME/solana/target/release/solana-bench-tps" "$HOME" 84 | upload_file "$HOME/solana-bench-tps" "gs://$ARTIFACT_BUCKET/$BUILDKITE_PIPELINE_ID/$BUILDKITE_BUILD_ID/$BUILDKITE_JOB_ID" 85 | else 86 | echo "build solana-bench-tps fail" 87 | exit 1 88 | fi 89 | else 90 | echo ------- download from bucket ------ 91 | download_file "gs://$ARTIFACT_BUCKET/$BUILDKITE_PIPELINE_ID/$BUILDKITE_BUILD_ID/$BUILDKITE_JOB_ID" "$BENCH_TPS_ARTIFACT_FILE" "$HOME" 92 | [[ ! -f "$HOME/solana-bench-tps" ]] && echo no solana-bench-tps && exit 1 93 | chmod +x "$HOME/solana-bench-tps" 94 | fi 95 | 96 | echo ---- stage: copy files to HOME and mkdir log folder ---- 97 | cp "$HOME/$GIT_REPO_DIR/start-dos-test.sh" $HOME/start-dos-test.sh 98 | cp "$HOME/$GIT_REPO_DIR/start-upload-logs.sh" $HOME/start-upload-logs.sh 99 | [[ -d "$HOME/$HOSTNAME" ]] && rm -rf "$HOME/$HOSTNAME" 100 | mkdir -p "$HOME/$HOSTNAME" 101 | 102 | echo ---- stage: download id, accounts and authority file in HOME ---- 103 | cd "$HOME" 104 | download_file "gs://$DOS_BENCH_TPS_PRIVATE_BUCKET" "$ID_FILE" "$HOME" 105 | [[ ! -f "$ID_FILE" ]]&&echo no "$ID_FILE" file && exit 1 106 | download_file "gs://$DOS_BENCH_TPS_PRIVATE_BUCKET" "$KEYPAIR_TAR_FILE" "$HOME" 107 | [[ ! -f "$KEYPAIR_TAR_FILE" ]]&&echo no "$KEYPAIR_TAR_FILE" file && exit 1 108 | tar -xzvf $KEYPAIR_TAR_FILE 109 | [[ ! -f "$HOME/keypair-configs/$KEYPAIR_FILE" ]]&&echo no "$KEYPAIR_FILE" file && exit 1 110 | cp "$HOME/keypair-configs/$KEYPAIR_FILE" "$HOME" 111 | exit 0 112 | -------------------------------------------------------------------------------- /start-dos-test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # benchmark 3 | set -ex 4 | # shellcheck source=/dev/null 5 | source $HOME/.profile 6 | # shellcheck source=/dev/null 7 | source $HOME/env-artifact.sh 8 | 9 | [[ ! "$ENDPOINT" ]]&& echo "No ENDPOINT" && exit 1 10 | [[ ! "$SOLANA_METRICS_CONFIG" ]] && echo no SOLANA_METRICS_CONFIG ENV && exit 1 11 | [[ ! "$KEYPAIR_FILE" ]]&& KEYPAIR_FILE=large-keypairs.yaml && echo No KEYPAIR_FILE Env , use $KEYPAIR_FILE 12 | #### bench-tps ENV #### 13 | echo --- stage:setup bench-tps parameters --- 14 | args=( 15 | -u "$ENDPOINT" 16 | --identity "$HOME/$ID_FILE" 17 | --read-client-keys "$HOME/$KEYPAIR_FILE" 18 | --duration "$TX_COUNT" 19 | --tx_count "$DURATION" 20 | --thread-batch-sleep-ms "$THREAD_BATCH_SLEEP_MS" 21 | ) 22 | # b) use_tpu_client (boolean, if true --use-tpu-client, if false --use-rpc-client) 23 | # f) tx_count (--tx_count 10000 for the UDP test and --tx_count 2000 per client for the QUIC ) 24 | # f1.1) tx_count no longer bound to test type. 8/20/2022 25 | # g) thread_batch_sleep ( --thread-batch-sleep-ms 1 for UDP --thread-batch-sleep-ms 10 for QUIC) 26 | # g.1) no longer bound to test type 27 | [[ ! "$TX_COUNT" ]] && TX_COUNT=10000 && echo No TX_COUNT Env , use $TX_COUNT 28 | # e) duration (default --duration 1800) 29 | [[ ! "$DURATION" ]] && DURATION=1800 && echo No DURATION Env , use $DURATION 30 | [[ ! "$THREAD_BATCH_SLEEP_MS" ]]&& THREAD_BATCH_SLEEP_MS=1 && echo No THREAD_BATCH_SLEEP_MS Env , use $THREAD_BATCH_SLEEP_MS 31 | [[ "$USE_TPU_CLIENT" == "true" ]] && args+=(--use-tpu-client) || args+=(--use-rpc-client) 32 | [[ "$USE_DURABLE_NONCE" == "true" ]] && args+=(--use-durable-nonce) 33 | # d) sustained (boolean, if true --sustained, if false nothing) 34 | [[ "$SUSTAINED" == "true" ]]&& args+=(--sustained) 35 | # benchmark execution 36 | cd $HOME 37 | echo --- start of benchmark $(date) 38 | benchmark=$(./solana-bench-tps "${args[@]}" &) 39 | sleep 2 40 | cd $HOME 41 | ret_ps=$(ps aux | grep solana-bench-tps) 42 | echo $ret_ps > ps.out 43 | echo --- end of benchmark $(date) 44 | 45 | -------------------------------------------------------------------------------- /start-upload-logs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -ex 3 | # shellcheck source=/dev/null 4 | source $HOME/.profile 5 | # shellcheck source=/dev/null 6 | source $HOME/env-artifact.sh 7 | 8 | upload_log_folder() { 9 | gsutil cp -r $1 gs://mango_bencher-dos-log/$BUILDKITE_BUILD_NUMBER/ 10 | } 11 | 12 | echo ----- stage: upload logs: make folder and move logs ------ 13 | cd $HOME 14 | if [[ -f "$HOME/start-dos-test.nohup" ]];then 15 | # must upload to build level, otherwise when the printlog in different job, it cannot find the file 16 | ret_upload_nohup=$(gsutil cp "$HOME/start-dos-test.nohup" "gs://$ARTIFACT_BUCKET/$BUILDKITE_PIPELINE_ID/$BUILDKITE_BUILD_ID/start-dos-test-$1.nohup") || true 17 | else 18 | echo no start-dos-test.nohup found in $home 19 | fi 20 | # [[ -f "$HOME/start-dos-test.nohup" ]]&& cat start-dos-test.nohup || true 21 | 22 | echo "all logs are uploaded" 23 | exit 0 24 | 25 | -------------------------------------------------------------------------------- /utils.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -ex 3 | function read_machines() { 4 | ip_file=instance_ip.out 5 | name_file=instance_name.out 6 | zone_file=instance_ip.out 7 | } 8 | 9 | ## provide filename in bucket 10 | ## s1: bucket name s2: file name s3: local directory 11 | download_file() { 12 | for retry in 0 1 2 13 | do 14 | if [[ $retry -gt 2 ]];then 15 | break 16 | fi 17 | gsutil cp "$1/$2" "$3" 18 | if [[ ! -f "$2" ]];then 19 | echo NO "$2" found, retry 20 | else 21 | echo "$2" dowloaded 22 | break 23 | fi 24 | sleep 5 25 | done 26 | } 27 | upload_file() { 28 | gsutil cp "$1" "$2" 29 | } 30 | # argv1: endpoint url 31 | function get_testnet_ver() { 32 | local ret 33 | for retry in 0 1 2 34 | do 35 | if [[ $retry -gt 1 ]];then 36 | break 37 | fi 38 | ret=$(curl "$1" -X POST -H "Content-Type: application/json" -d '{"jsonrpc":"2.0","id":1, "method":"getVersion"} 39 | ' | jq '.result."solana-core"' | sed 's/\"//g') || true 40 | if [[ $ret =~ [0-9]+.[0-9]+.[0-9]+ ]];then 41 | break 42 | fi 43 | sleep 3 44 | done 45 | if [[ ! $ret =~ ^[0-9]+.[0-9]+.[0-9]+ ]];then 46 | echo master 47 | else 48 | #adding a v because the branch has a v 49 | echo v$ret 50 | fi 51 | } 52 | 53 | # given time $1 and get after $2 seconds 54 | get_time_after() { 55 | outcom_in_sec=$(echo $1 + $2 | bc) 56 | echo $outcom_in_sec 57 | } 58 | 59 | # given time $1 and get before $2 seconds 60 | get_time_before() { 61 | outcom_in_sec=$(echo $1 - $2 | bc) 62 | echo $outcom_in_sec 63 | } 64 | 65 | 66 | # extract_time: extract number and unit from string like 10s, 10m, 10h 67 | # argv1: string like 10s, 10m, 10h 68 | # return: use $duration_in_seconds or return value $? 69 | function extract_time_in_sec { 70 | if [[ $1 =~ ^([0-9]+)([smh])$ ]]; then 71 | number="${BASH_REMATCH[1]}" 72 | unit="${BASH_REMATCH[2]}" 73 | 74 | case "$unit" in 75 | s) 76 | duration_in_seconds="$number" 77 | ;; 78 | m) 79 | duration_in_seconds=$((number * 60)) 80 | ;; 81 | h) 82 | duration_in_seconds=$((number * 3600)) 83 | ;; 84 | *) 85 | duration_in_seconds=0 86 | ;; 87 | esac 88 | echo "$duration_in_seconds" 89 | fi 90 | } 91 | 92 | --------------------------------------------------------------------------------