├── .asf.yaml ├── .github ├── PULL_REQUEST_TEMPLATE └── workflows │ ├── pre-commit.yml │ └── push-master.yml ├── .gitignore ├── .go_version ├── .golangci.yml ├── LICENSE ├── Makefile ├── NOTICE ├── README.md ├── cmd ├── queueconfigchecker │ └── queueconfigchecker.go ├── schedulerclient │ └── client.go └── simplescheduler │ ├── main.go │ └── simplescheduler.go ├── config ├── limit.yaml └── queues.yaml ├── go.mod ├── go.sum ├── pkg ├── common │ ├── configs │ │ ├── config.go │ │ ├── config_test.go │ │ ├── configs.go │ │ ├── configs_test.go │ │ ├── configvalidator.go │ │ └── configvalidator_test.go │ ├── constants.go │ ├── errors.go │ ├── resources │ │ ├── quantity.go │ │ ├── quantity_test.go │ │ ├── resources.go │ │ ├── resources_test.go │ │ ├── tracked_resources.go │ │ └── tracked_resources_test.go │ ├── security │ │ ├── acl.go │ │ ├── acl_test.go │ │ ├── usergroup.go │ │ ├── usergroup_no_resolver.go │ │ ├── usergroup_os_resolver.go │ │ ├── usergroup_test.go │ │ └── usergroup_test_resolver.go │ ├── server.go │ ├── utils.go │ └── utils_test.go ├── entrypoint │ ├── entrypoint.go │ ├── entrypoint_test.go │ └── service_context.go ├── events │ ├── event_publisher.go │ ├── event_publisher_test.go │ ├── event_ringbuffer.go │ ├── event_ringbuffer_test.go │ ├── event_store.go │ ├── event_store_test.go │ ├── event_streaming.go │ ├── event_streaming_test.go │ ├── event_system.go │ ├── event_system_test.go │ ├── events.go │ ├── events_test.go │ └── mock │ │ └── event_system.go ├── examples │ └── simple_example.go ├── handler │ └── event_handlers.go ├── locking │ ├── locking.go │ ├── locking_race_test.go │ └── locking_test.go ├── log │ ├── filtered_core.go │ ├── logger.go │ ├── logger_test.go │ ├── rate_limited_logger.go │ └── rate_limited_logger_test.go ├── metrics │ ├── event.go │ ├── history │ │ ├── internal_metrics.go │ │ └── internal_metrics_test.go │ ├── init.go │ ├── metrics_collector.go │ ├── metrics_collector_test.go │ ├── metrics_test.go │ ├── queue.go │ ├── queue_test.go │ ├── runtime.go │ ├── runtime_test.go │ ├── scheduler.go │ └── scheduler_test.go ├── mock │ ├── container_state_updater.go │ ├── event_plugin.go │ ├── predicate_plugin.go │ ├── preemption_predicate_plugin.go │ └── rm_callback.go ├── plugins │ ├── plugins.go │ ├── plugins_test.go │ └── types.go ├── rmproxy │ ├── rmevent │ │ └── events.go │ ├── rmproxy.go │ └── rmproxy_mock.go ├── scheduler │ ├── context.go │ ├── context_test.go │ ├── health_checker.go │ ├── health_checker_test.go │ ├── nodes_usage_monitor.go │ ├── objects │ │ ├── allocation.go │ │ ├── allocation_result.go │ │ ├── allocation_test.go │ │ ├── application.go │ │ ├── application_graphviz_test.go │ │ ├── application_state.go │ │ ├── application_state_test.go │ │ ├── application_summary.go │ │ ├── application_test.go │ │ ├── events │ │ │ ├── application_events.go │ │ │ ├── application_events_test.go │ │ │ ├── ask_events.go │ │ │ ├── ask_events_test.go │ │ │ ├── node_events.go │ │ │ ├── node_events_test.go │ │ │ ├── queue_events.go │ │ │ └── queue_events_test.go │ │ ├── node.go │ │ ├── node_collection.go │ │ ├── node_collection_test.go │ │ ├── node_iterator.go │ │ ├── node_iterator_test.go │ │ ├── node_listener.go │ │ ├── node_test.go │ │ ├── nodesorting.go │ │ ├── nodesorting_test.go │ │ ├── object_state.go │ │ ├── object_state_test.go │ │ ├── predicates.go │ │ ├── predicates_test.go │ │ ├── preemption.go │ │ ├── preemption_queue_test.go │ │ ├── preemption_test.go │ │ ├── queue.go │ │ ├── queue_test.go │ │ ├── required_node_preemptor.go │ │ ├── required_node_preemptor_test.go │ │ ├── reservation.go │ │ ├── reservation_test.go │ │ ├── sorted_asks.go │ │ ├── sorted_asks_test.go │ │ ├── sorters.go │ │ ├── sorters_test.go │ │ ├── template │ │ │ ├── template.go │ │ │ └── template_test.go │ │ └── utilities_test.go │ ├── partition.go │ ├── partition_manager.go │ ├── partition_manager_test.go │ ├── partition_test.go │ ├── placement │ │ ├── filter.go │ │ ├── filter_test.go │ │ ├── fixed_rule.go │ │ ├── fixed_rule_test.go │ │ ├── placement.go │ │ ├── placement_test.go │ │ ├── provided_rule.go │ │ ├── provided_rule_test.go │ │ ├── recovery_rule.go │ │ ├── recovery_rule_test.go │ │ ├── rule.go │ │ ├── rule_test.go │ │ ├── tag_rule.go │ │ ├── tag_rule_test.go │ │ ├── testrule.go │ │ ├── testrule_test.go │ │ ├── types │ │ │ └── types.go │ │ ├── user_rule.go │ │ └── user_rule_test.go │ ├── policies │ │ ├── nodesorting_policy.go │ │ ├── nodesorting_policy_test.go │ │ ├── preemption_policy.go │ │ ├── preemption_policy_test.go │ │ ├── priority_policy.go │ │ ├── priority_policy_test.go │ │ ├── sorting_policy.go │ │ └── sorting_policy_test.go │ ├── scheduler.go │ ├── scheduler_test.go │ ├── scheduling_metrics.go │ ├── tests │ │ ├── application_tracking_test.go │ │ ├── mock_rm_callback_test.go │ │ ├── mockscheduler_test.go │ │ ├── operation_test.go │ │ ├── performance_test.go │ │ ├── plugin_test.go │ │ ├── recovery_test.go │ │ ├── reservation_test.go │ │ ├── restclient_test.go │ │ ├── smoke_test.go │ │ └── utilities_test.go │ ├── ugm │ │ ├── group_tracker.go │ │ ├── group_tracker_test.go │ │ ├── manager.go │ │ ├── manager_test.go │ │ ├── queue_tracker.go │ │ ├── queue_tracker_test.go │ │ ├── tracker.go │ │ ├── ugm_events.go │ │ ├── ugm_events_test.go │ │ ├── user_tracker.go │ │ ├── user_tracker_test.go │ │ ├── utilities.go │ │ └── utilities_test.go │ └── utilities_test.go └── webservice │ ├── dao │ ├── allocation_ask_info.go │ ├── allocation_info.go │ ├── application_history.go │ ├── application_info.go │ ├── cluster_info.go │ ├── cluster_util.go │ ├── config_info.go │ ├── container_history.go │ ├── error_info.go │ ├── event_record.go │ ├── node_info.go │ ├── node_util.go │ ├── partition_info.go │ ├── queue_info.go │ ├── rule_info.go │ ├── scheduler_health.go │ ├── ugm_info.go │ └── yk_uuid.go │ ├── handler_mock_test.go │ ├── handlers.go │ ├── handlers_test.go │ ├── routes.go │ ├── state_dump.go │ ├── streaming_limit.go │ ├── streaming_limit_test.go │ ├── webservice.go │ └── webservice_test.go └── scripts └── generate-fsm-graph-images.sh /.asf.yaml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | # 19 | 20 | # https://cwiki.apache.org/confluence/display/INFRA/git+-+.asf.yaml+features 21 | 22 | github: 23 | description: "Apache YuniKorn Core" 24 | homepage: https://yunikorn.apache.org/ 25 | labels: 26 | - yunikorn 27 | - go 28 | - universal-resource-scheduler 29 | - apache-yarn 30 | - kubernetes 31 | enabled_merge_buttons: 32 | squash: true 33 | merge: false 34 | rebase: false 35 | features: 36 | wiki: false 37 | issues: false 38 | projects: false 39 | 40 | notifications: 41 | commits: issues@yunikorn.apache.org 42 | issues: reviews@yunikorn.apache.org 43 | pullrequests: reviews@yunikorn.apache.org 44 | jira_options: link label 45 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE: -------------------------------------------------------------------------------- 1 | ### What is this PR for? 2 | A few sentences describing the overall goals of the pull request's commits. 3 | First time? Check out the contributing guide - http://yunikorn.apache.org/community/how_to_contribute 4 | 5 | 6 | ### What type of PR is it? 7 | * [ ] - Bug Fix 8 | * [ ] - Improvement 9 | * [ ] - Feature 10 | * [ ] - Documentation 11 | * [ ] - Hot Fix 12 | * [ ] - Refactoring 13 | 14 | ### Todos 15 | * [ ] - Task 16 | 17 | ### What is the Jira issue? 18 | * Open an issue on Jira https://issues.apache.org/jira/browse/YUNIKORN/ 19 | * Put link here, and add [YUNIKORN-*Jira number*] in PR title, eg. `[YUNIKORN-2] Gang scheduling interface parameters` 20 | 21 | ### How should this be tested? 22 | 23 | ### Screenshots (if appropriate) 24 | 25 | ### Questions: 26 | * [ ] - The licenses files need update. 27 | * [ ] - There is breaking changes for older versions. 28 | * [ ] - It needs documentation. 29 | -------------------------------------------------------------------------------- /.github/workflows/pre-commit.yml: -------------------------------------------------------------------------------- 1 | name: Pre-commit checks 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - master 7 | workflow_dispatch: {} 8 | 9 | concurrency: 10 | group: ${{ github.workflow }}-${{ github.event.pull_request.number }} 11 | cancel-in-progress: true 12 | 13 | jobs: 14 | build: 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - name: Checkout source code 19 | uses: actions/checkout@v4 20 | with: 21 | fetch-depth: 2 22 | - name: Set up Go 23 | uses: actions/setup-go@v5 24 | with: 25 | go-version-file: .go_version 26 | - name: Check license 27 | run: make license-check 28 | - name: Go lint 29 | run: make lint 30 | - name: Run Version Check 31 | run: make pseudo 32 | - name: Run ShellCheck 33 | run: make check_scripts 34 | - name: Unit tests 35 | run: make test 36 | - name: Code coverage 37 | uses: codecov/codecov-action@v4 38 | with: 39 | files: build/coverage.txt 40 | # After codecov/codecov-action@v4, tokenless uploading of coverage files to non-public repo is unsupported. 41 | # To enable codecov analysis in your forked repo. Please configure CODECOV_TOKEN in your repository secrets. 42 | # Ref: https://docs.codecov.com/docs/adding-the-codecov-token 43 | token: ${{ secrets.CODECOV_TOKEN }} 44 | 45 | e2e-tests: 46 | needs: build 47 | runs-on: ubuntu-latest 48 | strategy: 49 | fail-fast: false 50 | matrix: 51 | k8s: [v1.32.2] 52 | plugin: [""] 53 | steps: 54 | - name: Checkout yunikorn-k8shim source code 55 | uses: actions/checkout@v4 56 | with: 57 | repository: apache/yunikorn-k8shim 58 | fetch-depth: 2 59 | path: k8shim 60 | - name: Checkout yunikorn-core source code 61 | uses: actions/checkout@v4 62 | with: 63 | fetch-depth: 2 64 | path: core 65 | - name: Set up Go 66 | uses: actions/setup-go@v5 67 | with: 68 | go-version-file: k8shim/.go_version 69 | cache-dependency-path: "**/go.sum" 70 | - name: Set hugepage 71 | run: | 72 | echo "vm.nr_hugepages = 1024" | sudo tee -a /etc/sysctl.conf 73 | sudo sysctl -p 74 | sudo sysctl -a | grep vm.nr_hugepages 75 | - name: Cache and Restore e2e required tools 76 | id: cache 77 | uses: actions/cache@v4 78 | with: 79 | path: | 80 | k8shim/tools 81 | key: ${{ runner.os }}-e2e-${{ hashFiles('k8shim/Makefile') }} 82 | restore-keys: | 83 | ${{ runner.os }}-e2e- 84 | - name: Replace yunikorn-core dependency 85 | run: cd k8shim && go mod edit -replace "github.com/apache/yunikorn-core=../core" && grep yunikorn-core go.mod && go mod tidy 86 | - name: Run e2e tests 87 | run: cd k8shim && ./scripts/run-e2e-tests.sh -a "test" -n "yk8s" -v "kindest/node:${KIND_NODE_IMAGE}" ${KIND_EXTRA_ARGS} 88 | env: 89 | KIND_NODE_IMAGE: ${{ matrix.k8s }} 90 | KIND_EXTRA_ARGS: ${{ matrix.plugin }} 91 | - name: Upload artifacts 92 | uses: actions/upload-artifact@v4 93 | if: ${{ failure() }} 94 | with: 95 | name: ${{ github.job }} stdout (${{ matrix.k8s }}${{ matrix.plugin == '--plugin' && format(', {0}', matrix.plugin) || matrix.plugin }}) 96 | path: k8shim/build/e2e 97 | -------------------------------------------------------------------------------- /.github/workflows/push-master.yml: -------------------------------------------------------------------------------- 1 | name: Push to master 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | 8 | jobs: 9 | unit-tests: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout source code 13 | uses: actions/checkout@v4 14 | with: 15 | fetch-depth: 2 16 | - name: Set up Go 17 | uses: actions/setup-go@v5 18 | with: 19 | go-version-file: .go_version 20 | - name: Unit tests 21 | run: make test 22 | - name: Code coverage 23 | uses: codecov/codecov-action@v4 24 | with: 25 | files: build/coverage.txt 26 | # After codecov/codecov-action@v4, tokenless uploading of coverage files to non-public repo is unsupported. 27 | # To enable codecov analysis in your forked repo. Please configure CODECOV_TOKEN in your repository secrets. 28 | # Ref: https://docs.codecov.com/docs/adding-the-codecov-token 29 | token: ${{ secrets.CODECOV_TOKEN }} 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.tmp 2 | .DS_Store 3 | .build 4 | *.swp 5 | *.a 6 | /tools/ 7 | /build/ 8 | /_output/ 9 | .idea 10 | /coverage.txt 11 | yunikorn-state.txt 12 | /shellcheck 13 | -------------------------------------------------------------------------------- /.go_version: -------------------------------------------------------------------------------- 1 | 1.23 -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | 19 | # options for analysis running 20 | run: 21 | issues-exit-code: 1 22 | modules-download-mode: readonly 23 | timeout: 5m 24 | 25 | # settings of specific linters 26 | linters-settings: 27 | errcheck: 28 | check-type-assertions: true 29 | check-blank: true 30 | revive: 31 | confidence: 0.8 32 | gofmt: 33 | simplify: true 34 | goimports: 35 | local-prefixes: github.com/apache/yunikorn 36 | govet: 37 | shadow: true 38 | funlen: 39 | lines: 120 40 | statements: 80 41 | depguard: 42 | rules: 43 | main: 44 | files: 45 | - $all 46 | deny: 47 | - pkg: "github.com/sirupsen/logrus" 48 | desc: "logging is standardised via yunikorn logger and zap" 49 | - pkg: "github.com/stretchr/testify" 50 | desc: "test assertions must use gotest.tools/v3/assert" 51 | 52 | # linters to use 53 | linters: 54 | disable-all: true 55 | fast: false 56 | enable: 57 | - errcheck 58 | - unused 59 | - staticcheck 60 | - gosimple 61 | - ineffassign 62 | - funlen 63 | - revive 64 | - gofmt 65 | - goimports 66 | - govet 67 | - goconst 68 | - depguard 69 | - nakedret 70 | - gocritic 71 | - godox 72 | - gosec 73 | - dogsled 74 | - whitespace 75 | 76 | issues: 77 | exclude-use-default: true 78 | 79 | # Maximum issues count per one linter. Set to 0 to disable. Default is 50. 80 | max-issues-per-linter: 0 81 | 82 | # Maximum count of issues with the same text. Set to 0 to disable. Default is 3. 83 | max-same-issues: 0 84 | 85 | # Show only new issues: if there are unstaged changes or untracked files, 86 | # only those changes are analyzed, else only changes in HEAD~ are analyzed. 87 | # It's a super-useful option for integration of golangci-lint into existing 88 | # large codebase. It's not practical to fix all existing issues at the moment 89 | # of integration: much better don't allow issues in new code. 90 | # Default is false. 91 | new: false 92 | 93 | # Show only new issues created after git revision `REV` 94 | # new-from-rev: REV 95 | 96 | # Show only new issues created in git patch with set file path. 97 | # new-from-patch: path/to/patch/file 98 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Apache YuniKorn 2 | Copyright 2019-2024 The Apache Software Foundation 3 | 4 | This product includes software developed at 5 | The Apache Software Foundation (http://www.apache.org/). 6 | -------------------------------------------------------------------------------- /cmd/queueconfigchecker/queueconfigchecker.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package main 20 | 21 | import ( 22 | "log" 23 | "os" 24 | 25 | "github.com/apache/yunikorn-core/pkg/common/configs" 26 | ) 27 | 28 | /* 29 | A utility command to load queue configuration file and check its validity 30 | */ 31 | func main() { 32 | if len(os.Args) != 2 { 33 | log.Println("Usage: " + os.Args[0] + " ") 34 | os.Exit(1) 35 | } 36 | queueFile := os.Args[1] 37 | conf, err := os.ReadFile(queueFile) 38 | if err != nil { 39 | log.Printf("Could not read file: %v", err) 40 | os.Exit(2) 41 | } 42 | _, err = configs.LoadSchedulerConfigFromByteArray(conf) 43 | if err != nil { 44 | log.Printf("Config validation failed: %v", err) 45 | os.Exit(3) 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /cmd/schedulerclient/client.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package main 20 | 21 | import ( 22 | "context" 23 | "fmt" 24 | "io" 25 | "log" 26 | "time" 27 | 28 | "github.com/apache/yunikorn-scheduler-interface/lib/go/si" 29 | 30 | "google.golang.org/grpc" 31 | "google.golang.org/grpc/credentials/insecure" 32 | ) 33 | 34 | const ( 35 | address = "localhost:3333" 36 | ) 37 | 38 | func main() { 39 | if err := runApp(); err != nil { 40 | log.Fatalf("error: %v", err) 41 | } 42 | } 43 | 44 | func runApp() error { 45 | // Set up a connection to the server. 46 | conn, err := grpc.NewClient(address, grpc.WithTransportCredentials(insecure.NewCredentials())) 47 | if err != nil { 48 | log.Fatalf("did not connect: %v", err) 49 | } 50 | defer conn.Close() 51 | c := si.NewSchedulerClient(conn) 52 | 53 | ctx, cancel := context.WithTimeout(context.Background(), time.Hour*100000) 54 | defer cancel() 55 | _, err = c.RegisterResourceManager(ctx, &si.RegisterResourceManagerRequest{}) 56 | if err != nil { 57 | return fmt.Errorf("could not greet: %v", err) 58 | } 59 | log.Printf("Responded") 60 | 61 | stream, err := c.UpdateAllocation(ctx) 62 | if err != nil { 63 | return fmt.Errorf("error on update: %v", err) 64 | } 65 | done := make(chan bool) 66 | 67 | // Connect to server and send streaming 68 | // first goroutine sends requests 69 | go func() { 70 | for i := 1; i <= 10; i++ { 71 | req := si.AllocationRequest{} 72 | if err := stream.Send(&req); err != nil { 73 | log.Fatalf("can not send %v", err) 74 | } 75 | 76 | log.Print("Send request") 77 | time.Sleep(time.Millisecond * 100) 78 | } 79 | }() 80 | 81 | // second goroutine receives data from stream 82 | // and saves result in max variable 83 | // 84 | // if stream is finished it closes done channel 85 | go func() { 86 | for { 87 | _, err := stream.Recv() 88 | if err == io.EOF { 89 | close(done) 90 | return 91 | } 92 | if err != nil { 93 | log.Fatalf("can not receive %v", err) 94 | } 95 | log.Printf("Responded by server") 96 | } 97 | }() 98 | 99 | // third goroutine closes done channel 100 | // if context is done 101 | go func() { 102 | <-ctx.Done() 103 | if err := ctx.Err(); err != nil { 104 | log.Println(err) 105 | } 106 | close(done) 107 | }() 108 | 109 | <-done 110 | log.Printf("Finished") 111 | return nil 112 | } 113 | -------------------------------------------------------------------------------- /cmd/simplescheduler/main.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package main 20 | 21 | import ( 22 | "flag" 23 | "os" 24 | ) 25 | 26 | var ( 27 | endpoint = flag.String("endpoint", "tcp://localhost:3333", "YuniKorn endpoint") 28 | ) 29 | 30 | func main() { 31 | flag.Parse() 32 | handle() 33 | os.Exit(0) 34 | } 35 | 36 | func handle() { 37 | scheduler := &SimpleScheduler{} 38 | scheduler.Run(*endpoint) 39 | } 40 | -------------------------------------------------------------------------------- /config/limit.yaml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | # 19 | 20 | partitions: 21 | - name: default 22 | queues: 23 | - name: root 24 | limits: 25 | - limit: 26 | users: 27 | - john 28 | - sue 29 | maxapplications: 10 30 | maxresources: {memory: 10G, vcore: 10} 31 | - limit: 32 | users: 33 | - bob 34 | maxapplications: 100 35 | queues: 36 | - name: level1 37 | limits: 38 | - limit: 39 | users: 40 | - user@domain 41 | - testuser 42 | maxapplications: 10 43 | - limit: 44 | groups: 45 | - "*" 46 | maxapplications: 10 47 | - limit: 48 | groups: 49 | - appdev 50 | maxapplications: 20 51 | queues: 52 | - name: level2-2 53 | limits: 54 | - limit: 55 | users: 56 | - user@domain 57 | maxapplications: 1 58 | - name: level2-3 59 | limits: 60 | - limit: 61 | users: 62 | - user@domain 63 | maxapplications: 50 64 | -------------------------------------------------------------------------------- /config/queues.yaml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | # 19 | 20 | partitions: 21 | - name: default 22 | nodesortpolicy: 23 | type: binpacking 24 | placementrules: 25 | - name: User 26 | create: true 27 | parent: 28 | name: Provided 29 | create: false 30 | filter: 31 | type: allow 32 | groups: 33 | - sandbox 34 | - name: Provided 35 | create: true 36 | limits: 37 | - limit: "resource limit for user" 38 | users: 39 | - user1 40 | maxresources: {memory: 10G, vcore: 10} 41 | - limit: "application limit for group" 42 | groups: 43 | - sandbox 44 | maxapplications: 10 45 | queues: 46 | - name: root 47 | queues: 48 | - name: advertisement 49 | resources: 50 | guaranteed: 51 | {memory: 500G, vcore: 50} 52 | max: 53 | {memory: 800G, vcore: 80} 54 | - name: search 55 | resources: 56 | guaranteed: 57 | memory: 400G 58 | vcore: 40 59 | max: 60 | memory: 600G 61 | vcore: 60 62 | - name: sandbox 63 | parent: true 64 | submitacl: " sandbox" 65 | resources: 66 | guaranteed: 67 | memory: 100G 68 | vcore: 10 69 | max: 70 | memory: 100G 71 | vcore: 10 72 | - name: gpu 73 | queues: 74 | - name: production 75 | adminacl: "admin admin" 76 | maxapplications: 10 77 | - name: test 78 | properties: 79 | x: y 80 | limits: 81 | - limit: "resource limit for user" 82 | users: 83 | - gpuuser 84 | maxresources: { memory: 10G, vcore: 10, gpu: 50 } 85 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | // 2 | // Licensed to the Apache Software Foundation (ASF) under one 3 | // or more contributor license agreements. See the NOTICE file 4 | // distributed with this work for additional information 5 | // regarding copyright ownership. The ASF licenses this file 6 | // to you under the Apache License, Version 2.0 (the 7 | // "License"); you may not use this file except in compliance 8 | // with the License. You may obtain a copy of the License at 9 | // 10 | // http://www.apache.org/licenses/LICENSE-2.0 11 | // 12 | // Unless required by applicable law or agreed to in writing, 13 | // software distributed under the License is distributed on an 14 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | // KIND, either express or implied. See the License for the 16 | // specific language governing permissions and limitations 17 | // under the License. 18 | // 19 | 20 | module github.com/apache/yunikorn-core 21 | 22 | go 1.23.0 23 | 24 | toolchain go1.23.7 25 | 26 | require ( 27 | github.com/apache/yunikorn-scheduler-interface v0.0.0-20250304214837-4513ff3a692d 28 | github.com/google/btree v1.1.3 29 | github.com/google/go-cmp v0.7.0 30 | github.com/google/uuid v1.6.0 31 | github.com/julienschmidt/httprouter v1.3.0 32 | github.com/looplab/fsm v1.0.2 33 | github.com/prometheus/client_golang v1.18.0 34 | github.com/prometheus/client_model v0.5.0 35 | github.com/prometheus/common v0.45.0 36 | github.com/sasha-s/go-deadlock v0.3.5 37 | go.uber.org/zap v1.27.0 38 | golang.org/x/exp v0.0.0-20250228200357-dead58393ab7 39 | golang.org/x/net v0.36.0 40 | golang.org/x/time v0.10.0 41 | google.golang.org/grpc v1.71.0 42 | gopkg.in/yaml.v3 v3.0.1 43 | gotest.tools/v3 v3.5.2 44 | ) 45 | 46 | require ( 47 | github.com/beorn7/perks v1.0.1 // indirect 48 | github.com/cespare/xxhash/v2 v2.3.0 // indirect 49 | github.com/davecgh/go-spew v1.1.1 // indirect 50 | github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 // indirect 51 | github.com/petermattis/goid v0.0.0-20250303134427-723919f7f203 // indirect 52 | github.com/prometheus/procfs v0.12.0 // indirect 53 | go.uber.org/multierr v1.10.0 // indirect 54 | golang.org/x/sys v0.30.0 // indirect 55 | golang.org/x/text v0.22.0 // indirect 56 | google.golang.org/genproto/googleapis/rpc v0.0.0-20250115164207-1a7da9e5054f // indirect 57 | google.golang.org/protobuf v1.36.5 // indirect 58 | ) 59 | 60 | replace ( 61 | github.com/petermattis/goid => github.com/petermattis/goid v0.0.0-20250303134427-723919f7f203 62 | golang.org/x/crypto => golang.org/x/crypto v0.35.0 63 | golang.org/x/net => golang.org/x/net v0.36.0 64 | golang.org/x/sys => golang.org/x/sys v0.30.0 65 | golang.org/x/text => golang.org/x/text v0.22.0 66 | golang.org/x/tools => golang.org/x/tools v0.30.0 67 | ) 68 | -------------------------------------------------------------------------------- /pkg/common/configs/configs_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package configs 20 | 21 | import ( 22 | "testing" 23 | 24 | "gotest.tools/v3/assert" 25 | ) 26 | 27 | func TestConfigMap(t *testing.T) { 28 | defer SetConfigMap(nil) 29 | 30 | configmap := make(map[string]string) 31 | configmap["key"] = "value" 32 | SetConfigMap(configmap) 33 | value, ok := GetConfigMap()["key"] 34 | assert.Assert(t, ok, "entry not found") 35 | assert.Equal(t, "value", value, "test value not found") 36 | 37 | SetConfigMap(nil) 38 | _, ok = GetConfigMap()["key"] 39 | assert.Assert(t, !ok, "test value still found") 40 | } 41 | 42 | func TestCallback(t *testing.T) { 43 | defer RemoveConfigMapCallback("test-callback") 44 | 45 | var callbackReceived bool = false 46 | AddConfigMapCallback("test-callback", func() { 47 | callbackReceived = true 48 | }) 49 | 50 | SetConfigMap(nil) 51 | assert.Assert(t, callbackReceived, "callback not received") 52 | 53 | callbackReceived = false 54 | RemoveConfigMapCallback("test-callback") 55 | SetConfigMap(nil) 56 | assert.Assert(t, !callbackReceived, "callback still received") 57 | } 58 | -------------------------------------------------------------------------------- /pkg/common/constants.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package common 20 | 21 | const ( 22 | Empty = "" 23 | 24 | Wildcard = "*" 25 | Separator = "," 26 | Space = " " 27 | AnonymousUser = "nobody" 28 | AnonymousGroup = "nogroup" 29 | RecoveryQueue = "@recovery@" 30 | RecoveryQueueFull = "root." + RecoveryQueue 31 | DefaultPlacementQueue = "root.default" 32 | ) 33 | -------------------------------------------------------------------------------- /pkg/common/errors.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package common 20 | 21 | import "errors" 22 | 23 | var ( 24 | // InvalidQueueName returned when queue name is invalid 25 | InvalidQueueName = errors.New("invalid queue name, max 64 characters consisting of alphanumeric characters and '-', '_', '#', '@', '/', ':' allowed") 26 | // ErrorReservingAlloc returned when an ask that is allocated tries to reserve a node. 27 | ErrorReservingAlloc = errors.New("ask already allocated, no reservation allowed") 28 | // ErrorDuplicateReserve returned when the same reservation already exists on the application 29 | ErrorDuplicateReserve = errors.New("reservation already exists") 30 | // ErrorNodeAlreadyReserved returned when the node is already reserved, failing the reservation 31 | ErrorNodeAlreadyReserved = errors.New("node is already reserved") 32 | // ErrorNodeNotFitReserve returned when the allocation does not fit on an empty node, failing the reservation 33 | ErrorNodeNotFitReserve = errors.New("reservation does not fit on node") 34 | ) 35 | 36 | // Constant messages for AllocationLog entries 37 | const ( 38 | PreemptionPreconditionsFailed = "Preemption preconditions failed" 39 | PreemptionDoesNotGuarantee = "Preemption queue guarantees check failed" 40 | PreemptionShortfall = "Preemption helped but short of resources" 41 | PreemptionDoesNotHelp = "Preemption does not help" 42 | NoVictimForRequiredNode = "No fit on required node, preemption does not help" 43 | ) 44 | -------------------------------------------------------------------------------- /pkg/common/resources/quantity.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package resources 20 | 21 | import ( 22 | "errors" 23 | "math/big" 24 | "regexp" 25 | "strconv" 26 | "strings" 27 | ) 28 | 29 | // This code handles parsing of SI units in quantities: 30 | // ::= 31 | // ::= 0 | 1 | ... | 9 32 | // ::= | 33 | // ::= | 34 | // ::= Ki | Mi | Gi | Ti | Pi | Ei 35 | // ::= "" | k | M | G | T | P | E 36 | // Additionally, ParseVCore supports decimalSI of 'm' to indicate millicores. 37 | 38 | var legal = regexp.MustCompile(`^(?P[0-9]+)\s*(?P([mkKMGTPE]i?)?)$`) 39 | 40 | var multipliers = map[string]int64{ 41 | "": 1, 42 | "m": 1, // special handling if milli is in use 43 | "k": 1e3, 44 | "M": 1e6, 45 | "G": 1e9, 46 | "T": 1e12, 47 | "P": 1e15, 48 | "E": 1e18, 49 | "Ki": 1 << 10, 50 | "Mi": 1 << 20, 51 | "Gi": 1 << 30, 52 | "Ti": 1 << 40, 53 | "Pi": 1 << 50, 54 | "Ei": 1 << 60, 55 | } 56 | 57 | // ParseQuantity is used to parse user-provided values into int64 quantities. 58 | func ParseQuantity(value string) (Quantity, error) { 59 | return parse(value, false) 60 | } 61 | 62 | // ParseVCore is similar to ParseQuantity but allows the 'm' suffix. Additionally, the base unit returned is a 63 | // millicore, so values without units will be converted to milliCPUs (i.e. '10' will result in 10000, and '500m' will 64 | // result in 500). 65 | func ParseVCore(value string) (Quantity, error) { 66 | return parse(value, true) 67 | } 68 | 69 | func parse(value string, milli bool) (Quantity, error) { 70 | value = strings.TrimSpace(value) 71 | 72 | parts := legal.FindStringSubmatch(value) 73 | if len(parts) == 0 { 74 | return 0, errors.New("invalid quantity") 75 | } 76 | number := parts[1] 77 | suffix := parts[2] 78 | 79 | result, err := strconv.ParseInt(number, 10, 64) 80 | if err != nil { 81 | return 0, errors.New("invalid quantity: overflow") 82 | } 83 | 84 | scale, ok := multipliers[suffix] 85 | if !ok || (suffix == "m" && !milli) { 86 | return 0, errors.New("invalid suffix") 87 | } 88 | 89 | bigResult := big.NewInt(result) 90 | bigScale := big.NewInt(scale) 91 | bigResult = bigResult.Mul(bigResult, bigScale) 92 | if milli && suffix != "m" { 93 | bigResult.Mul(bigResult, big.NewInt(1000)) 94 | } 95 | if !bigResult.IsInt64() { 96 | return 0, errors.New("invalid quantity: overflow") 97 | } 98 | result = bigResult.Int64() 99 | 100 | return Quantity(result), nil 101 | } 102 | -------------------------------------------------------------------------------- /pkg/common/security/usergroup_no_resolver.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package security 20 | 21 | import ( 22 | "os/user" 23 | "time" 24 | ) 25 | 26 | // Get the cache without a resolver. 27 | // In k8shim we currently have internal users to K8s which might not resolve against anything. 28 | // Just echo the object in the correct format based on the user passed in. 29 | func GetUserGroupNoResolve() *UserGroupCache { 30 | return &UserGroupCache{ 31 | ugs: map[string]*UserGroup{}, 32 | interval: cleanerInterval * time.Second, 33 | lookup: noLookupUser, 34 | lookupGroupID: noLookupGroupID, 35 | groupIds: noLookupGroupIds, 36 | stop: make(chan struct{}), 37 | } 38 | } 39 | 40 | // Default linux behaviour: a user is member of the primary group with the same name 41 | func noLookupUser(userName string) (*user.User, error) { 42 | return &user.User{ 43 | Uid: "-1", 44 | Gid: userName, 45 | Username: userName, 46 | }, nil 47 | } 48 | 49 | // Echo the group as it comes in 50 | func noLookupGroupID(gid string) (*user.Group, error) { 51 | group := user.Group{Gid: gid} 52 | group.Name = gid 53 | return &group, nil 54 | } 55 | 56 | // No further groups returned just the primary group 57 | func noLookupGroupIds(osUser *user.User) ([]string, error) { 58 | return []string{}, nil 59 | } 60 | -------------------------------------------------------------------------------- /pkg/common/security/usergroup_os_resolver.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package security 20 | 21 | import ( 22 | "os/user" 23 | "time" 24 | ) 25 | 26 | // Get the cache and use that to resolve all user requests 27 | func GetUserGroupCacheOS() *UserGroupCache { 28 | return &UserGroupCache{ 29 | ugs: map[string]*UserGroup{}, 30 | interval: cleanerInterval * time.Second, 31 | lookup: user.Lookup, 32 | lookupGroupID: user.LookupGroupId, 33 | groupIds: wrappedGroupIds, 34 | stop: make(chan struct{}), 35 | } 36 | } 37 | 38 | // wrapper function to allow easy testing of the cache 39 | func wrappedGroupIds(osUser *user.User) ([]string, error) { 40 | return osUser.GroupIds() 41 | } 42 | -------------------------------------------------------------------------------- /pkg/common/security/usergroup_test_resolver.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package security 20 | 21 | import ( 22 | "fmt" 23 | "os/user" 24 | "strconv" 25 | "time" 26 | ) 27 | 28 | const ( 29 | Testuser1 = "testuser1" 30 | Testuser2 = "testuser2" 31 | Testuser3 = "testuser3" 32 | ) 33 | 34 | // Get the cache with a test resolver 35 | // cleaner runs every second 36 | func GetUserGroupCacheTest() *UserGroupCache { 37 | return &UserGroupCache{ 38 | ugs: map[string]*UserGroup{}, 39 | interval: time.Second, 40 | lookup: lookup, 41 | lookupGroupID: lookupGroupID, 42 | groupIds: groupIds, 43 | stop: make(chan struct{}), 44 | } 45 | } 46 | 47 | // test function only 48 | func lookup(userName string) (*user.User, error) { 49 | // 1st test user: all OK 50 | if userName == Testuser1 { 51 | return &user.User{ 52 | Uid: "1000", 53 | Gid: "1000", 54 | Username: "testuser1", 55 | }, nil 56 | } 57 | // 2nd test user: primary group does not resolve 58 | if userName == Testuser2 { 59 | return &user.User{ 60 | Uid: "100", 61 | Gid: "100", 62 | Username: "testuser2", 63 | }, nil 64 | } 65 | if userName == Testuser3 { 66 | return &user.User{ 67 | Uid: "1001", 68 | Gid: "1001", 69 | Username: "testuser3", 70 | }, nil 71 | } 72 | if userName == "testuser4" { 73 | return &user.User{ 74 | Uid: "901", 75 | Gid: "901", 76 | Username: "testuser4", 77 | }, nil 78 | } 79 | if userName == "testuser5" { 80 | return &user.User{ 81 | Uid: "1001", 82 | Gid: "1001", 83 | Username: "testuser5", 84 | }, nil 85 | } 86 | if userName == "invalid-gid-user" { 87 | return &user.User{ 88 | Uid: "1001", 89 | Gid: "1_001", 90 | Username: "invalid-gid-user", 91 | }, nil 92 | } 93 | // all other users fail 94 | return nil, fmt.Errorf("lookup failed for user: %s", userName) 95 | } 96 | 97 | // test function only 98 | func lookupGroupID(gid string) (*user.Group, error) { 99 | gID, err := strconv.Atoi(gid) 100 | if err != nil { 101 | return nil, err 102 | } 103 | // fail all groups under 1000 104 | if gID < 1000 { 105 | return nil, fmt.Errorf("lookup failed for group: %s", gid) 106 | } 107 | // fixed return: group + id as the name 108 | group := user.Group{Gid: gid} 109 | group.Name = "group" + gid 110 | return &group, nil 111 | } 112 | 113 | // test function only 114 | func groupIds(osUser *user.User) ([]string, error) { 115 | if osUser.Username == Testuser1 { 116 | return []string{"1001"}, nil 117 | } 118 | if osUser.Username == Testuser2 { 119 | return []string{"1001", "1002"}, nil 120 | } 121 | // group list might return primary group ID also 122 | if osUser.Username == Testuser3 { 123 | return []string{"1002", "1001", "1003", "1004"}, nil 124 | } 125 | 126 | if osUser.Username == "testuser4" { 127 | return []string{"901", "902"}, nil 128 | } 129 | return nil, fmt.Errorf("lookup failed for user: %s", osUser.Username) 130 | } 131 | -------------------------------------------------------------------------------- /pkg/entrypoint/service_context.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package entrypoint 20 | 21 | import ( 22 | "go.uber.org/zap" 23 | 24 | "github.com/apache/yunikorn-core/pkg/events" 25 | "github.com/apache/yunikorn-core/pkg/log" 26 | "github.com/apache/yunikorn-core/pkg/metrics" 27 | "github.com/apache/yunikorn-core/pkg/scheduler" 28 | "github.com/apache/yunikorn-core/pkg/webservice" 29 | "github.com/apache/yunikorn-scheduler-interface/lib/go/api" 30 | ) 31 | 32 | type ServiceContext struct { 33 | RMProxy api.SchedulerAPI 34 | Scheduler *scheduler.Scheduler 35 | WebApp *webservice.WebService 36 | MetricsCollector metrics.InternalMetricsCollector 37 | } 38 | 39 | func (s *ServiceContext) StopAll() { 40 | log.Log(log.Entrypoint).Info("ServiceContext stop all services") 41 | if s.WebApp != nil { 42 | if err := s.WebApp.StopWebApp(); err != nil { 43 | log.Log(log.Entrypoint).Error("failed to stop web-app", 44 | zap.Error(err)) 45 | } 46 | } 47 | if s.MetricsCollector != nil { 48 | s.MetricsCollector.Stop() 49 | } 50 | s.Scheduler.Stop() 51 | s.RMProxy.Stop() 52 | events.GetEventSystem().Stop() 53 | } 54 | -------------------------------------------------------------------------------- /pkg/events/event_publisher.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package events 20 | 21 | import ( 22 | "time" 23 | 24 | "go.uber.org/zap" 25 | 26 | "github.com/apache/yunikorn-core/pkg/log" 27 | "github.com/apache/yunikorn-core/pkg/plugins" 28 | ) 29 | 30 | // stores the push event internal 31 | var defaultPushEventInterval = 2 * time.Second 32 | 33 | type EventPublisher struct { 34 | store *EventStore 35 | pushEventInterval time.Duration 36 | stop chan struct{} 37 | } 38 | 39 | func CreateShimPublisher(store *EventStore) *EventPublisher { 40 | publisher := &EventPublisher{ 41 | store: store, 42 | pushEventInterval: defaultPushEventInterval, 43 | stop: make(chan struct{}), 44 | } 45 | return publisher 46 | } 47 | 48 | func (sp *EventPublisher) StartService() { 49 | log.Log(log.Events).Info("Starting shim event publisher") 50 | go func() { 51 | for { 52 | select { 53 | case <-sp.stop: 54 | return 55 | case <-time.After(sp.pushEventInterval): 56 | messages := sp.store.CollectEvents() 57 | if len(messages) > 0 { 58 | if eventPlugin := plugins.GetResourceManagerCallbackPlugin(); eventPlugin != nil { 59 | log.Log(log.Events).Debug("Sending eventChannel", zap.Int("number of messages", len(messages))) 60 | eventPlugin.SendEvent(messages) 61 | } 62 | } 63 | } 64 | } 65 | }() 66 | } 67 | 68 | func (sp *EventPublisher) Stop() { 69 | log.Log(log.Events).Info("Stopping shim event publisher") 70 | close(sp.stop) 71 | } 72 | 73 | func (sp *EventPublisher) getEventStore() *EventStore { 74 | return sp.store 75 | } 76 | -------------------------------------------------------------------------------- /pkg/events/event_publisher_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package events 20 | 21 | import ( 22 | "testing" 23 | "time" 24 | 25 | "gotest.tools/v3/assert" 26 | 27 | "github.com/apache/yunikorn-core/pkg/common" 28 | "github.com/apache/yunikorn-core/pkg/mock" 29 | "github.com/apache/yunikorn-core/pkg/plugins" 30 | "github.com/apache/yunikorn-scheduler-interface/lib/go/si" 31 | ) 32 | 33 | // creating a Publisher with nil store should still provide a non-nil object 34 | func TestCreateShimPublisher(t *testing.T) { 35 | publisher := CreateShimPublisher(nil) 36 | assert.Assert(t, publisher != nil, "publisher should not be nil") 37 | } 38 | 39 | // StartService() and Stop() functions should not cause panic 40 | func TestServiceStartStopInternal(t *testing.T) { 41 | store := newEventStore(1000) 42 | publisher := CreateShimPublisher(store) 43 | publisher.StartService() 44 | defer publisher.Stop() 45 | assert.Equal(t, publisher.getEventStore(), store) 46 | } 47 | 48 | func TestNoFillWithoutEventPluginRegistered(t *testing.T) { 49 | store := newEventStore(1000) 50 | publisher := CreateShimPublisher(store) 51 | publisher.pushEventInterval = time.Millisecond 52 | publisher.StartService() 53 | defer publisher.Stop() 54 | 55 | event := &si.EventRecord{ 56 | Type: si.EventRecord_REQUEST, 57 | ObjectID: "ask", 58 | ReferenceID: "app", 59 | Message: "message", 60 | TimestampNano: 123456, 61 | } 62 | store.Store(event) 63 | 64 | err := common.WaitForCondition(time.Millisecond, 65 | time.Second, 66 | func() bool { 67 | return store.CountStoredEvents() == 0 68 | }, 69 | ) 70 | assert.NilError(t, err, "the Publisher should erase the store even if no EventPlugin registered") 71 | } 72 | 73 | // we push an event to the publisher, and check that the same event 74 | // is published by observing the mocked EventPlugin 75 | func TestPublisherSendsEvent(t *testing.T) { 76 | eventPlugin := mock.NewEventPlugin() 77 | plugins.RegisterSchedulerPlugin(eventPlugin) 78 | if plugins.GetResourceManagerCallbackPlugin() == nil { 79 | t.Fatal("could not register event plugin for test") 80 | } 81 | 82 | store := newEventStore(1000) 83 | publisher := CreateShimPublisher(store) 84 | publisher.pushEventInterval = time.Millisecond 85 | publisher.StartService() 86 | defer publisher.Stop() 87 | 88 | event := &si.EventRecord{ 89 | Type: si.EventRecord_REQUEST, 90 | ObjectID: "ask", 91 | ReferenceID: "app", 92 | Message: "message", 93 | TimestampNano: 123456, 94 | } 95 | store.Store(event) 96 | 97 | var eventFromPlugin *si.EventRecord 98 | err := common.WaitForCondition(time.Millisecond, 99 | time.Second, 100 | func() bool { 101 | eventFromPlugin = eventPlugin.GetNextEventRecord() 102 | return eventFromPlugin != nil 103 | }, 104 | ) 105 | assert.NilError(t, err, "event was not received in time: %v", err) 106 | assert.Equal(t, eventFromPlugin.ObjectID, "ask") 107 | assert.Equal(t, eventFromPlugin.ReferenceID, "app") 108 | assert.Equal(t, eventFromPlugin.Message, "message") 109 | assert.Equal(t, eventFromPlugin.TimestampNano, int64(123456)) 110 | } 111 | -------------------------------------------------------------------------------- /pkg/events/event_store.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package events 20 | 21 | import ( 22 | "go.uber.org/zap" 23 | 24 | "github.com/apache/yunikorn-core/pkg/locking" 25 | "github.com/apache/yunikorn-core/pkg/log" 26 | "github.com/apache/yunikorn-core/pkg/metrics" 27 | "github.com/apache/yunikorn-scheduler-interface/lib/go/si" 28 | ) 29 | 30 | // The EventStore operates under the following assumptions: 31 | // - there is a cap for the number of events stored 32 | // - the CollectEvents() function clears the currently stored events in the EventStore 33 | // 34 | // Assuming the rate of events generated by the scheduler component in a given time period 35 | // is high, calling CollectEvents() periodically should be fine. 36 | type EventStore struct { 37 | events []*si.EventRecord 38 | idx uint64 // points where to store the next event 39 | size uint64 40 | lastSize uint64 41 | locking.RWMutex 42 | } 43 | 44 | func newEventStore(size uint64) *EventStore { 45 | return &EventStore{ 46 | events: make([]*si.EventRecord, size), 47 | size: size, 48 | lastSize: size, 49 | } 50 | } 51 | 52 | func (es *EventStore) Store(event *si.EventRecord) { 53 | es.Lock() 54 | defer es.Unlock() 55 | 56 | if es.idx == uint64(len(es.events)) { 57 | metrics.GetEventMetrics().IncEventsNotStored() 58 | return 59 | } 60 | es.events[es.idx] = event 61 | es.idx++ 62 | 63 | metrics.GetEventMetrics().IncEventsStored() 64 | } 65 | 66 | func (es *EventStore) CollectEvents() []*si.EventRecord { 67 | es.Lock() 68 | defer es.Unlock() 69 | 70 | messages := make([]*si.EventRecord, len(es.events[:es.idx])) 71 | copy(messages, es.events[:es.idx]) 72 | 73 | if es.size != es.lastSize { 74 | log.Log(log.Events).Info("Resizing event store", zap.Uint64("last", es.lastSize), zap.Uint64("new", es.size)) 75 | es.events = make([]*si.EventRecord, es.size) 76 | } 77 | es.idx = 0 78 | es.lastSize = es.size 79 | 80 | metrics.GetEventMetrics().AddEventsCollected(len(messages)) 81 | return messages 82 | } 83 | 84 | func (es *EventStore) CountStoredEvents() uint64 { 85 | es.RLock() 86 | defer es.RUnlock() 87 | 88 | return es.idx 89 | } 90 | 91 | func (es *EventStore) SetStoreSize(size uint64) { 92 | es.Lock() 93 | defer es.Unlock() 94 | es.size = size 95 | } 96 | -------------------------------------------------------------------------------- /pkg/events/events.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package events 20 | 21 | import ( 22 | "time" 23 | 24 | "github.com/apache/yunikorn-core/pkg/common/resources" 25 | "github.com/apache/yunikorn-scheduler-interface/lib/go/si" 26 | ) 27 | 28 | func createEventRecord(recordType si.EventRecord_Type, objectID, referenceID, message string, 29 | changeType si.EventRecord_ChangeType, changeDetail si.EventRecord_ChangeDetail, resource *resources.Resource) *si.EventRecord { 30 | return &si.EventRecord{ 31 | Type: recordType, 32 | ObjectID: objectID, 33 | ReferenceID: referenceID, 34 | Message: message, 35 | TimestampNano: time.Now().UnixNano(), 36 | Resource: resource.ToProto(), 37 | EventChangeDetail: changeDetail, 38 | EventChangeType: changeType, 39 | } 40 | } 41 | 42 | func CreateRequestEventRecord(objectID, referenceID, message string, resource *resources.Resource) *si.EventRecord { 43 | return createEventRecord(si.EventRecord_REQUEST, objectID, referenceID, message, si.EventRecord_NONE, si.EventRecord_DETAILS_NONE, resource) 44 | } 45 | 46 | func CreateAppEventRecord(objectID, message, referenceID string, changeType si.EventRecord_ChangeType, changeDetail si.EventRecord_ChangeDetail, resource *resources.Resource) *si.EventRecord { 47 | return createEventRecord(si.EventRecord_APP, objectID, referenceID, message, changeType, changeDetail, resource) 48 | } 49 | 50 | func CreateNodeEventRecord(objectID, message, referenceID string, changeType si.EventRecord_ChangeType, changeDetail si.EventRecord_ChangeDetail, resource *resources.Resource) *si.EventRecord { 51 | return createEventRecord(si.EventRecord_NODE, objectID, referenceID, message, changeType, changeDetail, resource) 52 | } 53 | 54 | func CreateQueueEventRecord(objectID, message, referenceID string, changeType si.EventRecord_ChangeType, changeDetail si.EventRecord_ChangeDetail, resource *resources.Resource) *si.EventRecord { 55 | return createEventRecord(si.EventRecord_QUEUE, objectID, referenceID, message, changeType, changeDetail, resource) 56 | } 57 | 58 | func CreateUserGroupEventRecord(objectID, message, referenceID string, changeType si.EventRecord_ChangeType, changeDetail si.EventRecord_ChangeDetail, resource *resources.Resource) *si.EventRecord { 59 | return createEventRecord(si.EventRecord_USERGROUP, objectID, referenceID, message, changeType, changeDetail, resource) 60 | } 61 | -------------------------------------------------------------------------------- /pkg/events/events_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package events 20 | 21 | import ( 22 | "testing" 23 | 24 | "gotest.tools/v3/assert" 25 | 26 | "github.com/apache/yunikorn-core/pkg/common/resources" 27 | "github.com/apache/yunikorn-scheduler-interface/lib/go/si" 28 | ) 29 | 30 | func TestCreateEventRecord(t *testing.T) { 31 | record := createEventRecord(si.EventRecord_NODE, "ask", "app", "message", 32 | si.EventRecord_NONE, si.EventRecord_DETAILS_NONE, resources.NewResourceFromMap( 33 | map[string]resources.Quantity{ 34 | "cpu": 1, 35 | }, 36 | )) 37 | assert.Equal(t, record.Type, si.EventRecord_NODE) 38 | assert.Equal(t, record.ObjectID, "ask") 39 | assert.Equal(t, record.ReferenceID, "app") 40 | assert.Equal(t, record.Message, "message") 41 | assert.Equal(t, int64(1), record.Resource.Resources["cpu"].Value) 42 | assert.Equal(t, si.EventRecord_NONE, record.EventChangeType) 43 | assert.Equal(t, si.EventRecord_DETAILS_NONE, record.EventChangeDetail) 44 | if record.TimestampNano == 0 { 45 | t.Fatal("the timestamp should have been created") 46 | } 47 | } 48 | 49 | func TestCreateEventRecordTypes(t *testing.T) { 50 | record := CreateRequestEventRecord("ask", "app", "message", nil) 51 | assert.Equal(t, record.Type, si.EventRecord_REQUEST) 52 | 53 | record = CreateAppEventRecord("app", "message", "ask", si.EventRecord_NONE, si.EventRecord_DETAILS_NONE, nil) 54 | assert.Equal(t, record.Type, si.EventRecord_APP) 55 | 56 | record = CreateNodeEventRecord("node", "message", "ask", si.EventRecord_NONE, si.EventRecord_DETAILS_NONE, nil) 57 | assert.Equal(t, record.Type, si.EventRecord_NODE) 58 | 59 | record = CreateQueueEventRecord("queue", "message", "app", si.EventRecord_NONE, si.EventRecord_DETAILS_NONE, nil) 60 | assert.Equal(t, record.Type, si.EventRecord_QUEUE) 61 | 62 | record = CreateUserGroupEventRecord("user", "message", "queue", si.EventRecord_NONE, si.EventRecord_DETAILS_NONE, nil) 63 | assert.Equal(t, record.Type, si.EventRecord_USERGROUP) 64 | } 65 | -------------------------------------------------------------------------------- /pkg/events/mock/event_system.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package mock 20 | 21 | import ( 22 | "github.com/apache/yunikorn-core/pkg/events" 23 | "github.com/apache/yunikorn-scheduler-interface/lib/go/si" 24 | ) 25 | 26 | type EventSystem struct { 27 | Events []*si.EventRecord 28 | enabled bool 29 | } 30 | 31 | func (m *EventSystem) CreateEventStream(_ string, _ uint64) *events.EventStream { 32 | return nil 33 | } 34 | 35 | func (m *EventSystem) RemoveStream(_ *events.EventStream) { 36 | } 37 | 38 | func (m *EventSystem) AddEvent(event *si.EventRecord) { 39 | m.Events = append(m.Events, event) 40 | } 41 | 42 | func (m *EventSystem) StartService() {} 43 | 44 | func (m *EventSystem) Stop() {} 45 | 46 | func (m *EventSystem) Reset() { 47 | m.Events = make([]*si.EventRecord, 0) 48 | } 49 | 50 | func (m *EventSystem) GetEventsFromID(uint64, uint64) ([]*si.EventRecord, uint64, uint64) { 51 | return nil, 0, 0 52 | } 53 | 54 | func (m *EventSystem) IsEventTrackingEnabled() bool { 55 | return m.enabled 56 | } 57 | 58 | func (m *EventSystem) GetEventStreams() []events.EventStreamData { 59 | return nil 60 | } 61 | 62 | func NewEventSystem() *EventSystem { 63 | return &EventSystem{Events: make([]*si.EventRecord, 0), enabled: true} 64 | } 65 | func NewEventSystemDisabled() *EventSystem { 66 | return &EventSystem{Events: make([]*si.EventRecord, 0), enabled: false} 67 | } 68 | -------------------------------------------------------------------------------- /pkg/handler/event_handlers.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package handler 20 | 21 | type EventHandler interface { 22 | HandleEvent(ev interface{}) 23 | } 24 | 25 | type EventHandlers struct { 26 | RMProxyEventHandler EventHandler 27 | SchedulerEventHandler EventHandler 28 | } 29 | -------------------------------------------------------------------------------- /pkg/locking/locking_race_test.go: -------------------------------------------------------------------------------- 1 | //go:build !race 2 | 3 | /* 4 | Licensed to the Apache Software Foundation (ASF) under one 5 | or more contributor license agreements. See the NOTICE file 6 | distributed with this work for additional information 7 | regarding copyright ownership. The ASF licenses this file 8 | to you under the Apache License, Version 2.0 (the 9 | "License"); you may not use this file except in compliance 10 | with the License. You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | */ 20 | 21 | //nolint:staticcheck 22 | package locking 23 | 24 | import ( 25 | "testing" 26 | "time" 27 | 28 | "gotest.tools/v3/assert" 29 | ) 30 | 31 | func TestDeadlockDetection(t *testing.T) { 32 | enableTracking() 33 | deadlockDetected.Store(false) 34 | defer disableTracking() 35 | 36 | var mutex Mutex 37 | go func() { 38 | mutex.Lock() 39 | mutex.Lock() // will deadlock 40 | mutex.Unlock() // will unwind second lock 41 | }() 42 | time.Sleep(2 * time.Second) 43 | mutex.Unlock() // will unwind first lock 44 | assert.Assert(t, IsDeadlockDetected(), "Deadlock should have been detected") 45 | } 46 | 47 | // TestLockOrderDetection 48 | // lock order detection looks at the ordering of the same mutexes in different go routines 49 | // if the order changes (for two different go routines) then that could be a potential deadlock 50 | // this case happens in preemption when looking for victims when queues hover around guaranteed 51 | func TestLockOrderDetection(t *testing.T) { 52 | var tests = []struct { 53 | name string 54 | disable bool 55 | }{ 56 | {"ordered", false}, 57 | {"no order", true}, 58 | } 59 | for _, tt := range tests { 60 | t.Run(tt.name, func(t *testing.T) { 61 | enableTrackingWithOrder(tt.disable) 62 | deadlockDetected.Store(false) 63 | defer disableTracking() 64 | 65 | var a, b RWMutex 66 | // lock ordering: a, b, b, a 67 | a.Lock() 68 | b.RLock() 69 | b.RUnlock() 70 | a.Unlock() 71 | 72 | // lock ordering: b, a, a, b 73 | b.Lock() 74 | a.RLock() 75 | a.RUnlock() 76 | b.Unlock() 77 | 78 | // detection is based on the tracking order enabled or not 79 | assert.Assert(t, IsDeadlockDetected() == tt.disable, "Deadlock detected not as expected") 80 | }) 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /pkg/log/filtered_core.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package log 20 | 21 | import "go.uber.org/zap/zapcore" 22 | 23 | type filteredCore struct { 24 | level zapcore.Level 25 | inner zapcore.Core 26 | } 27 | 28 | var _ zapcore.Core = filteredCore{} 29 | 30 | func (f filteredCore) Enabled(level zapcore.Level) bool { 31 | if level < f.level { 32 | return false 33 | } 34 | return f.inner.Enabled(level) 35 | } 36 | 37 | func (f filteredCore) With(fields []zapcore.Field) zapcore.Core { 38 | return f.inner.With(fields) 39 | } 40 | 41 | func (f filteredCore) Check(entry zapcore.Entry, ce *zapcore.CheckedEntry) *zapcore.CheckedEntry { 42 | if entry.Level < f.level { 43 | return ce 44 | } 45 | return f.inner.Check(entry, ce) 46 | } 47 | 48 | func (f filteredCore) Write(entry zapcore.Entry, fields []zapcore.Field) error { 49 | return f.inner.Write(entry, fields) 50 | } 51 | 52 | func (f filteredCore) Sync() error { 53 | return f.inner.Sync() 54 | } 55 | -------------------------------------------------------------------------------- /pkg/log/rate_limited_logger.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package log 20 | 21 | import ( 22 | "time" 23 | 24 | "go.uber.org/zap" 25 | "golang.org/x/time/rate" 26 | ) 27 | 28 | type RateLimitedLogger struct { 29 | logger *zap.Logger 30 | limiter *rate.Limiter 31 | } 32 | 33 | // NewRateLimitedLogger provides a logger that only logs once within a specified duration. 34 | func NewRateLimitedLogger(handle *LoggerHandle, every time.Duration) *RateLimitedLogger { 35 | return &RateLimitedLogger{ 36 | logger: Log(handle), 37 | limiter: rate.NewLimiter(rate.Every(every), 1), 38 | } 39 | } 40 | 41 | func (rl *RateLimitedLogger) Debug(msg string, fields ...zap.Field) { 42 | if rl.limiter.Allow() { 43 | rl.logger.Debug(msg, fields...) 44 | } 45 | } 46 | 47 | func (rl *RateLimitedLogger) Info(msg string, fields ...zap.Field) { 48 | if rl.limiter.Allow() { 49 | rl.logger.Info(msg, fields...) 50 | } 51 | } 52 | 53 | func (rl *RateLimitedLogger) Warn(msg string, fields ...zap.Field) { 54 | if rl.limiter.Allow() { 55 | rl.logger.Warn(msg, fields...) 56 | } 57 | } 58 | 59 | func (rl *RateLimitedLogger) Error(msg string, fields ...zap.Field) { 60 | if rl.limiter.Allow() { 61 | rl.logger.Error(msg, fields...) 62 | } 63 | } 64 | 65 | func (rl *RateLimitedLogger) DPanic(msg string, fields ...zap.Field) { 66 | if rl.limiter.Allow() { 67 | rl.logger.DPanic(msg, fields...) 68 | } 69 | } 70 | 71 | func (rl *RateLimitedLogger) Panic(msg string, fields ...zap.Field) { 72 | if rl.limiter.Allow() { 73 | rl.logger.Panic(msg, fields...) 74 | } 75 | } 76 | 77 | func (rl *RateLimitedLogger) Fatal(msg string, fields ...zap.Field) { 78 | if rl.limiter.Allow() { 79 | rl.logger.Fatal(msg, fields...) 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /pkg/log/rate_limited_logger_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package log 20 | 21 | import ( 22 | "bufio" 23 | "bytes" 24 | "encoding/json" 25 | "sync" 26 | "testing" 27 | "time" 28 | 29 | "go.uber.org/zap" 30 | "go.uber.org/zap/zapcore" 31 | "gotest.tools/v3/assert" 32 | ) 33 | 34 | type logMessage struct { 35 | Level string `json:"L"` 36 | Message string `json:"M"` 37 | } 38 | 39 | func TestRateLimitedLog(t *testing.T) { 40 | defer resetTestLogger() 41 | once = sync.Once{} 42 | config := zap.NewDevelopmentConfig() 43 | encoderConfig := zap.NewDevelopmentEncoderConfig() 44 | buf := bytes.Buffer{} 45 | writer := bufio.NewWriter(&buf) 46 | zapLogger := zap.New( 47 | zapcore.NewCore( 48 | zapcore.NewJSONEncoder(encoderConfig), 49 | zapcore.AddSync(writer), 50 | zap.NewAtomicLevelAt(zap.InfoLevel), 51 | ), 52 | ) 53 | InitializeLogger(zapLogger, &config) 54 | // log once within one minute 55 | logger := NewRateLimitedLogger(Core, 1*time.Minute) 56 | startTime := time.Now() 57 | for { 58 | elapsed := time.Since(startTime) 59 | if elapsed > 500*time.Millisecond { 60 | break 61 | } 62 | logger.Info("YuniKorn") 63 | time.Sleep(10 * time.Millisecond) 64 | } 65 | err := writer.Flush() 66 | assert.NilError(t, err, "failed to flush writer") 67 | var lm logMessage 68 | err = json.Unmarshal(buf.Bytes(), &lm) 69 | assert.NilError(t, err, "failed to unmarshal logMessage from buffer: %s", buf.Bytes()) 70 | assert.Equal(t, "INFO", lm.Level) 71 | assert.Equal(t, "YuniKorn", lm.Message) 72 | } 73 | -------------------------------------------------------------------------------- /pkg/metrics/history/internal_metrics.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package history 20 | 21 | import ( 22 | "time" 23 | 24 | "github.com/apache/yunikorn-core/pkg/locking" 25 | ) 26 | 27 | // This class collects basic information about the cluster 28 | // for the web UI's front page. 29 | // For more detailed metrics collection use Prometheus. 30 | type InternalMetricsHistory struct { 31 | records []*MetricsRecord 32 | limit int 33 | 34 | // internal implementation of limited array 35 | pointer int 36 | 37 | locking.RWMutex 38 | } 39 | 40 | type MetricsRecord struct { 41 | Timestamp time.Time 42 | TotalApplications int 43 | TotalContainers int 44 | } 45 | 46 | func NewInternalMetricsHistory(limit int) *InternalMetricsHistory { 47 | return &InternalMetricsHistory{ 48 | records: make([]*MetricsRecord, limit), 49 | limit: limit, 50 | } 51 | } 52 | 53 | func (h *InternalMetricsHistory) Store(totalApplications, totalContainers int) { 54 | h.Lock() 55 | defer h.Unlock() 56 | 57 | h.records[h.pointer] = &MetricsRecord{ 58 | time.Now(), 59 | totalApplications, 60 | totalContainers, 61 | } 62 | h.pointer++ 63 | if h.pointer == h.limit { 64 | h.pointer = 0 65 | } 66 | } 67 | 68 | // contract: the non-nil values are ordered by the time of addition 69 | // may contains nil values, those should be handled (filtered) on the caller's side 70 | func (h *InternalMetricsHistory) GetRecords() []*MetricsRecord { 71 | h.RLock() 72 | defer h.RUnlock() 73 | 74 | returnRecords := make([]*MetricsRecord, h.limit-h.pointer) 75 | copy(returnRecords, h.records[h.pointer:]) 76 | returnRecords = append(returnRecords, h.records[:h.pointer]...) 77 | return returnRecords 78 | } 79 | 80 | func (h *InternalMetricsHistory) GetLimit() int { 81 | h.RLock() 82 | defer h.RUnlock() 83 | return h.limit 84 | } 85 | -------------------------------------------------------------------------------- /pkg/metrics/history/internal_metrics_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package history 20 | 21 | import ( 22 | "testing" 23 | 24 | "gotest.tools/v3/assert" 25 | ) 26 | 27 | func countNils(records []*MetricsRecord) int { 28 | count := 0 29 | for _, record := range records { 30 | if record == nil { 31 | count++ 32 | } 33 | } 34 | return count 35 | } 36 | 37 | func TestHistoricalClusterInfo(t *testing.T) { 38 | limit := 2 39 | hpInfo := NewInternalMetricsHistory(limit) 40 | 41 | assert.Equal(t, limit, hpInfo.GetLimit(), "Limit should have been set to 2!") 42 | 43 | hpInfo.Store(2, 3) 44 | records := hpInfo.GetRecords() 45 | assert.Equal(t, 2, len(records), "Expected to have 1 non nil record.") 46 | assert.Equal(t, 1, countNils(records), "Expected to have 1 non nil record.") 47 | 48 | hpInfo.Store(3, 4) 49 | records = hpInfo.GetRecords() 50 | assert.Equal(t, 2, len(records), "Expected to have 2 records") 51 | assert.Equal(t, 0, countNils(records), "Expected to have 0 non nil record.") 52 | 53 | hpInfo.Store(5, 6) 54 | records = hpInfo.GetRecords() 55 | assert.Equal(t, 2, len(records), "Expected to have 2 records") 56 | assert.Equal(t, 0, countNils(records), "Expected to have 0 non nil record.") 57 | 58 | for i, record := range hpInfo.GetRecords() { 59 | switch i { 60 | case 0: 61 | assert.Equal(t, 3, record.TotalApplications) 62 | assert.Equal(t, 4, record.TotalContainers) 63 | case 1: 64 | assert.Equal(t, 5, record.TotalApplications) 65 | assert.Equal(t, 6, record.TotalContainers) 66 | } 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /pkg/metrics/init.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package metrics 20 | 21 | import ( 22 | "sync" 23 | 24 | "github.com/apache/yunikorn-core/pkg/locking" 25 | ) 26 | 27 | const ( 28 | // Namespace for all metrics inside the scheduler 29 | Namespace = "yunikorn" 30 | // SchedulerSubsystem - subsystem name used by scheduler 31 | SchedulerSubsystem = "scheduler" 32 | // EventSubsystem - subsystem name used by event cache 33 | EventSubsystem = "event" 34 | // MetricNameInvalidByteReplacement byte used to replace invalid bytes in prometheus metric names 35 | MetricNameInvalidByteReplacement = '_' 36 | ) 37 | 38 | var once sync.Once 39 | var m *Metrics 40 | 41 | type Metrics struct { 42 | scheduler *SchedulerMetrics 43 | queues map[string]*QueueMetrics 44 | event *EventMetrics 45 | runtime *RuntimeMetrics 46 | lock locking.RWMutex 47 | } 48 | 49 | func init() { 50 | once.Do(func() { 51 | m = &Metrics{ 52 | scheduler: InitSchedulerMetrics(), 53 | queues: make(map[string]*QueueMetrics), 54 | event: initEventMetrics(), 55 | lock: locking.RWMutex{}, 56 | runtime: initRuntimeMetrics(), 57 | } 58 | }) 59 | } 60 | 61 | func Reset() { 62 | m.lock.Lock() 63 | defer m.lock.Unlock() 64 | m.scheduler.Reset() 65 | m.event.Reset() 66 | for _, qm := range m.queues { 67 | qm.Reset() 68 | } 69 | m.runtime.Reset() 70 | } 71 | 72 | func GetSchedulerMetrics() *SchedulerMetrics { 73 | return m.scheduler 74 | } 75 | 76 | func GetQueueMetrics(name string) *QueueMetrics { 77 | m.lock.Lock() 78 | defer m.lock.Unlock() 79 | if qm, ok := m.queues[name]; ok { 80 | return qm 81 | } 82 | queueMetrics := InitQueueMetrics(name) 83 | m.queues[name] = queueMetrics 84 | return queueMetrics 85 | } 86 | 87 | func RemoveQueueMetrics(name string) { 88 | m.lock.Lock() 89 | defer m.lock.Unlock() 90 | if metrics, ok := m.queues[name]; ok { 91 | metrics.UnregisterMetrics() 92 | delete(m.queues, name) 93 | } 94 | } 95 | 96 | func GetEventMetrics() *EventMetrics { 97 | return m.event 98 | } 99 | 100 | func GetRuntimeMetrics() *RuntimeMetrics { 101 | return m.runtime 102 | } 103 | 104 | // Format metric name based on the definition of metric name in prometheus, as per 105 | // https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels 106 | func formatMetricName(metricName string) string { 107 | if len(metricName) == 0 { 108 | return metricName 109 | } 110 | newBytes := make([]byte, len(metricName)) 111 | for i := 0; i < len(metricName); i++ { 112 | b := metricName[i] 113 | if !((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_' || b == ':' || (b >= '0' && b <= '9')) { 114 | newBytes[i] = MetricNameInvalidByteReplacement 115 | } else { 116 | newBytes[i] = b 117 | } 118 | } 119 | if '0' <= metricName[0] && metricName[0] <= '9' { 120 | return string(MetricNameInvalidByteReplacement) + string(newBytes) 121 | } 122 | return string(newBytes) 123 | } 124 | -------------------------------------------------------------------------------- /pkg/metrics/metrics_collector.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package metrics 20 | 21 | import ( 22 | "time" 23 | 24 | "go.uber.org/zap" 25 | 26 | "github.com/apache/yunikorn-core/pkg/log" 27 | "github.com/apache/yunikorn-core/pkg/metrics/history" 28 | ) 29 | 30 | // collecting metrics for YuniKorn-internal usage 31 | // will fill missing values with -1, in case of failures 32 | type internalMetricsCollector struct { 33 | ticker *time.Ticker 34 | stopped chan struct{} 35 | metricsHistory *history.InternalMetricsHistory 36 | } 37 | 38 | type InternalMetricsCollector interface { 39 | Stop() 40 | } 41 | 42 | func NewInternalMetricsCollector(hcInfo *history.InternalMetricsHistory) *internalMetricsCollector { 43 | return newInternalMetricsCollector(hcInfo, 1*time.Minute) 44 | } 45 | 46 | // create a internalMetricsCollector with specify tick duration. 47 | func newInternalMetricsCollector(hcInfo *history.InternalMetricsHistory, tickerDefault time.Duration) *internalMetricsCollector { 48 | finished := make(chan struct{}) 49 | ticker := time.NewTicker(tickerDefault) 50 | 51 | return &internalMetricsCollector{ 52 | ticker, 53 | finished, 54 | hcInfo, 55 | } 56 | } 57 | 58 | func (u *internalMetricsCollector) StartService() { 59 | go func() { 60 | log.Log(log.Metrics).Info("Starting internal metrics collector") 61 | for { 62 | select { 63 | case <-u.stopped: 64 | return 65 | case <-u.ticker.C: 66 | u.store() 67 | } 68 | } 69 | }() 70 | } 71 | 72 | func (u *internalMetricsCollector) store() { 73 | log.Log(log.Metrics).Debug("Adding current status to historical partition data") 74 | 75 | totalAppsRunning, err := m.scheduler.GetTotalApplicationsRunning() 76 | if err != nil { 77 | log.Log(log.Metrics).Warn("Could not encode totalApplications metric.", zap.Error(err)) 78 | totalAppsRunning = -1 79 | } 80 | allocatedContainers, err := m.scheduler.getAllocatedContainers() 81 | if err != nil { 82 | log.Log(log.Metrics).Warn("Could not encode allocatedContainers metric.", zap.Error(err)) 83 | } 84 | releasedContainers, err := m.scheduler.getReleasedContainers() 85 | if err != nil { 86 | log.Log(log.Metrics).Warn("Could not encode releasedContainers metric.", zap.Error(err)) 87 | } 88 | totalContainersRunning := allocatedContainers - releasedContainers 89 | if totalContainersRunning < 0 { 90 | log.Log(log.Metrics).Warn("Could not calculate the totalContainersRunning.", 91 | zap.Int("allocatedContainers", allocatedContainers), 92 | zap.Int("releasedContainers", releasedContainers)) 93 | } 94 | u.metricsHistory.Store(totalAppsRunning, totalContainersRunning) 95 | } 96 | 97 | func (u *internalMetricsCollector) Stop() { 98 | log.Log(log.Metrics).Info("Stopping internal metrics collector") 99 | close(u.stopped) 100 | } 101 | -------------------------------------------------------------------------------- /pkg/metrics/metrics_collector_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package metrics 20 | 21 | import ( 22 | "fmt" 23 | "testing" 24 | "time" 25 | 26 | "gotest.tools/v3/assert" 27 | 28 | "github.com/apache/yunikorn-core/pkg/metrics/history" 29 | ) 30 | 31 | func TestStop(t *testing.T) { 32 | metricsHistory := history.NewInternalMetricsHistory(3) 33 | metricsCollector := newInternalMetricsCollector(metricsHistory, 1*time.Second) 34 | metricsCollector.StartService() 35 | 36 | metricsCollector.Stop() 37 | // wait for the thread to store record. it should not happen 38 | time.Sleep(1500 * time.Millisecond) 39 | 40 | records := metricsHistory.GetRecords() 41 | assert.Equal(t, 3, len(records), "Expected exactly 3 history records") 42 | for _, record := range records { 43 | assert.Assert(t, record == nil, "The 1st item should be nil!") 44 | } 45 | } 46 | 47 | func TestStartService(t *testing.T) { 48 | metricsHistory := history.NewInternalMetricsHistory(3) 49 | metricsCollector := newInternalMetricsCollector(metricsHistory, 1*time.Second) 50 | metricsCollector.StartService() 51 | 52 | // wait for the thread to store record 53 | time.Sleep(1500 * time.Millisecond) 54 | metricsCollector.Stop() 55 | 56 | records := metricsHistory.GetRecords() 57 | assert.Equal(t, 3, len(records), "Expected exactly 3 history records") 58 | for i, record := range records { 59 | if i == 2 { 60 | assert.Assert(t, record != nil, fmt.Sprintf("record should not be nil, index: %d", i)) 61 | } else { 62 | assert.Assert(t, record == nil, fmt.Sprintf("record should be nil, index: %d", i)) 63 | } 64 | } 65 | } 66 | 67 | func TestHistoricalPartitionInfoUpdater(t *testing.T) { 68 | metricsHistory := history.NewInternalMetricsHistory(3) 69 | metricsCollector := NewInternalMetricsCollector(metricsHistory) 70 | 71 | metrics := GetSchedulerMetrics() 72 | 73 | // skip to store record for first application 74 | metrics.IncTotalApplicationsRunning() 75 | metrics.AddAllocatedContainers(2) 76 | 77 | metrics.IncTotalApplicationsRunning() 78 | metrics.AddAllocatedContainers(2) 79 | metricsCollector.store() 80 | 81 | metrics.IncTotalApplicationsRunning() 82 | metrics.AddAllocatedContainers(2) 83 | metricsCollector.store() 84 | 85 | records := metricsHistory.GetRecords() 86 | assert.Equal(t, 3, len(records), "Expected exactly 3 history records") 87 | for i, record := range records { 88 | switch i { 89 | case 0: 90 | assert.Assert(t, record == nil, fmt.Sprintf("record should be nil, index: %d", i)) 91 | case 1: 92 | assert.Equal(t, 2, record.TotalApplications, "Expected exactly 2 applications") 93 | assert.Equal(t, 4, record.TotalContainers, "Expected exactly 4 allocations") 94 | case 2: 95 | assert.Equal(t, 3, record.TotalApplications, "Expected exactly 3 applications") 96 | assert.Equal(t, 6, record.TotalContainers, "Expected exactly 6 allocations") 97 | } 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /pkg/metrics/metrics_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package metrics 20 | 21 | import ( 22 | "crypto/rand" 23 | "testing" 24 | 25 | "github.com/prometheus/common/model" 26 | "go.uber.org/zap" 27 | "gotest.tools/v3/assert" 28 | 29 | "github.com/apache/yunikorn-core/pkg/log" 30 | ) 31 | 32 | func TestFormatMetricName(t *testing.T) { 33 | testStrings := []string{"0", "ad_vs:ad", "~23", "test/a", "-dfs", "012~`s@dd#$b%23^&5^3*(45){78}|00[]\\1ssd"} 34 | for _, testString := range testStrings { 35 | replaceStr := formatMetricName(testString) 36 | assert.Equal(t, true, model.IsValidMetricName(model.LabelValue(replaceStr))) 37 | } 38 | numRandomTestStrings := 1000 39 | randomTestStrings := make([]string, numRandomTestStrings) 40 | for i := 0; i < numRandomTestStrings; i++ { 41 | randomTestStrings[i] = generateRandomString(100) 42 | } 43 | for _, testString := range randomTestStrings { 44 | replaceStr := formatMetricName(testString) 45 | assert.Equal(t, true, model.IsValidMetricName(model.LabelValue(replaceStr))) 46 | } 47 | } 48 | 49 | func generateRandomString(len int) string { 50 | randomBytes := make([]byte, len) 51 | n, err := rand.Read(randomBytes) 52 | if err != nil { 53 | log.Log(log.Metrics).Warn("Random running low on entropy", 54 | zap.Int("bytesRequested", len), 55 | zap.Int("bytesRead", n)) 56 | } 57 | return string(randomBytes) 58 | } 59 | -------------------------------------------------------------------------------- /pkg/mock/container_state_updater.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package mock 20 | 21 | import ( 22 | "github.com/apache/yunikorn-core/pkg/locking" 23 | "github.com/apache/yunikorn-scheduler-interface/lib/go/si" 24 | ) 25 | 26 | type ContainerStateUpdater struct { 27 | ResourceManagerCallback 28 | sentUpdate *si.UpdateContainerSchedulingStateRequest 29 | locking.RWMutex 30 | } 31 | 32 | func (m *ContainerStateUpdater) UpdateContainerSchedulingState(request *si.UpdateContainerSchedulingStateRequest) { 33 | m.Lock() 34 | defer m.Unlock() 35 | m.sentUpdate = request 36 | } 37 | 38 | func (m *ContainerStateUpdater) GetContainerUpdateRequest() *si.UpdateContainerSchedulingStateRequest { 39 | m.RLock() 40 | defer m.RUnlock() 41 | return m.sentUpdate 42 | } 43 | 44 | // NewContainerStateUpdater returns a mock that can allows retrieval of the update that was sent. 45 | func NewContainerStateUpdater() *ContainerStateUpdater { 46 | return &ContainerStateUpdater{} 47 | } 48 | -------------------------------------------------------------------------------- /pkg/mock/event_plugin.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package mock 20 | 21 | import ( 22 | "github.com/apache/yunikorn-core/pkg/locking" 23 | "github.com/apache/yunikorn-scheduler-interface/lib/go/si" 24 | ) 25 | 26 | type EventPlugin struct { 27 | ResourceManagerCallback 28 | records chan *si.EventRecord 29 | 30 | locking.Mutex 31 | } 32 | 33 | func (m *EventPlugin) SendEvent(events []*si.EventRecord) { 34 | m.Lock() 35 | defer m.Unlock() 36 | 37 | for _, event := range events { 38 | m.records <- event 39 | } 40 | } 41 | 42 | func (m *EventPlugin) GetNextEventRecord() *si.EventRecord { 43 | m.Lock() 44 | defer m.Unlock() 45 | 46 | select { 47 | case record := <-m.records: 48 | return record 49 | default: 50 | return nil 51 | } 52 | } 53 | 54 | // NewEventPlugin creates a mocked event plugin 55 | func NewEventPlugin() *EventPlugin { 56 | return &EventPlugin{ 57 | records: make(chan *si.EventRecord, 3), 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /pkg/mock/predicate_plugin.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package mock 20 | 21 | import ( 22 | "fmt" 23 | 24 | "go.uber.org/zap" 25 | 26 | "github.com/apache/yunikorn-core/pkg/log" 27 | "github.com/apache/yunikorn-scheduler-interface/lib/go/si" 28 | ) 29 | 30 | type PredicatePlugin struct { 31 | ResourceManagerCallback 32 | mustFail bool 33 | nodes map[string]int 34 | } 35 | 36 | func (f *PredicatePlugin) Predicates(args *si.PredicatesArgs) error { 37 | if f.mustFail { 38 | log.Log(log.Test).Info("fake predicate plugin fail: must fail set") 39 | return fmt.Errorf("fake predicate plugin failed") 40 | } 41 | if fail, ok := f.nodes[args.NodeID]; ok { 42 | if args.Allocate && fail >= 0 { 43 | log.Log(log.Test).Info("fake predicate plugin node allocate fail", 44 | zap.String("node", args.NodeID), 45 | zap.Int("fail mode", fail)) 46 | return fmt.Errorf("fake predicate plugin failed") 47 | } 48 | if !args.Allocate && fail <= 0 { 49 | log.Log(log.Test).Info("fake predicate plugin node reserve fail", 50 | zap.String("node", args.NodeID), 51 | zap.Int("fail mode", fail)) 52 | return fmt.Errorf("fake predicate plugin failed") 53 | } 54 | } 55 | log.Log(log.Test).Info("fake predicate plugin pass", 56 | zap.String("node", args.NodeID)) 57 | return nil 58 | } 59 | 60 | // NewPredicatePlugin returns a mock that can either always fail or fail based on the node that is checked. 61 | // mustFail will cause the predicate check to always fail 62 | // nodes allows specifying which node to fail for which check using the nodeID: 63 | // possible values: -1 fail reserve, 0 fail always, 1 fail alloc (defaults to always) 64 | func NewPredicatePlugin(mustFail bool, nodes map[string]int) *PredicatePlugin { 65 | return &PredicatePlugin{ 66 | mustFail: mustFail, 67 | nodes: nodes, 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /pkg/mock/rm_callback.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package mock 20 | 21 | import ( 22 | "github.com/apache/yunikorn-scheduler-interface/lib/go/si" 23 | ) 24 | 25 | type ResourceManagerCallback struct{} 26 | 27 | func (f *ResourceManagerCallback) UpdateApplication(_ *si.ApplicationResponse) error { 28 | return nil 29 | } 30 | 31 | func (f *ResourceManagerCallback) UpdateAllocation(_ *si.AllocationResponse) error { 32 | return nil 33 | } 34 | 35 | func (f *ResourceManagerCallback) UpdateNode(_ *si.NodeResponse) error { 36 | return nil 37 | } 38 | 39 | func (f *ResourceManagerCallback) Predicates(_ *si.PredicatesArgs) error { 40 | // do nothing 41 | return nil 42 | } 43 | 44 | func (f *ResourceManagerCallback) PreemptionPredicates(args *si.PreemptionPredicatesArgs) *si.PreemptionPredicatesResponse { 45 | // simulate "ideal" preemption check 46 | return &si.PreemptionPredicatesResponse{ 47 | Success: true, 48 | Index: args.StartIndex, 49 | } 50 | } 51 | 52 | func (f *ResourceManagerCallback) SendEvent(_ []*si.EventRecord) { 53 | // do nothing 54 | } 55 | 56 | func (f *ResourceManagerCallback) GetStateDump() (string, error) { 57 | return "{}", nil 58 | } 59 | 60 | func (f *ResourceManagerCallback) UpdateContainerSchedulingState(_ *si.UpdateContainerSchedulingStateRequest) { 61 | // do nothing 62 | } 63 | -------------------------------------------------------------------------------- /pkg/plugins/plugins.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package plugins 20 | 21 | import ( 22 | "github.com/apache/yunikorn-core/pkg/log" 23 | "github.com/apache/yunikorn-scheduler-interface/lib/go/api" 24 | ) 25 | 26 | var plugins SchedulerPlugins 27 | 28 | func init() { 29 | plugins = SchedulerPlugins{} 30 | } 31 | 32 | // RegisterSchedulerPlugin registers the plugin based on the interfaces(s) it implements. 33 | // The known interfaces are defined in yunikorn-scheduler-interface/lib/go/api 34 | func RegisterSchedulerPlugin(plugin interface{}) { 35 | plugins.Lock() 36 | defer plugins.Unlock() 37 | if rmc, ok := plugin.(api.ResourceManagerCallback); ok { 38 | log.Log(log.RMProxy).Info("register scheduler plugin: ResourceManagerCallback") 39 | plugins.ResourceManagerCallbackPlugin = rmc 40 | } 41 | if sdp, ok := plugin.(api.StateDumpPlugin); ok { 42 | log.Log(log.RMProxy).Info("register scheduler plugin: StateDumpPlugin") 43 | plugins.StateDumpPlugin = sdp 44 | } 45 | } 46 | 47 | // UnregisterSchedulerPlugins removes all earlier set plugins 48 | // visible for testing only 49 | func UnregisterSchedulerPlugins() { 50 | plugins.Lock() 51 | defer plugins.Unlock() 52 | plugins.ResourceManagerCallbackPlugin = nil 53 | plugins.StateDumpPlugin = nil 54 | } 55 | 56 | // GetResourceManagerCallbackPlugin returns the registered callback plugin or nil if none was registered. 57 | func GetResourceManagerCallbackPlugin() api.ResourceManagerCallback { 58 | plugins.RLock() 59 | defer plugins.RUnlock() 60 | return plugins.ResourceManagerCallbackPlugin 61 | } 62 | 63 | // GetStateDumpPlugin returns the registered state dump plugin or nil if none was registered. 64 | func GetStateDumpPlugin() api.StateDumpPlugin { 65 | plugins.RLock() 66 | defer plugins.RUnlock() 67 | return plugins.StateDumpPlugin 68 | } 69 | -------------------------------------------------------------------------------- /pkg/plugins/plugins_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package plugins 20 | 21 | import ( 22 | "testing" 23 | 24 | "gotest.tools/v3/assert" 25 | 26 | "github.com/apache/yunikorn-scheduler-interface/lib/go/si" 27 | ) 28 | 29 | type NoPluginImplemented struct{} 30 | 31 | type RMPluginImplemented struct{} 32 | 33 | type StateDumpImplemented struct{} 34 | 35 | type AllPluginsImplemented struct { 36 | RMPluginImplemented 37 | StateDumpImplemented 38 | } 39 | 40 | func (f *RMPluginImplemented) UpdateApplication(_ *si.ApplicationResponse) error { 41 | return nil 42 | } 43 | 44 | func (f *RMPluginImplemented) UpdateAllocation(_ *si.AllocationResponse) error { 45 | return nil 46 | } 47 | 48 | func (f *RMPluginImplemented) UpdateNode(_ *si.NodeResponse) error { 49 | return nil 50 | } 51 | 52 | func (f *RMPluginImplemented) Predicates(_ *si.PredicatesArgs) error { 53 | return nil 54 | } 55 | 56 | func (f *RMPluginImplemented) PreemptionPredicates(_ *si.PreemptionPredicatesArgs) *si.PreemptionPredicatesResponse { 57 | return nil 58 | } 59 | 60 | func (f *RMPluginImplemented) SendEvent(_ []*si.EventRecord) { 61 | // do nothing 62 | } 63 | 64 | func (f *RMPluginImplemented) UpdateContainerSchedulingState(_ *si.UpdateContainerSchedulingStateRequest) { 65 | } 66 | 67 | func (f *StateDumpImplemented) GetStateDump() (string, error) { 68 | return "", nil 69 | } 70 | 71 | func TestRegisterPlugins(t *testing.T) { 72 | plugins = SchedulerPlugins{} 73 | RegisterSchedulerPlugin(&NoPluginImplemented{}) 74 | assert.Assert(t, GetResourceManagerCallbackPlugin() == nil, "ResourceManagerCallback plugin should not have been registered") 75 | assert.Assert(t, GetStateDumpPlugin() == nil, "StateDumpCallback plugin should not have been registered") 76 | 77 | RegisterSchedulerPlugin(&RMPluginImplemented{}) 78 | assert.Assert(t, GetResourceManagerCallbackPlugin() != nil, "ResourceManagerCallback plugin should have been registered") 79 | assert.Assert(t, GetStateDumpPlugin() == nil, "StateDumpCallback plugin should not have been registered") 80 | UnregisterSchedulerPlugins() 81 | 82 | RegisterSchedulerPlugin(&StateDumpImplemented{}) 83 | assert.Assert(t, GetResourceManagerCallbackPlugin() == nil, "ResourceManagerCallback plugin should not have been registered") 84 | assert.Assert(t, GetStateDumpPlugin() != nil, "StateDumpCallback plugin should have been registered") 85 | UnregisterSchedulerPlugins() 86 | 87 | RegisterSchedulerPlugin(&AllPluginsImplemented{}) 88 | assert.Assert(t, GetResourceManagerCallbackPlugin() != nil, "ResourceManagerCallback plugin should have been registered") 89 | assert.Assert(t, GetStateDumpPlugin() != nil, "StateDumpCallback plugin should have been registered") 90 | UnregisterSchedulerPlugins() 91 | 92 | // registration are additive 93 | RegisterSchedulerPlugin(&RMPluginImplemented{}) 94 | RegisterSchedulerPlugin(&StateDumpImplemented{}) 95 | assert.Assert(t, GetResourceManagerCallbackPlugin() != nil, "ResourceManagerCallback plugin should have been registered") 96 | assert.Assert(t, GetStateDumpPlugin() != nil, "StateDumpCallback plugin should have been registered") 97 | UnregisterSchedulerPlugins() 98 | } 99 | -------------------------------------------------------------------------------- /pkg/plugins/types.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package plugins 20 | 21 | import ( 22 | "github.com/apache/yunikorn-core/pkg/locking" 23 | "github.com/apache/yunikorn-scheduler-interface/lib/go/api" 24 | ) 25 | 26 | type SchedulerPlugins struct { 27 | ResourceManagerCallbackPlugin api.ResourceManagerCallback 28 | StateDumpPlugin api.StateDumpPlugin 29 | 30 | locking.RWMutex 31 | } 32 | -------------------------------------------------------------------------------- /pkg/rmproxy/rmevent/events.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package rmevent 20 | 21 | import ( 22 | "github.com/apache/yunikorn-scheduler-interface/lib/go/si" 23 | ) 24 | 25 | // Incoming UpdateAllocation events from the RM to the scheduler (async) 26 | type RMUpdateAllocationEvent struct { 27 | // The generic UpdateAllocation does not wait for a result, 28 | // results are communicated back via the outgoing events. 29 | Request *si.AllocationRequest 30 | } 31 | 32 | // Incoming UpdateApplication events from the RM to the scheduler (async) 33 | type RMUpdateApplicationEvent struct { 34 | // The generic UpdateApplication does not wait for a result, 35 | // results are communicated back via the outgoing events. 36 | Request *si.ApplicationRequest 37 | } 38 | 39 | // Incoming UpdateNode events from the RM to the scheduler (async) 40 | type RMUpdateNodeEvent struct { 41 | // The generic UpdateNode does not wait for a result, 42 | // results are communicated back via the outgoing events. 43 | Request *si.NodeRequest 44 | } 45 | 46 | // Incoming events from the RM to the scheduler (sync) 47 | type RMRegistrationEvent struct { 48 | Registration *si.RegisterResourceManagerRequest 49 | Channel chan *Result `json:"-"` 50 | } 51 | 52 | type RMConfigUpdateEvent struct { 53 | RmID string 54 | PolicyGroup string 55 | Config string 56 | ExtraConfig map[string]string 57 | Channel chan *Result `json:"-"` 58 | } 59 | 60 | type RMPartitionsRemoveEvent struct { 61 | RmID string 62 | Channel chan *Result `json:"-"` 63 | } 64 | 65 | type Result struct { 66 | Succeeded bool 67 | Reason string 68 | } 69 | 70 | // Outgoing events from the scheduler to the RM 71 | type RMNewAllocationsEvent struct { 72 | RmID string 73 | Allocations []*si.Allocation 74 | Channel chan *Result `json:"-"` 75 | } 76 | 77 | type RMApplicationUpdateEvent struct { 78 | RmID string 79 | AcceptedApplications []*si.AcceptedApplication 80 | RejectedApplications []*si.RejectedApplication 81 | UpdatedApplications []*si.UpdatedApplication 82 | } 83 | 84 | type RMRejectedAllocationEvent struct { 85 | RmID string 86 | RejectedAllocations []*si.RejectedAllocation 87 | } 88 | 89 | type RMReleaseAllocationEvent struct { 90 | RmID string 91 | ReleasedAllocations []*si.AllocationRelease 92 | Channel chan *Result `json:"-"` 93 | } 94 | 95 | type RMNodeUpdateEvent struct { 96 | RmID string 97 | AcceptedNodes []*si.AcceptedNode 98 | RejectedNodes []*si.RejectedNode 99 | } 100 | -------------------------------------------------------------------------------- /pkg/rmproxy/rmproxy_mock.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package rmproxy 20 | 21 | import ( 22 | "github.com/apache/yunikorn-core/pkg/locking" 23 | "github.com/apache/yunikorn-core/pkg/rmproxy/rmevent" 24 | ) 25 | 26 | // MockedRMProxy Implements RMProxy Mock Event Handler for testing 27 | type MockedRMProxy struct { 28 | handled bool 29 | events []interface{} 30 | locking.RWMutex 31 | } 32 | 33 | func NewMockedRMProxy() *MockedRMProxy { 34 | return &MockedRMProxy{} 35 | } 36 | 37 | // HandleEvent implements event handling for a limited set of events for testing 38 | func (rmp *MockedRMProxy) HandleEvent(ev interface{}) { 39 | rmp.Lock() 40 | defer rmp.Unlock() 41 | if rmp.events == nil { 42 | rmp.events = make([]interface{}, 0) 43 | } 44 | rmp.events = append(rmp.events, ev) 45 | var c chan *rmevent.Result 46 | switch v := ev.(type) { 47 | case *rmevent.RMApplicationUpdateEvent: 48 | rmp.handled = true 49 | case *rmevent.RMNewAllocationsEvent: 50 | c = v.Channel 51 | case *rmevent.RMReleaseAllocationEvent: 52 | c = v.Channel 53 | } 54 | if c != nil { 55 | go func(rc chan *rmevent.Result) { 56 | rc <- &rmevent.Result{Succeeded: true, Reason: "test"} 57 | }(c) 58 | } 59 | } 60 | 61 | // IsHandled return the last action performed by the handler and reset 62 | func (rmp *MockedRMProxy) IsHandled() bool { 63 | rmp.Lock() 64 | defer rmp.Unlock() 65 | keep := rmp.handled 66 | rmp.handled = false 67 | return keep 68 | } 69 | 70 | // GetEvents return the list of events processed by the handler and reset 71 | func (rmp *MockedRMProxy) GetEvents() []interface{} { 72 | rmp.RLock() 73 | defer rmp.RUnlock() 74 | return rmp.events 75 | } 76 | -------------------------------------------------------------------------------- /pkg/scheduler/nodes_usage_monitor.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package scheduler 20 | 21 | import ( 22 | "time" 23 | 24 | "github.com/apache/yunikorn-core/pkg/log" 25 | "github.com/apache/yunikorn-core/pkg/metrics" 26 | ) 27 | 28 | type nodesResourceUsageMonitor struct { 29 | done chan struct{} 30 | ticker *time.Ticker 31 | cc *ClusterContext 32 | } 33 | 34 | func newNodesResourceUsageMonitor(scheduler *ClusterContext) *nodesResourceUsageMonitor { 35 | return &nodesResourceUsageMonitor{ 36 | done: make(chan struct{}), 37 | ticker: time.NewTicker(1 * time.Second), 38 | cc: scheduler, 39 | } 40 | } 41 | 42 | func (m *nodesResourceUsageMonitor) start() { 43 | log.Log(log.SchedNodesUsage).Info("Starting node resource monitor") 44 | go func() { 45 | for { 46 | select { 47 | case <-m.done: 48 | m.ticker.Stop() 49 | return 50 | case <-m.ticker.C: 51 | m.runOnce() 52 | } 53 | } 54 | }() 55 | } 56 | 57 | func (m *nodesResourceUsageMonitor) runOnce() { 58 | for _, p := range m.cc.GetPartitionMapClone() { 59 | usageMap := p.calculateNodesResourceUsage() 60 | if len(usageMap) > 0 { 61 | for resourceName, usageBuckets := range usageMap { 62 | for idx, bucketValue := range usageBuckets { 63 | metrics.GetSchedulerMetrics().SetNodeResourceUsage(resourceName, idx, float64(bucketValue)) 64 | } 65 | } 66 | } 67 | } 68 | } 69 | 70 | // Stop the node usage monitor. 71 | func (m *nodesResourceUsageMonitor) stop() { 72 | log.Log(log.SchedNodesUsage).Info("Stopping node resource usage monitor") 73 | close(m.done) 74 | } 75 | -------------------------------------------------------------------------------- /pkg/scheduler/objects/allocation_result.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package objects 20 | 21 | import "fmt" 22 | 23 | type AllocationResultType int 24 | 25 | const ( 26 | None AllocationResultType = iota 27 | Allocated 28 | AllocatedReserved 29 | Reserved 30 | Unreserved 31 | Replaced 32 | ) 33 | 34 | func (art AllocationResultType) String() string { 35 | return [...]string{"None", "Allocated", "AllocatedReserved", "Reserved", "Unreserved", "Replaced"}[art] 36 | } 37 | 38 | type AllocationResult struct { 39 | ResultType AllocationResultType 40 | Request *Allocation 41 | NodeID string 42 | ReservedNodeID string 43 | CancelledReservations int 44 | } 45 | 46 | func (ar *AllocationResult) String() string { 47 | if ar == nil { 48 | return "nil allocation result" 49 | } 50 | allocationKey := "" 51 | if ar.Request != nil { 52 | allocationKey = ar.Request.GetAllocationKey() 53 | } 54 | return fmt.Sprintf("resultType=%s, nodeID=%s, reservedNodeID=%s, allocationKey=%s", ar.ResultType.String(), ar.NodeID, ar.ReservedNodeID, allocationKey) 55 | } 56 | 57 | // newAllocatedAllocationResult creates a new allocation result for a new allocation. 58 | func newAllocatedAllocationResult(nodeID string, request *Allocation) *AllocationResult { 59 | return newAllocationResultInternal(Allocated, nodeID, request) 60 | } 61 | 62 | // newReservedAllocationResult creates a new allocation result for reserving a node. 63 | func newReservedAllocationResult(nodeID string, request *Allocation) *AllocationResult { 64 | return newAllocationResultInternal(Reserved, nodeID, request) 65 | } 66 | 67 | // newUnreservedAllocationResult creates a new allocation result for unreserving a node. 68 | func newUnreservedAllocationResult(nodeID string, request *Allocation) *AllocationResult { 69 | return newAllocationResultInternal(Unreserved, nodeID, request) 70 | } 71 | 72 | // newReplacedAllocationResult create a new allocation result for replaced allocations. 73 | func newReplacedAllocationResult(nodeID string, request *Allocation) *AllocationResult { 74 | return newAllocationResultInternal(Replaced, nodeID, request) 75 | } 76 | 77 | // newAllocationResultInternal creates a new allocation result. It should not be called directly. 78 | func newAllocationResultInternal(resultType AllocationResultType, nodeID string, request *Allocation) *AllocationResult { 79 | return &AllocationResult{ 80 | ResultType: resultType, 81 | Request: request, 82 | NodeID: nodeID, 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /pkg/scheduler/objects/application_graphviz_test.go: -------------------------------------------------------------------------------- 1 | //go:build graphviz 2 | // +build graphviz 3 | 4 | /* 5 | Licensed to the Apache Software Foundation (ASF) under one 6 | or more contributor license agreements. See the NOTICE file 7 | distributed with this work for additional information 8 | regarding copyright ownership. The ASF licenses this file 9 | to you under the Apache License, Version 2.0 (the 10 | "License"); you may not use this file except in compliance 11 | with the License. You may obtain a copy of the License at 12 | 13 | http://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | */ 21 | 22 | package objects 23 | 24 | import ( 25 | "os" 26 | "testing" 27 | 28 | "github.com/looplab/fsm" 29 | "gotest.tools/v3/assert" 30 | ) 31 | 32 | func TestApplicationFsmGraph(t *testing.T) { 33 | graph := fsm.Visualize(NewAppState()) 34 | 35 | err := os.MkdirAll("../../../build/fsm", 0755) 36 | assert.NilError(t, err, "Creating output dir failed") 37 | os.WriteFile("../../../build/fsm/application-state.dot", []byte(graph), 0644) 38 | assert.NilError(t, err, "Writing graph failed") 39 | } 40 | -------------------------------------------------------------------------------- /pkg/scheduler/objects/application_summary.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package objects 20 | 21 | import ( 22 | "fmt" 23 | "time" 24 | 25 | "github.com/apache/yunikorn-core/pkg/common/resources" 26 | "github.com/apache/yunikorn-core/pkg/log" 27 | ) 28 | 29 | type ApplicationSummary struct { 30 | ApplicationID string 31 | SubmissionTime time.Time 32 | StartTime time.Time 33 | FinishTime time.Time 34 | User string 35 | Queue string 36 | State string 37 | RmID string 38 | ResourceUsage *resources.TrackedResource 39 | PreemptedResource *resources.TrackedResource 40 | PlaceholderResource *resources.TrackedResource 41 | } 42 | 43 | func (as *ApplicationSummary) String() string { 44 | return fmt.Sprintf("ApplicationID: %s, SubmissionTime: %d, StartTime: %d, FinishTime: %d, User: %s, "+ 45 | "Queue: %s, State: %s, RmID: %s, ResourceUsage: %s, PreemptedResource: %s, PlaceholderResource: %s", 46 | as.ApplicationID, 47 | as.SubmissionTime.UnixMilli(), 48 | as.StartTime.UnixMilli(), 49 | as.FinishTime.UnixMilli(), 50 | as.User, 51 | as.Queue, 52 | as.State, 53 | as.RmID, 54 | as.ResourceUsage, 55 | as.PreemptedResource, 56 | as.PlaceholderResource) 57 | } 58 | 59 | func (as *ApplicationSummary) DoLogging() { 60 | log.Log(log.SchedAppUsage).Info(fmt.Sprintf("YK_APP_SUMMARY: {%s}", as)) 61 | } 62 | -------------------------------------------------------------------------------- /pkg/scheduler/objects/node_iterator.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package objects 20 | 21 | import ( 22 | "github.com/google/btree" 23 | ) 24 | 25 | // NodeIterator iterates over a list of nodes based on the defined policy 26 | type NodeIterator interface { 27 | // ForEachNode Calls the provided function on the sorted Node object until it returns false 28 | ForEachNode(func(*Node) bool) 29 | } 30 | 31 | type treeIterator struct { 32 | accept func(*Node) bool 33 | getTree func() *btree.BTree 34 | } 35 | 36 | // ForEachNode Calls the provided "f" function on the sorted Node object until it returns false. 37 | // The accept() function checks if the node should be a candidate or not. 38 | func (ti *treeIterator) ForEachNode(f func(*Node) bool) { 39 | ti.getTree().Ascend(func(item btree.Item) bool { 40 | if ref, ok := item.(nodeRef); ok { 41 | node := ref.node 42 | if ti.accept(node) { 43 | return f(node) 44 | } 45 | } 46 | 47 | return true 48 | }) 49 | } 50 | 51 | func NewTreeIterator(accept func(*Node) bool, getTree func() *btree.BTree) *treeIterator { 52 | ti := &treeIterator{ 53 | getTree: getTree, 54 | accept: accept, 55 | } 56 | return ti 57 | } 58 | -------------------------------------------------------------------------------- /pkg/scheduler/objects/node_iterator_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package objects 20 | 21 | import ( 22 | "strconv" 23 | "testing" 24 | 25 | "github.com/google/btree" 26 | "gotest.tools/v3/assert" 27 | 28 | "github.com/apache/yunikorn-core/pkg/common/resources" 29 | ) 30 | 31 | func TestTreeIterator_AcceptAll(t *testing.T) { 32 | tree := getTree() 33 | treeItr := NewTreeIterator(acceptAll, func() *btree.BTree { 34 | return tree 35 | }) 36 | 37 | checked := make([]*Node, 0) 38 | treeItr.ForEachNode(func(node *Node) bool { 39 | checked = append(checked, node) 40 | return true 41 | }) 42 | 43 | assert.Equal(t, 10, len(checked)) 44 | } 45 | 46 | func TestTreeIterator_AcceptUnreserved(t *testing.T) { 47 | tree := getTree() 48 | treeItr := NewTreeIterator(acceptUnreserved, func() *btree.BTree { 49 | return tree 50 | }) 51 | 52 | checked := make([]*Node, 0) 53 | unreservedIds := make(map[int]bool) 54 | treeItr.ForEachNode(func(node *Node) bool { 55 | checked = append(checked, node) 56 | i, err := strconv.Atoi(node.Hostname) 57 | assert.NilError(t, err, "conversion failure") 58 | unreservedIds[i] = true 59 | return true 60 | }) 61 | 62 | assert.Equal(t, 5, len(checked)) 63 | for i := 5; i < 10; i++ { // node 5-10 are unreserved 64 | assert.Assert(t, unreservedIds[i]) 65 | } 66 | } 67 | 68 | func getTree() *btree.BTree { 69 | nodesReserved := newSchedNodeList(0, 5, true) 70 | nodes := newSchedNodeList(5, 10, false) 71 | nodes = append(nodes, nodesReserved...) 72 | 73 | tree := btree.New(7) 74 | for _, n := range nodes { 75 | tree.ReplaceOrInsert(nodeRef{ 76 | node: n, 77 | nodeScore: 1, 78 | }) 79 | } 80 | 81 | return tree 82 | } 83 | 84 | // A list of nodes that can be iterated over. 85 | func newSchedNodeList(start, end int, reserved bool) []*Node { 86 | list := make([]*Node, 0) 87 | for i := start; i < end; i++ { 88 | num := strconv.Itoa(i) 89 | node := newNode("node-"+num, make(map[string]resources.Quantity)) 90 | node.Hostname = num 91 | if reserved { 92 | node.reservations = map[string]*reservation{ 93 | "dummy": {}, 94 | } 95 | } 96 | list = append(list, node) 97 | } 98 | return list 99 | } 100 | -------------------------------------------------------------------------------- /pkg/scheduler/objects/node_listener.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package objects 20 | 21 | type NodeListener interface { 22 | NodeUpdated(sn *Node) 23 | } 24 | -------------------------------------------------------------------------------- /pkg/scheduler/objects/object_state.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package objects 20 | 21 | import ( 22 | "context" 23 | 24 | "github.com/looplab/fsm" 25 | "go.uber.org/zap" 26 | 27 | "github.com/apache/yunikorn-core/pkg/log" 28 | ) 29 | 30 | // ---------------------------------- 31 | // object events 32 | // these events are used for: partitions and managed queues 33 | // ---------------------------------- 34 | type ObjectEvent int 35 | 36 | const ( 37 | Remove ObjectEvent = iota 38 | Start 39 | Stop 40 | ) 41 | 42 | func (oe ObjectEvent) String() string { 43 | return [...]string{"Remove", "Start", "Stop"}[oe] 44 | } 45 | 46 | // ---------------------------------- 47 | // object states 48 | // these states are used by: partitions and managed queues 49 | // ---------------------------------- 50 | type ObjectState int 51 | 52 | const ( 53 | Active ObjectState = iota 54 | Draining 55 | Stopped 56 | ) 57 | 58 | func (os ObjectState) String() string { 59 | return [...]string{"Active", "Draining", "Stopped"}[os] 60 | } 61 | 62 | func NewObjectState() *fsm.FSM { 63 | return fsm.NewFSM( 64 | Active.String(), fsm.Events{ 65 | { 66 | Name: Remove.String(), 67 | Src: []string{Active.String(), Draining.String()}, 68 | Dst: Draining.String(), 69 | }, { 70 | Name: Start.String(), 71 | Src: []string{Active.String(), Stopped.String(), Draining.String()}, 72 | Dst: Active.String(), 73 | }, { 74 | Name: Stop.String(), 75 | Src: []string{Active.String(), Stopped.String()}, 76 | Dst: Stopped.String(), 77 | }, 78 | }, 79 | fsm.Callbacks{ 80 | "enter_state": func(_ context.Context, event *fsm.Event) { 81 | log.Log(log.SchedFSM).Info("object transition", 82 | zap.Any("object", event.Args[0]), 83 | zap.String("source", event.Src), 84 | zap.String("destination", event.Dst), 85 | zap.String("event", event.Event)) 86 | }, 87 | }, 88 | ) 89 | } 90 | -------------------------------------------------------------------------------- /pkg/scheduler/objects/object_state_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package objects 20 | 21 | import ( 22 | "context" 23 | "testing" 24 | 25 | "gotest.tools/v3/assert" 26 | ) 27 | 28 | func TestStateTransition(t *testing.T) { 29 | // base is active 30 | stateMachine := NewObjectState() 31 | assert.Equal(t, stateMachine.Current(), Active.String()) 32 | 33 | // active to stopped 34 | err := stateMachine.Event(context.Background(), Stop.String(), "testobject") 35 | assert.Assert(t, err == nil) 36 | assert.Equal(t, stateMachine.Current(), Stopped.String()) 37 | 38 | // remove on stopped not allowed 39 | err = stateMachine.Event(context.Background(), Remove.String(), "testobject") 40 | assert.Assert(t, err != nil) 41 | assert.Equal(t, stateMachine.Current(), Stopped.String()) 42 | 43 | // stopped to active 44 | err = stateMachine.Event(context.Background(), Start.String(), "testobject") 45 | assert.Assert(t, err == nil) 46 | assert.Equal(t, stateMachine.Current(), Active.String()) 47 | 48 | // active to draining 49 | err = stateMachine.Event(context.Background(), Remove.String(), "testobject") 50 | assert.Assert(t, err == nil) 51 | assert.Equal(t, stateMachine.Current(), Draining.String()) 52 | 53 | // stop on draining not allowed 54 | err = stateMachine.Event(context.Background(), Stop.String(), "test_object") 55 | assert.Assert(t, err != nil) 56 | assert.Equal(t, stateMachine.Current(), Draining.String()) 57 | 58 | // draining to active 59 | err = stateMachine.Event(context.Background(), Start.String(), "test_object") 60 | assert.Assert(t, err == nil) 61 | assert.Equal(t, stateMachine.Current(), Active.String()) 62 | } 63 | 64 | func TestTransitionToSelf(t *testing.T) { 65 | // base is active 66 | stateMachine := NewObjectState() 67 | 68 | // start on active 69 | err := stateMachine.Event(context.Background(), Start.String(), "testobject") 70 | assert.Assert(t, err != nil) 71 | if err != nil && err.Error() != noTransition { 72 | t.Errorf("state change failed with error: %v", err) 73 | } 74 | assert.Equal(t, stateMachine.Current(), Active.String()) 75 | 76 | // remove on draining 77 | stateMachine.SetState(Draining.String()) 78 | err = stateMachine.Event(context.Background(), Remove.String(), "testobject") 79 | assert.Assert(t, err != nil) 80 | if err != nil && err.Error() != noTransition { 81 | t.Errorf("state change failed with error: %v", err) 82 | } 83 | assert.Equal(t, stateMachine.Current(), Draining.String()) 84 | 85 | // stop on stopped 86 | stateMachine.SetState(Stopped.String()) 87 | err = stateMachine.Event(context.Background(), Stop.String(), "testobject") 88 | assert.Assert(t, err != nil) 89 | if err != nil && err.Error() != noTransition { 90 | t.Errorf("state change failed with error: %v", err) 91 | } 92 | assert.Equal(t, stateMachine.Current(), Stopped.String()) 93 | } 94 | -------------------------------------------------------------------------------- /pkg/scheduler/objects/reservation.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package objects 20 | 21 | import ( 22 | "go.uber.org/zap" 23 | 24 | "github.com/apache/yunikorn-core/pkg/log" 25 | ) 26 | 27 | type reservation struct { 28 | appID string 29 | nodeID string 30 | allocKey string 31 | // these references must ONLY be used for alloc, node and application removal otherwise 32 | // the reservations cannot be removed and scheduling might be impacted. 33 | app *Application 34 | node *Node 35 | alloc *Allocation 36 | } 37 | 38 | // The reservation inside the scheduler. A reservation object is never mutated and does not use locking. 39 | // The key depends on where the reservation was made (node or app). 40 | // appBased must be true for a reservation for an app and false for a reservation on a node 41 | func newReservation(node *Node, app *Application, alloc *Allocation, appBased bool) *reservation { 42 | if alloc == nil || app == nil || node == nil { 43 | log.Log(log.SchedReservation).Warn("Illegal reservation requested: one input is nil", 44 | zap.Stringer("node", node), 45 | zap.Stringer("app", app), 46 | zap.Stringer("alloc", alloc)) 47 | return nil 48 | } 49 | res := &reservation{ 50 | allocKey: alloc.GetAllocationKey(), 51 | alloc: alloc, 52 | app: app, 53 | node: node, 54 | } 55 | if appBased { 56 | res.nodeID = node.NodeID 57 | } else { 58 | res.appID = app.ApplicationID 59 | } 60 | return res 61 | } 62 | 63 | func (r *reservation) String() string { 64 | if r == nil { 65 | return "nil reservation" 66 | } 67 | if r.nodeID == "" { 68 | return r.node.NodeID + " -> " + r.appID + "|" + r.allocKey 69 | } 70 | return r.app.ApplicationID + " -> " + r.nodeID + "|" + r.allocKey 71 | } 72 | 73 | // GetObjects returns the objects that created the reservation. 74 | // None of the returned values will be nil unless the reservation itself is nil 75 | func (r *reservation) GetObjects() (*Node, *Application, *Allocation) { 76 | if r != nil { 77 | return r.node, r.app, r.alloc 78 | } 79 | return nil, nil, nil 80 | } 81 | -------------------------------------------------------------------------------- /pkg/scheduler/objects/sorted_asks.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package objects 20 | 21 | import "sort" 22 | 23 | // Storing allocation asks in a sorted slice. 24 | // 25 | // In the overwhelming majority of the cases, new asks are added to the end of the list. Therefore we don't need to 26 | // use advanced data structures to maintain the sorted invariant of the slice. 27 | type sortedRequests []*Allocation 28 | 29 | func (s *sortedRequests) insert(ask *Allocation) { 30 | size := len(*s) 31 | 32 | if size > 0 && ask.LessThan((*s)[size-1]) { 33 | // fast path, insert at the end (most likely) 34 | s.insertAt(size, ask) 35 | return 36 | } 37 | 38 | idx := sort.Search(size, func(i int) bool { 39 | return (*s)[i].LessThan(ask) 40 | }) 41 | s.insertAt(idx, ask) 42 | } 43 | 44 | func (s *sortedRequests) insertAt(index int, ask *Allocation) { 45 | *s = append(*s, nil) 46 | if index < len(*s) { 47 | copy((*s)[index+1:], (*s)[index:]) 48 | } 49 | (*s)[index] = ask 50 | } 51 | 52 | func (s *sortedRequests) remove(ask *Allocation) { 53 | idx := sort.Search(len(*s), func(i int) bool { 54 | return (*s)[i].LessThan(ask) 55 | }) 56 | if idx == len(*s) || (*s)[idx].allocationKey != ask.allocationKey { 57 | return 58 | } 59 | s.removeAt(idx) 60 | } 61 | 62 | func (s *sortedRequests) removeAt(index int) { 63 | copy((*s)[index:], (*s)[index+1:]) 64 | (*s)[len(*s)-1] = nil 65 | *s = (*s)[:len(*s)-1] 66 | } 67 | -------------------------------------------------------------------------------- /pkg/scheduler/partition_manager_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package scheduler 20 | 21 | import ( 22 | "testing" 23 | 24 | "gotest.tools/v3/assert" 25 | 26 | "github.com/apache/yunikorn-core/pkg/common/configs" 27 | "github.com/apache/yunikorn-core/pkg/common/security" 28 | "github.com/apache/yunikorn-core/pkg/scheduler/objects" 29 | ) 30 | 31 | func createPartitionContext(t *testing.T) *PartitionContext { 32 | conf := configs.PartitionConfig{ 33 | Name: "test", 34 | Queues: []configs.QueueConfig{ 35 | { 36 | Name: "root", 37 | Parent: true, 38 | SubmitACL: "*", 39 | Queues: nil, 40 | }, 41 | }, 42 | } 43 | cc := &ClusterContext{} 44 | partition, err := newPartitionContext(conf, "test", cc, false) 45 | assert.NilError(t, err) 46 | return partition 47 | } 48 | 49 | func TestStopPartitionManager(t *testing.T) { 50 | p := createPartitionContext(t) 51 | 52 | p.partitionManager.Stop() 53 | 54 | // this call should not be blocked forever 55 | p.partitionManager.cleanExpiredApps() 56 | 57 | // this call should not be blocked forever 58 | p.partitionManager.cleanRoot() 59 | } 60 | 61 | func TestCleanQueues(t *testing.T) { 62 | p := createPartitionContext(t) 63 | 64 | root := p.GetQueue("root") 65 | assert.Assert(t, root != nil) 66 | 67 | // add new queue to partition 68 | queue, err := p.createQueue("root.test", security.UserGroup{}) 69 | assert.NilError(t, err) 70 | assert.Equal(t, false, queue.IsManaged()) 71 | assert.Equal(t, 1, len(p.root.GetCopyOfChildren())) 72 | 73 | // make sure all queues are removed 74 | p.partitionManager.cleanQueues(p.root) 75 | assert.Equal(t, 0, len(p.root.GetCopyOfChildren())) 76 | } 77 | 78 | func TestRemoveAll(t *testing.T) { 79 | p := createPartitionContext(t) 80 | 81 | _, err := p.createQueue("root.test", security.UserGroup{}) 82 | assert.NilError(t, err) 83 | 84 | // add new node to partition 85 | err = p.addNodeToList(&objects.Node{}) 86 | assert.NilError(t, err) 87 | assert.Equal(t, 1, p.nodes.GetNodeCount()) 88 | 89 | // add new application to partition 90 | err = p.AddApplication(newApplication("app", p.Name, "root.test")) 91 | assert.NilError(t, err) 92 | assert.Equal(t, 1, len(p.applications)) 93 | 94 | // make sure all nodes and applications are removed 95 | p.partitionManager.remove() 96 | assert.Equal(t, 0, len(p.applications)) 97 | assert.Equal(t, 0, p.nodes.GetNodeCount()) 98 | } 99 | -------------------------------------------------------------------------------- /pkg/scheduler/placement/recovery_rule.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package placement 20 | 21 | import ( 22 | "go.uber.org/zap" 23 | 24 | "github.com/apache/yunikorn-core/pkg/common" 25 | "github.com/apache/yunikorn-core/pkg/common/configs" 26 | "github.com/apache/yunikorn-core/pkg/log" 27 | "github.com/apache/yunikorn-core/pkg/scheduler/objects" 28 | "github.com/apache/yunikorn-core/pkg/scheduler/placement/types" 29 | "github.com/apache/yunikorn-core/pkg/webservice/dao" 30 | ) 31 | 32 | // A rule to place an application into the recovery queue if no other rules matched and application submission is forced. 33 | // This rule will be run implicitly after all other placement rules are evaluated to ensure that an application 34 | // corresponding to an already-executing workload can be accepted successfully. 35 | type recoveryRule struct { 36 | basicRule 37 | } 38 | 39 | func (rr *recoveryRule) getName() string { 40 | return types.Recovery 41 | } 42 | 43 | func (rr *recoveryRule) ruleDAO() *dao.RuleDAO { 44 | return &dao.RuleDAO{ 45 | Name: rr.getName(), 46 | Parameters: map[string]string{ 47 | "queue": common.RecoveryQueueFull, 48 | }, 49 | } 50 | } 51 | 52 | func (rr *recoveryRule) initialise(_ configs.PlacementRule) error { 53 | // no configuration needed for the recovery rule 54 | return nil 55 | } 56 | 57 | func (rr *recoveryRule) placeApplication(app *objects.Application, _ func(string) *objects.Queue) (string, error) { 58 | // only forced applications should resolve to the recovery queue 59 | if !app.IsCreateForced() { 60 | return "", nil 61 | } 62 | 63 | queueName := common.RecoveryQueueFull 64 | log.Log(log.SchedApplication).Info("Recovery rule application placed", 65 | zap.String("application", app.ApplicationID), 66 | zap.String("queue", queueName)) 67 | return queueName, nil 68 | } 69 | -------------------------------------------------------------------------------- /pkg/scheduler/placement/recovery_rule_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package placement 20 | 21 | import ( 22 | "testing" 23 | 24 | "gotest.tools/v3/assert" 25 | 26 | "github.com/apache/yunikorn-core/pkg/common" 27 | "github.com/apache/yunikorn-core/pkg/common/configs" 28 | "github.com/apache/yunikorn-core/pkg/common/security" 29 | "github.com/apache/yunikorn-core/pkg/webservice/dao" 30 | siCommon "github.com/apache/yunikorn-scheduler-interface/lib/go/common" 31 | ) 32 | 33 | func TestRecoveryRuleInitialise(t *testing.T) { 34 | conf := configs.PlacementRule{ 35 | Name: "recovery", 36 | } 37 | rr := &recoveryRule{} 38 | err := rr.initialise(conf) 39 | assert.NilError(t, err, "unexpected error in initialize") 40 | } 41 | 42 | func TestRecoveryRulePlace(t *testing.T) { 43 | rr := &recoveryRule{} 44 | err := initQueueStructure([]byte(confTestQueue)) 45 | assert.NilError(t, err, "setting up the queue config failed") 46 | 47 | // verify that non-forced app is not recovered 48 | user := security.UserGroup{ 49 | User: "testuser", 50 | Groups: []string{}, 51 | } 52 | tags := make(map[string]string) 53 | app := newApplication("app1", "default", "ignored", user, tags, nil, "") 54 | 55 | var queue string 56 | queue, err = rr.placeApplication(app, queueFunc) 57 | if queue != "" || err != nil { 58 | t.Errorf("recovery rule did not bypass non-forced application, resolved queue '%s', err %v ", queue, err) 59 | } 60 | 61 | tags[siCommon.AppTagCreateForce] = "true" 62 | app = newApplication("app1", "default", "ignored", user, tags, nil, "") 63 | queue, err = rr.placeApplication(app, queueFunc) 64 | if queue != common.RecoveryQueueFull || err != nil { 65 | t.Errorf("recovery rule did not place forced application into recovery queue, resolved queue '%s', err %v ", queue, err) 66 | } 67 | } 68 | 69 | func Test_recoveryRule_ruleDAO(t *testing.T) { 70 | // nothing should be set as everything is ignored by the rule 71 | rrDAO := &dao.RuleDAO{Name: "recovery", Parameters: map[string]string{"queue": common.RecoveryQueueFull}} 72 | rr := &recoveryRule{} 73 | ruleDAO := rr.ruleDAO() 74 | assert.DeepEqual(t, rrDAO, ruleDAO) 75 | } 76 | -------------------------------------------------------------------------------- /pkg/scheduler/placement/testrule.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package placement 20 | 21 | import ( 22 | "fmt" 23 | "strconv" 24 | "strings" 25 | 26 | "github.com/apache/yunikorn-core/pkg/common/configs" 27 | "github.com/apache/yunikorn-core/pkg/scheduler/objects" 28 | "github.com/apache/yunikorn-core/pkg/scheduler/placement/types" 29 | "github.com/apache/yunikorn-core/pkg/webservice/dao" 30 | ) 31 | 32 | // A simple test rule to place an application based on a nil application. 33 | // Testing only. 34 | type testRule struct { 35 | basicRule 36 | } 37 | 38 | func (tr *testRule) getName() string { 39 | return types.Test 40 | } 41 | 42 | func (tr *testRule) ruleDAO() *dao.RuleDAO { 43 | var pDAO *dao.RuleDAO 44 | if tr.parent != nil { 45 | pDAO = tr.parent.ruleDAO() 46 | } 47 | return &dao.RuleDAO{ 48 | Name: types.Test, 49 | Parameters: map[string]string{ 50 | "create": strconv.FormatBool(tr.create), 51 | }, 52 | ParentRule: pDAO, 53 | Filter: tr.filter.filterDAO(), 54 | } 55 | } 56 | 57 | // Simple init for the test rule: allow everything as per a normal rule. 58 | func (tr *testRule) initialise(conf configs.PlacementRule) error { 59 | tr.create = conf.Create 60 | tr.filter = newFilter(conf.Filter) 61 | var err = error(nil) 62 | if conf.Parent != nil { 63 | tr.parent, err = newRule(*conf.Parent) 64 | } 65 | return err 66 | } 67 | 68 | // Simple test rule that just checks the app passed in and returns fixed queue names. 69 | func (tr *testRule) placeApplication(app *objects.Application, queueFn func(string) *objects.Queue) (string, error) { 70 | if app == nil { 71 | return "", fmt.Errorf("nil app passed in") 72 | } 73 | if queuePath := app.GetQueuePath(); queuePath != "" { 74 | parts := strings.Split(queuePath, configs.DOT) 75 | for _, part := range parts { 76 | if err := configs.IsQueueNameValid(part); err != nil { 77 | return "", err 78 | } 79 | } 80 | return replaceDot(queuePath), nil 81 | } 82 | return types.Test, nil 83 | } 84 | -------------------------------------------------------------------------------- /pkg/scheduler/placement/types/types.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package types 20 | 21 | const ( 22 | Fixed = "fixed" 23 | User = "user" 24 | Provided = "provided" 25 | Tag = "tag" 26 | Test = "test" 27 | Recovery = "recovery" 28 | ) 29 | -------------------------------------------------------------------------------- /pkg/scheduler/policies/nodesorting_policy.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package policies 20 | 21 | import ( 22 | "fmt" 23 | ) 24 | 25 | type SortingPolicy int 26 | 27 | const ( 28 | BinPackingPolicy SortingPolicy = iota 29 | FairnessPolicy 30 | ) 31 | 32 | func (nsp SortingPolicy) String() string { 33 | return [...]string{"binpacking", "fair"}[nsp] 34 | } 35 | 36 | func SortingPolicyFromString(str string) (SortingPolicy, error) { 37 | switch str { 38 | // fair is the default policy when not set 39 | case FairnessPolicy.String(), "": 40 | return FairnessPolicy, nil 41 | case BinPackingPolicy.String(): 42 | return BinPackingPolicy, nil 43 | default: 44 | return FairnessPolicy, fmt.Errorf("undefined policy: %s", str) 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /pkg/scheduler/policies/nodesorting_policy_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package policies 20 | 21 | import ( 22 | "testing" 23 | ) 24 | 25 | func TestSortingPolicyFromString(t *testing.T) { 26 | tests := []struct { 27 | name string 28 | arg string 29 | want SortingPolicy 30 | wantErr bool 31 | }{ 32 | {"EmptyString", "", FairnessPolicy, false}, 33 | {"FairString", "fair", FairnessPolicy, false}, 34 | {"BinString", "binpacking", BinPackingPolicy, false}, 35 | {"UnknownString", "unknown", FairnessPolicy, true}, 36 | } 37 | for _, tt := range tests { 38 | got, err := SortingPolicyFromString(tt.arg) 39 | if (err != nil) != tt.wantErr { 40 | t.Errorf("%s unexpected error returned, expected error: %t, got error '%v'", tt.name, tt.wantErr, err) 41 | return 42 | } 43 | if got != tt.want { 44 | t.Errorf("%s unexpected string returned, expected string: '%s', got string '%v'", tt.name, tt.want, got) 45 | } 46 | } 47 | } 48 | 49 | func TestSortingPolicyToString(t *testing.T) { 50 | var someSP SortingPolicy // since SortingPolicy is an iota it defaults to first in the list 51 | tests := []struct { 52 | name string 53 | sp SortingPolicy 54 | want string 55 | }{ 56 | {"FairString", FairnessPolicy, "fair"}, 57 | {"BinString", BinPackingPolicy, "binpacking"}, 58 | {"NoneString", someSP, "binpacking"}, 59 | } 60 | for _, tt := range tests { 61 | if got := tt.sp.String(); got != tt.want { 62 | t.Errorf("%s unexpected string returned, expected = '%s', got '%v'", tt.name, tt.want, got) 63 | } 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /pkg/scheduler/policies/preemption_policy.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package policies 20 | 21 | import ( 22 | "fmt" 23 | "strings" 24 | ) 25 | 26 | type PreemptionPolicy int 27 | 28 | const ( 29 | DefaultPreemptionPolicy PreemptionPolicy = iota // preemption is allowed globally 30 | FencePreemptionPolicy // preemption is allowed only within queue subtree 31 | DisabledPreemptionPolicy // preemption is disabled 32 | ) 33 | 34 | func (p PreemptionPolicy) String() string { 35 | return [...]string{"default", "fence", "disabled"}[p] 36 | } 37 | 38 | func PreemptionPolicyFromString(str string) (PreemptionPolicy, error) { 39 | switch strings.ToLower(str) { 40 | case DefaultPreemptionPolicy.String(), "": 41 | return DefaultPreemptionPolicy, nil 42 | case FencePriorityPolicy.String(): 43 | return FencePreemptionPolicy, nil 44 | case DisabledPreemptionPolicy.String(): 45 | return DisabledPreemptionPolicy, nil 46 | default: 47 | return DefaultPreemptionPolicy, fmt.Errorf("undefined preemption.policy: %s", str) 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /pkg/scheduler/policies/preemption_policy_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package policies 20 | 21 | import ( 22 | "testing" 23 | ) 24 | 25 | func TestPreemptionPolicyFromString(t *testing.T) { 26 | tests := []struct { 27 | name string 28 | arg string 29 | want PreemptionPolicy 30 | wantErr bool 31 | }{ 32 | {"EmptyString", "", DefaultPreemptionPolicy, false}, 33 | {"DefaultString", "default", DefaultPreemptionPolicy, false}, 34 | {"FenceString", "fence", FencePreemptionPolicy, false}, 35 | {"DisabledString", "disabled", DisabledPreemptionPolicy, false}, 36 | {"InvalidString", "invalid", DefaultPreemptionPolicy, true}, 37 | } 38 | for _, tt := range tests { 39 | got, err := PreemptionPolicyFromString(tt.arg) 40 | if (err != nil) != tt.wantErr { 41 | t.Errorf("%s unexpected error returned, expected error: %t, got error '%v'", tt.name, tt.wantErr, err) 42 | return 43 | } 44 | if got != tt.want { 45 | t.Errorf("%s unexpected string returned, expected string: '%s', got string '%v'", tt.name, tt.want, got) 46 | } 47 | } 48 | } 49 | 50 | func TestPreemptionPolicyToString(t *testing.T) { 51 | tests := []struct { 52 | name string 53 | policy PreemptionPolicy 54 | want string 55 | }{ 56 | {"DefaultString", DefaultPreemptionPolicy, "default"}, 57 | {"FenceString", FencePreemptionPolicy, "fence"}, 58 | {"DisabledString", DisabledPreemptionPolicy, "disabled"}, 59 | } 60 | for _, tt := range tests { 61 | if got := tt.policy.String(); got != tt.want { 62 | t.Errorf("%s unexpected string returned, expected = '%s', got '%v'", tt.name, tt.want, got) 63 | } 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /pkg/scheduler/policies/priority_policy.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package policies 20 | 21 | import ( 22 | "fmt" 23 | "strings" 24 | ) 25 | 26 | type PriorityPolicy int 27 | 28 | const ( 29 | DefaultPriorityPolicy PriorityPolicy = iota // priority propagates upward 30 | FencePriorityPolicy // priority is not considered outside queue subtree 31 | ) 32 | 33 | func (p PriorityPolicy) String() string { 34 | return [...]string{"default", "fence"}[p] 35 | } 36 | 37 | func PriorityPolicyFromString(str string) (PriorityPolicy, error) { 38 | switch strings.ToLower(str) { 39 | case DefaultPriorityPolicy.String(), "": 40 | return DefaultPriorityPolicy, nil 41 | case FencePriorityPolicy.String(): 42 | return FencePriorityPolicy, nil 43 | default: 44 | return DefaultPriorityPolicy, fmt.Errorf("undefined priority.policy: %s", str) 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /pkg/scheduler/policies/priority_policy_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package policies 20 | 21 | import ( 22 | "testing" 23 | ) 24 | 25 | func TestPriorityPolicyFromString(t *testing.T) { 26 | tests := []struct { 27 | name string 28 | arg string 29 | want PriorityPolicy 30 | wantErr bool 31 | }{ 32 | {"EmptyString", "", DefaultPriorityPolicy, false}, 33 | {"DefaultString", "default", DefaultPriorityPolicy, false}, 34 | {"FenceString", "fence", FencePriorityPolicy, false}, 35 | {"InvalidString", "invalid", DefaultPriorityPolicy, true}, 36 | } 37 | for _, tt := range tests { 38 | got, err := PriorityPolicyFromString(tt.arg) 39 | if (err != nil) != tt.wantErr { 40 | t.Errorf("%s unexpected error returned, expected error: %t, got error '%v'", tt.name, tt.wantErr, err) 41 | return 42 | } 43 | if got != tt.want { 44 | t.Errorf("%s unexpected string returned, expected string: '%s', got string '%v'", tt.name, tt.want, got) 45 | } 46 | } 47 | } 48 | 49 | func TestPriorityPolicyToString(t *testing.T) { 50 | tests := []struct { 51 | name string 52 | policy PriorityPolicy 53 | want string 54 | }{ 55 | {"DefaultString", DefaultPriorityPolicy, "default"}, 56 | {"FenceString", FencePriorityPolicy, "fence"}, 57 | } 58 | for _, tt := range tests { 59 | if got := tt.policy.String(); got != tt.want { 60 | t.Errorf("%s unexpected string returned, expected = '%s', got '%v'", tt.name, tt.want, got) 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /pkg/scheduler/policies/sorting_policy.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package policies 20 | 21 | import ( 22 | "fmt" 23 | 24 | "github.com/apache/yunikorn-core/pkg/log" 25 | ) 26 | 27 | // Sort type for queues & apps. 28 | type SortPolicy int 29 | 30 | const ( 31 | FifoSortPolicy SortPolicy = iota // first in first out, submit time 32 | FairSortPolicy // fair based on usage 33 | deprecatedStateAwarePolicy // deprecated: now alias for FIFO 34 | Undefined // not initialised or parsing failed 35 | ) 36 | 37 | func (s SortPolicy) String() string { 38 | return [...]string{"fifo", "fair", "stateaware", "undefined"}[s] 39 | } 40 | 41 | func SortPolicyFromString(str string) (SortPolicy, error) { 42 | switch str { 43 | // fifo is the default policy when not set 44 | case FifoSortPolicy.String(), "": 45 | return FifoSortPolicy, nil 46 | case FairSortPolicy.String(): 47 | return FairSortPolicy, nil 48 | case deprecatedStateAwarePolicy.String(): 49 | log.Log(log.Deprecation).Warn("Sort policy 'stateaware' is deprecated; using 'fifo' instead") 50 | return FifoSortPolicy, nil 51 | default: 52 | return Undefined, fmt.Errorf("undefined policy: %s", str) 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /pkg/scheduler/policies/sorting_policy_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package policies 20 | 21 | import ( 22 | "testing" 23 | ) 24 | 25 | func TestAppFromString(t *testing.T) { 26 | tests := []struct { 27 | name string 28 | arg string 29 | want SortPolicy 30 | wantErr bool 31 | }{ 32 | {"EmptyString", "", FifoSortPolicy, false}, 33 | {"FifoString", "fifo", FifoSortPolicy, false}, 34 | {"FairString", "fair", FairSortPolicy, false}, 35 | {"StatusString", "stateaware", FifoSortPolicy, false}, 36 | {"UnknownString", "unknown", Undefined, true}, 37 | } 38 | for _, tt := range tests { 39 | got, err := SortPolicyFromString(tt.arg) 40 | if (err != nil) != tt.wantErr { 41 | t.Errorf("%s unexpected error returned, expected error: %t, got error '%v'", tt.name, tt.wantErr, err) 42 | return 43 | } 44 | if got != tt.want { 45 | t.Errorf("%s unexpected string returned, expected string: '%s', got string '%v'", tt.name, tt.want, got) 46 | } 47 | } 48 | } 49 | 50 | func TestAppToString(t *testing.T) { 51 | var someSP SortPolicy // since SortingPolicy is an iota it defaults to first in the list 52 | tests := []struct { 53 | name string 54 | sp SortPolicy 55 | want string 56 | }{ 57 | {"FifoString", FifoSortPolicy, "fifo"}, 58 | {"FairString", FairSortPolicy, "fair"}, 59 | {"StatusString", deprecatedStateAwarePolicy, "stateaware"}, 60 | {"DefaultString", Undefined, "undefined"}, 61 | {"NoneString", someSP, "fifo"}, 62 | } 63 | for _, tt := range tests { 64 | if got := tt.sp.String(); got != tt.want { 65 | t.Errorf("%s unexpected string returned, expected = '%s', got '%v'", tt.name, tt.want, got) 66 | } 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /pkg/scheduler/scheduler_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | package scheduler 19 | 20 | import ( 21 | "testing" 22 | 23 | "gotest.tools/v3/assert" 24 | 25 | "github.com/apache/yunikorn-core/pkg/common/resources" 26 | "github.com/apache/yunikorn-core/pkg/scheduler/objects" 27 | "github.com/apache/yunikorn-scheduler-interface/lib/go/si" 28 | ) 29 | 30 | func TestInspectOutstandingRequests(t *testing.T) { 31 | scheduler := NewScheduler() 32 | partition, err := newBasePartition() 33 | assert.NilError(t, err, "unable to create partition: %v", err) 34 | scheduler.clusterContext.partitions["test"] = partition 35 | 36 | // two applications with no asks 37 | app1 := newApplication(appID1, "test", "root.default") 38 | app2 := newApplication(appID2, "test", "root.default") 39 | err = partition.AddApplication(app1) 40 | assert.NilError(t, err) 41 | err = partition.AddApplication(app2) 42 | assert.NilError(t, err) 43 | 44 | // add asks 45 | askResource := resources.NewResourceFromMap(map[string]resources.Quantity{ 46 | "vcores": 1, 47 | "memory": 1, 48 | }) 49 | siAsk1 := &si.Allocation{ 50 | AllocationKey: "ask-uuid-1", 51 | ApplicationID: appID1, 52 | ResourcePerAlloc: askResource.ToProto(), 53 | } 54 | siAsk2 := &si.Allocation{ 55 | AllocationKey: "ask-uuid-2", 56 | ApplicationID: appID1, 57 | ResourcePerAlloc: askResource.ToProto(), 58 | } 59 | siAsk3 := &si.Allocation{ 60 | AllocationKey: "ask-uuid-3", 61 | ApplicationID: appID2, 62 | ResourcePerAlloc: askResource.ToProto(), 63 | } 64 | askCreated, _, err := partition.UpdateAllocation(objects.NewAllocationFromSI(siAsk1)) 65 | assert.NilError(t, err) 66 | assert.Check(t, askCreated) 67 | askCreated, _, err = partition.UpdateAllocation(objects.NewAllocationFromSI(siAsk2)) 68 | assert.NilError(t, err) 69 | assert.Check(t, askCreated) 70 | askCreated, _, err = partition.UpdateAllocation(objects.NewAllocationFromSI(siAsk3)) 71 | assert.NilError(t, err) 72 | assert.Check(t, askCreated) 73 | 74 | // mark asks as attempted 75 | expectedTotal := resources.NewResourceFromMap(map[string]resources.Quantity{ 76 | "memory": 3, 77 | "vcores": 3, 78 | }) 79 | app1.GetAllocationAsk("ask-uuid-1").SetSchedulingAttempted(true) 80 | app1.GetAllocationAsk("ask-uuid-2").SetSchedulingAttempted(true) 81 | app2.GetAllocationAsk("ask-uuid-3").SetSchedulingAttempted(true) 82 | 83 | // Check #1: collected 3 requests 84 | noRequests, totalResources := scheduler.inspectOutstandingRequests() 85 | assert.Equal(t, 3, noRequests) 86 | assert.Assert(t, resources.Equals(totalResources, expectedTotal), 87 | "total resource expected: %v, got: %v", expectedTotal, totalResources) 88 | 89 | // Check #2: try again, pending asks are not collected 90 | noRequests, totalResources = scheduler.inspectOutstandingRequests() 91 | assert.Equal(t, 0, noRequests) 92 | assert.Assert(t, resources.IsZero(totalResources), "total resource is not zero: %v", totalResources) 93 | } 94 | -------------------------------------------------------------------------------- /pkg/scheduler/scheduling_metrics.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package scheduler 20 | -------------------------------------------------------------------------------- /pkg/scheduler/tests/restclient_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package tests 20 | 21 | import ( 22 | "encoding/json" 23 | "io" 24 | "net/http" 25 | "net/url" 26 | "strconv" 27 | 28 | "github.com/apache/yunikorn-core/pkg/webservice/dao" 29 | ) 30 | 31 | type RClient struct { 32 | BaseURL *url.URL 33 | } 34 | 35 | // GetBatchEvents returns history events from the batch interface 36 | func (c *RClient) GetBatchEvents() (*dao.EventRecordDAO, error) { 37 | req, err := c.newRequest("GET", "ws/v1/events/batch") 38 | if err != nil { 39 | return nil, err 40 | } 41 | var events *dao.EventRecordDAO 42 | _, err = c.do(req, &events) 43 | return events, err 44 | } 45 | 46 | // GetEventsStream returns a persistent connection with a stream of events 47 | func (c *RClient) GetEventsStream(count uint64) (io.ReadCloser, error) { 48 | req, err := c.newRequest("GET", "ws/v1/events/stream") 49 | if err != nil { 50 | return nil, err 51 | } 52 | req.URL.RawQuery = "count=" + strconv.FormatUint(count, 10) 53 | resp, err := http.DefaultClient.Do(req) 54 | if err != nil { 55 | return nil, err 56 | } 57 | 58 | return resp.Body, nil 59 | } 60 | 61 | func (c *RClient) newRequest(method, path string) (*http.Request, error) { 62 | rel := &url.URL{Path: path} 63 | wsUrl := &url.URL{ 64 | Host: "localhost:9080", 65 | Scheme: "http", 66 | } 67 | 68 | u := wsUrl.ResolveReference(rel) 69 | var buf io.ReadWriter 70 | req, err := http.NewRequest(method, u.String(), buf) 71 | if err != nil { 72 | return nil, err 73 | } 74 | req.Header.Set("Accept", "application/json") 75 | req.Header.Set("User-Agent", "Golang_Spider_Bot/3.0") 76 | return req, nil 77 | } 78 | 79 | func (c *RClient) do(req *http.Request, v interface{}) (*http.Response, error) { 80 | resp, err := http.DefaultClient.Do(req) 81 | if err != nil { 82 | return nil, err 83 | } 84 | defer resp.Body.Close() 85 | err = json.NewDecoder(resp.Body).Decode(v) 86 | return resp, err 87 | } 88 | -------------------------------------------------------------------------------- /pkg/scheduler/ugm/tracker.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package ugm 20 | 21 | import ( 22 | "github.com/apache/yunikorn-core/pkg/common/resources" 23 | "github.com/apache/yunikorn-core/pkg/common/security" 24 | ) 25 | 26 | // Tracker Defines a set of interfaces to track and retrieve the user group resource usage 27 | type Tracker interface { 28 | GetUserResources(user security.UserGroup) *resources.Resource 29 | GetGroupResources(group string) *resources.Resource 30 | 31 | GetUsersResources() []*UserTracker 32 | GetGroupsResources() []*GroupTracker 33 | 34 | IncreaseTrackedResource(queuePath, applicationID string, usage *resources.Resource, user security.UserGroup) bool 35 | DecreaseTrackedResource(queuePath, applicationID string, usage *resources.Resource, user security.UserGroup, removeApp bool) bool 36 | } 37 | -------------------------------------------------------------------------------- /pkg/scheduler/ugm/utilities.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package ugm 20 | 21 | import ( 22 | "strings" 23 | 24 | "github.com/apache/yunikorn-core/pkg/common" 25 | "github.com/apache/yunikorn-core/pkg/common/configs" 26 | ) 27 | 28 | // getParentPath return the path of the parent queue and an empty string if this queue is 29 | // the root queue. 30 | func getParentPath(queuePath string) string { 31 | idx := strings.LastIndex(queuePath, configs.DOT) 32 | if idx == -1 { 33 | return common.Empty 34 | } 35 | return queuePath[:idx] 36 | } 37 | -------------------------------------------------------------------------------- /pkg/scheduler/ugm/utilities_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package ugm 20 | 21 | import ( 22 | "testing" 23 | 24 | "gotest.tools/v3/assert" 25 | ) 26 | 27 | func TestGetParentQueuePath(t *testing.T) { 28 | assert.Equal(t, getParentPath(""), "") 29 | assert.Equal(t, getParentPath("root"), "") 30 | assert.Equal(t, getParentPath("root.parent.leaf"), "root.parent") 31 | assert.Equal(t, getParentPath("parent.leaf"), "parent") 32 | } 33 | -------------------------------------------------------------------------------- /pkg/webservice/dao/allocation_ask_info.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package dao 20 | 21 | type AllocationAskLogDAOInfo struct { 22 | Message string `json:"message,omitempty"` 23 | LastOccurrence int64 `json:"lastOccurrence,omitempty"` 24 | Count int32 `json:"count,omitempty"` 25 | } 26 | 27 | type AllocationAskDAOInfo struct { 28 | AllocationKey string `json:"allocationKey"` // no omitempty, allocation key should not be empty 29 | AllocationTags map[string]string `json:"allocationTags,omitempty"` 30 | RequestTime int64 `json:"requestTime,omitempty"` 31 | ResourcePerAlloc map[string]int64 `json:"resource,omitempty"` 32 | Priority string `json:"priority,omitempty"` 33 | RequiredNodeID string `json:"requiredNodeId,omitempty"` 34 | ApplicationID string `json:"applicationId,omitempty"` 35 | Placeholder bool `json:"placeholder,omitempty"` 36 | TaskGroupName string `json:"taskGroupName,omitempty"` 37 | AllocationLog []*AllocationAskLogDAOInfo `json:"allocationLog,omitempty"` 38 | TriggeredPreemption bool `json:"triggeredPreemption,omitempty"` 39 | Originator bool `json:"originator,omitempty"` 40 | SchedulingAttempted bool `json:"schedulingAttempted,omitempty"` 41 | TriggeredScaleUp bool `json:"triggeredScaleUp,omitempty"` 42 | } 43 | -------------------------------------------------------------------------------- /pkg/webservice/dao/allocation_info.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package dao 20 | 21 | type AllocationDAOInfo struct { 22 | AllocationKey string `json:"allocationKey"` // no omitempty, allocation key should not be empty 23 | AllocationTags map[string]string `json:"allocationTags,omitempty"` 24 | RequestTime int64 `json:"requestTime,omitempty"` // Allocation ask's createTime if PlaceholderUsed is false, otherwise equivalent to placeholder allocation's createTime 25 | AllocationTime int64 `json:"allocationTime,omitempty"` // Allocation's createTime 26 | AllocationDelay int64 `json:"allocationDelay,omitempty"` // Difference between AllocationTime and RequestTime 27 | ResourcePerAlloc map[string]int64 `json:"resource,omitempty"` 28 | Priority string `json:"priority,omitempty"` 29 | NodeID string `json:"nodeId,omitempty"` 30 | ApplicationID string `json:"applicationId,omitempty"` 31 | Placeholder bool `json:"placeholder,omitempty"` 32 | PlaceholderUsed bool `json:"placeholderUsed,omitempty"` 33 | TaskGroupName string `json:"taskGroupName,omitempty"` 34 | Preempted bool `json:"preempted,omitempty"` 35 | Originator bool `json:"originator,omitempty"` 36 | } 37 | 38 | type ForeignAllocationDAOInfo struct { 39 | AllocationKey string `json:"allocationKey"` // no omitempty, allocation key should not be empty 40 | AllocationTags map[string]string `json:"allocationTags,omitempty"` 41 | AllocationTime int64 `json:"allocationTime,omitempty"` 42 | ResourcePerAlloc map[string]int64 `json:"resource,omitempty"` 43 | Priority string `json:"priority,omitempty"` 44 | NodeID string `json:"nodeId,omitempty"` 45 | Preemptable bool `json:"preemptable,omitempty"` 46 | } 47 | -------------------------------------------------------------------------------- /pkg/webservice/dao/application_history.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package dao 20 | 21 | type ApplicationHistoryDAOInfo struct { 22 | Timestamp int64 `json:"timestamp,omitempty"` 23 | TotalApplications string `json:"totalApplications,omitempty"` 24 | } 25 | -------------------------------------------------------------------------------- /pkg/webservice/dao/application_info.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package dao 20 | 21 | type ApplicationsDAOInfo struct { 22 | Applications []ApplicationDAOInfo `json:"applications,omitempty"` 23 | } 24 | 25 | type ApplicationDAOInfo struct { 26 | ApplicationID string `json:"applicationID"` // no omitempty, application id should not be empty 27 | UsedResource map[string]int64 `json:"usedResource,omitempty"` 28 | MaxUsedResource map[string]int64 `json:"maxUsedResource,omitempty"` 29 | PendingResource map[string]int64 `json:"pendingResource,omitempty"` 30 | Partition string `json:"partition"` // no omitempty, partition should not be empty 31 | QueueName string `json:"queueName"` // no omitempty, queue name should not be empty 32 | SubmissionTime int64 `json:"submissionTime,omitempty"` 33 | FinishedTime *int64 `json:"finishedTime,omitempty"` 34 | Requests []*AllocationAskDAOInfo `json:"requests,omitempty"` 35 | Allocations []*AllocationDAOInfo `json:"allocations,omitempty"` 36 | State string `json:"applicationState,omitempty"` 37 | User string `json:"user,omitempty"` 38 | Groups []string `json:"groups,omitempty"` 39 | RejectedMessage string `json:"rejectedMessage,omitempty"` 40 | StateLog []*StateDAOInfo `json:"stateLog,omitempty"` 41 | PlaceholderData []*PlaceholderDAOInfo `json:"placeholderData,omitempty"` 42 | HasReserved bool `json:"hasReserved,omitempty"` 43 | Reservations []string `json:"reservations,omitempty"` 44 | MaxRequestPriority int32 `json:"maxRequestPriority,omitempty"` 45 | StartTime int64 `json:"startTime,omitempty"` 46 | ResourceHistory ResourceHistory `json:"resourceHistory,omitempty"` 47 | } 48 | 49 | type StateDAOInfo struct { 50 | Time int64 `json:"time,omitempty"` 51 | ApplicationState string `json:"applicationState,omitempty"` 52 | } 53 | 54 | type PlaceholderDAOInfo struct { 55 | TaskGroupName string `json:"taskGroupName,omitempty"` 56 | Count int64 `json:"count,omitempty"` 57 | MinResource map[string]int64 `json:"minResource,omitempty"` 58 | Replaced int64 `json:"replaced,omitempty"` 59 | TimedOut int64 `json:"timedout,omitempty"` 60 | } 61 | 62 | type ResourceHistory struct { 63 | ResourceUsage map[string]map[string]int64 `json:"resourceUsage,omitempty"` 64 | PreemptedResource map[string]map[string]int64 `json:"preemptedResource,omitempty"` 65 | PlaceholderResource map[string]map[string]int64 `json:"placeholderResource,omitempty"` 66 | } 67 | -------------------------------------------------------------------------------- /pkg/webservice/dao/cluster_info.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package dao 20 | 21 | type ClusterDAOInfo struct { 22 | StartTime int64 `json:"startTime,omitempty"` 23 | RMBuildInformation []map[string]string `json:"rmBuildInformation,omitempty"` 24 | PartitionName string `json:"partition"` // no omitempty, partition name should not be empty 25 | ClusterName string `json:"clusterName"` // no omitempty, cluster name should not be empty 26 | } 27 | -------------------------------------------------------------------------------- /pkg/webservice/dao/cluster_util.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package dao 20 | 21 | type ClusterUtilDAOInfo struct { 22 | ResourceType string `json:"type,omitempty"` 23 | Total int64 `json:"total,omitempty"` 24 | Used int64 `json:"used,omitempty"` 25 | Usage string `json:"usage,omitempty"` 26 | } 27 | -------------------------------------------------------------------------------- /pkg/webservice/dao/config_info.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package dao 20 | 21 | import "github.com/apache/yunikorn-core/pkg/common/configs" 22 | 23 | type ValidateConfResponse struct { 24 | Allowed bool `json:"allowed"` // no omitempty, a false value gives a quick way to understand the result. 25 | Reason string `json:"reason,omitempty"` 26 | } 27 | 28 | type ConfigDAOInfo struct { 29 | *configs.SchedulerConfig `yaml:",inline"` 30 | Extra map[string]string `yaml:",omitempty" json:",omitempty"` 31 | DeadlockDetectionEnabled bool 32 | DeadlockTimeoutSeconds int 33 | } 34 | -------------------------------------------------------------------------------- /pkg/webservice/dao/container_history.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package dao 20 | 21 | type ContainerHistoryDAOInfo struct { 22 | Timestamp int64 `json:"timestamp,omitempty"` 23 | TotalContainers string `json:"totalContainers,omitempty"` 24 | } 25 | -------------------------------------------------------------------------------- /pkg/webservice/dao/error_info.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package dao 20 | 21 | type YAPIError struct { 22 | StatusCode int `json:"status_code"` // no omitempty, status code should not be 0 23 | Message string `json:"message,omitempty"` 24 | Description string `json:"description,omitempty"` 25 | } 26 | 27 | func NewYAPIError(err error, statusCode int, message string) *YAPIError { 28 | description := message 29 | if err != nil { 30 | description = message + ". Original Cause :" + err.Error() 31 | } 32 | 33 | return &YAPIError{ 34 | StatusCode: statusCode, 35 | Message: message, 36 | Description: description, 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /pkg/webservice/dao/event_record.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package dao 20 | 21 | import ( 22 | "github.com/apache/yunikorn-scheduler-interface/lib/go/si" 23 | ) 24 | 25 | type EventRecordDAO struct { 26 | InstanceUUID string 27 | LowestID uint64 28 | HighestID uint64 29 | EventRecords []*si.EventRecord 30 | } 31 | -------------------------------------------------------------------------------- /pkg/webservice/dao/node_info.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package dao 20 | 21 | type NodesDAOInfo struct { 22 | PartitionName string `json:"partitionName"` // no omitempty, partition name shoud not be empty 23 | Nodes []*NodeDAOInfo `json:"nodesInfo,omitempty"` 24 | } 25 | 26 | type NodeDAOInfo struct { 27 | NodeID string `json:"nodeID"` // no omitempty, node id should not be empty 28 | HostName string `json:"hostName,omitempty"` 29 | RackName string `json:"rackName,omitempty"` 30 | Attributes map[string]string `json:"attributes,omitempty"` 31 | Capacity map[string]int64 `json:"capacity,omitempty"` 32 | Allocated map[string]int64 `json:"allocated,omitempty"` 33 | Occupied map[string]int64 `json:"occupied,omitempty"` 34 | Available map[string]int64 `json:"available,omitempty"` 35 | Utilized map[string]int64 `json:"utilized,omitempty"` 36 | Allocations []*AllocationDAOInfo `json:"allocations,omitempty"` 37 | ForeignAllocations []*ForeignAllocationDAOInfo `json:"foreignAllocations,omitempty"` 38 | Schedulable bool `json:"schedulable"` // no omitempty, a false value gives a quick way to understand whether a node is schedulable. 39 | IsReserved bool `json:"isReserved"` // no omitempty, a false value gives a quick way to understand whether a node is reserved. 40 | Reservations []string `json:"reservations,omitempty"` 41 | } 42 | -------------------------------------------------------------------------------- /pkg/webservice/dao/node_util.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package dao 20 | 21 | type PartitionNodesUtilDAOInfo struct { 22 | ClusterID string `json:"clusterId"` // no omitempty, cluster id should not be empty 23 | Partition string `json:"partition"` // no omitempty, partition should not be empty 24 | NodesUtilList []*NodesUtilDAOInfo `json:"utilizations,omitempty"` 25 | } 26 | 27 | type NodesUtilDAOInfo struct { 28 | ResourceType string `json:"type,omitempty"` 29 | NodesUtil []*NodeUtilDAOInfo `json:"utilization,omitempty"` 30 | } 31 | 32 | type NodeUtilDAOInfo struct { 33 | BucketName string `json:"bucketName,omitempty"` 34 | NumOfNodes int64 `json:"numOfNodes,omitempty"` 35 | NodeNames []string `json:"nodeNames,omitempty"` 36 | } 37 | -------------------------------------------------------------------------------- /pkg/webservice/dao/partition_info.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package dao 20 | 21 | type PartitionInfo struct { 22 | ClusterID string `json:"clusterId"` // no omitempty, cluster id should not be empty 23 | Name string `json:"name"` // no omitempty, name should not be empty 24 | Capacity PartitionCapacity `json:"capacity"` // no omitempty, omitempty doesn't work on a structure value 25 | NodeSortingPolicy NodeSortingPolicy `json:"nodeSortingPolicy"` // no omitempty, omitempty doesn't work on a structure value 26 | PreemptionEnabled bool `json:"preemptionEnabled"` // no omitempty, false shows preemption status better 27 | TotalNodes int `json:"totalNodes,omitempty"` 28 | Applications map[string]int `json:"applications,omitempty"` 29 | TotalContainers int `json:"totalContainers,omitempty"` 30 | State string `json:"state,omitempty"` 31 | LastStateTransitionTime int64 `json:"lastStateTransitionTime,omitempty"` 32 | } 33 | 34 | type PartitionCapacity struct { 35 | Capacity map[string]int64 `json:"capacity,omitempty"` 36 | UsedCapacity map[string]int64 `json:"usedCapacity,omitempty"` 37 | Utilization map[string]int64 `json:"utilization,omitempty"` 38 | } 39 | 40 | type NodeSortingPolicy struct { 41 | Type string `json:"type,omitempty"` 42 | ResourceWeights map[string]float64 `json:"resourceWeights,omitempty"` 43 | } 44 | -------------------------------------------------------------------------------- /pkg/webservice/dao/queue_info.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package dao 20 | 21 | type TemplateInfo struct { 22 | MaxApplications uint64 `json:"maxApplications,omitempty"` 23 | MaxResource map[string]int64 `json:"maxResource,omitempty"` 24 | GuaranteedResource map[string]int64 `json:"guaranteedResource,omitempty"` 25 | Properties map[string]string `json:"properties,omitempty"` 26 | } 27 | 28 | type PartitionQueueDAOInfo struct { 29 | QueueName string `json:"queuename"` // no omitempty, queue name should not be empty 30 | Status string `json:"status,omitempty"` 31 | Partition string `json:"partition"` // no omitempty, partition name should not be empty 32 | PendingResource map[string]int64 `json:"pendingResource,omitempty"` 33 | MaxResource map[string]int64 `json:"maxResource,omitempty"` 34 | GuaranteedResource map[string]int64 `json:"guaranteedResource,omitempty"` 35 | AllocatedResource map[string]int64 `json:"allocatedResource,omitempty"` 36 | PreemptingResource map[string]int64 `json:"preemptingResource,omitempty"` 37 | HeadRoom map[string]int64 `json:"headroom,omitempty"` 38 | IsLeaf bool `json:"isLeaf"` // no omitempty, a false value gives a quick way to understand whether it's leaf. 39 | IsManaged bool `json:"isManaged"` // no omitempty, a false value gives a quick way to understand whether it's managed. 40 | Properties map[string]string `json:"properties,omitempty"` 41 | Parent string `json:"parent,omitempty"` 42 | TemplateInfo *TemplateInfo `json:"template,omitempty"` 43 | Children []PartitionQueueDAOInfo `json:"children,omitempty"` 44 | ChildNames []string `json:"childNames,omitempty"` 45 | AbsUsedCapacity map[string]int64 `json:"absUsedCapacity,omitempty"` 46 | MaxRunningApps uint64 `json:"maxRunningApps,omitempty"` 47 | RunningApps uint64 `json:"runningApps,omitempty"` 48 | CurrentPriority int32 `json:"currentPriority"` // no omitempty, as the current priority value may be 0, which is a valid priority level 49 | AllocatingAcceptedApps []string `json:"allocatingAcceptedApps,omitempty"` 50 | SortingPolicy string `json:"sortingPolicy,omitempty"` 51 | PrioritySorting bool `json:"prioritySorting"` // no omitempty, false shows priority sorting status better 52 | PreemptionEnabled bool `json:"preemptionEnabled"` // no omitempty, false shows preemption status better 53 | IsPreemptionFence bool `json:"isPreemptionFence"` // no omitempty, a false value gives a quick way to understand whether it's fenced. 54 | PreemptionDelay string `json:"preemptionDelay,omitempty"` 55 | IsPriorityFence bool `json:"isPriorityFence"` // no omitempty, a false value gives a quick way to understand whether it's fenced. 56 | PriorityOffset int32 `json:"priorityOffset,omitempty"` 57 | } 58 | -------------------------------------------------------------------------------- /pkg/webservice/dao/rule_info.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package dao 20 | 21 | type RuleDAOInfo struct { 22 | Partition string `json:"partition"` // no omitempty, partition name should not be empty 23 | Rules []*RuleDAO `json:"rules,omitempty"` 24 | } 25 | 26 | type FilterDAO struct { 27 | Type string `json:"type"` // no omitempty, type must exist 28 | UserList []string `json:"userList,omitempty"` 29 | GroupList []string `json:"groupList,omitempty"` 30 | UserExp string `json:"userExp,omitempty"` 31 | GroupExp string `json:"groupExp,omitempty"` 32 | } 33 | 34 | type RuleDAO struct { 35 | Name string `json:"name"` // no omitempty, name must exist 36 | Parameters map[string]string `json:"parameters,omitempty"` 37 | Filter *FilterDAO `json:"filter,omitempty"` 38 | ParentRule *RuleDAO `json:"parentRule,omitempty"` 39 | } 40 | -------------------------------------------------------------------------------- /pkg/webservice/dao/scheduler_health.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package dao 20 | 21 | type SchedulerHealthDAOInfo struct { 22 | Healthy bool // no omitempty, a false value gives a quick way to understand the result. 23 | HealthChecks []HealthCheckInfo `json:",omitempty"` 24 | } 25 | 26 | type HealthCheckInfo struct { 27 | Name string `json:",omitempty"` 28 | Succeeded bool // no omitempty, a false value gives a quick way to understand the result. 29 | Description string `json:",omitempty"` 30 | DiagnosisMessage string `json:",omitempty"` 31 | } 32 | -------------------------------------------------------------------------------- /pkg/webservice/dao/ugm_info.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package dao 20 | 21 | type UserResourceUsageDAOInfo struct { 22 | UserName string `json:"userName"` // no omitempty, user name should not be empty 23 | Groups map[string]string `json:"groups,omitempty"` 24 | Queues *ResourceUsageDAOInfo `json:"queues,omitempty"` 25 | } 26 | 27 | type GroupResourceUsageDAOInfo struct { 28 | GroupName string `json:"groupName"` // no omitempty, group name should not be empty 29 | Applications []string `json:"applications,omitempty"` 30 | Queues *ResourceUsageDAOInfo `json:"queues,omitempty"` 31 | } 32 | 33 | type ResourceUsageDAOInfo struct { 34 | QueuePath string `json:"queuePath"` // no omitempty, queue path should not be empty 35 | ResourceUsage map[string]int64 `json:"resourceUsage,omitempty"` 36 | RunningApplications []string `json:"runningApplications,omitempty"` 37 | MaxResources map[string]int64 `json:"maxResources,omitempty"` 38 | MaxApplications uint64 `json:"maxApplications,omitempty"` 39 | Children []*ResourceUsageDAOInfo `json:"children,omitempty"` 40 | } 41 | -------------------------------------------------------------------------------- /pkg/webservice/dao/yk_uuid.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package dao 20 | 21 | type YunikornID struct { 22 | InstanceUUID string 23 | } 24 | -------------------------------------------------------------------------------- /pkg/webservice/handler_mock_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | package webservice 19 | 20 | import ( 21 | "net/http" 22 | "time" 23 | ) 24 | 25 | // InternalMetricHistory needs resetting between tests 26 | // using defer to make sure it is cleaned up 27 | func ResetIMHistory() { 28 | imHistory = nil 29 | } 30 | 31 | // Mock response writer that is used for testing the handlers 32 | type MockResponseWriter struct { 33 | statusCode int 34 | outputBytes []byte 35 | header http.Header 36 | } 37 | 38 | func (trw *MockResponseWriter) Header() http.Header { 39 | if trw.header == nil { 40 | trw.header = make(http.Header) 41 | } 42 | return trw.header 43 | } 44 | 45 | func (trw *MockResponseWriter) Write(bytes []byte) (int, error) { 46 | trw.outputBytes = append(trw.outputBytes, bytes...) 47 | return len(bytes), nil 48 | } 49 | 50 | func (trw *MockResponseWriter) WriteHeader(statusCode int) { 51 | trw.statusCode = statusCode 52 | } 53 | 54 | func (trw *MockResponseWriter) SetWriteDeadline(deadline time.Time) error { 55 | return nil 56 | } 57 | -------------------------------------------------------------------------------- /pkg/webservice/streaming_limit_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package webservice 20 | 21 | import ( 22 | "testing" 23 | 24 | "gotest.tools/v3/assert" 25 | 26 | "github.com/apache/yunikorn-core/pkg/common/configs" 27 | ) 28 | 29 | func TestAddRemoveHost(t *testing.T) { 30 | sl := NewStreamingLimiter() 31 | defer sl.Stop() 32 | assert.Assert(t, sl.AddHost("host-1")) 33 | assert.Assert(t, sl.AddHost("host-1")) 34 | assert.Assert(t, sl.AddHost("host-2")) 35 | assert.Equal(t, 2, len(sl.perHostStreams)) 36 | assert.Equal(t, uint64(3), sl.streams) 37 | 38 | sl.RemoveHost("host-3") // remove non-existing 39 | assert.Equal(t, 2, len(sl.perHostStreams)) 40 | assert.Equal(t, uint64(3), sl.streams) 41 | 42 | sl.RemoveHost("host-1") 43 | assert.Equal(t, 2, len(sl.perHostStreams)) 44 | assert.Equal(t, uint64(2), sl.streams) 45 | 46 | sl.RemoveHost("host-2") 47 | assert.Equal(t, 1, len(sl.perHostStreams)) 48 | assert.Equal(t, uint64(1), sl.streams) 49 | 50 | sl.RemoveHost("host-1") 51 | assert.Equal(t, 0, len(sl.perHostStreams)) 52 | assert.Equal(t, uint64(0), sl.streams) 53 | } 54 | 55 | func TestAddHost_TotalLimitHit(t *testing.T) { 56 | current := configs.GetConfigMap() 57 | defer func() { 58 | configs.SetConfigMap(current) 59 | }() 60 | configs.SetConfigMap(map[string]string{ 61 | configs.CMMaxEventStreams: "2", 62 | }) 63 | sl := NewStreamingLimiter() 64 | sl.Stop() 65 | 66 | assert.Assert(t, sl.AddHost("host-1")) 67 | assert.Assert(t, sl.AddHost("host-2")) 68 | assert.Assert(t, !sl.AddHost("host-3")) 69 | } 70 | 71 | func TestAddHost_PerHostLimitHit(t *testing.T) { 72 | current := configs.GetConfigMap() 73 | defer func() { 74 | configs.SetConfigMap(current) 75 | }() 76 | configs.SetConfigMap(map[string]string{ 77 | configs.CMMaxEventStreamsPerHost: "2", 78 | }) 79 | sl := NewStreamingLimiter() 80 | defer sl.Stop() 81 | 82 | assert.Assert(t, sl.AddHost("host-1")) 83 | assert.Assert(t, sl.AddHost("host-1")) 84 | assert.Assert(t, !sl.AddHost("host-1")) 85 | } 86 | 87 | func TestGetLimits(t *testing.T) { 88 | current := configs.GetConfigMap() 89 | defer func() { 90 | configs.SetConfigMap(current) 91 | }() 92 | sl := NewStreamingLimiter() 93 | defer sl.Stop() 94 | 95 | sl.setLimits() 96 | assert.Equal(t, uint64(100), sl.maxStreams) 97 | assert.Equal(t, uint64(15), sl.maxPerHostStreams) 98 | 99 | configs.SetConfigMap(map[string]string{ 100 | configs.CMMaxEventStreams: "123", 101 | }) 102 | sl.setLimits() 103 | assert.Equal(t, uint64(123), sl.maxStreams) 104 | assert.Equal(t, uint64(15), sl.maxPerHostStreams) 105 | 106 | configs.SetConfigMap(map[string]string{ 107 | configs.CMMaxEventStreamsPerHost: "321", 108 | }) 109 | sl.setLimits() 110 | assert.Equal(t, uint64(100), sl.maxStreams) 111 | assert.Equal(t, uint64(321), sl.maxPerHostStreams) 112 | 113 | configs.SetConfigMap(map[string]string{ 114 | configs.CMMaxEventStreams: "xxx", 115 | configs.CMMaxEventStreamsPerHost: "yyy", 116 | }) 117 | sl.setLimits() 118 | assert.Equal(t, uint64(100), sl.maxStreams) 119 | assert.Equal(t, uint64(15), sl.maxPerHostStreams) 120 | } 121 | 122 | func (sl *StreamingLimiter) Stop() { 123 | configs.RemoveConfigMapCallback(sl.id) 124 | } 125 | -------------------------------------------------------------------------------- /pkg/webservice/webservice.go: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package webservice 20 | 21 | import ( 22 | "context" 23 | "errors" 24 | "net/http" 25 | "sync/atomic" 26 | "time" 27 | 28 | "github.com/julienschmidt/httprouter" 29 | 30 | "go.uber.org/zap" 31 | 32 | "github.com/apache/yunikorn-core/pkg/log" 33 | "github.com/apache/yunikorn-core/pkg/metrics/history" 34 | "github.com/apache/yunikorn-core/pkg/scheduler" 35 | ) 36 | 37 | var imHistory *history.InternalMetricsHistory 38 | var schedulerContext atomic.Pointer[scheduler.ClusterContext] 39 | 40 | type WebService struct { 41 | httpServer *http.Server 42 | } 43 | 44 | func newRouter() *httprouter.Router { 45 | router := httprouter.New() 46 | for _, webRoute := range webRoutes { 47 | handler := loggingHandler(webRoute.HandlerFunc, webRoute.Name) 48 | router.Handler(webRoute.Method, webRoute.Pattern, handler) 49 | } 50 | return router 51 | } 52 | 53 | func loggingHandler(inner http.Handler, name string) http.HandlerFunc { 54 | return func(w http.ResponseWriter, r *http.Request) { 55 | start := time.Now() 56 | inner.ServeHTTP(w, r) 57 | log.Log(log.REST).Debug("Web router call details", 58 | zap.String("name", name), 59 | zap.String("method", r.Method), 60 | zap.String("uri", r.RequestURI), 61 | zap.Duration("duration", time.Since(start))) 62 | } 63 | } 64 | 65 | // StartWebApp starts the web app on the default port. 66 | func (m *WebService) StartWebApp() { 67 | router := newRouter() 68 | m.httpServer = &http.Server{ 69 | Addr: ":9080", 70 | Handler: router, 71 | ReadHeaderTimeout: 10 * time.Second, 72 | } 73 | 74 | log.Log(log.REST).Info("web-app started", zap.Int("port", 9080)) 75 | go func() { 76 | httpError := m.httpServer.ListenAndServe() 77 | if httpError != nil && !errors.Is(httpError, http.ErrServerClosed) { 78 | log.Log(log.REST).Error("HTTP serving error", 79 | zap.Error(httpError)) 80 | } 81 | }() 82 | } 83 | 84 | func NewWebApp(context *scheduler.ClusterContext, internalMetrics *history.InternalMetricsHistory) *WebService { 85 | m := &WebService{} 86 | schedulerContext.Store(context) 87 | imHistory = internalMetrics 88 | return m 89 | } 90 | 91 | func (m *WebService) StopWebApp() error { 92 | if m.httpServer != nil { 93 | // graceful shutdown in 5 seconds 94 | ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 95 | defer cancel() 96 | return m.httpServer.Shutdown(ctx) 97 | } 98 | 99 | return nil 100 | } 101 | -------------------------------------------------------------------------------- /scripts/generate-fsm-graph-images.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # 4 | # Licensed to the Apache Software Foundation (ASF) under one 5 | # or more contributor license agreements. See the NOTICE file 6 | # distributed with this work for additional information 7 | # regarding copyright ownership. The ASF licenses this file 8 | # to you under the Apache License, Version 2.0 (the 9 | # "License"); you may not use this file except in compliance 10 | # with the License. You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | 20 | set -e 21 | 22 | WORKDIR=$(pwd)/build/fsm 23 | 24 | output_fsm() { 25 | # print digraph header 26 | head -n +1 "$1.dot" 27 | # add options 28 | echo "concentrate=true" 29 | # print rest of file, eliminating transitions from same state to same state 30 | # and cleaning up some verbose labels 31 | tail -n +2 "$1.dot" | \ 32 | grep -E -v '"(\w+)" -> "\1"' | \ 33 | grep -v AppAllocationAsk | \ 34 | sed "s/Application/ App/g" 35 | } 36 | 37 | cd "${WORKDIR}" 38 | 39 | for dot in *.dot; do 40 | # shellcheck disable=SC2001 41 | base=$(echo "${dot}" | sed "s/\.dot$//") 42 | 43 | output_fsm "${base}" | dot -Tpng > "${base}.png" 44 | done 45 | 46 | --------------------------------------------------------------------------------