├── proxlb ├── models │ ├── __init__.py │ ├── pools.py │ ├── features.py │ ├── ha_rules.py │ ├── groups.py │ ├── tags.py │ ├── balancing.py │ ├── nodes.py │ └── guests.py ├── utils │ ├── __init__.py │ ├── version.py │ ├── cli_parser.py │ ├── config_parser.py │ ├── logger.py │ └── helper.py └── main.py ├── debian ├── source │ └── format ├── rules ├── install ├── prerm ├── postinst ├── control └── changelog ├── setup.cfg ├── .changelogs ├── 0.9.0 │ ├── release_meta.yml │ └── 1_initial_release.yaml ├── 0.9.9 │ ├── release_meta.yml │ └── 1_initial_release.yaml ├── 1.0.0 │ ├── release_meta.yml │ ├── 27_add_container_lxc_support.yml │ ├── 10_add_container_support.yml │ ├── 6_add_dry_run_support.yml │ ├── 3_create_grouping_include_feature.yml │ ├── 7_ignore_vms_by_wildcard.yml │ ├── 16_add_rebalancing_by_assigned_resources.yml │ ├── 4_create_grouping_exclude_feature.yml │ ├── 7_ignore_vm_by_tag_proxlb_ignore_vm.yml │ ├── 29_add_option_rebalance_by_node_percent.yml │ └── 17-add-configurable-log-verbosity.yml ├── 1.0.2 │ ├── release_meta.yml │ ├── 45_fix_daemon_timer.yml │ ├── 49_fix_cmake_debian_packaging.yml │ ├── 41_add_option_run_migration_parallel_or_sequential.yml │ └── 40_add_option_to_run_only_on_cluster_master_node.yml ├── 1.0.3 │ ├── release_meta.yml │ ├── 51_add_storage_balancing.yml │ ├── bug_fix_cluster_master_only.yml │ ├── 67_fix_anti_affinity_rules.yml │ ├── 64_improve_error_handling_and_validations.yml │ ├── 8_add_best_next_node_for_placement.yml │ ├── 74_fix_documentation_master_only.yml │ ├── 68_adjust_logging_ha_services.yml │ ├── 71_fix_ignore_vm_node_handling_if_unset.yml │ ├── 60_allow_api_hosts_to_be_given_as_an_optional_list.yml │ ├── docs_fix_default_config_add_dedicated_user_howto.yml │ └── 53_code_improvements.yml ├── 1.0.4 │ ├── release_meta.yml │ ├── 89_add_version_output.yml │ ├── 91_make_api_timeout_configureable.yml │ ├── 81_documentation_infrastrucutre.yml │ ├── 75_fix_cpu_balancing.yml │ ├── 58_add_maintenance_mode.yml │ └── 79_storage_balancing_whitelist.yml ├── 1.0.5 │ ├── release_meta.yml │ ├── 113_fix_migration_from_local_disk.yml │ ├── 104_docs_bool_adjustment.yml │ ├── 98_fix_log_verbosity_opts.yml │ ├── 107_fix_offline_node_eval.yml │ └── 106_fix_maintenance_mode_compare_str_list.yml ├── 1.0.6 │ ├── release_meta.yml │ ├── 115_fix_daemon_scheduler_bool_time_fix.yml │ └── 119_fix_maintenance_mode_cli_and_config.yml ├── 1.1.0 │ ├── release_meta.yml │ ├── 125-add-proxmox-api-authentication-support.yml │ ├── 137_fix_systemd_unit_file.yml │ └── 114_refactor_code_base.yml ├── 1.1.1 │ ├── release_meta.yml │ ├── 163_fix_ignore_vm_tag.yml │ ├── 165_improve_logging_servity.yml │ ├── 171_set_correct_python_path_docker_image.yml │ ├── 176_change_turn_daemon_mode_on_default.yml │ ├── 200_requery_zero_guest_cpu_used.yml │ ├── 174_honor_balancing_activation_value.yml │ ├── 168_add_more_flexible_schedules.yml │ ├── 180_change_default_balancing_to_used_instead_assigned.yml │ ├── 184_validate_for_sufficient_user_permissions.yml │ ├── 204_fix_migration_log_relationship_of_guest_type.yml │ ├── 205_add_api_upstream_error_message_on_migration_failure.yml │ ├── 187_allow_use_of_minutes_instead_of_hours.yml │ ├── 197_remove_hard_coded_memory_usage_from_lowest_usage_node.yml │ ├── 185-logging-handler-for-no-systemd-integration.yml │ └── 195_set_cpu_used_to_cpu_usage_times_core_count.yml ├── 1.1.10 │ ├── release_meta.yml │ ├── 335-prevalidate-affinity-matrix.yml │ ├── 359-add-pve8-user-protections-for-conntrack-aware-migrations.yml │ ├── 368_fix_crash_enumerating_pools_with_storage_members.yml │ └── 361_fix_proxmox_api_connection_validation.yml ├── 1.1.2 │ ├── release_meta.yml │ ├── 137_fix_systemd_unit_file.yml │ ├── 157_add_proxmox_api_retry_mechanism.yml │ ├── 218_add_1_to_one_relationship_guest_node_pinning.yml │ └── 222_fix_force_type_cast_cpu_metrics_to_int.yml ├── 1.1.3 │ ├── release_meta.yml │ ├── 189_add_reload_function.yml │ ├── 232_align_maintenance_mode_proxmox_ha.yml │ ├── 241_make_amount_of_parallel_migrations_configurable.yml │ ├── 239_add_optional_delay_time_until_service_starts.yml │ └── 94_cpu_balancing_use_avg_instead_current_consumption.yml ├── 1.1.4 │ ├── release_meta.yml │ ├── 255_fix_loglevels.yml │ ├── 245_add_guest_pinning_to_groups_of_nodes.yml │ └── 248_fix_dry_run_combination_balancing_disabled.yml ├── 1.1.5 │ ├── release_meta.yml │ └── 260_allow_custom_api_ports.yml ├── 1.1.6 │ ├── release_meta.yml │ ├── 268_fix_balancing_type_eval.yml │ ├── 290_validate_user_token_syntax.yml │ ├── 296_fix_validate_node_presence_when_pinning_guests.yml │ ├── 291_fix_stack_trace_no_user_permissions_testing.yml │ └── 295_fix_overprovisioning_first_node_if_anti_affinity_group_has_only_one-member.yml ├── 1.1.7 │ ├── release_meta.yml │ ├── 305_add_conntrack_support_for_migrations.yml │ ├── 308_fix_only_validate_valid_jobids.yml │ ├── 304_add_graceful_shutdown_sigint.yml │ └── 310_guest-object-names-not-being-evaluated-in-log.yml ├── 1.1.8 │ ├── release_meta.yml │ ├── 329_add_log_prefix.yml │ ├── 318_fix_conntrack_aware_migrations_api_pve8.yml │ └── 317_container_image_non_root.yml ├── 1.1.9 │ ├── release_meta.yml │ ├── 342_add_memory_balancing_threshold.yml │ ├── 343_add_affinity_rules_support_by_pools.yml │ └── 337_add_pressure_based_balancing.yml └── 1.1.11 │ ├── 395_fix_pool_based_node_pinning.yml │ ├── 335_fix_affinity_matrix_prevalidation.yml │ ├── 275_add_overprovisioning_safety_guard.yml │ ├── 387_select_balancing_workloads_by_size.yml │ └── 391_add_native_proxmox_ha_rules_support.yml ├── requirements.txt ├── .gitignore ├── helm └── proxlb │ ├── Chart.yaml │ ├── templates │ ├── configmap.yaml │ ├── _helpers.yaml │ └── deployment.yaml │ └── values.yaml ├── misc ├── 02-create-changelog.sh └── 01-replace-version.sh ├── .github ├── ISSUE_TEMPLATE │ ├── feature_request.md │ └── bug_report.md └── workflows │ ├── 10-code-liniting.yml │ ├── 30-pipeline-build-container-multi-arch.yml │ ├── 30-pipeline-build-container-amd64.yml │ ├── 30-pipeline-build-container-arm64.yml │ ├── 20-pipeline-build-rpm-package.yml │ └── 20-pipeline-build-deb-package.yml ├── service └── proxlb.service ├── setup.py ├── Dockerfile ├── docs ├── 99-faq.md ├── 01_requirements.md └── 02_installation.md ├── config └── proxlb_example.yaml ├── CONTRIBUTING.md └── CHANGELOG.md /proxlb/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /proxlb/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /debian/source/format: -------------------------------------------------------------------------------- 1 | 3.0 (native) 2 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [pycodestyle] 2 | ignore = E501, W503 3 | -------------------------------------------------------------------------------- /.changelogs/0.9.0/release_meta.yml: -------------------------------------------------------------------------------- 1 | date: 2024-02-01 2 | -------------------------------------------------------------------------------- /.changelogs/0.9.9/release_meta.yml: -------------------------------------------------------------------------------- 1 | date: 2024-07-06 2 | -------------------------------------------------------------------------------- /.changelogs/1.0.0/release_meta.yml: -------------------------------------------------------------------------------- 1 | date: 2024-08-01 2 | -------------------------------------------------------------------------------- /.changelogs/1.0.2/release_meta.yml: -------------------------------------------------------------------------------- 1 | date: 2024-08-13 2 | -------------------------------------------------------------------------------- /.changelogs/1.0.3/release_meta.yml: -------------------------------------------------------------------------------- 1 | date: 2024-09-12 2 | -------------------------------------------------------------------------------- /.changelogs/1.0.4/release_meta.yml: -------------------------------------------------------------------------------- 1 | date: 2024-10-11 2 | -------------------------------------------------------------------------------- /.changelogs/1.0.5/release_meta.yml: -------------------------------------------------------------------------------- 1 | date: 2024-10-30 2 | -------------------------------------------------------------------------------- /.changelogs/1.0.6/release_meta.yml: -------------------------------------------------------------------------------- 1 | date: 2024-12-24 2 | -------------------------------------------------------------------------------- /.changelogs/1.1.0/release_meta.yml: -------------------------------------------------------------------------------- 1 | date: 2025-04-01 2 | -------------------------------------------------------------------------------- /.changelogs/1.1.1/release_meta.yml: -------------------------------------------------------------------------------- 1 | date: 2025-04-20 2 | -------------------------------------------------------------------------------- /.changelogs/1.1.10/release_meta.yml: -------------------------------------------------------------------------------- 1 | date: 2025-11-25 2 | -------------------------------------------------------------------------------- /.changelogs/1.1.2/release_meta.yml: -------------------------------------------------------------------------------- 1 | date: 2025-05-13 2 | -------------------------------------------------------------------------------- /.changelogs/1.1.3/release_meta.yml: -------------------------------------------------------------------------------- 1 | date: 2025-06-19 2 | -------------------------------------------------------------------------------- /.changelogs/1.1.4/release_meta.yml: -------------------------------------------------------------------------------- 1 | date: 2025-06-27 2 | -------------------------------------------------------------------------------- /.changelogs/1.1.5/release_meta.yml: -------------------------------------------------------------------------------- 1 | date: 2025-07-14 2 | -------------------------------------------------------------------------------- /.changelogs/1.1.6/release_meta.yml: -------------------------------------------------------------------------------- 1 | date: 2025-09-04 2 | -------------------------------------------------------------------------------- /.changelogs/1.1.7/release_meta.yml: -------------------------------------------------------------------------------- 1 | date: 2025-09-19 2 | -------------------------------------------------------------------------------- /.changelogs/1.1.8/release_meta.yml: -------------------------------------------------------------------------------- 1 | date: 2025-10-09 2 | -------------------------------------------------------------------------------- /.changelogs/1.1.9/release_meta.yml: -------------------------------------------------------------------------------- 1 | date: 2025-10-30 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | packaging 2 | proxmoxer 3 | requests 4 | urllib3 5 | PyYAML 6 | -------------------------------------------------------------------------------- /.changelogs/0.9.0/1_initial_release.yaml: -------------------------------------------------------------------------------- 1 | added: 2 | - Development release of ProxLB. 3 | -------------------------------------------------------------------------------- /.changelogs/1.0.4/89_add_version_output.yml: -------------------------------------------------------------------------------- 1 | added: 2 | - Add version output cli arg. [#89] 3 | -------------------------------------------------------------------------------- /debian/rules: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | %: 3 | dh $@ --with python3 --buildsystem=pybuild 4 | 5 | -------------------------------------------------------------------------------- /.changelogs/1.0.3/51_add_storage_balancing.yml: -------------------------------------------------------------------------------- 1 | added: 2 | - Add storage balancing function. [#51] 3 | -------------------------------------------------------------------------------- /.changelogs/0.9.9/1_initial_release.yaml: -------------------------------------------------------------------------------- 1 | added: 2 | - Initial public development release of ProxLB. 3 | -------------------------------------------------------------------------------- /.changelogs/1.0.0/27_add_container_lxc_support.yml: -------------------------------------------------------------------------------- 1 | added: 2 | - Add LXC/Container integration. [#27] 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | *.pyc 3 | .DS_Store 4 | build/ 5 | dist/ 6 | *.egg-info/ 7 | proxlb_dev.yaml 8 | -------------------------------------------------------------------------------- /debian/install: -------------------------------------------------------------------------------- 1 | proxlb /usr/lib/python3/dist-packages/ 2 | service/proxlb.service /lib/systemd/system/ 3 | -------------------------------------------------------------------------------- /.changelogs/1.0.0/10_add_container_support.yml: -------------------------------------------------------------------------------- 1 | added: 2 | - Add Docker/Podman support. [#10 by @daanbosch] 3 | -------------------------------------------------------------------------------- /.changelogs/1.0.2/45_fix_daemon_timer.yml: -------------------------------------------------------------------------------- 1 | changed: 2 | - Fix daemon timer to use hours instead of minutes. [#45] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.11/395_fix_pool_based_node_pinning.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Fixed pool based node pinning (@gyptazy). [#395] 3 | -------------------------------------------------------------------------------- /.changelogs/1.0.3/bug_fix_cluster_master_only.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Fixed `master_only` function by inverting the condition. 3 | -------------------------------------------------------------------------------- /.changelogs/1.0.4/91_make_api_timeout_configureable.yml: -------------------------------------------------------------------------------- 1 | added: 2 | - Add feature to make API timeout configureable. [#91] 3 | -------------------------------------------------------------------------------- /.changelogs/1.0.4/81_documentation_infrastrucutre.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Fix documentation for the underlying infrastructure. [#81] 3 | -------------------------------------------------------------------------------- /.changelogs/1.0.5/113_fix_migration_from_local_disk.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Fix migration from local disks (by @greenlogles). [#113] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.0/125-add-proxmox-api-authentication-support.yml: -------------------------------------------------------------------------------- 1 | feature: 2 | - Add Proxmox API authentication support. [#125] 3 | -------------------------------------------------------------------------------- /.changelogs/1.0.0/6_add_dry_run_support.yml: -------------------------------------------------------------------------------- 1 | added: 2 | - Add dry-run support to see what kind of rebalancing would be done. [#6] 3 | -------------------------------------------------------------------------------- /.changelogs/1.0.3/67_fix_anti_affinity_rules.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Fix anti-affinity rules not evaluating a new and different node. [#67] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.1/163_fix_ignore_vm_tag.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Fix tag evluation for VMs for being ignored for further balancing [#163] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.1/165_improve_logging_servity.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Improve logging verbosity of messages that had a wrong servity [#165] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.4/255_fix_loglevels.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Modified log levels to make output lighter at INFO level (@pmarasse) [#255] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.5/260_allow_custom_api_ports.yml: -------------------------------------------------------------------------------- 1 | added: 2 | - Allow custom API ports instead of fixed tcp/8006 (@gyptazy). [#260] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.9/342_add_memory_balancing_threshold.yml: -------------------------------------------------------------------------------- 1 | added: 2 | - Add an optional memory balancing threshold (@gyptazy). [#342] 3 | -------------------------------------------------------------------------------- /.changelogs/1.0.3/64_improve_error_handling_and_validations.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Improved the overall validation and error handling. [#64] 3 | -------------------------------------------------------------------------------- /.changelogs/1.0.5/104_docs_bool_adjustment.yml: -------------------------------------------------------------------------------- 1 | changed: 2 | - Change docs to make bool usage in configs more clear (by @gyptazy). [#104] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.1/171_set_correct_python_path_docker_image.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Fix Python path for Docker entrypoint (by @crandler) [#170] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.9/343_add_affinity_rules_support_by_pools.yml: -------------------------------------------------------------------------------- 1 | added: 2 | - Add affinity/anti-affinity support by pools (@gyptazy). [#343] 3 | -------------------------------------------------------------------------------- /.changelogs/1.0.0/3_create_grouping_include_feature.yml: -------------------------------------------------------------------------------- 1 | added: 2 | - Add include grouping feature to rebalance VMs bundled to new nodes. [#3] 3 | -------------------------------------------------------------------------------- /.changelogs/1.0.5/98_fix_log_verbosity_opts.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Fix allowed values (add DEBUG, WARNING) for log verbosity (by @gyptazy). [#98] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.1/176_change_turn_daemon_mode_on_default.yml: -------------------------------------------------------------------------------- 1 | changed: 2 | - Change the default behaviour of the daemon mode to active [#176] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.4/245_add_guest_pinning_to_groups_of_nodes.yml: -------------------------------------------------------------------------------- 1 | added: 2 | - Allow pinning of guests to a group of nodes (@gyptazy). [#245] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.6/268_fix_balancing_type_eval.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Fix balancing evaluation of guest types (e.g., VM or CT) (@gyptazy). [#268] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.7/305_add_conntrack_support_for_migrations.yml: -------------------------------------------------------------------------------- 1 | added: 2 | - Add conntrack state aware migrations of VMs (@gyptazy). [#305] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.7/308_fix_only_validate_valid_jobids.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Fix crash when validating absent migration job ids. (@gyptazy). [#308] 3 | -------------------------------------------------------------------------------- /.changelogs/1.0.0/7_ignore_vms_by_wildcard.yml: -------------------------------------------------------------------------------- 1 | added: 2 | - Add feature to prevent VMs from being relocated by defining a wildcard pattern. [#7] 3 | -------------------------------------------------------------------------------- /.changelogs/1.0.2/49_fix_cmake_debian_packaging.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Fix CMake packaging for Debian package to avoid overwriting the config file. [#49] 3 | -------------------------------------------------------------------------------- /.changelogs/1.0.3/8_add_best_next_node_for_placement.yml: -------------------------------------------------------------------------------- 1 | added: 2 | - Add cli arg `-b` to return the next best node for next VM/CT placement. [#8] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.0/137_fix_systemd_unit_file.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Fix the systemd unit file to start ProxLB after pveproxy (by @robertdahlem). [#137] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.1/200_requery_zero_guest_cpu_used.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Requery a guest if that running guest reports 0 cpu usage (by @glitchvern) [#200] -------------------------------------------------------------------------------- /.changelogs/1.1.2/137_fix_systemd_unit_file.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Fix systemd unit file to run after network on non PVE nodes (by @robertdahlem) [#137] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.3/189_add_reload_function.yml: -------------------------------------------------------------------------------- 1 | added: 2 | - Add relaod (SIGHUP) function to ProxLB to reload the configuration (by @gyptazy). [#189] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.7/304_add_graceful_shutdown_sigint.yml: -------------------------------------------------------------------------------- 1 | added: 2 | - Add graceful shutdown for SIGINT (e.g., CTRL + C abort). (@gyptazy). [#304] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.8/329_add_log_prefix.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Add a static ProxLB prefix to the log output when used by journal handler (@gyptazy). [#329] 3 | -------------------------------------------------------------------------------- /.changelogs/1.0.2/41_add_option_run_migration_parallel_or_sequential.yml: -------------------------------------------------------------------------------- 1 | added: 2 | - Add option to run migrations in parallel or sequentially. [#41] 3 | -------------------------------------------------------------------------------- /.changelogs/1.0.4/75_fix_cpu_balancing.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Fix CPU balancing where calculations are done in float instead of int. (by @glitchvern) [#75] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.1/174_honor_balancing_activation_value.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Honor the value when balancing should not be performed and stop balancing [#174] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.3/232_align_maintenance_mode_proxmox_ha.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Align maintenance mode with Proxmox HA maintenance mode (by @gyptazy). [#232] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.6/290_validate_user_token_syntax.yml: -------------------------------------------------------------------------------- 1 | added: 2 | - Add validation for provided API user token id to avoid confusions (@gyptazy). [#291] 3 | -------------------------------------------------------------------------------- /.changelogs/1.0.0/16_add_rebalancing_by_assigned_resources.yml: -------------------------------------------------------------------------------- 1 | added: 2 | - Add option to rebalance by assigned VM resources to avoid overprovisioning. [#16] 3 | -------------------------------------------------------------------------------- /.changelogs/1.0.3/74_fix_documentation_master_only.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Fix documentation for the master_only parameter placed in the wrong config section. [#74] 3 | -------------------------------------------------------------------------------- /.changelogs/1.0.4/58_add_maintenance_mode.yml: -------------------------------------------------------------------------------- 1 | added: 2 | - Add maintenance mode to evacuate a node and move workloads for other nodes in the cluster. [#58] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.1/168_add_more_flexible_schedules.yml: -------------------------------------------------------------------------------- 1 | feature: 2 | - Add a more flexible way to define schedules in minutes or hours (by @gyptazy) [#168] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.11/335_fix_affinity_matrix_prevalidation.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Fixed affinity matrix pre-validation by inverting validations (@Thalagyrt). [#335] 3 | -------------------------------------------------------------------------------- /.changelogs/1.0.0/4_create_grouping_exclude_feature.yml: -------------------------------------------------------------------------------- 1 | added: 2 | - Add exclude grouping feature to rebalance VMs from being located together to new nodes. [#4] 3 | -------------------------------------------------------------------------------- /.changelogs/1.0.0/7_ignore_vm_by_tag_proxlb_ignore_vm.yml: -------------------------------------------------------------------------------- 1 | added: 2 | - Add feature to prevent VMs from being relocated by defining the 'plb_ignore_vm' tag. [#7] 3 | -------------------------------------------------------------------------------- /.changelogs/1.0.3/68_adjust_logging_ha_services.yml: -------------------------------------------------------------------------------- 1 | changed: 2 | - Provide a more reasonable output when HA services are not active in a Proxmox cluster. [#68] 3 | -------------------------------------------------------------------------------- /.changelogs/1.0.5/107_fix_offline_node_eval.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Fix node (and its objects) evaluation when not reachable, e.g., maintenance (by @gyptazy). [#107] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.1/180_change_default_balancing_to_used_instead_assigned.yml: -------------------------------------------------------------------------------- 1 | changed: 2 | - Change the default banalcing mode to used instead of assigned [#180] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.1/184_validate_for_sufficient_user_permissions.yml: -------------------------------------------------------------------------------- 1 | feature: 2 | - Add validation for the minimum required permissions of a user in Proxmox [#184] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.2/157_add_proxmox_api_retry_mechanism.yml: -------------------------------------------------------------------------------- 1 | added: 2 | - Add a configurable retry mechanism when connecting to the Proxmox API (by @gyptazy) [#157] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.3/241_make_amount_of_parallel_migrations_configurable.yml: -------------------------------------------------------------------------------- 1 | added: 2 | - Make the amount of parallel migrations configurable (by @gyptazy). [#241] 3 | -------------------------------------------------------------------------------- /.changelogs/1.0.0/29_add_option_rebalance_by_node_percent.yml: -------------------------------------------------------------------------------- 1 | added: 2 | - Add option_mode to rebalance by node's free resources in percent (instead of bytes). [#29] 3 | -------------------------------------------------------------------------------- /.changelogs/1.0.3/71_fix_ignore_vm_node_handling_if_unset.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Fix handling of unset `ignore_nodes` and `ignore_vms` resulted in an attribute error. [#71] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.11/275_add_overprovisioning_safety_guard.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Fixed missing overprovisioning safety guard to avoid node overprovisioning (@gyptazy). [#275] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.7/310_guest-object-names-not-being-evaluated-in-log.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Fix guest object names are not being evaluated in debug log. (@gyptazy). [#310] 3 | -------------------------------------------------------------------------------- /.changelogs/1.0.3/60_allow_api_hosts_to_be_given_as_an_optional_list.yml: -------------------------------------------------------------------------------- 1 | added: 2 | - Add feature to allow the API hosts being provided as a comma separated list. [#60] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.6/296_fix_validate_node_presence_when_pinning_guests.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Validate for node presence when pinning guests to avoid crashing (@gyptazy). [#296] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.1/204_fix_migration_log_relationship_of_guest_type.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Fix the guest type relationship in the logs when a migration job failed (by @gyptazy) [#204] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.10/335-prevalidate-affinity-matrix.yml: -------------------------------------------------------------------------------- 1 | added: 2 | - Prevent redundant rebalancing by validating existing affinity enforcement before taking actions (@gyptazy). [#335] -------------------------------------------------------------------------------- /.changelogs/1.1.8/318_fix_conntrack_aware_migrations_api_pve8.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Fix API errors when using conntrack aware migration with older PVE versions (@gyptazy). [#318] 3 | -------------------------------------------------------------------------------- /helm/proxlb/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: proxlb 3 | description: A Helm chart for self-hosted ProxLB 4 | type: application 5 | version: "1.1.10" 6 | appVersion: "v1.1.10" 7 | -------------------------------------------------------------------------------- /.changelogs/1.0.2/40_add_option_to_run_only_on_cluster_master_node.yml: -------------------------------------------------------------------------------- 1 | added: 2 | - Add option to run ProxLB only on the Proxmox's master node in the cluster (reg. HA feature). [#40] 3 | -------------------------------------------------------------------------------- /.changelogs/1.0.6/115_fix_daemon_scheduler_bool_time_fix.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Fix that a scheduler time definition of 1 (int) gets wrongly interpreted as a bool (by @gyptazy). [#115] 3 | -------------------------------------------------------------------------------- /.changelogs/1.0.6/119_fix_maintenance_mode_cli_and_config.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Fix maintenance mode when using cli arg and config mode by using the merged list (by @CartCaved). [#119] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.11/387_select_balancing_workloads_by_size.yml: -------------------------------------------------------------------------------- 1 | added: 2 | - Add possibility to sort and select balancing workloads by smaller/larger guest objects (@gyptazy). [#387] 3 | -------------------------------------------------------------------------------- /.changelogs/1.0.5/106_fix_maintenance_mode_compare_str_list.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Fix evaluation of maintenance mode where comparing list & string resulted in a crash (by @glitchvern). [#106] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.1/205_add_api_upstream_error_message_on_migration_failure.yml: -------------------------------------------------------------------------------- 1 | added: 2 | - Providing the API upstream error message when migration fails in debug mode (by @gyptazy) [#205] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.3/239_add_optional_delay_time_until_service_starts.yml: -------------------------------------------------------------------------------- 1 | added: 2 | - Add optional wait time parameter to delay execution until the service takes action (by @gyptazy). #239 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.6/291_fix_stack_trace_no_user_permissions_testing.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Fix stacktrace output when validating permissions on non existing users in Proxmox (@gyptazy). [#291] 3 | -------------------------------------------------------------------------------- /.changelogs/1.0.0/17-add-configurable-log-verbosity.yml: -------------------------------------------------------------------------------- 1 | added: 2 | - Add feature to make log verbosity configurable [#17]. 3 | changed: 4 | - Adjusted general logging and log more details. 5 | -------------------------------------------------------------------------------- /.changelogs/1.1.1/187_allow_use_of_minutes_instead_of_hours.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - allow the use of minutes instead of hours and only accept hours or minutes in the format (by @glitchvern) [#187] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.2/218_add_1_to_one_relationship_guest_node_pinning.yml: -------------------------------------------------------------------------------- 1 | added: 2 | - Add 1-to-1 relationships between guest and hypervisor node to ping a guest on a node (by @gyptazy) [#218] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.2/222_fix_force_type_cast_cpu_metrics_to_int.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Force type cast cpu count of guests to int for some corner cases where a str got returned (by @gyptazy). [#222] 3 | -------------------------------------------------------------------------------- /.changelogs/1.0.4/79_storage_balancing_whitelist.yml: -------------------------------------------------------------------------------- 1 | changed: 2 | - Run storage balancing only on supported shared storages. [#79] 3 | - Run storage balancing only when needed to save time. [#79] 4 | -------------------------------------------------------------------------------- /.changelogs/1.1.10/359-add-pve8-user-protections-for-conntrack-aware-migrations.yml: -------------------------------------------------------------------------------- 1 | added: 2 | - Add safety-guard for PVE 8 users when activating conntrack-aware migrations mistakenly (@gyptazy). [#359] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.4/248_fix_dry_run_combination_balancing_disabled.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Fixed an issue where balancing was performed in combination of deactivated balancing and dry-run mode (@gyptazy). [#248] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.10/368_fix_crash_enumerating_pools_with_storage_members.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Fixed a crash during PVE resource pool enumeration by skipping members not having a 'name' property (@stefanoettl). [#368] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.6/295_fix_overprovisioning_first_node_if_anti_affinity_group_has_only_one-member.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Fix Overprovisioning first node if anti_affinity_group has only one member (@MiBUl-eu). [#295] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.8/317_container_image_non_root.yml: -------------------------------------------------------------------------------- 1 | changed: 2 | - Container image does not run as root anymore (@mikaelkrantz945). [#317] 3 | - Container image uses venv for running ProxLB (@mikaelkrantz945). [#317] 4 | -------------------------------------------------------------------------------- /.changelogs/1.1.1/197_remove_hard_coded_memory_usage_from_lowest_usage_node.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Remove hard coded memory usage from lowest usage node and use method and mode specified in configuration instead (by @glitchvern) [#197] -------------------------------------------------------------------------------- /.changelogs/1.0.3/docs_fix_default_config_add_dedicated_user_howto.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Fix bug in the `proxlb.conf` in the vm_balancing section. 3 | added: 4 | - Add doc how to add dedicated user for authentication. (by @Dulux-Oz) 5 | -------------------------------------------------------------------------------- /.changelogs/1.1.3/94_cpu_balancing_use_avg_instead_current_consumption.yml: -------------------------------------------------------------------------------- 1 | changed: 2 | - Use the average CPU consumption of a guest within the last 60 minutes instead of the current CPU usage (by @philslab-ninja & @gyptazy). [#94] 3 | -------------------------------------------------------------------------------- /misc/02-create-changelog.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | git clone https://github.com/gyptazy/changelog-fragments-creator.git 3 | ./changelog-fragments-creator/changelog-creator -f .changelogs/ -o CHANGELOG.md 4 | echo "Created changelog file" -------------------------------------------------------------------------------- /.changelogs/1.1.1/185-logging-handler-for-no-systemd-integration.yml: -------------------------------------------------------------------------------- 1 | fix: 2 | - add handler to log messages with severity less than info to the screen when there is no systemd integration, for instance, inside a docker container (by @glitchvern) [#185] 3 | -------------------------------------------------------------------------------- /.changelogs/1.1.10/361_fix_proxmox_api_connection_validation.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Fixed the Proxmox API connection validation which returned a false-positive logging message of timeouts (@gyptazy). [#361] 3 | - Refactored Proxmox API connection functions 4 | -------------------------------------------------------------------------------- /.changelogs/1.1.1/195_set_cpu_used_to_cpu_usage_times_core_count.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Set cpu_used to the cpu usage, which is a percent, times the total number of cores to get a number where guest cpu_used can be added to nodes cpu_used and be meaningful (by @glitchvern) [#195] -------------------------------------------------------------------------------- /.changelogs/1.1.11/391_add_native_proxmox_ha_rules_support.yml: -------------------------------------------------------------------------------- 1 | feature: 2 | - Add support for Proxmox's native HA (affinity/anti-affinity) rules (@gyptazy). [#391] 3 | - Add support for Proxmox's native HA (node-affinity) rules for pinning guests to nodes (@gyptazy). [#391] 4 | -------------------------------------------------------------------------------- /.changelogs/1.1.9/337_add_pressure_based_balancing.yml: -------------------------------------------------------------------------------- 1 | added: 2 | - Add pressure (PSI) based balancing for memory, cpu, disk (req. PVE9 or greater) (@gyptazy). [#337] 3 | - Pressure (PSI) based balancing for nodes 4 | - Pressure (PSI) based balancing for guests 5 | - Add PVE version evaluation 6 | -------------------------------------------------------------------------------- /helm/proxlb/templates/configmap.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.configmap.create }} 2 | apiVersion: v1 3 | kind: ConfigMap 4 | metadata: 5 | name: proxlb-config 6 | labels: 7 | {{- include "proxlb.labels" . | nindent 4 }} 8 | data: 9 | proxlb.yaml: | 10 | {{ toYaml .Values.configmap.config | indent 4 }} 11 | {{ end }} -------------------------------------------------------------------------------- /.changelogs/1.0.3/53_code_improvements.yml: -------------------------------------------------------------------------------- 1 | added: 2 | - Add a convert function to cast all bool alike options from configparser to bools. [#53] 3 | - Add a config parser options for future features. [#53] 4 | - Add a config versio schema that must be supported by ProxLB. [#53] 5 | changed: 6 | - Improve the underlying code base for future implementations. [#53] 7 | -------------------------------------------------------------------------------- /proxlb/utils/version.py: -------------------------------------------------------------------------------- 1 | __app_name__ = "ProxLB" 2 | __app_desc__ = "An advanced resource scheduler and load balancer for Proxmox clusters." 3 | __author__ = "Florian Paul Azim Hoberg " 4 | __copyright__ = "Copyright (C) 2025 Florian Paul Azim Hoberg (@gyptazy)" 5 | __license__ = "GPL-3.0" 6 | __version__ = "1.1.10" 7 | __url__ = "https://github.com/gyptazy/ProxLB" 8 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Create a new request for a missing feature 4 | title: "`Feature`: " 5 | labels: feature, needs-analysis 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## General 11 | <-- Describe the feature idea from a high level perspective. --> 12 | 13 | ## Details 14 | <-- Provide some more details about the new feature request and provide examples. --> 15 | -------------------------------------------------------------------------------- /service/proxlb.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=ProxLB - A loadbalancer for Proxmox clusters 3 | After=network-online.target pveproxy.service 4 | Wants=network-online.target pveproxy.service 5 | 6 | [Service] 7 | ExecStart=python3 /usr/lib/python3/dist-packages/proxlb/main.py -c /etc/proxlb/proxlb.yaml 8 | User=plb 9 | ExecReload=/bin/kill -HUP $MAINPID 10 | KillMode=process 11 | 12 | [Install] 13 | WantedBy=multi-user.target 14 | -------------------------------------------------------------------------------- /debian/prerm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | #DEBHELPER# 5 | if [ "$1" = "remove" ]; then 6 | systemctl stop proxlb.service || true 7 | systemctl disable proxlb.service || true 8 | 9 | # Remove the 'plb' user if it exists 10 | if id "plb" &>/dev/null; then 11 | userdel --remove plb 12 | echo "User 'plb' removed." 13 | else 14 | echo "User 'plb' does not exist, skipping removal." 15 | fi 16 | fi 17 | -------------------------------------------------------------------------------- /helm/proxlb/templates/_helpers.yaml: -------------------------------------------------------------------------------- 1 | {{- define "proxlb.fullname" -}} 2 | {{- printf "%s-%s" .Release.Name .Chart.Name | trunc 63 | trimSuffix "-" -}} 3 | {{- end -}} 4 | 5 | {{ define "proxlb.labels" }} 6 | app.kubernetes.io/name: {{ .Release.Name }} 7 | app.kubernetes.io/managed-by: Helm 8 | app.kubernetes.io/version: {{ .Chart.AppVersion }} 9 | app.kubernetes.io/component: proxlb 10 | {{- if .Values.labels }} 11 | {{ toYaml .Values.labels }} 12 | {{- end }} 13 | {{ end }} -------------------------------------------------------------------------------- /debian/postinst: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | #DEBHELPER# 5 | if [ "$1" = "configure" ]; then 6 | systemctl enable proxlb.service 7 | systemctl restart proxlb.service || true 8 | 9 | # Create the 'plb' user if it does not exist 10 | if ! id "plb" &>/dev/null; then 11 | useradd --system --home /var/lib/proxlb --create-home --shell /usr/sbin/nologin --group nogroup plb 12 | echo "User 'plb' created." 13 | else 14 | echo "User 'plb' already exists, skipping creation." 15 | fi 16 | fi 17 | -------------------------------------------------------------------------------- /misc/01-replace-version.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | VERSION="1.1.9.1" 3 | 4 | # ProxLB 5 | sed -i "s/^__version__ = .*/__version__ = \"$VERSION\"/" "proxlb/utils/version.py" 6 | sed -i "s/version=\"[0-9]*\.[0-9]*\.[0-9]*\"/version=\"$VERSION\"/" setup.py 7 | 8 | # Helm Chart 9 | sed -i "s/^version: .*/version: \"$VERSION\"/" helm/proxlb/Chart.yaml 10 | sed -i "s/^appVersion: .*/appVersion: \"v$VERSION\"/" helm/proxlb/Chart.yaml 11 | sed -i "s/^tag: .*/tag: \"v$VERSION\"/" helm/proxlb/values.yaml 12 | 13 | echo "OK: Versions have been sucessfully set to $VERSION" 14 | -------------------------------------------------------------------------------- /debian/control: -------------------------------------------------------------------------------- 1 | Source: proxlb 2 | Maintainer: Florian Paul Azim Hoberg 3 | Section: admin 4 | Priority: optional 5 | Standards-Version: 4.5.0 6 | Build-Depends: debhelper-compat (= 13), dh-python, python3-all, python3-setuptools 7 | 8 | Package: proxlb 9 | Architecture: all 10 | Depends: ${python3:Depends}, ${misc:Depends}, python3-requests, python3-urllib3, python3-packaging, python3-proxmoxer, python3-yaml 11 | Description: An advanced resource scheduler and load balancer for Proxmox clusters 12 | An advanced resource scheduler and load balancer for Proxmox clusters that also supports maintenance mode and affinity/anti-affinity rules. 13 | -------------------------------------------------------------------------------- /.github/workflows/10-code-liniting.yml: -------------------------------------------------------------------------------- 1 | name: Code linting 2 | on: [push] 3 | jobs: 4 | build: 5 | runs-on: ubuntu-latest 6 | strategy: 7 | matrix: 8 | python-version: ["3.8"] 9 | steps: 10 | - uses: actions/checkout@v3 11 | - name: Setup dependencies for code linting 12 | uses: actions/setup-python@v4 13 | with: 14 | python-version: ${{ matrix.python-version }} 15 | - name: Install additional dependencies for code linting 16 | run: | 17 | sudo apt-get update 18 | sudo apt-get -y install python3-pycodestyle pycodestyle 19 | - name: Run code linting on ProxLB Python code 20 | run: | 21 | pycodestyle proxlb/* 22 | -------------------------------------------------------------------------------- /.github/workflows/30-pipeline-build-container-multi-arch.yml: -------------------------------------------------------------------------------- 1 | name: "Build Container Image: Multiarch" 2 | on: [push] 3 | jobs: 4 | build: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - name: Checkout repository 8 | uses: actions/checkout@v4 9 | - name: Set up QEMU 10 | uses: docker/setup-qemu-action@v3 11 | - name: Set up Docker Buildx 12 | uses: docker/setup-buildx-action@v3 13 | - name: Build multi-arch image and save as tar 14 | run: | 15 | docker buildx build \ 16 | --platform linux/amd64,linux/arm64 \ 17 | --output type=tar,dest=proxlb_image_multiarch.tar \ 18 | . 19 | - name: Upload Docker image artifact 20 | uses: actions/upload-artifact@v4 21 | with: 22 | name: proxlb-image-multiarch 23 | path: proxlb_image_multiarch.tar 24 | -------------------------------------------------------------------------------- /.changelogs/1.1.0/114_refactor_code_base.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | - Refactored code base for ProxLB [#114] 3 | - Switched to `pycodestyle` for linting [#114] 4 | - Package building will be done within GitHub actions pipeline [#114] 5 | - ProxLB now only returns a warning when no guests for further balancing are not present (instead of quitting) [132#] 6 | - All nodes (according to the free resources) will be used now [#130] 7 | - Fixed logging outputs where highest/lowest were mixed-up [#129] 8 | - Stop balancing when movement would get worste (new force param to enfoce for affinity rules) [#128] 9 | - Added requested documentation regarding Proxmox HA groups [#127] 10 | - Rewrite of the whole affinity/anti-affinity rules evaluation and placement [#123] 11 | - Fixed the `ignore` parameter for nodes where the node and guests on the node will be untouched [#102] 12 | -------------------------------------------------------------------------------- /.github/workflows/30-pipeline-build-container-amd64.yml: -------------------------------------------------------------------------------- 1 | name: "Build Container Image: AMD64" 2 | on: [push] 3 | jobs: 4 | build: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - name: Checkout repository 8 | uses: actions/checkout@v4 9 | - name: Set up QEMU 10 | uses: docker/setup-qemu-action@v3 11 | - name: Set up Docker Buildx 12 | uses: docker/setup-buildx-action@v3 13 | - name: Build amd64 image and save as tar 14 | run: | 15 | docker buildx build \ 16 | --platform linux/amd64 \ 17 | --load \ 18 | -t proxlb-image:amd64 \ 19 | . 20 | 21 | docker save proxlb-image:amd64 -o proxlb_image_amd64.tar 22 | - name: Upload Docker image artifact 23 | uses: actions/upload-artifact@v4 24 | with: 25 | name: proxlb-image-amd64 26 | path: proxlb_image_amd64.tar 27 | -------------------------------------------------------------------------------- /.github/workflows/30-pipeline-build-container-arm64.yml: -------------------------------------------------------------------------------- 1 | name: "Build Container Image: ARM64" 2 | on: [push] 3 | jobs: 4 | build: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - name: Checkout repository 8 | uses: actions/checkout@v4 9 | - name: Set up QEMU 10 | uses: docker/setup-qemu-action@v3 11 | - name: Set up Docker Buildx 12 | uses: docker/setup-buildx-action@v3 13 | - name: Build arm64 image and save as tar 14 | run: | 15 | docker buildx build \ 16 | --platform linux/arm64 \ 17 | --load \ 18 | -t proxlb-image:arm64 \ 19 | . 20 | 21 | docker save proxlb-image:arm64 -o proxlb_image_arm64.tar 22 | - name: Upload Docker image artifact 23 | uses: actions/upload-artifact@v4 24 | with: 25 | name: proxlb-image-arm64 26 | path: proxlb_image_arm64.tar 27 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name="proxlb", 5 | version="1.1.10", 6 | description="An advanced resource scheduler and load balancer for Proxmox clusters.", 7 | long_description="An advanced resource scheduler and load balancer for Proxmox clusters that also supports maintenance modes and affinity/anti-affinity rules.", 8 | author="Florian Paul Azim Hoberg", 9 | author_email="gyptazy@gyptazy.com", 10 | maintainer="Florian Paul Azim Hoberg", 11 | maintainer_email="gyptazy@gyptazy.com", 12 | url="https://github.com/gyptazy/ProxLB", 13 | packages=["proxlb", "proxlb.utils", "proxlb.models"], 14 | install_requires=[ 15 | "requests", 16 | "urllib3", 17 | "proxmoxer", 18 | "pyyaml", 19 | ], 20 | data_files=[('/etc/systemd/system', ['service/proxlb.service']), ('/etc/proxlb/', ['config/proxlb_example.yaml'])], 21 | ) 22 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a bug report 4 | title: "`Bug`:" 5 | labels: bug, needs-analysis 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## General 11 | <-- Describe the bug from a high level perspective. --> 12 | 13 | ## Weighting 14 | Score: <-- Define a scoring from 0-10 (10 highest, most urgent) --> 15 | 16 | ## Config 17 | <-- Attach the ProxLB configuration for further analysis. Please take car to NOT publish your API credentials! --> 18 | 19 | ## Log 20 | <-- Attach the ProxLB debug log for further analysis. Please take car to NOT publish your API credentials! --> 21 | 22 | ## Meta 23 | Please provide some more information about your setup. This includes where you obtained ProxLB (e.g., as a `.deb` file, from the repository or container image) and also which version you're running in which mode. You can obtain the used version from you image version, your local repository information or by running `proxlb -v`. 24 | 25 | Version: <-- DEFINE_VERSION --> 26 | Installed from: <-- DEFINE_INSTALL_SOURCE --> 27 | Running as: <-- Container, local on Proxmox, local on all Proxmox, dedicated --> 28 | -------------------------------------------------------------------------------- /helm/proxlb/templates/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: {{ .Release.Name }} 5 | labels: 6 | {{- include "proxlb.labels" . | nindent 4 }} 7 | spec: 8 | replicas: 1 # Number of replicas cannot be more than 1 9 | selector: 10 | matchLabels: 11 | {{- include "proxlb.labels" . | nindent 6 }} 12 | template: 13 | metadata: 14 | labels: 15 | {{- include "proxlb.labels" . | nindent 8 }} 16 | spec: 17 | {{- with .Values.image.imagePullSecrets }} 18 | imagePullSecrets: 19 | {{- toYaml . | nindent 8 }} 20 | {{- end }} 21 | # not interacting with the k8s cluster 22 | automountServiceAccountToken: False 23 | containers: 24 | - name: proxlb 25 | image: "{{ .Values.image.registry }}/{{ .Values.image.repository }}:{{ .Values.image.tag }}" 26 | imagePullPolicy: {{ .Values.image.pullPolicy }} 27 | args: 28 | {{- if .Values.extraArgs.dryRun }} 29 | - --dry-run 30 | {{- end }} 31 | volumeMounts: 32 | - name: config 33 | mountPath: /etc/proxlb/proxlb.yaml 34 | subPath: proxlb.yaml 35 | {{ if .Values.resources }} 36 | resources: 37 | {{ with .Values.resources }} 38 | {{ toYaml . | nindent 10 }} 39 | {{ end }} 40 | {{ end }} 41 | volumes: 42 | - name: config 43 | configMap: 44 | name: proxlb-config -------------------------------------------------------------------------------- /helm/proxlb/values.yaml: -------------------------------------------------------------------------------- 1 | image: 2 | registry: cr.gyptazy.com 3 | repository: proxlb/proxlb 4 | tag: v1.1.10 5 | pullPolicy: IfNotPresent 6 | imagePullSecrets: [ ] 7 | 8 | resources: 9 | limits: 10 | cpu: "1000m" 11 | memory: "2Gi" 12 | requests: 13 | cpu: "100m" 14 | memory: "100Mi" 15 | 16 | labels: {} 17 | 18 | extraArgs: 19 | dryRun: false 20 | 21 | configmap: 22 | create: true 23 | config: 24 | proxmox_api: 25 | hosts: [] 26 | #Can be either a user or a token 27 | # user: "" 28 | # pass: "" 29 | # token_id: "" 30 | # token_secret: "" 31 | ssl_verification: True 32 | timeout: 10 33 | proxmox_cluster: 34 | maintenance_nodes: [ ] 35 | ignore_nodes: [ ] 36 | overprovisioning: True 37 | balancing: 38 | enable: True 39 | enforce_affinity: False 40 | parallel: False 41 | # If running parallel job, you can define 42 | # the amount of prallel jobs (default: 5) 43 | parallel_jobs: 1 44 | live: True 45 | with_local_disks: True 46 | with_conntrack_state: True 47 | balance_types: [ 'vm', 'ct' ] 48 | max_job_validation: 1800 49 | balanciness: 5 50 | method: memory 51 | mode: used 52 | service: 53 | daemon: True 54 | schedule: 55 | interval: 12 56 | format: "hours" 57 | delay: 58 | enable: False 59 | time: 1 60 | format: "hours" 61 | log_level: INFO 62 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Use the latest Alpine image 2 | FROM alpine:latest 3 | 4 | # Labels 5 | LABEL maintainer="gyptazy@gyptazy.com" 6 | LABEL org.label-schema.name="ProxLB" 7 | LABEL org.label-schema.description="ProxLB - An advanced load balancer for Proxmox clusters." 8 | LABEL org.label-schema.vendor="gyptazy" 9 | LABEL org.label-schema.url="https://proxlb.de" 10 | LABEL org.label-schema.vcs-url="https://github.com/gyptazy/ProxLB" 11 | 12 | # --- Step 1 (root): system deps, user, dirs --- 13 | RUN apk add --no-cache python3 py3-pip \ 14 | && addgroup -S plb \ 15 | && adduser -S -G plb -h /home/plb plb \ 16 | && mkdir -p /app/conf /opt/venv \ 17 | && chown -R plb:plb /app /home/plb /opt/venv 18 | 19 | WORKDIR /app 20 | 21 | # Copy only requirements first for better layer caching 22 | COPY --chown=plb:plb requirements.txt /app/requirements.txt 23 | 24 | # --- Step 2 (appuser): venv + deps + code --- 25 | USER plb 26 | 27 | # Create venv owned by appuser and put it on PATH 28 | RUN python3 -m venv /opt/venv 29 | ENV PATH="/opt/venv/bin:${PATH}" 30 | 31 | # Install Python dependencies into the venv (no PEP 668 issues) 32 | RUN pip install --no-cache-dir -r /app/requirements.txt 33 | 34 | # Copy application code (owned by appuser) 35 | COPY --chown=plb:plb proxlb /app/proxlb 36 | 37 | # Optional: placeholder config so a bind-mount can override cleanly 38 | RUN touch /app/conf/proxlb.yaml 39 | 40 | # Run as non-root using venv Python 41 | ENTRYPOINT ["/opt/venv/bin/python", "/app/proxlb/main.py"] 42 | -------------------------------------------------------------------------------- /docs/99-faq.md: -------------------------------------------------------------------------------- 1 | ## Table of Contents 2 | 3 | 1. [GUI Integration](#gui-integration) 4 | - [How to install pve-proxmoxlb-service-ui package](https://github.com/gyptazy/ProxLB/issues/44) 5 | 2. [Proxmox HA Integration](#proxmox-ha-integration) 6 | - [Host groups: Honour HA groups](https://github.com/gyptazy/ProxLB/issues/65) 7 | 8 | ### GUI Integration 9 | ProxLB can also be accessed through the Proxmox Web UI by installing the optional `pve-proxmoxlb-service-ui` package, which depends on the proxlb package. For full Web UI integration, this package must be installed on all nodes within the cluster. Once installed, a new menu item - `Rebalancing`, appears in the cluster level under the HA section. Once installed, it offers two key functionalities: 10 | * Rebalancing VM workloads 11 | * Migrate VM workloads away from a defined node (e.g. maintenance preparation) 12 | 13 | **Note:** This package is currently discontinued and will be readded at a later time. See also: [#44: How to install pve-proxmoxlb-service-ui package](https://github.com/gyptazy/ProxLB/issues/44). 14 | 15 | ### Proxmox HA Integration 16 | Proxmox HA (High Availability) groups are designed to ensure that virtual machines (VMs) remain running within a Proxmox cluster. HA groups define specific rules for where VMs should be started or migrated in case of node failures, ensuring minimal downtime and automatic recovery. 17 | 18 | However, when used in conjunction with ProxLB, the built-in load balancer for Proxmox, conflicts can arise. ProxLB operates with its own logic for workload distribution, taking into account affinity and anti-affinity rules. While it effectively balances guest workloads, it may re-shift and redistribute VMs in a way that does not align with HA group constraints, potentially leading to unsuitable placements. 19 | 20 | Due to these conflicts, it is currently not recommended to use both HA groups and ProxLB simultaneously. The interaction between the two mechanisms can lead to unexpected behavior, where VMs might not adhere to HA group rules after being moved by ProxLB. 21 | 22 | A solution to improve compatibility between HA groups and ProxLB is under evaluation, aiming to ensure that both features can work together without disrupting VM placement strategies. 23 | 24 | See also: [#65: Host groups: Honour HA groups](https://github.com/gyptazy/ProxLB/issues/65). -------------------------------------------------------------------------------- /config/proxlb_example.yaml: -------------------------------------------------------------------------------- 1 | proxmox_api: 2 | hosts: ['virt01.example.com', '10.10.10.10', 'fe01:bad:code::cafe'] 3 | user: root@pam 4 | pass: crazyPassw0rd! 5 | # API Token method 6 | # token_id: proxlb 7 | # token_secret: 430e308f-1337-1337-beef-1337beefcafe 8 | ssl_verification: True 9 | timeout: 10 10 | # API Connection retries 11 | # retries: 1 12 | # wait_time: 1 13 | 14 | proxmox_cluster: 15 | maintenance_nodes: ['virt66.example.com'] 16 | ignore_nodes: [] 17 | overprovisioning: True 18 | 19 | balancing: 20 | enable: True 21 | enforce_affinity: False 22 | parallel: False 23 | # If running parallel job, you can define 24 | # the amount of prallel jobs (default: 5) 25 | parallel_jobs: 1 26 | live: True 27 | with_local_disks: True 28 | with_conntrack_state: True 29 | balance_types: ['vm', 'ct'] # 'vm' | 'ct' 30 | max_job_validation: 1800 # Maximum time (in seconds) a job validation may take 31 | memory_threshold: 75 # Optional: Maximum threshold (in percent) to trigger balancing actions 32 | balanciness: 5 # Maximum delta of resource usage between highest and lowest usage node 33 | method: memory # 'memory' | 'cpu' | 'disk' 34 | mode: used # 'assigned' | 'used' | 'psi' 35 | balance_larger_guests_first: False # Option to prioritize balancing of larger or smaller guests first 36 | # # PSI thresholds only apply when using mode 'psi' 37 | # psi: 38 | # nodes: 39 | # memory: 40 | # pressure_full: 0.20 41 | # pressure_some: 0.20 42 | # pressure_spikes: 1.00 43 | # cpu: 44 | # pressure_full: 0.20 45 | # pressure_some: 0.20 46 | # pressure_spikes: 1.00 47 | # disk: 48 | # pressure_full: 0.20 49 | # pressure_some: 0.20 50 | # pressure_spikes: 1.00 51 | # guests: 52 | # memory: 53 | # pressure_full: 0.20 54 | # pressure_some: 0.20 55 | # pressure_spikes: 1.00 56 | # cpu: 57 | # pressure_full: 0.20 58 | # pressure_some: 0.20 59 | # pressure_spikes: 1.00 60 | # disk: 61 | # pressure_full: 0.20 62 | # pressure_some: 0.20 63 | # pressure_spikes: 1.00 64 | pools: # Optional: Define affinity/anti-affinity rules per pool 65 | dev: # Pool name: dev 66 | type: affinity # Type: affinity (keeping VMs together) 67 | de-nbg01-db: # Pool name: de-nbg01-db 68 | type: anti-affinity # Type: anti-affinity (spreading VMs apart) 69 | pin: # Define a pinning og guests to specific node(s) 70 | - virt66 71 | - virt77 72 | 73 | service: 74 | daemon: True 75 | schedule: 76 | interval: 12 77 | format: hours 78 | delay: 79 | enable: False 80 | time: 1 81 | format: hours 82 | log_level: INFO 83 | -------------------------------------------------------------------------------- /proxlb/utils/cli_parser.py: -------------------------------------------------------------------------------- 1 | """ 2 | The CliParser class handles the parsing of command-line interface (CLI) arguments. 3 | """ 4 | 5 | __author__ = "Florian Paul Azim Hoberg " 6 | __copyright__ = "Copyright (C) 2025 Florian Paul Azim Hoberg (@gyptazy)" 7 | __license__ = "GPL-3.0" 8 | 9 | 10 | import argparse 11 | import utils.version 12 | from utils.logger import SystemdLogger 13 | 14 | logger = SystemdLogger() 15 | 16 | 17 | class CliParser: 18 | """ 19 | The CliParser class handles the parsing of command-line interface (CLI) arguments. 20 | """ 21 | def __init__(self): 22 | """ 23 | Initializes the CliParser class. 24 | 25 | This method sets up an argument parser for the command-line interface (CLI) with various options: 26 | - `-c` or `--config`: Specifies the path to the configuration file. 27 | - `-d` or `--dry-run`: Performs a dry-run without executing any actions. 28 | - `-j` or `--json`: Returns a JSON of the VM movement. 29 | - `-b` or `--best-node`: Returns the best next node. 30 | - `-v` or `--version`: Returns the current ProxLB version. 31 | 32 | Logs the start and end of the initialization process. 33 | """ 34 | logger.debug("Starting: CliParser.") 35 | 36 | self.parser = argparse.ArgumentParser( 37 | description=( 38 | f"{utils.version.__app_name__} ({utils.version.__version__}): " 39 | f"{utils.version.__app_desc__}" 40 | ) 41 | ) 42 | 43 | self.parser.add_argument( 44 | "-c", "--config", 45 | help="Path to the configuration file", 46 | type=str, 47 | required=False 48 | ) 49 | self.parser.add_argument( 50 | "-d", "--dry-run", 51 | help="Perform a dry-run without executing any actions", 52 | action="store_true", 53 | required=False 54 | ) 55 | self.parser.add_argument( 56 | "-j", "--json", 57 | help="Return a JSON of the VM movement", 58 | action="store_true", 59 | required=False 60 | ) 61 | self.parser.add_argument( 62 | "-b", "--best-node", 63 | help="Returns the best next node", 64 | action="store_true", 65 | required=False 66 | ) 67 | self.parser.add_argument( 68 | "-v", "--version", 69 | help="Returns the current ProxLB version", 70 | action="store_true", 71 | required=False 72 | ) 73 | logger.debug("Finished: CliParser.") 74 | 75 | def parse_args(self) -> argparse.Namespace: 76 | """ 77 | Parses and returns the parsed command-line interface (CLI) arguments. 78 | 79 | This method uses the argparse library to parse the arguments provided 80 | via the command line. It logs the start and end of the parsing process, 81 | as well as the parsed arguments for debugging purposes. 82 | 83 | Returns: 84 | argparse.Namespace: An object containing the parsed CLI arguments. 85 | """ 86 | logger.debug("Starting: parse_args.") 87 | logger.debug(self.parser.parse_args()) 88 | 89 | logger.debug("Finished: parse_args.") 90 | return self.parser.parse_args() 91 | -------------------------------------------------------------------------------- /proxlb/utils/config_parser.py: -------------------------------------------------------------------------------- 1 | """ 2 | The ConfigParser class handles the parsing of configuration file 3 | from a given YAML file from any location. 4 | """ 5 | 6 | __author__ = "Florian Paul Azim Hoberg " 7 | __copyright__ = "Copyright (C) 2025 Florian Paul Azim Hoberg (@gyptazy)" 8 | __license__ = "GPL-3.0" 9 | 10 | 11 | import os 12 | import sys 13 | try: 14 | import yaml 15 | PYYAML_PRESENT = True 16 | except ImportError: 17 | PYYAML_PRESENT = False 18 | from typing import Dict, Any 19 | from utils.logger import SystemdLogger 20 | 21 | 22 | if not PYYAML_PRESENT: 23 | print("Error: The required library 'pyyaml' is not installed.") 24 | sys.exit(1) 25 | 26 | 27 | logger = SystemdLogger() 28 | 29 | 30 | class ConfigParser: 31 | """ 32 | The ConfigParser class handles the parsing of a configuration file. 33 | 34 | Methods: 35 | __init__(config_path: str) 36 | 37 | test_config_path(config_path: str) -> None 38 | Checks if the configuration file is present at the given config path. 39 | 40 | get_config() -> Dict[str, Any] 41 | Parses and returns the configuration data from the YAML file. 42 | """ 43 | def __init__(self, config_path: str): 44 | """ 45 | Initializes the configuration file parser and validates the config file. 46 | """ 47 | logger.debug("Starting: ConfigParser.") 48 | self.config_path = self.test_config_path(config_path) 49 | logger.debug("Finished: ConfigParser.") 50 | 51 | def test_config_path(self, config_path: str) -> None: 52 | """ 53 | Checks if configuration file is present at given config path. 54 | """ 55 | logger.debug("Starting: test_config_path.") 56 | # Test for config file at given location 57 | if config_path is not None: 58 | 59 | if os.path.exists(config_path): 60 | logger.debug(f"The file {config_path} exists.") 61 | else: 62 | logger.error(f"The file {config_path} does not exist.") 63 | sys.exit(1) 64 | 65 | # Test for config file at default location as a fallback 66 | if config_path is None: 67 | default_config_path = "/etc/proxlb/proxlb.yaml" 68 | 69 | if os.path.exists(default_config_path): 70 | logger.debug(f"The file {default_config_path} exists.") 71 | config_path = default_config_path 72 | else: 73 | print(f"The config file {default_config_path} does not exist.") 74 | logger.critical(f"The config file {default_config_path} does not exist.") 75 | sys.exit(1) 76 | 77 | logger.debug("Finished: test_config_path.") 78 | return config_path 79 | 80 | def get_config(self) -> Dict[str, Any]: 81 | """ 82 | Parses and returns CLI arguments. 83 | """ 84 | logger.debug("Starting: get_config.") 85 | logger.info(f"Using config path: {self.config_path}") 86 | 87 | try: 88 | with open(self.config_path, "r", encoding="utf-8") as config_file: 89 | config_data = yaml.load(config_file, Loader=yaml.FullLoader) 90 | return config_data 91 | except yaml.YAMLError as exception_error: 92 | print(f"Error loading YAML file: {exception_error}") 93 | logger.critical(f"Error loading YAML file: {exception_error}") 94 | sys.exit(1) 95 | 96 | logger.debug("Finished: get_config.") 97 | -------------------------------------------------------------------------------- /.github/workflows/20-pipeline-build-rpm-package.yml: -------------------------------------------------------------------------------- 1 | name: "Build package: .rpm" 2 | on: [push] 3 | jobs: 4 | lint-code-proxlb: 5 | runs-on: ubuntu-latest 6 | strategy: 7 | matrix: 8 | python-version: ["3.8"] 9 | steps: 10 | - uses: actions/checkout@v3 11 | - name: Setup dependencies for code linting 12 | uses: actions/setup-python@v4 13 | with: 14 | python-version: ${{ matrix.python-version }} 15 | - name: Install additional dependencies for code linting 16 | run: | 17 | sudo apt-get update 18 | sudo apt-get -y install python3-pycodestyle pycodestyle 19 | - name: Run code linting on ProxLB Python code 20 | run: | 21 | pycodestyle proxlb/* && \ 22 | echo "OK: Code linting successfully performed on ProxLB code." 23 | 24 | build-package-rpm: 25 | needs: lint-code-proxlb 26 | runs-on: ubuntu-latest 27 | steps: 28 | - name: Check out repository 29 | uses: actions/checkout@v3 30 | with: 31 | ref: 'development' 32 | 33 | - name: Set up Docker with Debian image 34 | run: | 35 | docker pull debian:latest 36 | 37 | - name: Build DEB package in Docker container 38 | run: | 39 | docker run --rm -v $(pwd):/workspace -w /workspace debian:latest bash -c " 40 | # Install dependencies 41 | apt-get update && \ 42 | apt-get install -y python3 python3-setuptools rpm debhelper dh-python python3-pip python3-stdeb python3-proxmoxer python3-requests python3-urllib3 && \ 43 | # Build package 44 | python3 setup.py --command-packages=stdeb.command bdist_rpm && \ 45 | echo 'OK: RPM package successfully created.' 46 | " 47 | 48 | - name: Upload RPM package python3-proxlb as artifact 49 | uses: actions/upload-artifact@v4 50 | with: 51 | name: rpm-package 52 | path: dist/*.rpm 53 | 54 | # integration-test-rpm-rockylinux-9: 55 | # needs: build-package-rpm 56 | # runs-on: ubuntu-latest 57 | # steps: 58 | # - name: Download RPM package artifact 59 | # uses: actions/download-artifact@v4 60 | # with: 61 | # name: rpm-package 62 | # path: dist/ 63 | 64 | # - name: Set up Docker with RockyLinux 9 image 65 | # run: docker pull rockylinux:9 66 | 67 | # - name: Install and test RPM package in Rocky Linux Docker container 68 | # run: | 69 | # docker run --rm -v $(pwd)/dist:/dist -w /dist rockylinux:9 bash -c " 70 | # # DNF does not handle wildcards well 71 | # rpm_file=\$(ls proxlb*.noarch.rpm) && \ 72 | # dnf install -y \$rpm_file && \ 73 | # python3 -c 'import proxlb; print(\"OK: RPM package successfully installed.\")' 74 | # " 75 | 76 | # integration-test-rpm-rockylinux-8: 77 | # needs: build-package-rpm 78 | # runs-on: ubuntu-latest 79 | # steps: 80 | # - name: Download RPM package artifact 81 | # uses: actions/download-artifact@v4 82 | # with: 83 | # name: rpm-package 84 | # path: dist/ 85 | 86 | # - name: Set up Docker with RockyLinux 8 image 87 | # run: docker pull rockylinux:8 88 | 89 | # - name: Install and test RPM package in Rocky Linux Docker container 90 | # run: | 91 | # docker run --rm -v $(pwd)/dist:/dist -w /dist rockylinux:8 bash -c " 92 | # # DNF does not handle wildcards well 93 | # rpm_file=\$(ls proxlb*.noarch.rpm) && \ 94 | # dnf install -y \$rpm_file && \ 95 | # python3 -c 'import proxlb; print(\"OK: RPM package successfully installed.\")' 96 | # " 97 | -------------------------------------------------------------------------------- /docs/01_requirements.md: -------------------------------------------------------------------------------- 1 | # Table of Contents 2 | 3 | - [Requirements](#requirements) 4 | - [Where To Run?](#where-to-run) 5 | 6 | ## Requirements 7 | ProxLB is a sophisticated load balancer designed to enhance the management and distribution of workloads within a Proxmox cluster. By fully utilizing the Proxmox API, ProxLB eliminates the need for additional SSH access, streamlining cluster management while maintaining robust security. This chapter outlines the general requirements necessary to deploy and operate ProxLB effectively. 8 | 9 | ### Proxmox Cluster Requirements 10 | To use ProxLB, you must have an existing Proxmox cluster consisting of at least two nodes. While traditional load balancers often struggle to manage minimal node configurations, ProxLB is optimized to provide efficient load distribution even in a two-node environment. The more nodes present in the cluster, the better ProxLB can optimize resource usage and manage workloads. 11 | 12 | ### ProxLB Package Requirements 13 | Next to the previously mentioned requirements, ProxLB also requires you to fit the following ones: 14 | * Python3.x 15 | * proxmoxer 16 | * requests 17 | * urllib3 18 | * pyyaml 19 | 20 | ### Seamless API Integration 21 | ProxLB relies exclusively on the Proxmox API for all management tasks. This eliminates the need for direct SSH access, ensuring a cleaner and more secure interaction with the cluster. The API integration allows ProxLB to: 22 | 23 | - Monitor cluster health and node resource utilization 24 | - Migrate virtual machines (VMs) and containers as needed 25 | - Manage storage utilization and distribution 26 | - Implement load balancing policies 27 | 28 | ### Authentication and Security Standards 29 | ProxLB fully supports Proxmox’s integrated user management system, providing robust authentication and access control. Key features include: 30 | 31 | - **Multi-Factor Authentication (MFA):** Enhances security by requiring multiple verification methods. 32 | - **API Key Support:** ProxLB can utilize API keys for authentication instead of traditional username/password combinations, minimizing exposure to credentials. 33 | - **Role-Based Access Control (RBAC):** Ensures administrators have fine-grained control over user permissions. 34 | 35 | ### Flexible Storage Support 36 | ProxLB offers versatile storage management options, supporting both local and shared storage types. It efficiently balances storage workloads across the cluster using the following storage systems: 37 | 38 | - **Local Storage:** Direct-attached storage on each node. 39 | - **Shared Storage:** Includes options like iSCSI, NVMeOF, and NFS for centralized storage solutions. 40 | - **Ceph:** Integrated support for Ceph distributed storage, providing high availability and fault tolerance. 41 | 42 | ### Network Infrastructure Requirements 43 | For optimal performance, ProxLB requires a reliable and high-speed network connection between the nodes in the cluster. Ensure that the network infrastructure meets the following criteria: 44 | 45 | - **Low Latency:** Essential for real-time load balancing and VM migration. 46 | - **Sufficient Bandwidth:** Adequate to handle storage access, data replication, and migration traffic. 47 | - **Redundant Network Paths:** Recommended for increased fault tolerance and uptime. 48 | 49 | ### System Resource Allocation 50 | ProxLB itself requires minimal system resources to operate. However, for managing larger clusters or high workloads, ensure the node running ProxLB has adequate resources available: 51 | 52 | - **CPU:** A modern multi-core processor. 53 | - **Memory:** At least 2 GB of RAM. 54 | - **Storage:** Minimal disk space for configuration files and logs. 55 | 56 | 57 | ## Where To Run? 58 | ProxLB is lightweight and flexible where it runs on nearly any environment and only needs access to your Proxmox host’s API endpoint (commonly TCP port 8006). 59 | 60 | Therefore, you can simply run ProxLB on: 61 | * Bare-metal Systems 62 | * VMs (even inside the Proxmox cluster) 63 | * Docker/Podman Container 64 | * LXC Container 65 | * On a Proxmox node -------------------------------------------------------------------------------- /.github/workflows/20-pipeline-build-deb-package.yml: -------------------------------------------------------------------------------- 1 | name: "Build package: .deb" 2 | on: [push] 3 | jobs: 4 | lint-code-proxlb: 5 | runs-on: ubuntu-latest 6 | strategy: 7 | matrix: 8 | python-version: ["3.8"] 9 | steps: 10 | - uses: actions/checkout@v3 11 | - name: Setup dependencies for code linting 12 | uses: actions/setup-python@v4 13 | with: 14 | python-version: ${{ matrix.python-version }} 15 | - name: Install additional dependencies for code linting 16 | run: | 17 | sudo apt-get update 18 | sudo apt-get -y install python3-pycodestyle pycodestyle 19 | - name: Run code linting on ProxLB Python code 20 | run: | 21 | pycodestyle proxlb/* && \ 22 | echo "OK: Code linting successfully performed on ProxLB code." 23 | 24 | build-package-debian: 25 | needs: lint-code-proxlb 26 | runs-on: ubuntu-latest 27 | steps: 28 | - name: Check out repository 29 | uses: actions/checkout@v3 30 | with: 31 | ref: ${{ github.ref }} 32 | 33 | - name: Set up Docker with Debian image 34 | run: | 35 | docker pull debian:latest 36 | 37 | - name: Build DEB package in Docker container 38 | run: | 39 | docker run --rm -v $(pwd):/workspace -w /workspace debian:latest bash -c " 40 | # Install dependencies 41 | apt-get update && \ 42 | apt-get install -y python3 python3-setuptools debhelper dh-python python3-pip python3-stdeb python3-proxmoxer python3-requests python3-urllib3 devscripts python3-all && \ 43 | 44 | # Get base version from source code 45 | BASE_VERSION=\$(grep __version__ proxlb/utils/version.py | awk '{print \$3}' | tr -d '\"') 46 | echo \"Base version: \$BASE_VERSION\" 47 | 48 | # Build full version with timestamp 49 | FULL_VERSION=\"\${BASE_VERSION}+$(date +%Y%m%d%H%M)\" 50 | echo \"Full version: \$FULL_VERSION\" 51 | 52 | # Update debian/changelog with new version 53 | dch --force-bad-version -v \"\$FULL_VERSION\" \ 54 | \"Automated GitHub Actions build on $(date -u +'%Y-%m-%d %H:%M UTC').\" && \ 55 | 56 | # Build package using stdeb / setuptools 57 | # python3 setup.py --command-packages=stdeb.command bdist_deb && \ 58 | # Build native package 59 | dpkg-buildpackage -us -uc && \ 60 | mkdir package && \ 61 | mv ../*.deb package/ && \ 62 | echo 'OK: Debian package successfully created.' 63 | " 64 | 65 | - name: Upload Debian package python3-proxlb as artifact 66 | uses: actions/upload-artifact@v4 67 | with: 68 | name: debian-package 69 | path: package/*.deb 70 | 71 | integration-test-debian: 72 | needs: build-package-debian 73 | runs-on: ubuntu-latest 74 | strategy: 75 | matrix: 76 | debian_version: [bookworm, trixie] 77 | name: Integration Test on Debian ${{ matrix.debian_version }} 78 | steps: 79 | - name: Download Debian package artifact 80 | uses: actions/download-artifact@v4 81 | with: 82 | name: debian-package 83 | path: package/ 84 | 85 | - name: Set up Docker with Debian image 86 | run: docker pull debian:${{ matrix.debian_version }} 87 | 88 | - name: Install and test Debian package in Docker container 89 | run: | 90 | docker run --rm \ 91 | -v "$(pwd)/package:/package" \ 92 | -w /package \ 93 | debian:${{ matrix.debian_version }} \ 94 | bash -c " 95 | set -e 96 | apt-get update 97 | apt-get install -y python3 systemd 98 | apt-get install -y ./proxlb*.deb 99 | python3 -c 'import proxlb; print(\"OK: Debian package successfully installed on ${{ matrix.debian_version }}.\")' 100 | " 101 | -------------------------------------------------------------------------------- /proxlb/main.py: -------------------------------------------------------------------------------- 1 | """ 2 | ProxLB is a load balancing tool for Proxmox Virtual Environment (PVE) clusters. 3 | It connects to the Proxmox API, retrieves information about nodes, guests, and groups, 4 | and performs calculations to determine the optimal distribution of resources across the 5 | cluster. The tool supports daemon mode for continuous operation and can log metrics and 6 | perform balancing actions based on the configuration provided. It also includes a CLI 7 | parser for handling command-line arguments and a custom logger for systemd integration. 8 | """ 9 | 10 | __author__ = "Florian Paul Azim Hoberg " 11 | __copyright__ = "Copyright (C) 2025 Florian Paul Azim Hoberg (@gyptazy)" 12 | __license__ = "GPL-3.0" 13 | 14 | 15 | import logging 16 | import signal 17 | from utils.logger import SystemdLogger 18 | from utils.cli_parser import CliParser 19 | from utils.config_parser import ConfigParser 20 | from utils.proxmox_api import ProxmoxApi 21 | from models.nodes import Nodes 22 | from models.features import Features 23 | from models.guests import Guests 24 | from models.groups import Groups 25 | from models.calculations import Calculations 26 | from models.balancing import Balancing 27 | from models.pools import Pools 28 | from models.ha_rules import HaRules 29 | from utils.helper import Helper 30 | 31 | 32 | def main(): 33 | """ 34 | ProxLB main function 35 | """ 36 | # Initialize logging handler 37 | logger = SystemdLogger(level=logging.INFO) 38 | 39 | # Initialize handlers 40 | signal.signal(signal.SIGHUP, Helper.handler_sighup) 41 | signal.signal(signal.SIGINT, Helper.handler_sigint) 42 | 43 | # Parses arguments passed from the CLI 44 | cli_parser = CliParser() 45 | cli_args = cli_parser.parse_args() 46 | Helper.get_version(cli_args.version) 47 | 48 | # Parse ProxLB config file 49 | config_parser = ConfigParser(cli_args.config) 50 | proxlb_config = config_parser.get_config() 51 | 52 | # Update log level from config and fallback to INFO if not defined 53 | logger.set_log_level(proxlb_config.get('service', {}).get('log_level', 'INFO')) 54 | 55 | # Validate of an optional service delay 56 | Helper.get_service_delay(proxlb_config) 57 | 58 | # Connect to Proxmox API & create API object 59 | proxmox_api = ProxmoxApi(proxlb_config) 60 | 61 | # Overwrite password after creating the API object 62 | proxlb_config["proxmox_api"]["pass"] = "********" 63 | 64 | while True: 65 | 66 | # Validate if reload signal was sent during runtime 67 | # and reload the ProxLB configuration and adjust log level 68 | if Helper.proxlb_reload: 69 | logger.info("Reloading ProxLB configuration.") 70 | proxlb_config = config_parser.get_config() 71 | logger.set_log_level(proxlb_config.get('service', {}).get('log_level', 'INFO')) 72 | Helper.proxlb_reload = False 73 | 74 | # Get all required objects from the Proxmox cluster 75 | meta = {"meta": proxlb_config} 76 | nodes = Nodes.get_nodes(proxmox_api, proxlb_config) 77 | meta = Features.validate_any_non_pve9_node(meta, nodes) 78 | pools = Pools.get_pools(proxmox_api) 79 | ha_rules = HaRules.get_ha_rules(proxmox_api, meta) 80 | guests = Guests.get_guests(proxmox_api, pools, ha_rules, nodes, meta, proxlb_config) 81 | groups = Groups.get_groups(guests, nodes) 82 | 83 | # Merge obtained objects from the Proxmox cluster for further usage 84 | proxlb_data = {**meta, **nodes, **guests, **pools, **ha_rules, **groups} 85 | Helper.log_node_metrics(proxlb_data) 86 | 87 | # Validate usable features by PVE versions 88 | Features.validate_available_features(proxlb_data) 89 | 90 | # Update the initial node resource assignments 91 | # by the previously created groups. 92 | Calculations.set_node_assignments(proxlb_data) 93 | Calculations.set_node_hot(proxlb_data) 94 | Calculations.set_guest_hot(proxlb_data) 95 | Calculations.get_most_free_node(proxlb_data, cli_args.best_node) 96 | Calculations.validate_affinity_map(proxlb_data) 97 | Calculations.relocate_guests_on_maintenance_nodes(proxlb_data) 98 | Calculations.get_balanciness(proxlb_data) 99 | Calculations.relocate_guests(proxlb_data) 100 | Helper.log_node_metrics(proxlb_data, init=False) 101 | 102 | # Perform balancing actions via Proxmox API 103 | if proxlb_data["meta"]["balancing"].get("enable", False): 104 | if not cli_args.dry_run: 105 | Balancing(proxmox_api, proxlb_data) 106 | 107 | # Validate if the JSON output should be 108 | # printed to stdout 109 | Helper.print_json(proxlb_data, cli_args.json) 110 | # Validate daemon mode 111 | Helper.get_daemon_mode(proxlb_config) 112 | 113 | logger.debug(f"Finished: __main__") 114 | 115 | 116 | if __name__ == "__main__": 117 | main() 118 | -------------------------------------------------------------------------------- /proxlb/models/pools.py: -------------------------------------------------------------------------------- 1 | """ 2 | The Pools class retrieves all present pools defined on a Proxmox cluster 3 | including the chield objects. 4 | """ 5 | 6 | __author__ = "Florian Paul Azim Hoberg " 7 | __copyright__ = "Copyright (C) 2025 Florian Paul Azim Hoberg (@gyptazy)" 8 | __license__ = "GPL-3.0" 9 | 10 | 11 | from typing import Dict, Any 12 | from utils.logger import SystemdLogger 13 | from models.tags import Tags 14 | import time 15 | 16 | logger = SystemdLogger() 17 | 18 | 19 | class Pools: 20 | """ 21 | The Pools class retrieves all present pools defined on a Proxmox cluster 22 | including the chield objects. 23 | 24 | Methods: 25 | __init__: 26 | Initializes the Pools class. 27 | 28 | get_pools(proxmox_api: any) -> Dict[str, Any]: 29 | Retrieve pool definitions and membership from the Proxmox cluster. 30 | Returns a dict with a top-level "pools" mapping each poolid to 31 | {"name": , "members": [...]}. 32 | This method does not collect per-member metrics or perform node filtering. 33 | """ 34 | def __init__(self): 35 | """ 36 | Initializes the Pools class with the provided ProxLB data. 37 | """ 38 | 39 | @staticmethod 40 | def get_pools(proxmox_api: any) -> Dict[str, Any]: 41 | """ 42 | Retrieve all pools and their members from a Proxmox cluster. 43 | 44 | Queries the Proxmox API for pool definitions and returns a dictionary 45 | containing each pool's id/name and a list of its member VM/CT names. 46 | This function does not perform per-member metric collection or node 47 | filtering — it only gathers pool membership information. 48 | 49 | Args: 50 | proxmox_api (any): Proxmox API client instance. 51 | 52 | Returns: 53 | Dict[str, Any]: Dictionary with a top-level "pools" key mapping poolid 54 | to {"name": , "members": [...]}. 55 | """ 56 | logger.debug("Starting: get_pools.") 57 | pools = {"pools": {}} 58 | 59 | # Pool objects: iterate over all pools in the cluster. 60 | # We keep pool members even if their nodes are ignored so resource accounting 61 | # for rebalancing remains correct and we avoid overprovisioning nodes. 62 | for pool in proxmox_api.pools.get(): 63 | logger.debug(f"Got pool: {pool['poolid']}") 64 | pools['pools'][pool['poolid']] = {} 65 | pools['pools'][pool['poolid']]['name'] = pool['poolid'] 66 | pools['pools'][pool['poolid']]['members'] = [] 67 | 68 | # Fetch pool details and collect member names 69 | pool_details = proxmox_api.pools(pool['poolid']).get() 70 | for member in pool_details.get("members", []): 71 | 72 | # We might also have objects without the key "name", e.g. storage pools 73 | if "name" not in member: 74 | logger.debug(f"Skipping member without name in pool: {pool['poolid']}") 75 | continue 76 | 77 | logger.debug(f"Got member: {member['name']} for pool: {pool['poolid']}") 78 | pools['pools'][pool['poolid']]['members'].append(member["name"]) 79 | 80 | logger.debug("Finished: get_pools.") 81 | return pools 82 | 83 | @staticmethod 84 | def get_pools_for_guest(guest_name: str, pools: Dict[str, Any]) -> Dict[str, Any]: 85 | """ 86 | Return the list of pool names that include the given guest. 87 | 88 | Args: 89 | guest_name (str): Name of the VM or CT to look up. 90 | pools (Dict[str, Any]): Pools structure as returned by get_pools(), 91 | expected to contain a top-level "pools" mapping each poolid to 92 | {"name": , "members": [...]}. 93 | 94 | Returns: 95 | list[str]: Names of pools the guest is a member of (empty list if none). 96 | """ 97 | logger.debug("Starting: get_pools_for_guests.") 98 | guest_pools = [] 99 | 100 | for pool in pools.items(): 101 | for pool_id, pool_data in pool[1].items(): 102 | 103 | if type(pool_data) is dict: 104 | pool_name = pool_data.get("name", "") 105 | pool_name_members = pool_data.get("members", []) 106 | 107 | if guest_name in pool_name_members: 108 | logger.debug(f"Guest: {guest_name} is member of Pool: {pool_name}.") 109 | guest_pools.append(pool_name) 110 | else: 111 | logger.debug(f"Guest: {guest_name} is NOT member of Pool: {pool_name}.") 112 | 113 | else: 114 | logger.debug(f"Pool data for pool_id {pool_id} is not a dict: {pool_data}") 115 | 116 | logger.debug("Finished: get_pools_for_guests.") 117 | return guest_pools 118 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to ProxLB (PLB) 2 | 3 | Thank you for considering contributing to ProxLB! We appreciate your help in improving the efficiency and performance of Proxmox clusters. Below are guidelines for contributing to the project. 4 | 5 | ## Table of Contents 6 | 7 | - [Contributing to ProxLB (PLB)](#contributing-to-proxlb-plb) 8 | - [Table of Contents](#table-of-contents) 9 | - [Creating an Issue](#creating-an-issue) 10 | - [Running Linting](#running-linting) 11 | - [Running Tests](#running-tests) 12 | - [Add Changelogs](#add-changelogs) 13 | - [Submitting a Pull Request](#submitting-a-pull-request) 14 | - [Code of Conduct](#code-of-conduct) 15 | - [Getting Help](#getting-help) 16 | 17 | ## Creating an Issue 18 | 19 | If you encounter a bug, have a feature request, or have any suggestions, please create an issue in our GitHub repository. To create an issue: 20 | 21 | 1. **Go to the [Issues](https://github.com/gyptazy/proxlb/issues) section of the repository.** 22 | 2. **Click on the "New issue" button.** 23 | 3. **Select the appropriate issue template (Bug Report, Feature Request, or Custom Issue).** 24 | 4. **Provide a clear and descriptive title.** 25 | 5. **Fill out the necessary details in the issue template.** Provide as much detail as possible to help us understand and reproduce the issue or evaluate the feature request. 26 | 27 | ## Running Linting 28 | Before submitting a pull request, ensure that your changes sucessfully perform the lintin. ProxLB uses [flake8] for running tests. Follow these steps to run tests locally: 29 | 30 | 1. **Install pytest if you haven't already:** 31 | ```sh 32 | pip install flake8 33 | ``` 34 | 35 | 2. **Run the lintin:** 36 | ```sh 37 | python3 -m flake8 proxlb 38 | ``` 39 | 40 | Linting will also be performed for each PR. Therefore, it might make sense to test this before pushing locally. 41 | 42 | ## Running Tests 43 | 44 | Before submitting a pull request, ensure that your changes do not break existing functionality. ProxLB uses [pytest](https://docs.pytest.org/en/stable/) for running tests. Follow these steps to run tests locally: 45 | 46 | 1. **Install pytest if you haven't already:** 47 | ```sh 48 | pip install pytest 49 | ``` 50 | 51 | 2. **Run the tests:** 52 | ```sh 53 | pytest 54 | ``` 55 | 56 | Ensure all tests pass before submitting your changes. 57 | 58 | ## Add Changelogs 59 | ProxLB uses the [Changelog Fragments Creator](https://github.com/gyptazy/changelog-fragments-creator) for creating the overall `CHANGELOG.md` file. This changelog file is being generated from the files placed in the https://github.com/gyptazy/ProxLB/tree/main/.changelogs/ directory. Each release is represented by its version number where additional yaml files are being placed and parsed by the CFC tool. Such files look like: 60 | 61 | ``` 62 | added: 63 | - Add option to rebalance by assigned VM resources to avoid overprovisioning. [#16] 64 | ``` 65 | 66 | Every PR should contain such a file describing the change to ensure this is also stated in the changelog file. 67 | 68 | ## Submitting a Pull Request 69 | 70 | We welcome your contributions! Follow these steps to submit a pull request: 71 | 72 | 1. **Fork the repository to your GitHub account.** 73 | 2. **Clone your forked repository to your local machine:** 74 | ```sh 75 | git clone https://github.com/gyptazy/proxlb.git 76 | cd proxlb 77 | ``` 78 | 79 | Please prefix your PR regarding its type. It might be: 80 | * doc 81 | * feature 82 | * fix 83 | 84 | It should also provide the issue id to which it is related. 85 | 86 | 1. **Create a new branch for your changes:** 87 | ```sh 88 | git checkout -b feature/10-add-new-cool-stuff 89 | ``` 90 | 91 | 2. **Make your changes and commit them with a descriptive commit message:** 92 | ```sh 93 | git add . 94 | git commit -m "feature: Adding new cool stuff" 95 | ``` 96 | 97 | 3. **Push your changes to your forked repository:** 98 | ```sh 99 | git push origin feature/10-add-new-cool-stuff 100 | ``` 101 | 102 | 4. **Create a pull request from your forked repository:** 103 | - Go to the original repository on GitHub. 104 | - Click on the "New pull request" button. 105 | - Select the branch you pushed your changes to and create the pull request. 106 | 107 | Please ensure that your pull request: 108 | 109 | - Follows the project's coding style and guidelines. 110 | - Includes tests for any new functionality. 111 | - Updates the documentation as necessary. 112 | 113 | ## Code of Conduct 114 | 115 | By participating in this project, you agree to abide by our [Code of Conduct](CODE_OF_CONDUCT.md). Please read it to understand the expected behavior and responsibilities when interacting with the community. 116 | 117 | ## Getting Help 118 | 119 | If you need help or have any questions, feel free to reach out by creating an issue or by joining our [discussion forum](https://github.com/gyptazy/proxlb/discussions). You can also refer to our [documentation](https://github.com/gyptazy/ProxLB/tree/main/docs) for more information about the project or join our [chat room](https://matrix.to/#/#proxlb:gyptazy.com) in Matrix. 120 | 121 | Thank you for contributing to ProxLB! Together, we can enhance the efficiency and performance of Proxmox clusters. -------------------------------------------------------------------------------- /proxlb/utils/logger.py: -------------------------------------------------------------------------------- 1 | """ 2 | The SystemdLogger class provides a singleton logger that integrates with systemd's journal if available. 3 | It dynamically evaluates the environment and adjusts the logger accordingly. 4 | """ 5 | 6 | __author__ = "Florian Paul Azim Hoberg " 7 | __copyright__ = "Copyright (C) 2025 Florian Paul Azim Hoberg (@gyptazy)" 8 | __license__ = "GPL-3.0" 9 | 10 | 11 | import logging 12 | import sys 13 | try: 14 | from systemd.journal import JournalHandler 15 | SYSTEMD_PRESENT = True 16 | except ImportError: 17 | SYSTEMD_PRESENT = False 18 | 19 | 20 | class SystemdLogger: 21 | """ 22 | The SystemdLogger class provides a singleton logger that integrates with systemd's journal if available. 23 | It dynamically evaluates the environment and adjusts the logger accordingly. 24 | 25 | Attributes: 26 | instance (SystemdLogger): Singleton instance of the SystemdLogger class. 27 | 28 | Methods: 29 | __new__(cls, name: str = "ProxLB", level: str = logging.INFO) -> 'SystemdLogger': 30 | Creates a new instance of the SystemdLogger class or returns the existing instance. 31 | 32 | initialize_logger(self, name: str, level: str) -> None: 33 | Initializes the logger with the given name and log level. Adds a JournalHandler if systemd is present. 34 | 35 | set_log_level(self, level: str) -> None: 36 | Sets the log level for the logger and all its handlers. 37 | 38 | debug(self, msg: str) -> str: 39 | Logs a message with level DEBUG. 40 | 41 | info(self, msg: str) -> str: 42 | Logs a message with level INFO. 43 | 44 | warning(self, msg: str) -> str: 45 | Logs a message with level WARNING. 46 | 47 | error(self, msg: str) -> str: 48 | Logs a message with level ERROR. 49 | 50 | critical(self, msg: str) -> str: 51 | Logs a message with level CRITICAL. 52 | """ 53 | # Create a singleton instance variable 54 | instance = None 55 | 56 | def __new__(cls, name: str = "ProxLB", level: str = logging.INFO) -> 'SystemdLogger': 57 | """ 58 | Creating a new systemd logger class based on a given logging name 59 | and its logging level/verbosity. 60 | 61 | Args: 62 | name (str): The application name that is being used for the logger. 63 | level (str): The log level defined as a string (e.g.: INFO). 64 | 65 | Returns: 66 | SystemdLogger: The systemd logger object. 67 | """ 68 | # Check if instance already exists, otherwise create a new one 69 | if cls.instance is None: 70 | cls.instance = super(SystemdLogger, cls).__new__(cls) 71 | cls.instance.initialize_logger(name, level) 72 | return cls.instance 73 | 74 | def initialize_logger(self, name: str, level: str) -> None: 75 | """ 76 | Initializing the systemd logger class based on a given logging name 77 | and its logging level/verbosity. 78 | 79 | Args: 80 | name (str): The application name that is being used for the logger. 81 | level (str): The log level defined as a string (e.g.: INFO). 82 | """ 83 | self.logger = logging.getLogger(name) 84 | self.logger.setLevel(level) 85 | 86 | # Create a logging handler depending on the 87 | # capabilities of the underlying OS where systemd 88 | # logging is preferred. 89 | if SYSTEMD_PRESENT: 90 | # Add a JournalHandler for systemd integration 91 | handler = JournalHandler(SYSLOG_IDENTIFIER="ProxLB") 92 | else: 93 | # Add a stdout handler as a fallback 94 | handler = logging.StreamHandler(sys.stdout) 95 | 96 | handler.setLevel(level) 97 | # Set a formatter to include the logger's name and log message 98 | formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') 99 | handler.setFormatter(formatter) 100 | # Add handler to logger 101 | self.logger.addHandler(handler) 102 | 103 | def set_log_level(self, level: str) -> None: 104 | """ 105 | Modifies and sets the log level on the given log level. 106 | 107 | Args: 108 | level (str): The log level defined as a string (e.g.: INFO). 109 | """ 110 | self.logger.setLevel(level) 111 | 112 | for handler in self.logger.handlers: 113 | handler.setLevel(level) 114 | 115 | self.logger.debug("Set to debug level") 116 | 117 | # Handle systemd log levels 118 | def debug(self, msg: str) -> str: 119 | """ 120 | Logger out for messages of type: DEBUG 121 | """ 122 | self.logger.debug(msg) 123 | 124 | def info(self, msg: str) -> str: 125 | """ 126 | Logger out for messages of type: INFO 127 | """ 128 | self.logger.info(msg) 129 | 130 | def warning(self, msg: str) -> str: 131 | """ 132 | Logger out for messages of type: WARNING 133 | """ 134 | self.logger.warning(msg) 135 | 136 | def error(self, msg: str) -> str: 137 | """ 138 | Logger out for messages of type: ERROR 139 | """ 140 | self.logger.error(msg) 141 | 142 | def critical(self, msg: str) -> str: 143 | """ 144 | Logger out for messages of type: CRITICAL 145 | """ 146 | self.logger.critical(msg) 147 | -------------------------------------------------------------------------------- /proxlb/models/features.py: -------------------------------------------------------------------------------- 1 | """ 2 | ProxLB Features module for validating and adjusting feature flags 3 | based on Proxmox VE node versions and cluster compatibility. 4 | """ 5 | 6 | __author__ = "Florian Paul Azim Hoberg " 7 | __copyright__ = "Copyright (C) 2025 Florian Paul Azim Hoberg (@gyptazy)" 8 | __license__ = "GPL-3.0" 9 | 10 | 11 | from typing import List 12 | from typing import Dict, Any 13 | from utils.logger import SystemdLogger 14 | from packaging import version 15 | 16 | logger = SystemdLogger() 17 | 18 | 19 | class Features: 20 | """ 21 | ProxLB Features module for validating and adjusting feature flags 22 | based on Proxmox VE node versions and cluster compatibility. 23 | 24 | Responsibilities: 25 | - Validate and adjust feature flags based on Proxmox VE node versions. 26 | 27 | Methods: 28 | __init__(): 29 | No-op initializer. 30 | 31 | validate_available_features(proxlb_data: dict) -> None: 32 | Static method that inspects proxlb_data["nodes"] versions and disables 33 | incompatible balancing features for Proxmox VE versions < 9.0.0. 34 | This function mutates proxlb_data in place. 35 | 36 | Notes: 37 | - Expects proxlb_data to be a dict with "nodes" and "meta" keys. 38 | """ 39 | def __init__(self): 40 | """ 41 | Initializes the Features class. 42 | """ 43 | 44 | @staticmethod 45 | def validate_available_features(proxlb_data: any) -> None: 46 | """ 47 | Validate and adjust feature flags in the provided proxlb_data according to Proxmox VE versions. 48 | 49 | This function inspects the cluster node versions in proxlb_data and disables features 50 | that are incompatible with Proxmox VE versions older than 9.0.0. Concretely, if any node 51 | reports a 'pve_version' lower than "9.0.0": 52 | - If meta.balancing.with_conntrack_state is truthy, it is set to False and a warning is logged. 53 | - If meta.balancing.mode equals "psi", meta.balancing.enable is set to False and a warning is logged. 54 | 55 | proxlb_data (dict): Cluster data structure that must contain: 56 | - "nodes": a mapping (e.g., dict) whose values are mappings containing a 'pve_version' string. 57 | - "meta": a mapping that may contain a "balancing" mapping with keys: 58 | - "with_conntrack_state" (bool, optional) 59 | - "mode" (str, optional) 60 | - "enable" (bool, optional) 61 | 62 | None: The function mutates proxlb_data in place to disable incompatible features. 63 | 64 | Side effects: 65 | - Mutates proxlb_data["meta"]["balancing"] when incompatible features are detected. 66 | - Emits debug and warning log messages. 67 | 68 | Notes: 69 | - Unexpected or missing keys/types in proxlb_data may raise KeyError or TypeError. 70 | - Version comparison uses semantic version parsing; callers should provide versions as strings. 71 | 72 | Returns: 73 | None 74 | """ 75 | logger.debug("Starting: validate_available_features.") 76 | 77 | any_non_pve9_node = any(version.parse(n['pve_version']) < version.parse("9.0.0") for n in proxlb_data["nodes"].values()) 78 | if any_non_pve9_node: 79 | 80 | with_conntrack_state = proxlb_data["meta"].get("balancing", {}).get("with_conntrack_state", False) 81 | if with_conntrack_state: 82 | logger.warning("Non Proxmox VE 9 systems detected: Deactivating migration option 'with-conntrack-state'!") 83 | proxlb_data["meta"]["balancing"]["with_conntrack_state"] = False 84 | 85 | psi_balancing = proxlb_data["meta"].get("balancing", {}).get("mode", None) 86 | if psi_balancing == "psi": 87 | logger.warning("Non Proxmox VE 9 systems detected: Deactivating balancing!") 88 | proxlb_data["meta"]["balancing"]["enable"] = False 89 | 90 | logger.debug("Finished: validate_available_features.") 91 | 92 | @staticmethod 93 | def validate_any_non_pve9_node(meta: any, nodes: any) -> dict: 94 | """ 95 | Validate if any node in the cluster is running Proxmox VE < 9.0.0 and update meta accordingly. 96 | 97 | This function inspects the cluster node versions and sets a flag in meta indicating whether 98 | any node is running a Proxmox VE version older than 9.0.0. 99 | 100 | Args: 101 | meta (dict): Metadata structure that will be updated with cluster version information. 102 | nodes (dict): Cluster nodes mapping whose values contain 'pve_version' strings. 103 | 104 | Returns: 105 | dict: The updated meta dictionary with 'cluster_non_pve9' flag set to True or False. 106 | 107 | Side effects: 108 | - Mutates meta["meta"]["cluster_non_pve9"] based on node versions. 109 | - Emits debug log messages. 110 | 111 | Notes: 112 | - Version comparison uses semantic version parsing; defaults to "0.0.0" if pve_version is missing. 113 | """ 114 | logger.debug("Starting: validate_any_non_pve9_node.") 115 | any_non_pve9_node = any(version.parse(node.get("pve_version", "0.0.0")) < version.parse("9.0.0") for node in nodes.get("nodes", {}).values()) 116 | 117 | if any_non_pve9_node: 118 | meta["meta"]["cluster_non_pve9"] = True 119 | logger.debug("Finished: validate_any_non_pve9_node. Result: True") 120 | else: 121 | meta["meta"]["cluster_non_pve9"] = False 122 | logger.debug("Finished: validate_any_non_pve9_node. Result: False") 123 | 124 | return meta 125 | -------------------------------------------------------------------------------- /proxlb/models/ha_rules.py: -------------------------------------------------------------------------------- 1 | """ 2 | The HaRules class retrieves all HA rules defined on a Proxmox cluster 3 | including their affinity settings and member resources. 4 | """ 5 | 6 | __author__ = "Florian Paul Azim Hoberg " 7 | __copyright__ = "Copyright (C) 2025 Florian Paul Azim Hoberg (@gyptazy)" 8 | __license__ = "GPL-3.0" 9 | 10 | 11 | from typing import Dict, Any 12 | from utils.logger import SystemdLogger 13 | 14 | logger = SystemdLogger() 15 | 16 | 17 | class HaRules: 18 | """ 19 | The HaRules class retrieves all HA rules defined on a Proxmox cluster 20 | including their (anti)a-ffinity settings and member resources and translates 21 | them into a ProxLB usable format. 22 | 23 | Methods: 24 | __init__: 25 | Initializes the HaRules class. 26 | 27 | get_ha_rules(proxmox_api: any) -> Dict[str, Any]: 28 | Retrieve HA rule definitions from the Proxmox cluster. 29 | Returns a dict with a top-level "ha_rules" mapping each rule id to 30 | {"rule": , "type": , "members": [...]}. 31 | Converts affinity settings to descriptive format (affinity or anti-affinity). 32 | """ 33 | def __init__(self): 34 | """ 35 | Initializes the HA Rules class with the provided ProxLB data. 36 | """ 37 | 38 | @staticmethod 39 | def get_ha_rules(proxmox_api: any, meta: dict) -> Dict[str, Any]: 40 | """ 41 | Retrieve all HA rules from a Proxmox cluster. 42 | 43 | Queries the Proxmox API for HA rule definitions and returns a dictionary 44 | containing each rule's id, affinity type, and member resources (VM/CT IDs). 45 | This function processes rule affinity settings and converts them to a more 46 | descriptive format (affinity or anti-affinity). 47 | 48 | Args: 49 | proxmox_api (any): Proxmox API client instance. 50 | meta (dict): The metadata dictionary containing cluster information. 51 | 52 | Returns: 53 | Dict[str, Any]: Dictionary with a top-level "ha_rules" key mapping rule id 54 | to {"rule": , "type": , "members": [...]}. 55 | """ 56 | logger.debug("Starting: get_ha_rules.") 57 | ha_rules = {"ha_rules": {}} 58 | 59 | # If any node is non PVE 9, skip fetching HA rules as they are unsupported 60 | if meta["meta"]["cluster_non_pve9"]: 61 | logger.debug("Skipping HA rule retrieval as non Proxmox VE 9 systems detected.") 62 | return ha_rules 63 | else: 64 | logger.debug("Cluster running Proxmox VE 9 or newer, proceeding with HA rule retrieval.") 65 | 66 | for rule in proxmox_api.cluster.ha.rules.get(): 67 | 68 | # Skip disabled rules (disable key exists AND is truthy) 69 | if rule.get("disable", 0): 70 | logger.debug(f"Skipping ha-rule: {rule['rule']} of type {rule['type']} affecting guests: {rule['resources']}. Rule is disabled.") 71 | continue 72 | 73 | # Create a resource list by splitting on commas and stripping whitespace containing 74 | # the VM and CT IDs that are part of this HA rule 75 | resources_list_guests = [int(r.split(":")[1]) for r in rule["resources"].split(",") if r.strip()] 76 | 77 | # Convert the affinity field to a more descriptive type 78 | if rule.get("affinity", None) == "negative": 79 | affinity_type = "anti-affinity" 80 | else: 81 | affinity_type = "affinity" 82 | 83 | # Create affected nodes list 84 | resources_list_nodes = [] 85 | if rule.get("nodes", None): 86 | resources_list_nodes = [n for n in rule["nodes"].split(",") if n] 87 | 88 | # Create the ha_rule element 89 | ha_rules['ha_rules'][rule['rule']] = {} 90 | ha_rules['ha_rules'][rule['rule']]['rule'] = rule['rule'] 91 | ha_rules['ha_rules'][rule['rule']]['type'] = affinity_type 92 | ha_rules['ha_rules'][rule['rule']]['nodes'] = resources_list_nodes 93 | ha_rules['ha_rules'][rule['rule']]['members'] = resources_list_guests 94 | 95 | logger.debug(f"Got ha-rule: {rule['rule']} as type {affinity_type} affecting guests: {rule['resources']}") 96 | 97 | logger.debug("Finished: ha_rules.") 98 | return ha_rules 99 | 100 | @staticmethod 101 | def get_ha_rules_for_guest(guest_name: str, ha_rules: Dict[str, Any], vm_id: int) -> Dict[str, Any]: 102 | """ 103 | Return the list of HA rules that include the given guest. 104 | 105 | Args: 106 | guest_name (str): Name of the VM or CT to look up. 107 | ha_rules (Dict[str, Any]): HA rules structure as returned by get_ha_rules(), 108 | expected to contain a top-level "ha_rules" mapping each rule id to 109 | {"rule": , "type": , "members": [...]}. 110 | vm_id (int): VM or CT ID of the guest. 111 | 112 | Returns: 113 | list: IDs of HA rules the guest is a member of (empty list if none). 114 | """ 115 | logger.debug("Starting: get_ha_rules_for_guest.") 116 | guest_ha_rules = [] 117 | 118 | for rule in ha_rules["ha_rules"].values(): 119 | if vm_id in rule.get("members", []): 120 | logger.debug(f"Guest: {guest_name} (VMID: {vm_id}) is member of HA Rule: {rule['rule']}.") 121 | guest_ha_rules.append(rule) 122 | else: 123 | logger.debug(f"Guest: {guest_name} (VMID: {vm_id}) is NOT member of HA Rule: {rule['rule']}.") 124 | 125 | logger.debug("Finished: get_ha_rules_for_guest.") 126 | return guest_ha_rules 127 | -------------------------------------------------------------------------------- /debian/changelog: -------------------------------------------------------------------------------- 1 | proxlb (1.1.10) stable; urgency=medium 2 | 3 | * Prevent redundant rebalancing by validating existing affinity enforcement before taking actions. (Closes: #335) 4 | * Add safety-guard for PVE 8 users when activating conntrack-aware migrations mistakenly. (Closes: #359) 5 | * Fix the Proxmox API connection validation which returned a false-positive logging message of timeouts. (Closes: #361) 6 | * Refactored the whole Proxmox API connection function. (Closes: #361) 7 | * Fix a crash during PVE resource pool enumeration by skipping members not having a 'name' property. (Closes: #368) 8 | 9 | -- Florian Paul Azim Hoberg Tue, 25 Nov 2025 09:12:04 +0001 10 | 11 | proxlb (1.1.9.1) stable; urgency=medium 12 | 13 | * Fix quoting in f-strings which may cause issues on PVE 8 / Debian Bookworm systems. (Closes: #352) 14 | 15 | -- Florian Paul Azim Hoberg Thu, 30 Oct 2025 17:41:02 +0001 16 | 17 | proxlb (1.1.9) stable; urgency=medium 18 | 19 | * Add pressure (PSI) based balancing for memory, cpu, disk (req. PVE9 or greater). (Closes: #339) 20 | * Add (memory) threshold for nodes before running balancing. (Closes: #342) 21 | * Add affinity/anti-affinity support by pools. (Closes: #343) 22 | 23 | -- Florian Paul Azim Hoberg Thu, 30 Oct 2025 06:58:43 +0001 24 | 25 | proxlb (1.1.8) stable; urgency=medium 26 | 27 | * Fix API errors when using conntrack aware migration with older PVE version. (Closes: #318) 28 | * Add a static ProxLB prefix to the log output when used by journal handler. (Closes: #329) 29 | 30 | -- Florian Paul Azim Hoberg Thu, 09 Oct 2025 09:04:13 +0002 31 | 32 | proxlb (1.1.7) stable; urgency=medium 33 | 34 | * Add conntrack state aware migrations of VMs. (Closes: #305) 35 | * Add graceful shutdown for SIGINT command. (Closes: #304) 36 | * Fix crash when validating absent migration job ids. (Closes: #308) 37 | * Fix guest object names are not being evaluated in debug log. (Closes: #310) 38 | * Note: Have a great Dutch Proxmox Day 2025! 39 | 40 | -- Florian Paul Azim Hoberg Thu, 04 Sep 2025 19:23:51 +0000 41 | 42 | proxlb (1.1.6.1) stable; urgency=medium 43 | 44 | * Validate for node presence when pinning VMs to avoid crashing. (Closes: #296) 45 | 46 | -- Florian Paul Azim Hoberg Thu, 04 Sep 2025 19:23:51 +0000 47 | 48 | proxlb (1.1.6) stable; urgency=medium 49 | 50 | * Add validation for provided API user token id to avoid confusions. (Closes: #291) 51 | * Fix stacktrace output when validating permissions on non existing users in Proxmox. (Closes: #291) 52 | * Fix Overprovisioning first node if anti_affinity_group has only one member. (Closes: #295) 53 | * Validate for node presence when pinning guests to avoid crashing. (Closes: #296) 54 | * Fix balancing evaluation of guest types (e.g., VM or CT). (Closes: #268) 55 | 56 | -- Florian Paul Azim Hoberg Thu, 04 Sep 2025 05:12:19 +0000 57 | 58 | proxlb (1.1.5) stable; urgency=medium 59 | 60 | * Allow custom API ports instead of fixed tcp/8006. (Closes: #260) 61 | 62 | -- Florian Paul Azim Hoberg Mon, 14 Jul 2025 11:07:34 +0000 63 | 64 | proxlb (1.1.4) stable; urgency=medium 65 | 66 | * Allow pinning of guests to a group of nodes. (Closes: #245) 67 | * Modified log levels to make output lighter at INFO level. (Closes: #255) 68 | * ixed an issue where balancing was performed in combination of deactivated balancing and dry-run mode. (Closes: #248) 69 | 70 | -- Florian Paul Azim Hoberg Fri, 27 Jun 2025 16:22:58 +0000 71 | 72 | proxlb (1.1.3) stable; urgency=medium 73 | 74 | * Add relaod (SIGHUP) function to ProxLB to reload the configuration. (Closes: #189) 75 | * Add optional wait time parameter to delay execution until the service takes action. (Closes: #239) 76 | * Make the amount of parallel migrations configurable. (Closes: #241) 77 | * Use the average CPU consumption of a guest within the last 60 minutes instead of the current CPU usage. (Closes: #94) 78 | * Align maintenance mode with Proxmox HA maintenance mode. (Closes: #232) 79 | 80 | -- Florian Paul Azim Hoberg Thu, 19 Jun 2025 09:10:43 +0000 81 | 82 | proxlb (1.1.2) stable; urgency=medium 83 | 84 | * Add a configurable retry mechanism when connecting to the Proxmox API. (Closed: #157) 85 | * Add 1-to-1 relationships between guest and hypervisor node to ping a guest on a node. (Closes #218) 86 | * Force type cast cpu count of guests to int for some corner cases where a str got returned. (Closed #222) 87 | * Fix systemd unit file to run after network on non PVE nodes. (Closes #137) 88 | 89 | -- Florian Paul Azim Hoberg Mon, 13 May 2025 18:12:04 +0000 90 | 91 | proxlb (1.1.1) stable; urgency=medium 92 | 93 | * Fix tag evluation for VMs for being ignored for further balancing. (Closes: #163) 94 | * Improve logging verbosity of messages that had a wrong servity. (Closes: #165) 95 | * Providing the API upstream error message when migration fails in debug mode (Closes: #205) 96 | * Change the default behaviour of the daemon mode to active. (Closes: #176) 97 | * Change the default banalcing mode to used instead of assigned. (Closes: #180) 98 | * Set cpu_used to the cpu usage, which is a percent, times the total number of cores to get a number where guest cpu_used can be added to nodes cpu_used and be meaningful. (Closes: #195) 99 | * Honor the value when balancing should not be performed and stop balancing. (Closes: #174) 100 | * Allow the use of minutes instead of hours and only accept hours or minutes in the format. (Closes: #187) 101 | * Remove hard coded memory usage from lowest usage node and use method and mode specified in configuration instead. (Closes: #197) 102 | * Fix the guest type relationship in the logs when a migration job failed. (Closes: #204) 103 | * Requery a guest if that running guest reports 0 cpu usage. (Closes: #200) 104 | 105 | -- Florian Paul Azim Hoberg Sat, 20 Apr 2025 20:55:02 +0000 106 | 107 | proxlb (1.1.0) stable; urgency=medium 108 | 109 | * Refactored code base of ProxLB. (Closes: #114) 110 | 111 | -- Florian Paul Azim Hoberg Mon, 17 Mar 2025 18:55:02 +0000 112 | -------------------------------------------------------------------------------- /proxlb/models/groups.py: -------------------------------------------------------------------------------- 1 | """ 2 | The Groups class is responsible for handling the correlations between the guests 3 | and their groups, such as affinity and anti-affinity groups. It ensures proper balancing 4 | by grouping guests and evaluating them for further balancing. The class provides methods 5 | to initialize with ProxLB data and to generate groups based on guest and node data. 6 | """ 7 | 8 | __author__ = "Florian Paul Azim Hoberg " 9 | __copyright__ = "Copyright (C) 2025 Florian Paul Azim Hoberg (@gyptazy)" 10 | __license__ = "GPL-3.0" 11 | 12 | 13 | from typing import Dict, Any 14 | from utils.logger import SystemdLogger 15 | from utils.helper import Helper 16 | 17 | logger = SystemdLogger() 18 | 19 | 20 | class Groups: 21 | """ 22 | The groups class is responsible for handling the correlations between the guests 23 | and their groups like affinity and anti-affinity groups. To ensure a proper balancing 24 | guests will ge grouped and then evaluated for further balancing. 25 | 26 | Methods: 27 | __init__(proxlb_data: Dict[str, Any]): 28 | Initializes the Groups class. 29 | 30 | get_groups(guests: Dict[str, Any], nodes: Dict[str, Any]) -> Dict[str, Any]: 31 | Generates and returns a dictionary of affinity and anti-affinity groups 32 | based on the provided data. 33 | """ 34 | 35 | def __init__(self, proxlb_data: Dict[str, Any]): 36 | """ 37 | Initializes the Groups class with the provided ProxLB data. 38 | 39 | Args: 40 | proxlb_data (Dict[str, Any]): The data required for balancing VMs and CTs. 41 | """ 42 | 43 | @staticmethod 44 | def get_groups(guests: Dict[str, Any], nodes: Dict[str, Any]) -> Dict[str, Any]: 45 | """ 46 | Generates and returns a dictionary of affinity and anti-affinity groups based on the provided data. 47 | 48 | Args: 49 | guests (Dict[str, Any]): A dictionary containing the guest data. 50 | nodes (Dict[str, Any]): A dictionary containing the nodes data. 51 | 52 | Returns: 53 | Dict[str, Any]: A dictionary containing the created groups that includes: 54 | * Affinity groups (or a randon and uniq group) 55 | * Anti-affinity groups 56 | * A list of guests that are currently placed on a node which 57 | is defined to be in maintenance. 58 | """ 59 | logger.debug("Starting: get_groups.") 60 | groups = {'groups': {'affinity': {}, 'anti_affinity': {}, 'maintenance': []}} 61 | 62 | for guest_name, guest_meta in guests["guests"].items(): 63 | # Create affinity grouping 64 | # Use an affinity group if available for the guest 65 | if len(guest_meta["affinity_groups"]) > 0: 66 | for affinity_group in guest_meta["affinity_groups"]: 67 | group_name = affinity_group 68 | logger.debug(f'Affinity group {affinity_group} for {guest_name} will be used.') 69 | else: 70 | # Generate a random uniq group name for the guest if 71 | # the guest does not belong to any affinity group 72 | random_group = Helper.get_uuid_string() 73 | group_name = random_group 74 | logger.debug(f'Random uniq group {random_group} for {guest_name} will be used.') 75 | 76 | if not groups["groups"]["affinity"].get(group_name, False): 77 | # Create group template with initial guest meta information 78 | groups["groups"]["affinity"][group_name] = {} 79 | groups["groups"]["affinity"][group_name]["guests"] = [] 80 | groups["groups"]["affinity"][group_name]["guests"].append(guest_name) 81 | groups["groups"]["affinity"][group_name]["counter"] = 1 82 | # Create groups resource template by the guests resources 83 | groups["groups"]["affinity"][group_name]["cpu_total"] = guest_meta["cpu_total"] 84 | groups["groups"]["affinity"][group_name]["cpu_used"] = guest_meta["cpu_used"] 85 | groups["groups"]["affinity"][group_name]["memory_total"] = guest_meta["memory_total"] 86 | groups["groups"]["affinity"][group_name]["memory_used"] = guest_meta["cpu_used"] 87 | groups["groups"]["affinity"][group_name]["disk_total"] = guest_meta["disk_total"] 88 | groups["groups"]["affinity"][group_name]["disk_used"] = guest_meta["cpu_used"] 89 | else: 90 | # Update group templates by guest meta information 91 | groups["groups"]["affinity"][group_name]["guests"].append(guest_name) 92 | groups["groups"]["affinity"][group_name]["counter"] += 1 93 | # Update group resources by guest resources 94 | groups["groups"]["affinity"][group_name]["cpu_total"] += guest_meta["cpu_total"] 95 | groups["groups"]["affinity"][group_name]["cpu_used"] += guest_meta["cpu_used"] 96 | groups["groups"]["affinity"][group_name]["memory_total"] += guest_meta["memory_total"] 97 | groups["groups"]["affinity"][group_name]["memory_used"] += guest_meta["cpu_used"] 98 | groups["groups"]["affinity"][group_name]["disk_total"] += guest_meta["disk_total"] 99 | groups["groups"]["affinity"][group_name]["disk_used"] += guest_meta["cpu_used"] 100 | 101 | # Create anti-affinity grouping 102 | if len(guest_meta["anti_affinity_groups"]) > 0: 103 | for anti_affinity_group in guest_meta["anti_affinity_groups"]: 104 | anti_affinity_group_name = anti_affinity_group 105 | logger.debug(f'Anti-affinity group {anti_affinity_group_name} for {guest_name} will be used.') 106 | 107 | if not groups["groups"]["anti_affinity"].get(anti_affinity_group_name, False): 108 | groups["groups"]["anti_affinity"][anti_affinity_group_name] = {} 109 | groups["groups"]["anti_affinity"][anti_affinity_group_name]["guests"] = [] 110 | groups["groups"]["anti_affinity"][anti_affinity_group_name]["guests"].append(guest_name) 111 | groups["groups"]["anti_affinity"][anti_affinity_group_name]["counter"] = 1 112 | groups["groups"]["anti_affinity"][anti_affinity_group_name]["used_nodes"] = [] 113 | else: 114 | groups["groups"]["anti_affinity"][anti_affinity_group_name]["guests"].append(guest_name) 115 | groups["groups"]["anti_affinity"][anti_affinity_group_name]["counter"] += 1 116 | 117 | # Create grouping of guests that are currently located on nodes that are 118 | # marked as in maintenance and must be migrated 119 | if nodes["nodes"][guest_meta["node_current"]]["maintenance"]: 120 | logger.debug(f'{guest_name} will be migrated to another node because the underlying node {guest_meta["node_current"]} is defined to be in maintenance.') 121 | groups["groups"]["maintenance"].append(guest_name) 122 | 123 | logger.debug("Finished: get_groups.") 124 | return groups 125 | -------------------------------------------------------------------------------- /docs/02_installation.md: -------------------------------------------------------------------------------- 1 | # Table of Contents 2 | 3 | - [Installation](#installation) 4 | - [Requirements / Dependencies](#requirements--dependencies) 5 | - [Debian Package](#debian-package) 6 | - [Quick-Start](#quick-start) 7 | - [Details](#details) 8 | - [Debian Packages (.deb files)](#debian-packages-deb-files) 9 | - [Repo Mirror and Proxmox Offline Mirror Support](#repo-mirror-and-proxmox-offline-mirror-support) 10 | - [RedHat Package](#redhat-package) 11 | - [Container Images / Docker](#container-images--docker) 12 | - [Overview of Images](#overview-of-images) 13 | - [Source](#source) 14 | - [Traditional System](#traditional-system) 15 | - [Container Image](#container-image) 16 | - [Upgrading](#upgrading) 17 | - [Upgrading from < 1.1.0](#upgrading-from--110) 18 | - [Upgrading from >= 1.1.0](#upgrading-from--110) 19 | 20 | 21 | ## Installation 22 | ### Requirements / Dependencies 23 | * Python3.x 24 | * proxmoxer 25 | * requests 26 | * urllib3 27 | * pyyaml 28 | 29 | The dependencies can simply be installed with `pip` by running the following command: 30 | ``` 31 | pip install -r requirements.txt 32 | ``` 33 | 34 | *Note: Distribution packages, such like the provided `.deb` package will automatically resolve and install all required dependencies by using already packaged version from the distribution's repository. By using the Docker (container) image or Debian packages, you do not need to take any care of the requirements listed here.* 35 | 36 | ### Debian Package 37 | ProxLB is a powerful and flexible load balancer designed to work across various architectures, including `amd64`, `arm64`, `rv64` and many other ones that support Python. It runs independently of the underlying hardware, making it a versatile choice for different environments. This chapter covers the step-by-step process to install ProxLB on Debian-based systems, including Debian clones like Ubuntu. 38 | 39 | #### Quick-Start 40 | You can simply use this snippet to install the repository and to install ProxLB on your system. 41 | 42 | ```bash 43 | echo "deb https://repo.gyptazy.com/stable /" > /etc/apt/sources.list.d/proxlb.list 44 | wget -O /etc/apt/trusted.gpg.d/proxlb.asc https://repo.gyptazy.com/repository.gpg 45 | apt-get update && apt-get -y install proxlb 46 | cp /etc/proxlb/proxlb_example.yaml /etc/proxlb/proxlb.yaml 47 | # Adjust the config to your needs 48 | vi /etc/proxlb/proxlb.yaml 49 | systemctl start proxlb 50 | ``` 51 | 52 | Afterwards, ProxLB is running in the background and balances your cluster by your defined balancing method (default: memory). 53 | 54 | #### Details 55 | ProxLB provides two different repositories: 56 | * https://repo.gyptazy.com/stable (only stable release) 57 | * https://repo.gyptazy.com/testing (bleeding edge - not recommended) 58 | 59 | The repository is signed and the GPG key can be found at: 60 | * https://repo.gyptazy.com/repository.gpg 61 | 62 | You can also simply import it by running: 63 | 64 | ``` 65 | # KeyID: 17169F23F9F71A14AD49EDADDB51D3EB01824F4C 66 | # UID: gyptazy Solutions Repository 67 | # SHA256: 52c267e6f4ec799d40cdbdb29fa518533ac7942dab557fa4c217a76f90d6b0f3 repository.gpg 68 | 69 | wget -O /etc/apt/trusted.gpg.d/proxlb.asc https://repo.gyptazy.com/repository.gpg 70 | ``` 71 | 72 | *Note: The defined repositories `repo.gyptazy.com` and `repo.proxlb.de` are the same!* 73 | 74 | #### Debian Packages (.deb files) 75 | If you do not want to use the repository you can also find the debian packages as a .deb file on gyptazy's CDN at: 76 | * https://cdn.gyptazy.com/files/os/debian/proxlb/ 77 | 78 | Afterwards, you can simply install the package by running: 79 | ```bash 80 | dpkg -i proxlb_*.deb 81 | cp /etc/proxlb/proxlb_example.yaml /etc/proxlb/proxlb.yaml 82 | # Adjust the config to your needs 83 | vi /etc/proxlb/proxlb.yaml 84 | systemctl start proxlb 85 | ``` 86 | 87 | #### Repo Mirror and Proxmox Offline Mirror Support 88 | ProxLB uses the supported flat mirror style for the Debian repository. Unfortunately, not all offline-mirror applications support it. One of the known ones is the official *proxmox-offline-mirror* which is unable to handle flat repositories (see also: [#385](https://github.com/gyptazy/ProxLB/issues/385)). 89 | 90 | Therefore, we currently operate and support both ways to avoid everyone force switching to the new repository. As a result, you can simply use this repository: 91 | ``` 92 | deb https://repo.gyptazy.com/proxlb stable main 93 | ``` 94 | 95 | **Example Config for proxmox-offline-mirror:** 96 | 97 | An example config for the proxmox-offline-mirror would look like: 98 | ``` 99 | mirror: proxlb 100 | architectures amd64 101 | base-dir /var/lib/proxmox-offline-mirror/mirrors/ 102 | key-path /etc/apt/trusted.gpg.d/proxlb.asc 103 | repository deb https://repo.gyptazy.com/proxlb stable main 104 | sync true 105 | verify true 106 | ``` 107 | 108 | ### RedHat Package 109 | There's currently no official support for RedHat based systems. However, there's a dummy .rpm package for such systems in the pipeline which can be found here: 110 | * https://github.com/gyptazy/ProxLB/actions/workflows/20-pipeline-build-rpm-package.yml 111 | 112 | 113 | ### Container Images / Docker 114 | Using the ProxLB container images is straight forward and only requires you to mount the config file. 115 | 116 | ```bash 117 | # Pull the image 118 | docker pull cr.gyptazy.com/proxlb/proxlb:latest 119 | # Download the config 120 | wget -O proxlb.yaml https://raw.githubusercontent.com/gyptazy/ProxLB/refs/heads/main/config/proxlb_example.yaml 121 | # Adjust the config to your needs 122 | vi proxlb.yaml 123 | # Start the ProxLB container image with the ProxLB config 124 | docker run -it --rm -v $(pwd)/proxlb.yaml:/etc/proxlb/proxlb.yaml proxlb 125 | ``` 126 | 127 | *Note: ProxLB container images are officially only available at cr.proxlb.de and cr.gyptazy.com.* 128 | 129 | #### Overview of Images 130 | | Version | Image | 131 | |------|:------:| 132 | | latest | cr.gyptazy.com/proxlb/proxlb:latest | 133 | | v1.1.0 | cr.gyptazy.com/proxlb/proxlb:v1.1.0 | 134 | | v1.0.6 | cr.gyptazy.com/proxlb/proxlb:v1.0.6 | 135 | | v1.0.5 | cr.gyptazy.com/proxlb/proxlb:v1.0.5 | 136 | | v1.0.4 | cr.gyptazy.com/proxlb/proxlb:v1.0.4 | 137 | | v1.0.3 | cr.gyptazy.com/proxlb/proxlb:v1.0.3 | 138 | | v1.0.2 | cr.gyptazy.com/proxlb/proxlb:v1.0.2 | 139 | | v1.0.0 | cr.gyptazy.com/proxlb/proxlb:v1.0.0 | 140 | | v0.9.9 | cr.gyptazy.com/proxlb/proxlb:v0.9.9 | 141 | 142 | ### Source 143 | ProxLB can also easily be used from the provided sources - for traditional systems but also as a Docker/Podman container image. 144 | 145 | #### Traditional System 146 | Setting up and running ProxLB from the sources is simple and requires just a few commands. Ensure Python 3 and the Python dependencies are installed on your system, then run ProxLB using the following command: 147 | ```bash 148 | git clone https://github.com/gyptazy/ProxLB.git 149 | cd ProxLB 150 | ``` 151 | 152 | Afterwards simply adjust the config file to your needs: 153 | ```bash 154 | vi config/proxlb.yaml 155 | ``` 156 | 157 | Start ProxLB by Python3 on the system: 158 | ```bash 159 | python3 proxlb/main.py -c config/proxlb.yaml 160 | ``` 161 | 162 | #### Container Image 163 | Creating a container image of ProxLB is straightforward using the provided Dockerfile. The Dockerfile simplifies the process by automating the setup and configuration required to get ProxLB running in an Alpine container. Simply follow the steps in the Dockerfile to build the image, ensuring all dependencies and configurations are correctly applied. For those looking for an even quicker setup, a ready-to-use ProxLB container image is also available, eliminating the need for manual building and allowing for immediate deployment. 164 | 165 | ```bash 166 | git clone https://github.com/gyptazy/ProxLB.git 167 | cd ProxLB 168 | docker build -t proxlb . 169 | ``` 170 | 171 | Afterwards simply adjust the config file to your needs: 172 | ```bash 173 | vi config/proxlb.yaml 174 | ``` 175 | 176 | Finally, start the created container. 177 | ```bash 178 | docker run -it --rm -v $(pwd)/proxlb.yaml:/etc/proxlb/proxlb.yaml proxlb 179 | ``` 180 | 181 | ## Upgrading 182 | ### Upgrading from < 1.1.0 183 | Upgrading ProxLB is not supported due to a fundamental redesign introduced in version 1.1.x. With this update, ProxLB transitioned from a monolithic application to a pure Python-style project, embracing a more modular and flexible architecture. This shift aimed to improve maintainability and extensibility while keeping up with modern development practices. Additionally, ProxLB moved away from traditional ini-style configuration files and adopted YAML for configuration management. This change simplifies configuration handling, reduces the need for extensive validation, and ensures better type casting, ultimately providing a more streamlined and user-friendly experience. 184 | 185 | ### Upgrading from >= 1.1.0 186 | Uprading within the current stable versions, starting from 1.1.0, will be possible in all supported ways. -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 | 8 | ## [1.1.10] - 2025-11-25 9 | 10 | ### Added 11 | 12 | - Prevent redundant rebalancing by validating existing affinity enforcement before taking actions (@gyptazy). [#335] 13 | - Add safety-guard for PVE 8 users when activating conntrack-aware migrations mistakenly (@gyptazy). [#359] 14 | 15 | ### Fixed 16 | 17 | - Fix the Proxmox API connection validation which returned a false-positive logging message of timeouts (@gyptazy). [#361] 18 | - Refactored Proxmox API connection functions (@gyptazy). [#361] 19 | - Fix a crash during PVE resource pool enumeration by skipping members not having a 'name' property (@stefanoettl). [#368] 20 | 21 | ## [1.1.9.1] - 2025-10-30 22 | 23 | ### Fixed 24 | 25 | - Fix quoting in f-strings which may cause issues on PVE 8 / Debian Bookworm systems (@gyptazy). [#352] 26 | 27 | ## [1.1.9] - 2025-10-30 28 | 29 | ### Added 30 | 31 | - Add an optional memory balancing threshold (@gyptazy). [#342] 32 | - Add affinity/anti-affinity support by pools (@gyptazy). [#343] 33 | - Add pressure (PSI) based balancing for memory, cpu, disk (req. PVE9 or greater) (@gyptazy). [#337] 34 | - Pressure (PSI) based balancing for nodes 35 | - Pressure (PSI) based balancing for guests 36 | - Add PVE version evaluation 37 | 38 | ## [1.1.8] - 2025-10-09 39 | 40 | ### Fixed 41 | 42 | - Fix API errors when using conntrack aware migration with older PVE versions (@gyptazy). [#318] 43 | - Add a static ProxLB prefix to the log output when used by journal handler (@gyptazy). [#329] 44 | 45 | ### Changed 46 | - Container image does not run as root anymore (@mikaelkrantz945). [#317] 47 | - Container image uses venv for running ProxLB (@mikaelkrantz945). [#317] 48 | 49 | ## [1.1.7] - 2025-09-19 50 | 51 | ### Added 52 | 53 | - Add conntrack state aware migrations of VMs (@gyptazy). [#305] 54 | - Add graceful shutdown for SIGINT (e.g., CTRL + C abort). (@gyptazy). [#304] 55 | 56 | ### Fixed 57 | 58 | - Fix crash when validating absent migration job ids. (@gyptazy). [#308] 59 | - Fix guest object names are not being evaluated in debug log. (@gyptazy). [#310] 60 | 61 | ## [1.1.6.1] - 2025-09-04 62 | 63 | ### Fixed 64 | 65 | - Validate for node presence when pinning VMs to avoid crashing (@gyptazy). [#296] 66 | 67 | ## [1.1.6] - 2025-09-04 68 | 69 | ### Added 70 | 71 | - Add validation for provided API user token id to avoid confusions (@gyptazy). [#291] 72 | 73 | ### Fixed 74 | 75 | - Fix stacktrace output when validating permissions on non existing users in Proxmox (@gyptazy). [#291] 76 | - Fix Overprovisioning first node if anti_affinity_group has only one member (@MiBUl-eu). [#295] 77 | - Validate for node presence when pinning guests to avoid crashing (@gyptazy). [#296] 78 | - Fix balancing evaluation of guest types (e.g., VM or CT) (@gyptazy). [#268] 79 | 80 | ## [1.1.5] - 2025-07-14 81 | 82 | ### Added 83 | 84 | - Allow custom API ports instead of fixed tcp/8006 (@gyptazy). [#260] 85 | 86 | 87 | ## [1.1.4] - 2025-06-27 88 | 89 | ### Added 90 | 91 | - Allow pinning of guests to a group of nodes (@gyptazy). [#245] 92 | 93 | ### Fixed 94 | 95 | - Modified log levels to make output lighter at INFO level (@pmarasse) [#255] 96 | - Fixed an issue where balancing was performed in combination of deactivated balancing and dry-run mode (@gyptazy). [#248] 97 | 98 | 99 | ## [1.1.3] - 2025-06-19 100 | 101 | ### Added 102 | 103 | - Add relaod (SIGHUP) function to ProxLB to reload the configuration (by @gyptazy). [#189] 104 | - Add optional wait time parameter to delay execution until the service takes action (by @gyptazy). [#239] 105 | - Make the amount of parallel migrations configurable (by @gyptazy). [#241] 106 | 107 | ### Changed 108 | 109 | - Use the average CPU consumption of a guest within the last 60 minutes instead of the current CPU usage (by @philslab-ninja & @gyptazy). [#94] 110 | 111 | ### Fixed 112 | 113 | - Align maintenance mode with Proxmox HA maintenance mode (by @gyptazy). [#232] 114 | 115 | 116 | ## [1.1.2] - 2025-05-13 117 | 118 | ### Added 119 | 120 | - Add a configurable retry mechanism when connecting to the Proxmox API (by @gyptazy) [#157] 121 | - Add 1-to-1 relationships between guest and hypervisor node to ping a guest on a node (by @gyptazy) [#218] 122 | 123 | ### Fixed 124 | 125 | - Force type cast cpu count of guests to int for some corner cases where a str got returned (by @gyptazy). [#222] 126 | - Fix systemd unit file to run after network on non PVE nodes (by @robertdahlem) [#137] 127 | 128 | 129 | ## [1.1.1] - 2025-04-20 130 | 131 | ### Added 132 | 133 | - Providing the API upstream error message when migration fails in debug mode (by @gyptazy) [#205] 134 | 135 | ### Changed 136 | 137 | - Change the default behaviour of the daemon mode to active [#176] 138 | - Change the default banalcing mode to used instead of assigned [#180] 139 | 140 | ### Fixed 141 | 142 | - Set cpu_used to the cpu usage, which is a percent, times the total number of cores to get a number where guest cpu_used can be added to nodes cpu_used and be meaningful (by @glitchvern) [#195] 143 | - Fix tag evluation for VMs for being ignored for further balancing [#163] 144 | - Honor the value when balancing should not be performed and stop balancing [#174] 145 | - allow the use of minutes instead of hours and only accept hours or minutes in the format (by @glitchvern) [#187] 146 | - Remove hard coded memory usage from lowest usage node and use method and mode specified in configuration instead (by @glitchvern) [#197] 147 | - Fix the guest type relationship in the logs when a migration job failed (by @gyptazy) [#204] 148 | - Requery a guest if that running guest reports 0 cpu usage (by @glitchvern) [#200] 149 | - Fix Python path for Docker entrypoint (by @crandler) [#170] 150 | - Improve logging verbosity of messages that had a wrong servity [#165] 151 | 152 | 153 | ## [1.1.0] - 2025-04-01 154 | 155 | ### Fixed 156 | 157 | - Refactored code base for ProxLB [#114] 158 | - Switched to `pycodestyle` for linting [#114] 159 | - Package building will be done within GitHub actions pipeline [#114] 160 | - ProxLB now only returns a warning when no guests for further balancing are not present (instead of quitting) [132#] 161 | - All nodes (according to the free resources) will be used now [#130] 162 | - Fixed logging outputs where highest/lowest were mixed-up [#129] 163 | - Stop balancing when movement would get worste (new force param to enfoce for affinity rules) [#128] 164 | - Added requested documentation regarding Proxmox HA groups [#127] 165 | - Rewrite of the whole affinity/anti-affinity rules evaluation and placement [#123] 166 | - Fixed the `ignore` parameter for nodes where the node and guests on the node will be untouched [#102] 167 | 168 | 169 | ## [1.0.6] - 2024-12-24 170 | 171 | ### Fixed 172 | 173 | - Fix maintenance mode when using cli arg and config mode by using the merged list (by @CartCaved). [#119] 174 | - Fix that a scheduler time definition of 1 (int) gets wrongly interpreted as a bool (by @gyptazy). [#115] 175 | 176 | 177 | ## [1.0.5] - 2024-10-30 178 | 179 | ### Changed 180 | 181 | - Change docs to make bool usage in configs more clear (by @gyptazy). [#104] 182 | 183 | ### Fixed 184 | 185 | - Fix node (and its objects) evaluation when not reachable, e.g., maintenance (by @gyptazy). [#107] 186 | - Fix migration from local disks (by @greenlogles). [#113] 187 | - Fix evaluation of maintenance mode where comparing list & string resulted in a crash (by @glitchvern). [#106] 188 | - Fix allowed values (add DEBUG, WARNING) for log verbosity (by @gyptazy). [#98] 189 | 190 | 191 | ## [1.0.4] - 2024-10-11 192 | 193 | ### Added 194 | 195 | - Add maintenance mode to evacuate a node and move workloads for other nodes in the cluster. [#58] 196 | - Add feature to make API timeout configureable. [#91] 197 | - Add version output cli arg. [#89] 198 | 199 | ### Changed 200 | 201 | - Run storage balancing only on supported shared storages. [#79] 202 | - Run storage balancing only when needed to save time. [#79] 203 | 204 | ### Fixed 205 | 206 | - Fix CPU balancing where calculations are done in float instead of int. (by @glitchvern) [#75] 207 | - Fix documentation for the underlying infrastructure. [#81] 208 | 209 | 210 | ## [1.0.3] - 2024-09-12 211 | 212 | ### Added 213 | 214 | - Add cli arg `-b` to return the next best node for next VM/CT placement. [#8] 215 | - Add a convert function to cast all bool alike options from configparser to bools. [#53] 216 | - Add a config parser options for future features. [#53] 217 | - Add a config versio schema that must be supported by ProxLB. [#53] 218 | - Add feature to allow the API hosts being provided as a comma separated list. [#60] 219 | - Add doc how to add dedicated user for authentication. (by @Dulux-Oz) 220 | - Add storage balancing function. [#51] 221 | 222 | ### Changed 223 | 224 | - Provide a more reasonable output when HA services are not active in a Proxmox cluster. [#68] 225 | - Improve the underlying code base for future implementations. [#53] 226 | 227 | ### Fixed 228 | 229 | - Fix anti-affinity rules not evaluating a new and different node. [#67] 230 | - Fixed `master_only` function by inverting the condition. 231 | - Fix documentation for the master_only parameter placed in the wrong config section. [#74] 232 | - Fix bug in the `proxlb.conf` in the vm_balancing section. 233 | - Fix handling of unset `ignore_nodes` and `ignore_vms` resulted in an attribute error. [#71] 234 | - Improved the overall validation and error handling. [#64] 235 | 236 | 237 | ## [1.0.2] - 2024-08-13 238 | 239 | ### Added 240 | 241 | - Add option to run ProxLB only on the Proxmox's master node in the cluster (reg. HA feature). [#40] 242 | - Add option to run migrations in parallel or sequentially. [#41] 243 | 244 | ### Changed 245 | 246 | - Fix daemon timer to use hours instead of minutes. [#45] 247 | 248 | ### Fixed 249 | 250 | - Fix CMake packaging for Debian package to avoid overwriting the config file. [#49] 251 | 252 | 253 | ## [1.0.0] - 2024-08-01 254 | 255 | ### Added 256 | 257 | - Add feature to prevent VMs from being relocated by defining the 'plb_ignore_vm' tag. [#7] 258 | - Add feature to prevent VMs from being relocated by defining a wildcard pattern. [#7] 259 | - Add Docker/Podman support. [#10 by @daanbosch] 260 | - Add option to rebalance by assigned VM resources to avoid overprovisioning. [#16] 261 | - Add feature to make log verbosity configurable [#17]. 262 | - Add dry-run support to see what kind of rebalancing would be done. [#6] 263 | - Add LXC/Container integration. [#27] 264 | - Add exclude grouping feature to rebalance VMs from being located together to new nodes. [#4] 265 | - Add include grouping feature to rebalance VMs bundled to new nodes. [#3] 266 | - Add option_mode to rebalance by node's free resources in percent (instead of bytes). [#29] 267 | 268 | ### Changed 269 | 270 | - Adjusted general logging and log more details. 271 | 272 | 273 | ## [0.9.9] - 2024-07-06 274 | 275 | ### Added 276 | 277 | - Initial public development release of ProxLB. 278 | 279 | 280 | ## [0.9.0] - 2024-02-01 281 | 282 | ### Added 283 | 284 | - Development release of ProxLB. 285 | -------------------------------------------------------------------------------- /proxlb/models/tags.py: -------------------------------------------------------------------------------- 1 | """ 2 | The Tags class retrieves and processes tags from guests of type VM or CT running 3 | in a Proxmox cluster. It provides methods to fetch tags from the Proxmox API and 4 | evaluate them for affinity, anti-affinity, and ignore tags, which are used during 5 | balancing calculations. 6 | """ 7 | 8 | __author__ = "Florian Paul Azim Hoberg " 9 | __copyright__ = "Copyright (C) 2025 Florian Paul Azim Hoberg (@gyptazy)" 10 | __license__ = "GPL-3.0" 11 | 12 | 13 | import time 14 | from typing import List 15 | from typing import Dict, Any 16 | from utils.logger import SystemdLogger 17 | from utils.helper import Helper 18 | 19 | logger = SystemdLogger() 20 | 21 | 22 | class Tags: 23 | """ 24 | The Tags class retrieves and processes tags from guests of type VM or CT running 25 | in a Proxmox cluster. It provides methods to fetch tags from the Proxmox API and 26 | evaluate them for affinity, anti-affinity, and ignore tags, which are used during 27 | balancing calculations. 28 | 29 | Methods: 30 | __init__: 31 | Initializes the Tags class. 32 | 33 | get_tags_from_guests(proxmox_api: any, node: str, guest_id: int, guest_type: str) -> List[str]: 34 | Retrieves all tags for a given guest from the Proxmox API. 35 | 36 | get_affinity_groups(tags: List[str]) -> List[str]: 37 | Evaluates and returns all affinity tags from the provided list of tags. 38 | 39 | get_anti_affinity_groups(tags: List[str]) -> List[str]: 40 | Evaluates and returns all anti-affinity tags from the provided list of tags. 41 | 42 | get_ignore(tags: List[str]) -> bool: 43 | Evaluates and returns a boolean indicating whether the guest should be ignored based on the provided list of tags. 44 | """ 45 | def __init__(self): 46 | """ 47 | Initializes the tags class. 48 | """ 49 | 50 | @staticmethod 51 | def get_tags_from_guests(proxmox_api: any, node: str, guest_id: int, guest_type: str) -> List[str]: 52 | """ 53 | Get tags for a guest from the Proxmox cluster by the API. 54 | 55 | This method retrieves all tags for a given guest from the Proxmox API which 56 | is held in the guest_config. 57 | 58 | Args: 59 | proxmox_api (any): The Proxmox API client instance. 60 | node (str): The node name where the given guest is located. 61 | guest_id (int): The internal Proxmox ID of the guest. 62 | guest_type (str): The type (vm or ct) of the guest. 63 | 64 | Returns: 65 | List: A list of all tags assoiciated with the given guest. 66 | """ 67 | logger.debug("Starting: get_tags_from_guests.") 68 | time.sleep(0.1) 69 | if guest_type == 'vm': 70 | guest_config = proxmox_api.nodes(node).qemu(guest_id).config.get() 71 | tags = guest_config.get("tags", []) 72 | if guest_type == 'ct': 73 | guest_config = proxmox_api.nodes(node).lxc(guest_id).config.get() 74 | tags = guest_config.get("tags", []) 75 | 76 | if isinstance(tags, str): 77 | tags = tags.split(";") 78 | 79 | logger.debug("Finished: get_tags_from_guests.") 80 | return tags 81 | 82 | @staticmethod 83 | def get_affinity_groups(tags: List[str], pools: List[str], ha_rules: List[str], proxlb_config: Dict[str, Any]) -> List[str]: 84 | """ 85 | Get affinity tags for a guest from the Proxmox cluster by the API. 86 | 87 | This method retrieves all tags for a given guest or based on a 88 | membership of a pool and evaluates the affinity groups which are 89 | required during the balancing calculations. 90 | 91 | Args: 92 | tags (List): A list holding all defined tags for a given guest. 93 | pools (List): A list holding all defined pools for a given guest. 94 | ha_rules (List): A list holding all defined ha_rules for a given guest. 95 | proxlb_config (Dict): A dict holding the ProxLB configuration. 96 | 97 | Returns: 98 | List: A list including all affinity tags for the given guest. 99 | """ 100 | logger.debug("Starting: get_affinity_groups.") 101 | affinity_tags = [] 102 | 103 | # Tag based affinity groups 104 | if len(tags) > 0: 105 | for tag in tags: 106 | if tag.startswith("plb_affinity"): 107 | logger.debug(f"Adding affinity group for tag {tag}.") 108 | affinity_tags.append(tag) 109 | else: 110 | logger.debug(f"Skipping affinity group for tag {tag}.") 111 | 112 | # Pool based affinity groups 113 | if len(pools) > 0: 114 | for pool in pools: 115 | if pool in (proxlb_config['balancing'].get('pools') or {}): 116 | if proxlb_config['balancing']['pools'][pool].get('type', None) == 'affinity': 117 | logger.debug(f"Adding affinity group for pool {pool}.") 118 | affinity_tags.append(pool) 119 | else: 120 | logger.debug(f"Skipping affinity group for pool {pool}.") 121 | 122 | # HA rule based affinity groups 123 | if len(ha_rules) > 0: 124 | for ha_rule in ha_rules: 125 | if ha_rule.get('type', None) == 'affinity': 126 | logger.debug(f"Adding affinity group for ha-rule {ha_rule}.") 127 | affinity_tags.append(ha_rule['rule']) 128 | 129 | logger.debug("Finished: get_affinity_groups.") 130 | return affinity_tags 131 | 132 | @staticmethod 133 | def get_anti_affinity_groups(tags: List[str], pools: List[str], ha_rules: List[str], proxlb_config: Dict[str, Any]) -> List[str]: 134 | """ 135 | Get anti-affinity tags for a guest from the Proxmox cluster by the API. 136 | 137 | This method retrieves all tags for a given guest or based on a 138 | membership of a pool and evaluates the anti-affinity groups which 139 | are required during the balancing calculations. 140 | 141 | Args: 142 | tags (List): A list holding all defined tags for a given guest. 143 | pools (List): A list holding all defined pools for a given guest. 144 | ha_rules (List): A list holding all defined ha_rules for a given guest. 145 | proxlb_config (Dict): A dict holding the ProxLB configuration. 146 | 147 | Returns: 148 | List: A list including all anti-affinity tags for the given guest.. 149 | """ 150 | logger.debug("Starting: get_anti_affinity_groups.") 151 | anti_affinity_tags = [] 152 | 153 | # Tag based anti-affinity groups 154 | if len(tags) > 0: 155 | for tag in tags: 156 | if tag.startswith("plb_anti_affinity"): 157 | logger.debug(f"Adding anti-affinity group for tag {tag}.") 158 | anti_affinity_tags.append(tag) 159 | else: 160 | logger.debug(f"Skipping anti-affinity group for tag {tag}.") 161 | 162 | # Pool based anti-affinity groups 163 | if len(pools) > 0: 164 | for pool in pools: 165 | if pool in (proxlb_config['balancing'].get('pools') or {}): 166 | if proxlb_config['balancing']['pools'][pool].get('type', None) == 'anti-affinity': 167 | logger.debug(f"Adding anti-affinity group for pool {pool}.") 168 | anti_affinity_tags.append(pool) 169 | else: 170 | logger.debug(f"Skipping anti-affinity group for pool {pool}.") 171 | 172 | # HA rule based anti-affinity groups 173 | if len(ha_rules) > 0: 174 | for ha_rule in ha_rules: 175 | if ha_rule.get('type', None) == 'anti-affinity': 176 | logger.debug(f"Adding anti-affinity group for ha-rule {ha_rule}.") 177 | anti_affinity_tags.append(ha_rule['rule']) 178 | 179 | logger.debug("Finished: get_anti_affinity_groups.") 180 | return anti_affinity_tags 181 | 182 | @staticmethod 183 | def get_ignore(tags: List[str]) -> bool: 184 | """ 185 | Validate for ignore tags of a guest from the Proxmox cluster by the API. 186 | 187 | This method retrieves all tags for a given guest and evaluates the 188 | ignore tag which are required during the balancing calculations. 189 | 190 | Args: 191 | tags (List): A list holding all defined tags for a given guest. 192 | 193 | Returns: 194 | Bool: Returns a bool that indicates whether to ignore a guest or not. 195 | """ 196 | logger.debug("Starting: get_ignore.") 197 | ignore_tag = False 198 | 199 | if len(tags) > 0: 200 | for tag in tags: 201 | if tag.startswith("plb_ignore"): 202 | ignore_tag = True 203 | 204 | logger.debug("Finished: get_ignore.") 205 | return ignore_tag 206 | 207 | @staticmethod 208 | def get_node_relationships(tags: List[str], nodes: Dict[str, Any], pools: List[str], ha_rules: List[str], proxlb_config: Dict[str, Any]) -> str: 209 | """ 210 | Get a node relationship tag for a guest from the Proxmox cluster by the API to pin 211 | a guest to a node or by defined pools from ProxLB configuration. 212 | 213 | This method retrieves a relationship tag between a guest and a specific 214 | hypervisor node to pin the guest to a specific node (e.g., for licensing reason). 215 | 216 | Args: 217 | tags (List): A list holding all defined tags for a given guest. 218 | nodes (Dict): A dictionary holding all available nodes in the cluster. 219 | pools (List): A list holding all defined pools for a given guest. 220 | ha_rules (List): A list holding all defined ha_rules for a given guest. 221 | proxlb_config (Dict): A dict holding the ProxLB configuration. 222 | 223 | Returns: 224 | Str: The related hypervisor node name(s). 225 | """ 226 | logger.debug("Starting: get_node_relationships.") 227 | node_relationship_tags = [] 228 | 229 | # Tag based node relationship 230 | if len(tags) > 0: 231 | logger.debug("Validating node pinning by tags.") 232 | for tag in tags: 233 | if tag.startswith("plb_pin"): 234 | node_relationship_tag = tag.replace("plb_pin_", "") 235 | 236 | # Validate if the node to pin is present in the cluster 237 | if Helper.validate_node_presence(node_relationship_tag, nodes): 238 | logger.debug(f"Tag {node_relationship_tag} is valid! Defined node exists in the cluster.") 239 | logger.debug(f"Setting node relationship because of tag {tag} to {node_relationship_tag}.") 240 | node_relationship_tags.append(node_relationship_tag) 241 | else: 242 | logger.warning(f"Tag {node_relationship_tag} is invalid! Defined node does not exist in the cluster. Not applying pinning.") 243 | 244 | # Pool based node relationship 245 | if len(pools) > 0: 246 | logger.debug("Validating node pinning by pools.") 247 | for pool in pools: 248 | if pool in (proxlb_config['balancing'].get('pools') or {}): 249 | 250 | pool_nodes = proxlb_config['balancing']['pools'][pool].get('pin', None) 251 | for node in pool_nodes if pool_nodes is not None else []: 252 | 253 | # Validate if the node to pin is present in the cluster 254 | if Helper.validate_node_presence(node, nodes): 255 | logger.debug(f"Pool pinning tag {node} is valid! Defined node exists in the cluster.") 256 | logger.debug(f"Setting node relationship because of pool {pool} to {node}.") 257 | node_relationship_tags.append(node) 258 | else: 259 | logger.warning(f"Pool pinning tag {node} is invalid! Defined node does not exist in the cluster. Not applying pinning.") 260 | 261 | else: 262 | logger.debug(f"Skipping pinning for pool {pool}. Pool is not defined in ProxLB configuration.") 263 | 264 | # HA rule based node relationship 265 | if len(ha_rules) > 0: 266 | logger.debug("Validating node pinning by ha-rules.") 267 | for ha_rule in ha_rules: 268 | if len(ha_rule.get("nodes", 0)) > 0: 269 | if ha_rule.get("type", None) == "affinity": 270 | logger.debug(f"ha-rule {ha_rule['rule']} is of type affinity.") 271 | for node in ha_rule["nodes"]: 272 | logger.debug(f"Adding {node} as node relationship because of ha-rule {ha_rule['rule']}.") 273 | node_relationship_tags.append(node) 274 | else: 275 | logger.debug(f"ha-rule {ha_rule['rule']} is of type anti-affinity. Skipping node relationship addition.") 276 | 277 | logger.debug("Finished: get_node_relationships.") 278 | return node_relationship_tags 279 | -------------------------------------------------------------------------------- /proxlb/models/balancing.py: -------------------------------------------------------------------------------- 1 | """ 2 | The Balancing class is responsible for processing workloads on Proxmox clusters. 3 | It processes the previously generated data (held in proxlb_data) and moves guests 4 | and other supported types across Proxmox clusters based on the defined values by an operator. 5 | """ 6 | 7 | 8 | __author__ = "Florian Paul Azim Hoberg " 9 | __copyright__ = "Copyright (C) 2025 Florian Paul Azim Hoberg (@gyptazy)" 10 | __license__ = "GPL-3.0" 11 | 12 | 13 | import proxmoxer 14 | import time 15 | from itertools import islice 16 | from utils.logger import SystemdLogger 17 | from typing import Dict, Any 18 | 19 | logger = SystemdLogger() 20 | 21 | 22 | class Balancing: 23 | """ 24 | The balancing class is responsible for processing workloads on Proxmox clusters. 25 | The previously generated data (hold in proxlb_data) will processed and guests and 26 | other supported types will be moved across Proxmox clusters based on the defined 27 | values by an operator. 28 | 29 | Methods: 30 | __init__(self, proxmox_api: any, proxlb_data: Dict[str, Any]): 31 | Initializes the Balancing class with the provided ProxLB data and initiates the rebalancing 32 | process for guests. 33 | 34 | exec_rebalancing_vm(self, proxmox_api: any, proxlb_data: Dict[str, Any], guest_name: str) -> None: 35 | Executes the rebalancing of a virtual machine (VM) to a new node within the cluster. Logs the migration 36 | process and handles exceptions. 37 | 38 | exec_rebalancing_ct(self, proxmox_api: any, proxlb_data: Dict[str, Any], guest_name: str) -> None: 39 | Executes the rebalancing of a container (CT) to a new node within the cluster. Logs the migration 40 | process and handles exceptions. 41 | 42 | get_rebalancing_job_status(self, proxmox_api: any, proxlb_data: Dict[str, Any], guest_name: str, guest_current_node: str, job_id: int, retry_counter: int = 1) -> bool: 43 | Monitors the status of a rebalancing job on a Proxmox node until it completes or a timeout 44 | is reached. Returns True if the job completed successfully, False otherwise. 45 | """ 46 | 47 | def __init__(self, proxmox_api: any, proxlb_data: Dict[str, Any]): 48 | """ 49 | Initializes the Balancing class with the provided ProxLB data. 50 | 51 | Args: 52 | proxmox_api (object): The Proxmox API client instance used to interact with the Proxmox cluster. 53 | proxlb_data (dict): A dictionary containing data related to the ProxLB load balancing configuration. 54 | """ 55 | def chunk_dict(data, size): 56 | """ 57 | Splits a dictionary into chunks of a specified size. 58 | Args: 59 | data (dict): The dictionary to be split into chunks. 60 | size (int): The size of each chunk. 61 | Yields: 62 | dict: A chunk of the original dictionary with the specified size. 63 | """ 64 | logger.debug("Starting: chunk_dict.") 65 | it = iter(data.items()) 66 | for chunk in range(0, len(data), size): 67 | yield dict(islice(it, size)) 68 | 69 | # Validate if balancing should be performed in parallel or sequentially. 70 | # If parallel balancing is enabled, set the number of parallel jobs. 71 | parallel_jobs = proxlb_data["meta"]["balancing"].get("parallel_jobs", 5) 72 | if not proxlb_data["meta"]["balancing"].get("parallel", False): 73 | parallel_jobs = 1 74 | logger.debug("Balancing: Parallel balancing is disabled. Running sequentially.") 75 | else: 76 | logger.debug(f"Balancing: Parallel balancing is enabled. Running with {parallel_jobs} parallel jobs.") 77 | 78 | for chunk in chunk_dict(proxlb_data["guests"], parallel_jobs): 79 | jobs_to_wait = [] 80 | 81 | for guest_name, guest_meta in chunk.items(): 82 | 83 | # Check if the guest's target is not the same as the current node 84 | if guest_meta["node_current"] != guest_meta["node_target"]: 85 | 86 | # Check if the guest is not ignored and perform the balancing 87 | # operation based on the guest type 88 | if not guest_meta["ignore"]: 89 | job_id = None 90 | 91 | # VM Balancing 92 | if guest_meta["type"] == "vm": 93 | if 'vm' in proxlb_data["meta"]["balancing"].get("balance_types", []): 94 | logger.debug(f"Balancing: Balancing for guest {guest_name} of type VM started.") 95 | job_id = self.exec_rebalancing_vm(proxmox_api, proxlb_data, guest_name) 96 | else: 97 | logger.debug( 98 | f"Balancing: Balancing for guest {guest_name} will not be performed. " 99 | "Guest is of type VM which is not included in allowed balancing types.") 100 | 101 | # CT Balancing 102 | elif guest_meta["type"] == "ct": 103 | if 'ct' in proxlb_data["meta"]["balancing"].get("balance_types", []): 104 | logger.debug(f"Balancing: Balancing for guest {guest_name} of type CT started.") 105 | job_id = self.exec_rebalancing_ct(proxmox_api, proxlb_data, guest_name) 106 | else: 107 | logger.debug( 108 | f"Balancing: Balancing for guest {guest_name} will not be performed. " 109 | "Guest is of type CT which is not included in allowed balancing types.") 110 | 111 | # Just in case we get a new type of guest in the future 112 | else: 113 | logger.critical(f"Balancing: Got unexpected guest type: {guest_meta['type']}. Cannot proceed guest: {guest_meta['name']}.") 114 | 115 | if job_id: 116 | jobs_to_wait.append((guest_name, guest_meta["node_current"], job_id)) 117 | 118 | else: 119 | logger.debug(f"Balancing: Guest {guest_name} is ignored and will not be rebalanced.") 120 | else: 121 | logger.debug(f"Balancing: Guest {guest_name} is already on the target node {guest_meta['node_target']} and will not be rebalanced.") 122 | 123 | # Wait for all jobs in the current chunk to complete 124 | for guest_name, node, job_id in jobs_to_wait: 125 | if job_id: 126 | self.get_rebalancing_job_status(proxmox_api, proxlb_data, guest_name, node, job_id) 127 | 128 | def exec_rebalancing_vm(self, proxmox_api: any, proxlb_data: Dict[str, Any], guest_name: str) -> None: 129 | """ 130 | Executes the rebalancing of a virtual machine (VM) to a new node within the cluster. 131 | This function initiates the migration of a specified VM to a target node as part of the 132 | load balancing process. It logs the migration process and handles any exceptions that 133 | may occur during the migration. 134 | Args: 135 | proxmox_api (object): The Proxmox API client instance used to interact with the Proxmox cluster. 136 | proxlb_data (dict): A dictionary containing data related to the ProxLB load balancing configuration. 137 | guest_name (str): The name of the guest VM to be migrated. 138 | Raises: 139 | proxmox_api.core.ResourceException: If an error occurs during the migration process. 140 | Returns: 141 | None 142 | """ 143 | logger.debug("Starting: exec_rebalancing_vm.") 144 | guest_id = proxlb_data["guests"][guest_name]["id"] 145 | guest_node_current = proxlb_data["guests"][guest_name]["node_current"] 146 | guest_node_target = proxlb_data["guests"][guest_name]["node_target"] 147 | job_id = None 148 | 149 | if proxlb_data["meta"]["balancing"].get("live", True): 150 | online_migration = 1 151 | else: 152 | online_migration = 0 153 | 154 | if proxlb_data["meta"]["balancing"].get("with_local_disks", True): 155 | with_local_disks = 1 156 | else: 157 | with_local_disks = 0 158 | 159 | migration_options = { 160 | 'target': guest_node_target, 161 | 'online': online_migration, 162 | 'with-local-disks': with_local_disks, 163 | } 164 | 165 | # Conntrack state aware migrations are not supported in older 166 | # PVE versions, so we should not add it by default. 167 | if proxlb_data["meta"]["balancing"].get("with_conntrack_state", True): 168 | migration_options['with-conntrack-state'] = 1 169 | 170 | try: 171 | logger.info(f"Balancing: Starting to migrate VM guest {guest_name} from {guest_node_current} to {guest_node_target}.") 172 | job_id = proxmox_api.nodes(guest_node_current).qemu(guest_id).migrate().post(**migration_options) 173 | except proxmoxer.core.ResourceException as proxmox_api_error: 174 | logger.critical(f"Balancing: Failed to migrate guest {guest_name} of type VM due to some Proxmox errors. Please check if resource is locked or similar.") 175 | logger.debug(f"Balancing: Failed to migrate guest {guest_name} of type VM due to some Proxmox errors: {proxmox_api_error}") 176 | 177 | logger.debug("Finished: exec_rebalancing_vm.") 178 | return job_id 179 | 180 | def exec_rebalancing_ct(self, proxmox_api: any, proxlb_data: Dict[str, Any], guest_name: str) -> None: 181 | """ 182 | Executes the rebalancing of a container (CT) to a new node within the cluster. 183 | This function initiates the migration of a specified CT to a target node as part of the 184 | load balancing process. It logs the migration process and handles any exceptions that 185 | may occur during the migration. 186 | Args: 187 | proxmox_api (object): The Proxmox API client instance used to interact with the Proxmox cluster. 188 | proxlb_data (dict): A dictionary containing data related to the ProxLB load balancing configuration. 189 | guest_name (str): The name of the guest CT to be migrated. 190 | Raises: 191 | proxmox_api.core.ResourceException: If an error occurs during the migration process. 192 | Returns: 193 | None 194 | """ 195 | logger.debug("Starting: exec_rebalancing_ct.") 196 | guest_id = proxlb_data["guests"][guest_name]["id"] 197 | guest_node_current = proxlb_data["guests"][guest_name]["node_current"] 198 | guest_node_target = proxlb_data["guests"][guest_name]["node_target"] 199 | job_id = None 200 | 201 | try: 202 | logger.info(f"Balancing: Starting to migrate CT guest {guest_name} from {guest_node_current} to {guest_node_target}.") 203 | job_id = proxmox_api.nodes(guest_node_current).lxc(guest_id).migrate().post(target=guest_node_target, restart=1) 204 | except proxmoxer.core.ResourceException as proxmox_api_error: 205 | logger.critical(f"Balancing: Failed to migrate guest {guest_name} of type CT due to some Proxmox errors. Please check if resource is locked or similar.") 206 | logger.debug(f"Balancing: Failed to migrate guest {guest_name} of type CT due to some Proxmox errors: {proxmox_api_error}") 207 | 208 | logger.debug("Finished: exec_rebalancing_ct.") 209 | return job_id 210 | 211 | def get_rebalancing_job_status(self, proxmox_api: any, proxlb_data: Dict[str, Any], guest_name: str, guest_current_node: str, job_id: int, retry_counter: int = 1) -> bool: 212 | """ 213 | Monitors the status of a rebalancing job on a Proxmox node until it completes or a timeout is reached. 214 | 215 | Args: 216 | proxmox_api (object): The Proxmox API client instance. 217 | proxlb_data (dict): The ProxLB configuration data. 218 | guest_name (str): The name of the guest (virtual machine) being rebalanced. 219 | guest_current_node (str): The current node where the guest is running. 220 | job_id (str): The ID of the rebalancing job to monitor. 221 | retry_counter (int, optional): The current retry count. Defaults to 1. 222 | 223 | Returns: 224 | bool: True if the job completed successfully, False otherwise. 225 | """ 226 | logger.debug("Starting: get_rebalancing_job_status.") 227 | job = proxmox_api.nodes(guest_current_node).tasks(job_id).status().get() 228 | 229 | # Watch job id until it finalizes 230 | if job["status"] == "running": 231 | # Do not hammer the API while 232 | # watching the job status 233 | time.sleep(10) 234 | retry_counter += 1 235 | 236 | # Run recursion until we hit the soft-limit of maximum migration time for a guest 237 | if retry_counter < proxlb_data["meta"]["balancing"].get("max_job_validation", 1800): 238 | logger.debug(f"Balancing: Job ID {job_id} (guest: {guest_name}) for migration is still running... (Run: {retry_counter})") 239 | self.get_rebalancing_job_status(proxmox_api, proxlb_data, guest_name, guest_current_node, job_id, retry_counter) 240 | else: 241 | logger.warning(f"Balancing: Job ID {job_id} (guest: {guest_name}) for migration took too long. Please check manually.") 242 | logger.debug("Finished: get_rebalancing_job_status.") 243 | return False 244 | 245 | # Validate job output for errors when finished 246 | if job["status"] == "stopped": 247 | 248 | if job["exitstatus"] == "OK": 249 | logger.debug(f"Balancing: Job ID {job_id} (guest: {guest_name}) was successfully.") 250 | logger.debug("Finished: get_rebalancing_job_status.") 251 | return True 252 | else: 253 | logger.critical(f"Balancing: Job ID {job_id} (guest: {guest_name}) went into an error! Please check manually.") 254 | logger.debug("Finished: get_rebalancing_job_status.") 255 | return False 256 | -------------------------------------------------------------------------------- /proxlb/utils/helper.py: -------------------------------------------------------------------------------- 1 | """ 2 | The Helper class provides some basic helper functions to not mess up the code in other 3 | classes. 4 | """ 5 | 6 | __author__ = "Florian Paul Azim Hoberg " 7 | __copyright__ = "Copyright (C) 2025 Florian Paul Azim Hoberg (@gyptazy)" 8 | __license__ = "GPL-3.0" 9 | 10 | 11 | import json 12 | import uuid 13 | import re 14 | import socket 15 | import sys 16 | import time 17 | import utils.version 18 | from utils.logger import SystemdLogger 19 | from typing import Dict, Any 20 | from types import FrameType 21 | 22 | logger = SystemdLogger() 23 | 24 | 25 | class Helper: 26 | """ 27 | The Helper class provides some basic helper functions to not mess up the code in other 28 | classes. 29 | 30 | Methods: 31 | __init__(): 32 | Initializes the general Helper class. 33 | 34 | get_uuid_string() -> str: 35 | Generates a random uuid and returns it as a string. 36 | 37 | log_node_metrics(proxlb_data: Dict[str, Any], init: bool = True) -> None: 38 | Logs the memory, CPU, and disk usage metrics of nodes in the provided proxlb_data dictionary. 39 | 40 | get_version(print_version: bool = False) -> None: 41 | Returns the current version of ProxLB and optionally prints it to stdout. 42 | 43 | get_daemon_mode(proxlb_config: Dict[str, Any]) -> None: 44 | Checks if the daemon mode is active and handles the scheduling accordingly. 45 | """ 46 | proxlb_reload = False 47 | 48 | def __init__(self): 49 | """ 50 | Initializes the general Helper clas. 51 | """ 52 | 53 | @staticmethod 54 | def get_uuid_string() -> str: 55 | """ 56 | Generates a random uuid and returns it as a string. 57 | 58 | Args: 59 | None 60 | 61 | Returns: 62 | Str: Returns a random uuid as a string. 63 | """ 64 | logger.debug("Starting: get_uuid_string.") 65 | generated_uuid = uuid.uuid4() 66 | logger.debug("Finished: get_uuid_string.") 67 | return str(generated_uuid) 68 | 69 | @staticmethod 70 | def log_node_metrics(proxlb_data: Dict[str, Any], init: bool = True) -> None: 71 | """ 72 | Logs the memory, CPU, and disk usage metrics of nodes in the provided proxlb_data dictionary. 73 | 74 | This method processes the usage metrics of nodes and logs them. It also updates the 75 | 'statistics' field in the 'meta' section of the proxlb_data dictionary with the 76 | memory, CPU, and disk usage metrics before and after a certain operation. 77 | 78 | proxlb_data (Dict[str, Any]): A dictionary containing node metrics and metadata. 79 | init (bool): A flag indicating whether to initialize the 'before' statistics 80 | (True) or update the 'after' statistics (False). Default is True. 81 | """ 82 | logger.debug("Starting: log_node_metrics.") 83 | nodes_usage_memory = " | ".join([f"{key}: {value['memory_used_percent']:.2f}%" for key, value in proxlb_data["nodes"].items()]) 84 | nodes_usage_cpu = " | ".join([f"{key}: {value['cpu_used_percent']:.2f}%" for key, value in proxlb_data["nodes"].items()]) 85 | nodes_usage_disk = " | ".join([f"{key}: {value['disk_used_percent']:.2f}%" for key, value in proxlb_data["nodes"].items()]) 86 | 87 | if init: 88 | proxlb_data["meta"]["statistics"] = {"before": {"memory": nodes_usage_memory, "cpu": nodes_usage_cpu, "disk": nodes_usage_disk}, "after": {"memory": "", "cpu": "", "disk": ""}} 89 | else: 90 | proxlb_data["meta"]["statistics"]["after"] = {"memory": nodes_usage_memory, "cpu": nodes_usage_cpu, "disk": nodes_usage_disk} 91 | 92 | logger.debug(f"Nodes usage memory: {nodes_usage_memory}") 93 | logger.debug(f"Nodes usage cpu: {nodes_usage_cpu}") 94 | logger.debug(f"Nodes usage disk: {nodes_usage_disk}") 95 | logger.debug("Finished: log_node_metrics.") 96 | 97 | @staticmethod 98 | def get_version(print_version: bool = False) -> None: 99 | """ 100 | Returns the current version of ProxLB and optionally prints it to stdout. 101 | 102 | Parameters: 103 | print_version (bool): If True, prints the version information to stdout and exits the program. 104 | 105 | Returns: 106 | None 107 | """ 108 | if print_version: 109 | print(f"{utils.version.__app_name__} version: {utils.version.__version__}\n(C) 2025 by {utils.version.__author__}\n{utils.version.__url__}") 110 | sys.exit(0) 111 | 112 | @staticmethod 113 | def get_daemon_mode(proxlb_config: Dict[str, Any]) -> None: 114 | """ 115 | Checks if the daemon mode is active and handles the scheduling accordingly. 116 | 117 | Parameters: 118 | proxlb_config (Dict[str, Any]): A dictionary containing the ProxLB configuration. 119 | 120 | Returns: 121 | None 122 | """ 123 | logger.debug("Starting: get_daemon_mode.") 124 | if proxlb_config.get("service", {}).get("daemon", True): 125 | 126 | # Validate schedule format which changed in v1.1.1 127 | if type(proxlb_config["service"].get("schedule", None)) != dict: 128 | logger.error("Invalid format for schedule. Please use 'hours' or 'minutes'.") 129 | sys.exit(1) 130 | 131 | # Convert hours to seconds 132 | if proxlb_config["service"]["schedule"].get("format", "hours") == "hours": 133 | sleep_seconds = proxlb_config.get("service", {}).get("schedule", {}).get("interval", 12) * 3600 134 | # Convert minutes to seconds 135 | elif proxlb_config["service"]["schedule"].get("format", "hours") == "minutes": 136 | sleep_seconds = proxlb_config.get("service", {}).get("schedule", {}).get("interval", 720) * 60 137 | else: 138 | logger.error("Invalid format for schedule. Please use 'hours' or 'minutes'.") 139 | sys.exit(1) 140 | 141 | logger.info(f"Daemon mode active: Next run in: {proxlb_config.get('service', {}).get('schedule', {}).get('interval', 12)} {proxlb_config['service']['schedule'].get('format', 'hours')}.") 142 | time.sleep(sleep_seconds) 143 | 144 | else: 145 | logger.debug("Successfully executed ProxLB. Daemon mode not active - stopping.") 146 | print("Daemon mode not active - stopping.") 147 | sys.exit(0) 148 | 149 | logger.debug("Finished: get_daemon_mode.") 150 | 151 | @staticmethod 152 | def get_service_delay(proxlb_config: Dict[str, Any]) -> None: 153 | """ 154 | Checks if a start up delay for the service is defined and waits to proceed until 155 | the time is up. 156 | 157 | Parameters: 158 | proxlb_config (Dict[str, Any]): A dictionary containing the ProxLB configuration. 159 | 160 | Returns: 161 | None 162 | """ 163 | logger.debug("Starting: get_service_delay.") 164 | if proxlb_config.get("service", {}).get("delay", {}).get("enable", False): 165 | 166 | # Convert hours to seconds 167 | if proxlb_config["service"]["delay"].get("format", "hours") == "hours": 168 | sleep_seconds = proxlb_config.get("service", {}).get("delay", {}).get("time", 1) * 3600 169 | # Convert minutes to seconds 170 | elif proxlb_config["service"]["delay"].get("format", "hours") == "minutes": 171 | sleep_seconds = proxlb_config.get("service", {}).get("delay", {}).get("time", 60) * 60 172 | else: 173 | logger.error("Invalid format for service delay. Please use 'hours' or 'minutes'.") 174 | sys.exit(1) 175 | 176 | logger.info(f"Service delay active: First run in: {proxlb_config.get('service', {}).get('delay', {}).get('time', 1)} {proxlb_config['service']['delay'].get('format', 'hours')}.") 177 | time.sleep(sleep_seconds) 178 | 179 | else: 180 | logger.debug("Service delay not active. Proceeding without delay.") 181 | 182 | logger.debug("Finished: get_service_delay.") 183 | 184 | @staticmethod 185 | def print_json(proxlb_config: Dict[str, Any], print_json: bool = False) -> None: 186 | """ 187 | Prints the calculated balancing matrix as a JSON output to stdout. 188 | 189 | Parameters: 190 | proxlb_config (Dict[str, Any]): A dictionary containing the ProxLB configuration. 191 | 192 | Returns: 193 | None 194 | """ 195 | logger.debug("Starting: print_json.") 196 | if print_json: 197 | # Create a filtered list by stripping the 'meta' key from the proxlb_config dictionary 198 | # to make sure that no credentials are leaked. 199 | filtered_data = {k: v for k, v in proxlb_config.items() if k != "meta"} 200 | print(json.dumps(filtered_data, indent=4)) 201 | 202 | logger.debug("Finished: print_json.") 203 | 204 | @staticmethod 205 | def handler_sighup(signum: int, frame: FrameType) -> None: 206 | """ 207 | Signal handler for SIGHUP. 208 | 209 | This method is triggered when the process receives a SIGHUP signal. 210 | It sets the `proxlb_reload` class variable to True to indicate that 211 | configuration should be reloaded in the main loop. 212 | 213 | Args: 214 | signum (int): The signal number (expected to be signal.SIGHUP). 215 | frame (frame object): Current stack frame (unused but required by signal handler signature). 216 | """ 217 | logger.debug("Starting: handle_sighup.") 218 | logger.debug("Got SIGHUP signal. Reloading...") 219 | Helper.proxlb_reload = True 220 | logger.debug("Finished: handle_sighup.") 221 | 222 | @staticmethod 223 | def handler_sigint(signum: int, frame: FrameType) -> None: 224 | """ 225 | Signal handler for SIGINT. (triggered by CTRL+C). 226 | 227 | Args: 228 | signum (int): The signal number (e.g., SIGINT). 229 | frame (FrameType): The current stack frame when the signal was received. 230 | 231 | Returns: 232 | None 233 | """ 234 | exit_message = "ProxLB has been successfully terminated by user." 235 | logger.debug(exit_message) 236 | print(f"\n {exit_message}") 237 | sys.exit(0) 238 | 239 | @staticmethod 240 | def get_host_port_from_string(host_object): 241 | """ 242 | Parses a string containing a host (IPv4, IPv6, or hostname) and an optional port, and returns a tuple of (host, port). 243 | 244 | Supported formats: 245 | - Hostname or IPv4 without port: "example.com" or "192.168.0.1" 246 | - Hostname or IPv4 with port: "example.com:8006" or "192.168.0.1:8006" 247 | - IPv6 in brackets with optional port: "[fc00::1]" or "[fc00::1]:8006" 248 | - IPv6 without brackets, port is assumed after last colon: "fc00::1:8006" 249 | 250 | If no port is specified, port 8006 is used as the default. 251 | 252 | Args: 253 | host_object (str): A string representing a host with or without a port. 254 | 255 | Returns: 256 | tuple: A tuple (host: str, port: int) 257 | """ 258 | logger.debug("Starting: get_host_port_from_string.") 259 | 260 | # IPv6 (with or without port, written in brackets) 261 | match = re.match(r'^\[(.+)\](?::(\d+))?$', host_object) 262 | if match: 263 | host = match.group(1) 264 | port = int(match.group(2)) if match.group(2) else 8006 265 | return host, port 266 | 267 | # Count colons to identify IPv6 addresses without brackets 268 | colon_count = host_object.count(':') 269 | 270 | # IPv4 or hostname without port 271 | if colon_count == 0: 272 | return host_object, 8006 273 | 274 | # IPv4 or hostname with port 275 | elif colon_count == 1: 276 | host, port = host_object.split(':') 277 | return host, int(port) 278 | 279 | # IPv6 (with or without port, assume last colon is port) 280 | else: 281 | parts = host_object.rsplit(':', 1) 282 | try: 283 | port = int(parts[1]) 284 | return parts[0], port 285 | except ValueError: 286 | return host_object, 8006 287 | 288 | @staticmethod 289 | def validate_node_presence(node: str, nodes: Dict[str, Any]) -> bool: 290 | """ 291 | Validates whether a given node exists in the provided cluster nodes dictionary. 292 | 293 | Args: 294 | node (str): The name of the node to validate. 295 | nodes (Dict[str, Any]): A dictionary containing cluster information. 296 | Must include a "nodes" key mapping to a dict of available nodes. 297 | 298 | Returns: 299 | bool: True if the node exists in the cluster, False otherwise. 300 | """ 301 | logger.debug("Starting: validate_node_presence.") 302 | 303 | if node in nodes["nodes"].keys(): 304 | logger.info(f"Node {node} found in cluster. Applying pinning.") 305 | logger.debug("Finished: validate_node_presence.") 306 | return True 307 | else: 308 | logger.warning(f"Node {node} not found in cluster. Not applying pinning!") 309 | logger.debug("Finished: validate_node_presence.") 310 | return False 311 | 312 | @staticmethod 313 | def tcp_connect_test(addr_family: int, host: str, port: int, timeout: int) -> tuple[bool, int | None]: 314 | """ 315 | Attempt a TCP connection to the specified host and port to test the reachability. 316 | 317 | Args: 318 | addr_family (int): Address family for the socket (e.g., socket.AF_INET for IPv4, socket.AF_INET6 for IPv6). 319 | host (str): The hostname or IP address to connect to. 320 | port (int): The port number to connect to. 321 | timeout (int): Connection timeout in seconds. 322 | 323 | Returns: 324 | tuple[bool, int | None]: A tuple containing: 325 | - bool: True if the connection was successful, False otherwise. 326 | - int | None: None if the connection was successful, otherwise the errno code indicating the reason for failure. 327 | """ 328 | test_socket = socket.socket(addr_family, socket.SOCK_STREAM) 329 | test_socket.settimeout(timeout) 330 | 331 | try: 332 | rc = test_socket.connect_ex((host, port)) 333 | return (rc == 0, rc if rc != 0 else None) 334 | finally: 335 | test_socket.close() 336 | -------------------------------------------------------------------------------- /proxlb/models/nodes.py: -------------------------------------------------------------------------------- 1 | """ 2 | The Nodes class retrieves all running nodes in a Proxmox cluster 3 | and collects their resource metrics. 4 | 5 | Methods: 6 | __init__: 7 | Initializes the Nodes class. 8 | 9 | get_nodes(proxmox_api: any, proxlb_config: Dict[str, Any]) -> Dict[str, Any]: 10 | Gets metrics of all nodes in a Proxmox cluster. 11 | 12 | set_node_maintenance(proxlb_config: Dict[str, Any], node_name: str) -> Dict[str, Any]: 13 | Sets Proxmox nodes to a maintenance mode if required. 14 | 15 | set_node_ignore(proxlb_config: Dict[str, Any], node_name: str) -> Dict[str, Any]: 16 | Sets Proxmox nodes to be ignored if requested. 17 | """ 18 | 19 | __author__ = "Florian Paul Azim Hoberg " 20 | __copyright__ = "Copyright (C) 2025 Florian Paul Azim Hoberg (@gyptazy)" 21 | __license__ = "GPL-3.0" 22 | 23 | 24 | import time 25 | from typing import Dict, Any 26 | from utils.logger import SystemdLogger 27 | 28 | logger = SystemdLogger() 29 | 30 | 31 | class Nodes: 32 | """ 33 | The Nodes class retrieves all running nodes in a Proxmox cluster 34 | and collects their resource metrics. 35 | """ 36 | def __init__(self): 37 | """ 38 | Initializes the Nodes class with the provided ProxLB data. 39 | """ 40 | 41 | @staticmethod 42 | def get_nodes(proxmox_api: any, proxlb_config: Dict[str, Any]) -> Dict[str, Any]: 43 | """ 44 | Get metrics of all nodes in a Proxmox cluster. 45 | 46 | This method retrieves metrics for all available nodes in the Proxmox cluster. 47 | It iterates over each node and collects resource metrics including CPU, memory, and disk usage. 48 | 49 | Args: 50 | proxmox_api (any): The Proxmox API client instance. 51 | proxmox_config (Dict[str, Any]): A dictionary containing the ProxLB configuration. 52 | nodes (Dict[str, Any]): A dictionary containing information about the nodes in the Proxmox cluster. 53 | 54 | Returns: 55 | Dict[str, Any]: A dictionary containing metrics and information for all running nodes. 56 | """ 57 | logger.debug("Starting: get_nodes.") 58 | nodes = {"nodes": {}} 59 | 60 | for node in proxmox_api.nodes.get(): 61 | # Ignoring a node results into ignoring all placed guests on the ignored node! 62 | if node["status"] == "online" and not Nodes.set_node_ignore(proxlb_config, node["node"]): 63 | nodes["nodes"][node["node"]] = {} 64 | nodes["nodes"][node["node"]]["name"] = node["node"] 65 | nodes["nodes"][node["node"]]["pve_version"] = Nodes.get_node_pve_version(proxmox_api, node["node"]) 66 | nodes["nodes"][node["node"]]["pressure_hot"] = False 67 | nodes["nodes"][node["node"]]["maintenance"] = False 68 | nodes["nodes"][node["node"]]["cpu_total"] = node["maxcpu"] 69 | nodes["nodes"][node["node"]]["cpu_assigned"] = 0 70 | nodes["nodes"][node["node"]]["cpu_used"] = node["cpu"] * node["maxcpu"] 71 | nodes["nodes"][node["node"]]["cpu_free"] = (node["maxcpu"]) - (node["cpu"] * node["maxcpu"]) 72 | nodes["nodes"][node["node"]]["cpu_assigned_percent"] = nodes["nodes"][node["node"]]["cpu_assigned"] / nodes["nodes"][node["node"]]["cpu_total"] * 100 73 | nodes["nodes"][node["node"]]["cpu_free_percent"] = nodes["nodes"][node["node"]]["cpu_free"] / node["maxcpu"] * 100 74 | nodes["nodes"][node["node"]]["cpu_used_percent"] = nodes["nodes"][node["node"]]["cpu_used"] / node["maxcpu"] * 100 75 | nodes["nodes"][node["node"]]["cpu_pressure_some_percent"] = Nodes.get_node_rrd_data(proxmox_api, node["node"], "cpu", "some") 76 | nodes["nodes"][node["node"]]["cpu_pressure_full_percent"] = Nodes.get_node_rrd_data(proxmox_api, node["node"], "cpu", "full") 77 | nodes["nodes"][node["node"]]["cpu_pressure_some_spikes_percent"] = Nodes.get_node_rrd_data(proxmox_api, node["node"], "cpu", "some", spikes=True) 78 | nodes["nodes"][node["node"]]["cpu_pressure_full_spikes_percent"] = Nodes.get_node_rrd_data(proxmox_api, node["node"], "cpu", "full", spikes=True) 79 | nodes["nodes"][node["node"]]["cpu_pressure_hot"] = False 80 | nodes["nodes"][node["node"]]["memory_total"] = node["maxmem"] 81 | nodes["nodes"][node["node"]]["memory_assigned"] = 0 82 | nodes["nodes"][node["node"]]["memory_used"] = node["mem"] 83 | nodes["nodes"][node["node"]]["memory_free"] = node["maxmem"] - node["mem"] 84 | nodes["nodes"][node["node"]]["memory_assigned_percent"] = nodes["nodes"][node["node"]]["memory_assigned"] / nodes["nodes"][node["node"]]["memory_total"] * 100 85 | nodes["nodes"][node["node"]]["memory_free_percent"] = nodes["nodes"][node["node"]]["memory_free"] / node["maxmem"] * 100 86 | nodes["nodes"][node["node"]]["memory_used_percent"] = nodes["nodes"][node["node"]]["memory_used"] / node["maxmem"] * 100 87 | nodes["nodes"][node["node"]]["memory_pressure_some_percent"] = Nodes.get_node_rrd_data(proxmox_api, node["node"], "memory", "some") 88 | nodes["nodes"][node["node"]]["memory_pressure_full_percent"] = Nodes.get_node_rrd_data(proxmox_api, node["node"], "memory", "full") 89 | nodes["nodes"][node["node"]]["memory_pressure_some_spikes_percent"] = Nodes.get_node_rrd_data(proxmox_api, node["node"], "memory", "some", spikes=True) 90 | nodes["nodes"][node["node"]]["memory_pressure_full_spikes_percent"] = Nodes.get_node_rrd_data(proxmox_api, node["node"], "memory", "full", spikes=True) 91 | nodes["nodes"][node["node"]]["memory_pressure_hot"] = False 92 | nodes["nodes"][node["node"]]["disk_total"] = node["maxdisk"] 93 | nodes["nodes"][node["node"]]["disk_assigned"] = 0 94 | nodes["nodes"][node["node"]]["disk_used"] = node["disk"] 95 | nodes["nodes"][node["node"]]["disk_free"] = node["maxdisk"] - node["disk"] 96 | nodes["nodes"][node["node"]]["disk_assigned_percent"] = nodes["nodes"][node["node"]]["disk_assigned"] / nodes["nodes"][node["node"]]["disk_total"] * 100 97 | nodes["nodes"][node["node"]]["disk_free_percent"] = nodes["nodes"][node["node"]]["disk_free"] / node["maxdisk"] * 100 98 | nodes["nodes"][node["node"]]["disk_used_percent"] = nodes["nodes"][node["node"]]["disk_used"] / node["maxdisk"] * 100 99 | nodes["nodes"][node["node"]]["disk_pressure_some_percent"] = Nodes.get_node_rrd_data(proxmox_api, node["node"], "disk", "some") 100 | nodes["nodes"][node["node"]]["disk_pressure_full_percent"] = Nodes.get_node_rrd_data(proxmox_api, node["node"], "disk", "full") 101 | nodes["nodes"][node["node"]]["disk_pressure_some_spikes_percent"] = Nodes.get_node_rrd_data(proxmox_api, node["node"], "disk", "some", spikes=True) 102 | nodes["nodes"][node["node"]]["disk_pressure_full_spikes_percent"] = Nodes.get_node_rrd_data(proxmox_api, node["node"], "disk", "full", spikes=True) 103 | nodes["nodes"][node["node"]]["disk_pressure_hot"] = False 104 | 105 | # Evaluate if node should be set to maintenance mode 106 | if Nodes.set_node_maintenance(proxmox_api, proxlb_config, node["node"]): 107 | nodes["nodes"][node["node"]]["maintenance"] = True 108 | 109 | logger.debug(f"Node metrics collected: {nodes}") 110 | logger.debug("Finished: get_nodes.") 111 | return nodes 112 | 113 | @staticmethod 114 | def set_node_maintenance(proxmox_api, proxlb_config: Dict[str, Any], node_name: str) -> Dict[str, Any]: 115 | """ 116 | Set nodes to maintenance mode based on the provided configuration. 117 | 118 | This method updates the nodes dictionary to mark certain nodes as being in maintenance mode 119 | based on the configuration provided in proxlb_config. 120 | 121 | Args: 122 | proxmox_api (any): The Proxmox API client instance. 123 | proxlb_config (Dict[str, Any]): A dictionary containing the ProxLB configuration, including maintenance nodes. 124 | node_name: (str): The current node name within the outer iteration. 125 | 126 | Returns: 127 | Bool: Returns a bool if the provided node name is present in the maintenance section of the config file. 128 | """ 129 | logger.debug("Starting: set_node_maintenance.") 130 | 131 | # Evaluate maintenance mode by config 132 | if proxlb_config.get("proxmox_cluster", None).get("maintenance_nodes", None) is not None: 133 | if len(proxlb_config.get("proxmox_cluster", {}).get("maintenance_nodes", [])) > 0: 134 | if node_name in proxlb_config.get("proxmox_cluster", {}).get("maintenance_nodes", []): 135 | logger.info(f"Node: {node_name} has been set to maintenance mode (by ProxLB config).") 136 | return True 137 | else: 138 | logger.debug(f"Node: {node_name} is not in maintenance mode by ProxLB config.") 139 | 140 | # Evaluate maintenance mode by Proxmox HA 141 | for ha_element in proxmox_api.cluster.ha.status.current.get(): 142 | if ha_element.get("status"): 143 | if "maintenance mode" in ha_element.get("status"): 144 | if ha_element.get("node") == node_name: 145 | logger.info(f"Node: {node_name} has been set to maintenance mode (by Proxmox HA API).") 146 | return True 147 | else: 148 | logger.debug(f"Node: {node_name} is not in maintenance mode by Proxmox HA API.") 149 | 150 | logger.debug("Finished: set_node_maintenance.") 151 | 152 | @staticmethod 153 | def set_node_ignore(proxlb_config: Dict[str, Any], node_name: str) -> Dict[str, Any]: 154 | """ 155 | Set nodes to be ignored based on the provided configuration. 156 | 157 | This method updates the nodes dictionary to mark certain nodes as being ignored 158 | based on the configuration provided in proxlb_config. 159 | 160 | Args: 161 | proxlb_config (Dict[str, Any]): A dictionary containing the ProxLB configuration, including maintenance nodes. 162 | node_name: (str): The current node name within the outer iteration. 163 | 164 | Returns: 165 | Bool: Returns a bool if the provided node name is present in the ignore section of the config file. 166 | """ 167 | logger.debug("Starting: set_node_ignore.") 168 | 169 | if proxlb_config.get("proxmox_cluster", None).get("ignore_nodes", None) is not None: 170 | if len(proxlb_config.get("proxmox_cluster", {}).get("ignore_nodes", [])) > 0: 171 | if node_name in proxlb_config.get("proxmox_cluster", {}).get("ignore_nodes", []): 172 | logger.info(f"Node: {node_name} has been set to be ignored. Not adding node!") 173 | return True 174 | 175 | logger.debug("Finished: set_node_ignore.") 176 | 177 | @staticmethod 178 | def get_node_rrd_data(proxmox_api, node_name: str, object_name: str, object_type: str, spikes=False) -> float: 179 | """ 180 | Retrieves the rrd data metrics for a specific resource (CPU, memory, disk) of a node. 181 | 182 | Args: 183 | proxmox_api (Any): The Proxmox API client instance. 184 | node_name (str): The name of the node hosting the guest. 185 | object_name (str): The resource type to query (e.g., 'cpu', 'memory', 'disk'). 186 | object_type (str, optional): The pressure type ('some', 'full') or None for average usage. 187 | spikes (bool, optional): Whether to consider spikes in the calculation. Defaults to False. 188 | 189 | Returns: 190 | float: The calculated average usage value for the specified resource. 191 | """ 192 | logger.debug("Starting: get_node_rrd_data.") 193 | time.sleep(0.1) 194 | 195 | try: 196 | if spikes: 197 | logger.debug(f"Getting spike RRD data for {object_name} from node: {node_name}.") 198 | node_data_rrd = proxmox_api.nodes(node_name).rrddata.get(timeframe="hour", cf="MAX") 199 | else: 200 | logger.debug(f"Getting average RRD data for {object_name} from node: {node_name}.") 201 | node_data_rrd = proxmox_api.nodes(node_name).rrddata.get(timeframe="hour", cf="AVERAGE") 202 | 203 | except Exception: 204 | logger.error(f"Failed to retrieve RRD data for guest: {node_name}. Using 0.0 as value.") 205 | logger.debug("Finished: get_node_rrd_data.") 206 | return 0.0 207 | 208 | lookup_key = f"pressure{object_name}{object_type}" 209 | 210 | if spikes: 211 | # RRD data is collected every minute, so we look at the last 6 entries 212 | # and take the maximum value to represent the spike 213 | rrd_data_value = [row.get(lookup_key) for row in node_data_rrd if row.get(lookup_key) is not None] 214 | rrd_data_value = max(rrd_data_value[-6:], default=0.0) 215 | else: 216 | # Calculate the average value from the RRD data entries 217 | rrd_data_value = sum(entry.get(lookup_key, 0.0) for entry in node_data_rrd) / len(node_data_rrd) 218 | 219 | logger.debug(f"RRD data (spike: {spikes}) for {object_name} from node: {node_name}: {rrd_data_value}") 220 | logger.debug("Finished: get_node_rrd_data.") 221 | return rrd_data_value 222 | 223 | @staticmethod 224 | def get_node_pve_version(proxmox_api, node_name: str) -> float: 225 | """ 226 | Return the Proxmox VE (PVE) version for a given node by querying the Proxmox API. 227 | 228 | This function calls proxmox_api.nodes(node_name).version.get() and extracts the 229 | 'version' field from the returned mapping. The value is expected to be numeric 230 | (or convertible to float) and is returned as a float. 231 | 232 | Args: 233 | proxmox_api (Any): The Proxmox API client instance. 234 | node_name (str): The name of the node hosting the guest. 235 | 236 | Returns: 237 | float: The PVE version for the specified node as a floating point number. 238 | 239 | Raises: 240 | Exception: If the proxmox_api call fails, returns an unexpected structure, or the 241 | 'version' field is missing or cannot be converted to float. Callers should 242 | handle or propagate exceptions as appropriate. 243 | """ 244 | logger.debug("Starting: get_node_pve_version.") 245 | time.sleep(0.1) 246 | 247 | try: 248 | logger.debug(f"Trying to get PVE version for node: {node_name}.") 249 | version = proxmox_api.nodes(node_name).version.get() 250 | except Exception: 251 | logger.error(f"Failed to get PVE version for node: {node_name}.") 252 | 253 | logger.debug(f"Got version {version['version']} for node {node_name}.") 254 | logger.debug("Finished: get_node_pve_version.") 255 | return version["version"] 256 | -------------------------------------------------------------------------------- /proxlb/models/guests.py: -------------------------------------------------------------------------------- 1 | """ 2 | The Guests class retrieves all running guests on the Proxmox cluster across all available nodes. 3 | It handles both VM and CT guest types, collecting their resource metrics. 4 | """ 5 | 6 | __author__ = "Florian Paul Azim Hoberg " 7 | __copyright__ = "Copyright (C) 2025 Florian Paul Azim Hoberg (@gyptazy)" 8 | __license__ = "GPL-3.0" 9 | 10 | 11 | from typing import Dict, Any 12 | from utils.logger import SystemdLogger 13 | from models.pools import Pools 14 | from models.ha_rules import HaRules 15 | from models.tags import Tags 16 | import time 17 | 18 | logger = SystemdLogger() 19 | 20 | 21 | class Guests: 22 | """ 23 | The Guests class retrieves all running guests on the Proxmox cluster across all available nodes. 24 | It handles both VM and CT guest types, collecting their resource metrics. 25 | 26 | Methods: 27 | __init__: 28 | Initializes the Guests class. 29 | 30 | get_guests(proxmox_api: any, nodes: Dict[str, Any]) -> Dict[str, Any]: 31 | Retrieves metrics for all running guests (both VMs and CTs) across all nodes in the Proxmox cluster. 32 | It collects resource metrics such as CPU, memory, and disk usage, as well as tags and affinity/anti-affinity groups. 33 | """ 34 | def __init__(self): 35 | """ 36 | Initializes the Guests class with the provided ProxLB data. 37 | """ 38 | 39 | @staticmethod 40 | def get_guests(proxmox_api: any, pools: Dict[str, Any], ha_rules: Dict[str, Any], nodes: Dict[str, Any], meta: Dict[str, Any], proxlb_config: Dict[str, Any]) -> Dict[str, Any]: 41 | """ 42 | Get metrics of all guests in a Proxmox cluster. 43 | 44 | This method retrieves metrics for all running guests (both VMs and CTs) across all nodes in the Proxmox cluster. 45 | It iterates over each node and collects resource metrics for each running guest, including CPU, memory, and disk usage. 46 | Additionally, it retrieves tags and affinity/anti-affinity groups for each guest. 47 | 48 | Args: 49 | proxmox_api (any): The Proxmox API client instance. 50 | pools (Dict[str, Any]): A dictionary containing information about the pools in the Proxmox cluster. 51 | ha_rules (Dict[str, Any]): A dictionary containing information about the HA rules in the 52 | nodes (Dict[str, Any]): A dictionary containing information about the nodes in the Proxmox cluster. 53 | meta (Dict[str, Any]): A dictionary containing metadata information. 54 | proxmox_config (Dict[str, Any]): A dictionary containing the ProxLB configuration. 55 | 56 | Returns: 57 | Dict[str, Any]: A dictionary containing metrics and information for all running guests. 58 | """ 59 | logger.debug("Starting: get_guests.") 60 | guests = {"guests": {}} 61 | 62 | # Guest objects are always only in the scope of a node. 63 | # Therefore, we need to iterate over all nodes to get all guests. 64 | for node in nodes['nodes'].keys(): 65 | 66 | # VM objects: Iterate over all VMs on the current node by the qemu API object. 67 | # Unlike the nodes we need to keep them even when being ignored to create proper 68 | # resource metrics for rebalancing to ensure that we do not overprovisiong the node. 69 | for guest in proxmox_api.nodes(node).qemu.get(): 70 | if guest['status'] == 'running': 71 | guests['guests'][guest['name']] = {} 72 | guests['guests'][guest['name']]['name'] = guest['name'] 73 | guests['guests'][guest['name']]['cpu_total'] = int(guest['cpus']) 74 | guests['guests'][guest['name']]['cpu_used'] = Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'cpu', None) 75 | guests['guests'][guest['name']]['cpu_pressure_some_percent'] = Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'cpu', 'some') 76 | guests['guests'][guest['name']]['cpu_pressure_full_percent'] = Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'cpu', 'full') 77 | guests['guests'][guest['name']]['cpu_pressure_some_spikes_percent'] = Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'cpu', 'some', spikes=True) 78 | guests['guests'][guest['name']]['cpu_pressure_full_spikes_percent'] = Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'cpu', 'full', spikes=True) 79 | guests['guests'][guest['name']]['cpu_pressure_hot'] = False 80 | guests['guests'][guest['name']]['memory_total'] = guest['maxmem'] 81 | guests['guests'][guest['name']]['memory_used'] = guest['mem'] 82 | guests['guests'][guest['name']]['memory_pressure_some_percent'] = Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'memory', 'some') 83 | guests['guests'][guest['name']]['memory_pressure_full_percent'] = Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'memory', 'full') 84 | guests['guests'][guest['name']]['memory_pressure_some_spikes_percent'] = Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'memory', 'some', spikes=True) 85 | guests['guests'][guest['name']]['memory_pressure_full_spikes_percent'] = Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'memory', 'full', spikes=True) 86 | guests['guests'][guest['name']]['memory_pressure_hot'] = False 87 | guests['guests'][guest['name']]['disk_total'] = guest['maxdisk'] 88 | guests['guests'][guest['name']]['disk_used'] = guest['disk'] 89 | guests['guests'][guest['name']]['disk_pressure_some_percent'] = Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'disk', 'some') 90 | guests['guests'][guest['name']]['disk_pressure_full_percent'] = Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'disk', 'full') 91 | guests['guests'][guest['name']]['disk_pressure_some_spikes_percent'] = Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'disk', 'some', spikes=True) 92 | guests['guests'][guest['name']]['disk_pressure_full_spikes_percent'] = Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'disk', 'full', spikes=True) 93 | guests['guests'][guest['name']]['disk_pressure_hot'] = False 94 | guests['guests'][guest['name']]['id'] = guest['vmid'] 95 | guests['guests'][guest['name']]['node_current'] = node 96 | guests['guests'][guest['name']]['node_target'] = node 97 | guests['guests'][guest['name']]['processed'] = False 98 | guests['guests'][guest['name']]['pressure_hot'] = False 99 | guests['guests'][guest['name']]['tags'] = Tags.get_tags_from_guests(proxmox_api, node, guest['vmid'], 'vm') 100 | guests['guests'][guest['name']]['pools'] = Pools.get_pools_for_guest(guest['name'], pools) 101 | guests['guests'][guest['name']]['ha_rules'] = HaRules.get_ha_rules_for_guest(guest['name'], ha_rules, guest['vmid']) 102 | guests['guests'][guest['name']]['affinity_groups'] = Tags.get_affinity_groups(guests['guests'][guest['name']]['tags'], guests['guests'][guest['name']]['pools'], guests['guests'][guest['name']]['ha_rules'], proxlb_config) 103 | guests['guests'][guest['name']]['anti_affinity_groups'] = Tags.get_anti_affinity_groups(guests['guests'][guest['name']]['tags'], guests['guests'][guest['name']]['pools'], guests['guests'][guest['name']]['ha_rules'], proxlb_config) 104 | guests['guests'][guest['name']]['ignore'] = Tags.get_ignore(guests['guests'][guest['name']]['tags']) 105 | guests['guests'][guest['name']]['node_relationships'] = Tags.get_node_relationships(guests['guests'][guest['name']]['tags'], nodes, guests['guests'][guest['name']]['pools'], guests['guests'][guest['name']]['ha_rules'], proxlb_config) 106 | guests['guests'][guest['name']]['type'] = 'vm' 107 | 108 | logger.debug(f"Resources of Guest {guest['name']} (type VM) added: {guests['guests'][guest['name']]}") 109 | else: 110 | logger.debug(f'Metric for VM {guest["name"]} ignored because VM is not running.') 111 | 112 | # CT objects: Iterate over all VMs on the current node by the lxc API object. 113 | # Unlike the nodes we need to keep them even when being ignored to create proper 114 | # resource metrics for rebalancing to ensure that we do not overprovisiong the node. 115 | for guest in proxmox_api.nodes(node).lxc.get(): 116 | if guest['status'] == 'running': 117 | guests['guests'][guest['name']] = {} 118 | guests['guests'][guest['name']]['name'] = guest['name'] 119 | guests['guests'][guest['name']]['cpu_total'] = int(guest['cpus']) 120 | guests['guests'][guest['name']]['cpu_used'] = Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'cpu', None) 121 | guests['guests'][guest['name']]['cpu_pressure_some_percent'] = Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'cpu', 'some') 122 | guests['guests'][guest['name']]['cpu_pressure_full_percent'] = Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'cpu', 'full') 123 | guests['guests'][guest['name']]['cpu_pressure_some_spikes_percent'] = Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'cpu', 'some', spikes=True) 124 | guests['guests'][guest['name']]['cpu_pressure_full_spikes_percent'] = Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'cpu', 'full', spikes=True) 125 | guests['guests'][guest['name']]['cpu_pressure_hot'] = False 126 | guests['guests'][guest['name']]['memory_total'] = guest['maxmem'] 127 | guests['guests'][guest['name']]['memory_used'] = guest['mem'] 128 | guests['guests'][guest['name']]['memory_pressure_some_percent'] = Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'memory', 'some') 129 | guests['guests'][guest['name']]['memory_pressure_full_percent'] = Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'memory', 'full') 130 | guests['guests'][guest['name']]['memory_pressure_some_spikes_percent'] = Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'memory', 'some', spikes=True) 131 | guests['guests'][guest['name']]['memory_pressure_full_spikes_percent'] = Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'memory', 'full', spikes=True) 132 | guests['guests'][guest['name']]['memory_pressure_hot'] = False 133 | guests['guests'][guest['name']]['disk_total'] = guest['maxdisk'] 134 | guests['guests'][guest['name']]['disk_used'] = guest['disk'] 135 | guests['guests'][guest['name']]['disk_pressure_some_percent'] = Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'disk', 'some') 136 | guests['guests'][guest['name']]['disk_pressure_full_percent'] = Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'disk', 'full') 137 | guests['guests'][guest['name']]['disk_pressure_some_spikes_percent'] = Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'disk', 'some', spikes=True) 138 | guests['guests'][guest['name']]['disk_pressure_full_spikes_percent'] = Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'disk', 'full', spikes=True) 139 | guests['guests'][guest['name']]['disk_pressure_hot'] = False 140 | guests['guests'][guest['name']]['id'] = guest['vmid'] 141 | guests['guests'][guest['name']]['node_current'] = node 142 | guests['guests'][guest['name']]['node_target'] = node 143 | guests['guests'][guest['name']]['processed'] = False 144 | guests['guests'][guest['name']]['pressure_hot'] = False 145 | guests['guests'][guest['name']]['tags'] = Tags.get_tags_from_guests(proxmox_api, node, guest['vmid'], 'ct') 146 | guests['guests'][guest['name']]['pools'] = Pools.get_pools_for_guest(guest['name'], pools) 147 | guests['guests'][guest['name']]['ha_rules'] = HaRules.get_ha_rules_for_guest(guest['name'], ha_rules, guest['vmid']) 148 | guests['guests'][guest['name']]['affinity_groups'] = Tags.get_affinity_groups(guests['guests'][guest['name']]['tags'], guests['guests'][guest['name']]['pools'], guests['guests'][guest['name']]['ha_rules'], proxlb_config) 149 | guests['guests'][guest['name']]['anti_affinity_groups'] = Tags.get_anti_affinity_groups(guests['guests'][guest['name']]['tags'], guests['guests'][guest['name']]['pools'], guests['guests'][guest['name']]['ha_rules'], proxlb_config) 150 | guests['guests'][guest['name']]['ignore'] = Tags.get_ignore(guests['guests'][guest['name']]['tags']) 151 | guests['guests'][guest['name']]['node_relationships'] = Tags.get_node_relationships(guests['guests'][guest['name']]['tags'], nodes, guests['guests'][guest['name']]['pools'], guests['guests'][guest['name']]['ha_rules'], proxlb_config) 152 | guests['guests'][guest['name']]['type'] = 'ct' 153 | 154 | logger.debug(f"Resources of Guest {guest['name']} (type CT) added: {guests['guests'][guest['name']]}") 155 | else: 156 | logger.debug(f'Metric for CT {guest["name"]} ignored because CT is not running.') 157 | 158 | logger.debug("Finished: get_guests.") 159 | return guests 160 | 161 | @staticmethod 162 | def get_guest_rrd_data(proxmox_api, node_name: str, vm_id: int, vm_name: str, object_name: str, object_type: str, spikes=False) -> float: 163 | """ 164 | Retrieves the rrd data metrics for a specific resource (CPU, memory, disk) of a guest VM or CT. 165 | 166 | Args: 167 | proxmox_api (Any): The Proxmox API client instance. 168 | node_name (str): The name of the node hosting the guest. 169 | vm_id (int): The ID of the guest VM or CT. 170 | vm_name (str): The name of the guest VM or CT. 171 | object_name (str): The resource type to query (e.g., 'cpu', 'memory', 'disk'). 172 | object_type (str, optional): The pressure type ('some', 'full') or None for average usage. 173 | spikes (bool, optional): Whether to consider spikes in the calculation. Defaults to False. 174 | 175 | Returns: 176 | float: The calculated average usage value for the specified resource. 177 | """ 178 | logger.debug("Starting: get_guest_rrd_data.") 179 | time.sleep(0.1) 180 | 181 | try: 182 | if spikes: 183 | logger.debug(f"Getting spike RRD data for {object_name} from guest: {vm_name}.") 184 | guest_data_rrd = proxmox_api.nodes(node_name).qemu(vm_id).rrddata.get(timeframe="hour", cf="MAX") 185 | else: 186 | logger.debug(f"Getting average RRD data for {object_name} from guest: {vm_name}.") 187 | guest_data_rrd = proxmox_api.nodes(node_name).qemu(vm_id).rrddata.get(timeframe="hour", cf="AVERAGE") 188 | except Exception: 189 | logger.error(f"Failed to retrieve RRD data for guest: {vm_name} (ID: {vm_id}) on node: {node_name}. Using 0.0 as value.") 190 | logger.debug("Finished: get_guest_rrd_data.") 191 | return float(0.0) 192 | 193 | if object_type: 194 | 195 | lookup_key = f"pressure{object_name}{object_type}" 196 | if spikes: 197 | # RRD data is collected every minute, so we look at the last 6 entries 198 | # and take the maximum value to represent the spike 199 | logger.debug(f"Getting RRD data (spike: {spikes}) of pressure for {object_name} {object_type} from guest: {vm_name}.") 200 | rrd_data_value = [row.get(lookup_key) for row in guest_data_rrd if row.get(lookup_key) is not None] 201 | rrd_data_value = max(rrd_data_value[-6:], default=0.0) 202 | else: 203 | # Calculate the average value from the RRD data entries 204 | logger.debug(f"Getting RRD data (spike: {spikes}) of pressure for {object_name} {object_type} from guest: {vm_name}.") 205 | rrd_data_value = sum(entry.get(lookup_key, 0.0) for entry in guest_data_rrd) / len(guest_data_rrd) 206 | 207 | else: 208 | logger.debug(f"Getting RRD data of cpu usage from guest: {vm_name}.") 209 | rrd_data_value = sum(entry.get("cpu", 0.0) for entry in guest_data_rrd) / len(guest_data_rrd) 210 | 211 | logger.debug(f"RRD data (spike: {spikes}) for {object_name} from guest: {vm_name}: {rrd_data_value}") 212 | logger.debug("Finished: get_guest_rrd_data.") 213 | return rrd_data_value 214 | --------------------------------------------------------------------------------