├── data ├── software_stack │ ├── alliance.yaml │ ├── eessi.yaml │ └── computecanada.yaml ├── os │ └── RedHat │ │ ├── 8.yaml │ │ └── 9.yaml ├── cloud │ ├── openstack │ │ ├── jusuf-cloud.yaml │ │ └── arbutus.yaml │ └── azure.yaml └── site.yaml ├── .puppet-lint.rc ├── site └── profile │ ├── files │ ├── efa │ │ ├── efa.pp │ │ └── efa.te │ ├── slurm │ │ ├── slurmd.pp │ │ ├── cgroup_allowed_devices_file.conf │ │ ├── munge_socket.pp │ │ ├── pam_slurm_adopt.pp │ │ ├── slurm_mail │ │ ├── epilog │ │ ├── munge_socket.te │ │ ├── pam_slurm_adopt.te │ │ ├── slurmd.te │ │ └── nvidia_gres.sh │ ├── gpu │ │ ├── nvidia-gpu.pp │ │ └── nvidia-gpu.te │ ├── reverse_proxy │ │ ├── caddy.pp │ │ └── caddy.te │ ├── freeipa │ │ ├── dse-init.ldif │ │ ├── kinit_wrapper │ │ ├── ignore-systemd-session-slice.conf │ │ ├── mc-ipa-client-install │ │ └── 27e9181bdc684915a7f9f15631f4c3dd6ac5f884.patch │ ├── fail2ban │ │ ├── fail2ban_route.pp │ │ └── fail2ban_route.te │ ├── nfs │ │ └── nfs.conf │ ├── vector │ │ └── default_config.yaml │ ├── accounts │ │ ├── mkhome.service │ │ └── mkproject.service │ ├── base │ │ ├── opensshserver.config │ │ ├── opensshserver-9.config │ │ └── prepare4image.sh │ ├── software_stack │ │ └── z-00-rsnt_arch.sh.ctmpl │ ├── consul │ │ └── puppet_event_handler.sh │ └── users │ │ └── ipa_create_user.py │ ├── lib │ └── facter │ │ └── nameservers.rb │ ├── facts.d │ ├── nvidia_gpu_count.sh │ ├── dev_disk.sh │ ├── is_unprivileged_container.sh │ ├── ipa.sh │ ├── cpu_ext.sh │ ├── nvidia_grid_vgpu.sh │ └── letsencrypt.sh │ ├── templates │ ├── freeipa │ │ ├── zzz-puppet.conf.epp │ │ ├── ipa-rewrite.conf.epp │ │ ├── group_rules.py.epp │ │ ├── hbac_rules.py.epp │ │ └── mokey.yaml.epp │ ├── sssd │ │ └── sssd.conf.epp │ ├── slurm │ │ ├── cgroup.conf.epp │ │ ├── slurmdbd.conf.epp │ │ ├── job_submit.lua.epp │ │ ├── nodes.conf.epp │ │ ├── sacct.cfg.epp │ │ ├── gres.conf.epp │ │ └── slurm.conf.epp │ ├── cvmfs │ │ ├── alien_cache.conf.epp │ │ └── default.local.epp │ ├── base │ │ ├── hosts.epp │ │ └── postrun.epp │ ├── software_stack │ │ └── z-01-site.sh.epp │ ├── prometheus │ │ └── prometheus-slurm-exporter.service.epp │ ├── reverse_proxy │ │ └── subdomain.conf.epp │ ├── jupyterhub │ │ └── login.html.epp │ └── accounts │ │ ├── mkhome.sh.epp │ │ └── mkproject.sh.epp │ ├── functions │ ├── getcidr.pp │ ├── getlocalinterface.pp │ ├── is_grid_vgpu.pp │ ├── getptrrecord.pp │ ├── getnetmask.pp │ ├── getreversezone.pp │ ├── gethostnames_with_class.pp │ └── generate_slurm_node_line.pp │ └── manifests │ ├── globus.pp │ ├── swap.pp │ ├── fail2ban.pp │ ├── vector.pp │ ├── metrix.pp │ ├── efa.pp │ ├── puppetserver.pp │ ├── squid.pp │ ├── rsyslog.pp │ ├── sssd.pp │ ├── software_stack.pp │ ├── consul.pp │ ├── jupyterhub.pp │ ├── ceph.pp │ ├── accounts.pp │ ├── reverse_proxy.pp │ ├── base.pp │ ├── nfs.pp │ ├── prometheus.pp │ ├── ssh.pp │ ├── mail.pp │ ├── cvmfs.pp │ ├── volumes.pp │ ├── users.pp │ └── gpu.pp ├── .gitignore ├── lib └── puppet │ └── functions │ ├── ssh_split_options.rb │ ├── terraform_self.rb │ └── slurm_compute_weights.rb ├── Gemfile ├── .github └── workflows │ └── test.yaml ├── manifests └── site.pp ├── Rakefile ├── environment.conf ├── LICENSE ├── bootstrap.sh ├── Puppetfile ├── hiera.yaml ├── Gemfile.lock ├── local-tests └── puppet-missing-files └── extra.md /data/software_stack/alliance.yaml: -------------------------------------------------------------------------------- 1 | computecanada.yaml -------------------------------------------------------------------------------- /data/os/RedHat/8.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | os::redhat::python3::version: 3.6 3 | -------------------------------------------------------------------------------- /data/os/RedHat/9.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | os::redhat::python3::version: 3.9 3 | -------------------------------------------------------------------------------- /.puppet-lint.rc: -------------------------------------------------------------------------------- 1 | --no-documentation-check 2 | --no-parameter_documentation 3 | -------------------------------------------------------------------------------- /site/profile/files/efa/efa.pp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComputeCanada/puppet-magic_castle/HEAD/site/profile/files/efa/efa.pp -------------------------------------------------------------------------------- /site/profile/files/slurm/slurmd.pp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComputeCanada/puppet-magic_castle/HEAD/site/profile/files/slurm/slurmd.pp -------------------------------------------------------------------------------- /site/profile/files/gpu/nvidia-gpu.pp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComputeCanada/puppet-magic_castle/HEAD/site/profile/files/gpu/nvidia-gpu.pp -------------------------------------------------------------------------------- /site/profile/files/slurm/cgroup_allowed_devices_file.conf: -------------------------------------------------------------------------------- 1 | # 2 | /dev/vda* 3 | /dev/cpu/* 4 | /dev/pts/* 5 | # only on gpu nodes 6 | /dev/nvidia* -------------------------------------------------------------------------------- /site/profile/files/reverse_proxy/caddy.pp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComputeCanada/puppet-magic_castle/HEAD/site/profile/files/reverse_proxy/caddy.pp -------------------------------------------------------------------------------- /site/profile/files/slurm/munge_socket.pp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComputeCanada/puppet-magic_castle/HEAD/site/profile/files/slurm/munge_socket.pp -------------------------------------------------------------------------------- /site/profile/files/freeipa/dse-init.ldif: -------------------------------------------------------------------------------- 1 | dn: cn=config 2 | changetype: modify 3 | replace: nsslapd-accesslog-compress 4 | nsslapd-accesslog-compress: on 5 | -------------------------------------------------------------------------------- /site/profile/files/slurm/pam_slurm_adopt.pp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComputeCanada/puppet-magic_castle/HEAD/site/profile/files/slurm/pam_slurm_adopt.pp -------------------------------------------------------------------------------- /site/profile/files/fail2ban/fail2ban_route.pp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComputeCanada/puppet-magic_castle/HEAD/site/profile/files/fail2ban/fail2ban_route.pp -------------------------------------------------------------------------------- /site/profile/files/nfs/nfs.conf: -------------------------------------------------------------------------------- 1 | [nfsd] 2 | udp=n 3 | tcp=y 4 | vers2=n 5 | vers3=n 6 | vers4=y 7 | vers4.0=n 8 | vers4.1=n 9 | vers4.2=y 10 | 11 | -------------------------------------------------------------------------------- /site/profile/files/slurm/slurm_mail: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | email=${@: -1} 4 | subject=${@: 2:$#-2} 5 | cat << EOF | /usr/sbin/sendmail $email 6 | To: ${email} 7 | Subject: ${subject} 8 | EOF -------------------------------------------------------------------------------- /site/profile/lib/facter/nameservers.rb: -------------------------------------------------------------------------------- 1 | require 'resolv' 2 | 3 | Facter.add("nameservers") do 4 | setcode do 5 | Resolv::DNS::Config.default_config_hash[:nameserver] 6 | end 7 | end -------------------------------------------------------------------------------- /site/profile/files/slurm/epilog: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | rm -rf "/localscratch/$SLURM_JOB_USER.$SLURM_JOBID.0" 3 | rm -rf "/dev/shm/$SLURM_JOB_USER.$SLURM_JOBID.0" 4 | rm -rf "/tmp/$SLURM_JOB_USER.$SLURM_JOBID.0" 5 | exit 0 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | terraform 3 | .vscode 4 | .librarian/ 5 | .tmp/ 6 | data/bootstrap.yaml 7 | data/terraform_data.yaml 8 | data/user_data.yaml 9 | modules/ 10 | site/profile/facts.d/terraform_facts.yaml -------------------------------------------------------------------------------- /site/profile/facts.d/nvidia_gpu_count.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # NVIDIA vendor id is 0x10de 3 | # List all devides with that vendor id then count the number of lines 4 | echo "{ 'nvidia_gpu_count' : $(lspci -d 0x10de: | wc -l) }" -------------------------------------------------------------------------------- /site/profile/facts.d/dev_disk.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo "---" 3 | echo \"/dev/disk\": 4 | if [ -e /dev/disk ]; then 5 | for i in $(find /dev/disk -type l); do 6 | echo " "\"$i\": \"$(readlink -f $i)\" 7 | done 8 | fi 9 | -------------------------------------------------------------------------------- /site/profile/files/vector/default_config.yaml: -------------------------------------------------------------------------------- 1 | sources: 2 | in: 3 | type: "stdin" 4 | 5 | sinks: 6 | out: 7 | inputs: 8 | - "in" 9 | type: "console" 10 | encoding: 11 | codec: "text" 12 | -------------------------------------------------------------------------------- /site/profile/facts.d/is_unprivileged_container.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | is_unprivileged_container=$(test "nobody" == $(stat -c "%U" /proc/) && echo true || echo false) 3 | echo "---" 4 | echo "is_unprivileged_container: ${is_unprivileged_container}" 5 | -------------------------------------------------------------------------------- /site/profile/templates/freeipa/zzz-puppet.conf.epp: -------------------------------------------------------------------------------- 1 | # File managed by puppet 2 | [main] 3 | dns=default 4 | 5 | [global-dns] 6 | searches=<%= $int_domain_name %> 7 | 8 | [global-dns-domain-*] 9 | servers=<%= join($nameservers, ',') -%> 10 | 11 | -------------------------------------------------------------------------------- /site/profile/templates/sssd/sssd.conf.epp: -------------------------------------------------------------------------------- 1 | [domain/<%= $domain %>] 2 | <% $config.map |$ki, $vi| { -%> 3 | <% if $vi.is_a(Array) { -%> 4 | <%= $ki %> = <%= join($vi, ',') %> 5 | <% } elsif $vi != undef { -%> 6 | <%= $ki %> = <%= $vi %> 7 | <% } -%> 8 | <% } -%> 9 | -------------------------------------------------------------------------------- /site/profile/facts.d/ipa.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | echo "---" 3 | echo '"ipa":' 4 | echo ' "installed":' $(test -f /etc/ipa/default.conf && echo "true" || echo "false") 5 | echo ' "domain":' $(test -f /etc/ipa/default.conf && grep -oP 'domain\s*=\s*\K(.*)' /etc/ipa/default.conf) 6 | -------------------------------------------------------------------------------- /site/profile/files/reverse_proxy/caddy.te: -------------------------------------------------------------------------------- 1 | 2 | module caddy 1.0; 3 | 4 | require { 5 | type sysctl_net_t; 6 | type httpd_t; 7 | class file { open read }; 8 | } 9 | 10 | #============= httpd_t ============== 11 | allow httpd_t sysctl_net_t:file { open read }; 12 | -------------------------------------------------------------------------------- /site/profile/files/freeipa/kinit_wrapper: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | trap kdestroy EXIT 3 | 4 | export KRB5CCNAME=$(mktemp) 5 | 6 | if [ -z "${IPA_ADMIN_PASSWD}" ]; then 7 | kinit admin 8 | else 9 | echo $IPA_ADMIN_PASSWD | kinit admin &> /dev/null 10 | fi 11 | $1 "${@:2}" -------------------------------------------------------------------------------- /data/cloud/openstack/jusuf-cloud.yaml: -------------------------------------------------------------------------------- 1 | profile::gpu::install::vgpu::installer: bin 2 | profile::gpu::install::vgpu::bin::source: https://hpsrepo.fz-juelich.de/jusuf/nvidia/NVIDIA-Driver.latest 3 | profile::gpu::install::vgpu::bin::gridd_source: https://hpsrepo.fz-juelich.de/jusuf/nvidia/gridd.conf -------------------------------------------------------------------------------- /site/profile/files/slurm/munge_socket.te: -------------------------------------------------------------------------------- 1 | module munge_socket 1.0; 2 | 3 | require { 4 | type user_t; 5 | type var_run_t; 6 | class sock_file { getattr write }; 7 | } 8 | 9 | #============= user_t ============== 10 | allow user_t var_run_t:sock_file { getattr write }; -------------------------------------------------------------------------------- /site/profile/templates/slurm/cgroup.conf.epp: -------------------------------------------------------------------------------- 1 | CgroupMountpoint="/sys/fs/cgroup" 2 | ConstrainCores=yes 3 | ConstrainRAMSpace=yes 4 | ConstrainSwapSpace=yes 5 | ConstrainDevices=yes 6 | AllowedRamSpace=100 7 | AllowedSwapSpace=0 8 | MaxRAMPercent=100 9 | MaxSwapPercent=100 10 | MinRAMSpace=30 11 | -------------------------------------------------------------------------------- /site/profile/templates/cvmfs/alien_cache.conf.epp: -------------------------------------------------------------------------------- 1 | # Create repo to store alien cache instead of having them created/store in default.local file 2 | CVMFS_ALIEN_CACHE="/<%= $alien_fs_root %>/<%= $alien_folder_name %>/@fqrn@" 3 | CVMFS_SHARED_CACHE='no' 4 | CVMFS_QUOTA_LIMIT=-1 5 | CVMFS_CLAIM_OWNERSHIP=yes 6 | -------------------------------------------------------------------------------- /site/profile/files/efa/efa.te: -------------------------------------------------------------------------------- 1 | module efa 1.0; 2 | 3 | 4 | require { 5 | type user_t; 6 | type infiniband_device_t; 7 | class chr_file { open ioctl map read write }; 8 | } 9 | 10 | #============= user_t ============== 11 | allow user_t infiniband_device_t:chr_file { open ioctl map read write }; 12 | -------------------------------------------------------------------------------- /lib/puppet/functions/ssh_split_options.rb: -------------------------------------------------------------------------------- 1 | Puppet::Functions.create_function(:ssh_split_options) do 2 | dispatch :ssh_split_options do 3 | param 'String', :options 4 | return_type 'Array' 5 | end 6 | def ssh_split_options(options) 7 | return options.scan(/(\w+=".*?"|[\w-]+)/).flatten 8 | end 9 | end -------------------------------------------------------------------------------- /site/profile/functions/getcidr.pp: -------------------------------------------------------------------------------- 1 | function profile::getcidr() >> String { 2 | $interface = profile::getlocalinterface() 3 | $masklen = extlib::netmask_to_cidr(profile::getnetmask()) 4 | $ip = $networking['interfaces'][$interface]['ip'] 5 | $network = extlib::cidr_to_network("${ip}/${masklen}") 6 | "${network}/${masklen}" 7 | } 8 | -------------------------------------------------------------------------------- /site/profile/files/slurm/pam_slurm_adopt.te: -------------------------------------------------------------------------------- 1 | module sshd_pam_slurm_adopt 1.0; 2 | 3 | require { 4 | type sshd_t; 5 | type var_spool_t; 6 | class sock_file write; 7 | } 8 | 9 | #============= sshd_t ============== 10 | 11 | #!!!! WARNING: 'var_spool_t' is a base type. 12 | allow sshd_t var_spool_t:sock_file write; -------------------------------------------------------------------------------- /site/profile/files/accounts/mkhome.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Create home directory for new IPA users 3 | BindsTo=ipa.service 4 | After=ipa.service 5 | 6 | [Service] 7 | Type=simple 8 | StandardOutput=journal 9 | StandardError=journal 10 | SyslogIdentifier=mkhome 11 | ExecStart=/sbin/mkhome.sh 12 | 13 | [Install] 14 | WantedBy=multi-user.target 15 | -------------------------------------------------------------------------------- /site/profile/functions/getlocalinterface.pp: -------------------------------------------------------------------------------- 1 | function profile::getlocalinterface() >> String { 2 | $local_ip = lookup('terraform.self.local_ip') 3 | $interfaces = keys($facts['networking']['interfaces']) 4 | $search = $interfaces.filter | $interface | { 5 | $facts['networking']['interfaces'][$interface]['ip'] == $local_ip 6 | } 7 | $search[0] 8 | } 9 | -------------------------------------------------------------------------------- /site/profile/files/accounts/mkproject.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Create /project directories for IPA users 3 | BindsTo=ipa.service 4 | After=ipa.service 5 | 6 | [Service] 7 | Type=simple 8 | StandardOutput=journal 9 | StandardError=journal 10 | SyslogIdentifier=mkproject 11 | ExecStart=/sbin/mkproject.sh 12 | 13 | [Install] 14 | WantedBy=multi-user.target 15 | -------------------------------------------------------------------------------- /site/profile/functions/is_grid_vgpu.pp: -------------------------------------------------------------------------------- 1 | function profile::is_grid_vgpu() >> Boolean { 2 | if $facts['nvidia_grid_vgpu'] { 3 | true 4 | } else { 5 | $grid_vgpu_types = lookup('profile::gpu::install::vgpu::grid_vgpu_types', undef, undef, []) 6 | $type = lookup('terraform.self.specs.type') 7 | $grid_vgpu_types.any|$regex| { $type =~ Regexp($regex) } 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /site/profile/templates/slurm/slurmdbd.conf.epp: -------------------------------------------------------------------------------- 1 | AuthType=auth/munge 2 | LogFile=/var/log/slurm/slurmdbd.log 3 | PidFile=/var/run/slurmdbd/slurmdbd.pid 4 | DbdHost=<%= $dbd_host %> 5 | DbdPort=<%= $dbd_port %> 6 | SlurmUser=slurm 7 | StorageHost=localhost 8 | StorageUser=slurm 9 | StoragePass=<%= $storage_pass %> 10 | StorageLoc=slurm_acct_db 11 | StorageType=accounting_storage/mysql 12 | -------------------------------------------------------------------------------- /site/profile/templates/cvmfs/default.local.epp: -------------------------------------------------------------------------------- 1 | <% if ! $repositories.empty { -%> 2 | CVMFS_REPOSITORIES="<%= $repositories.join(',') %>" 3 | <% } -%> 4 | CVMFS_STRICT_MOUNT="<%= $strict_mount %>" 5 | CVMFS_QUOTA_LIMIT=<%= $quota_limit %> 6 | {{ if service "squid" -}} 7 | CVMFS_HTTP_PROXY='{{ range $i, $s := service "squid" }}{{if $i}}|{{end}}http://{{.Address}}:{{.Port}}{{end}}' 8 | {{ end -}} 9 | -------------------------------------------------------------------------------- /site/profile/files/freeipa/ignore-systemd-session-slice.conf: -------------------------------------------------------------------------------- 1 | if $programname == "systemd" and ($msg contains "Starting Session" or $msg contains "Started Session" or $msg contains "Created slice" or $msg contains "Starting user-" or $msg contains "Starting User Slice of" or $msg contains "Removed session" or $msg contains "Removed slice User Slice of" or $msg contains "Stopping User Slice of") then stop -------------------------------------------------------------------------------- /data/cloud/openstack/arbutus.yaml: -------------------------------------------------------------------------------- 1 | profile::gpu::install::vgpu::installer: rpm 2 | profile::gpu::install::vgpu::rpm::source: http://repo.arbutus.cloud.computecanada.ca/pulp/repos/alma%{facts.os.release.major}/Packages/a/arbutus-cloud-vgpu-repo-1.0-1.el%{facts.os.release.major}.noarch.rpm 3 | profile::gpu::install::vgpu::rpm::packages: 4 | - nvidia-vgpu-kmod 5 | - nvidia-vgpu-gridd 6 | - nvidia-vgpu-tools -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source "https://rubygems.org" 2 | 3 | gem "rake" 4 | gem "openvox", ENV['PUPPET_VERSION'] || '~> 8.0' 5 | gem 'puppet-lint', '~> 5.1', '>= 5.1.1' 6 | gem 'puppet-lint-param-docs' 7 | 8 | gem "rspec" 9 | gem "rspec-core" 10 | gem "rspec-puppet" 11 | 12 | gem 'puppet-syntax', '~> 7.0', '>= 7.0.1' 13 | gem "puppetlabs_spec_helper" 14 | gem "hiera" 15 | gem 'rspec-puppet-facts' 16 | 17 | gem "syslog" 18 | -------------------------------------------------------------------------------- /site/profile/functions/getptrrecord.pp: -------------------------------------------------------------------------------- 1 | function profile::getptrrecord() >> String { 2 | $interface = profile::getlocalinterface() 3 | $ip = $networking['interfaces'][$interface]['ip'] 4 | $ip_list = split($ip, '[.]') 5 | $netmask_list = split(profile::getnetmask(), '[.]') 6 | 7 | $filtered_ip = $ip_list.filter |$i, $v| { $netmask_list[$i] == '0' } 8 | 9 | join(reverse($filtered_ip), '.') 10 | } 11 | -------------------------------------------------------------------------------- /site/profile/templates/slurm/job_submit.lua.epp: -------------------------------------------------------------------------------- 1 | function slurm_job_submit(job_desc, part_list, submit_uid) 2 | <% if $selinux_enabled and $selinux_context { -%> 3 | job_desc.selinux_context = "<%= $selinux_context %>" 4 | <% } -%> 5 | return slurm.SUCCESS 6 | end 7 | 8 | function slurm_job_modify(job_desc, job_rec, part_list, modify_uid) 9 | return slurm.SUCCESS 10 | end 11 | 12 | return slurm.SUCCESS 13 | 14 | -------------------------------------------------------------------------------- /site/profile/functions/getnetmask.pp: -------------------------------------------------------------------------------- 1 | function profile::getnetmask() >> String { 2 | if $facts['gce'] { 3 | # GCP instances netmask is set to /32 but the network netmask is available 4 | $netmask = $gce['instance']['networkInterfaces'][0]['subnetmask'] 5 | } else { 6 | $interface = profile::getlocalinterface() 7 | $netmask = $networking['interfaces'][$interface]['netmask'] 8 | } 9 | $netmask 10 | } 11 | -------------------------------------------------------------------------------- /data/cloud/azure.yaml: -------------------------------------------------------------------------------- 1 | profile::gpu::install::vgpu::installer: bin 2 | profile::gpu::install::vgpu::bin::source: https://go.microsoft.com/fwlink/?linkid=874272 3 | profile::gpu::install::vgpu::bin::gridd_content: | 4 | IgnoreSP=FALSE 5 | EnableUI=FALSE 6 | profile::gpu::install::vgpu::grid_vgpu_types: 7 | - "^Standard_NV(6|12|18|36|72)ad[m]*s_A10_v5$" 8 | - "^Standard_NV(12|24|48)s_v3$" 9 | - "^Standard_NC(4|8|16|64)as_T4_v3$" 10 | -------------------------------------------------------------------------------- /site/profile/files/freeipa/mc-ipa-client-install: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # keep previous installation logs 3 | mv /var/log/ipaclient-install.log{,.$(ls /var/log/ipaclient-install.log* | wc -l)} 4 | if /sbin/ipa-client-install "${@}"; then 5 | if grep -q "nsupdate failed" /var/log/ipaclient-install.log; then 6 | /sbin/ipa-client-install --uninstall -U 7 | exit 1 8 | else 9 | exit 0 10 | fi 11 | else 12 | /sbin/ipa-client-install --uninstall -U 13 | exit $? 14 | fi 15 | -------------------------------------------------------------------------------- /site/profile/functions/getreversezone.pp: -------------------------------------------------------------------------------- 1 | function profile::getreversezone() >> String { 2 | $interface = profile::getlocalinterface() 3 | $network = $networking['interfaces'][$interface]['network'] 4 | $network_list = split($network, '[.]') 5 | $netmask_list = split(profile::getnetmask(), '[.]') 6 | 7 | $filtered_network = $network_list.filter |$i, $v| { $netmask_list[$i] != '0' } 8 | 9 | $zone = join(reverse($filtered_network), '.') 10 | "${zone}.in-addr.arpa." 11 | } 12 | -------------------------------------------------------------------------------- /site/profile/templates/base/hosts.epp: -------------------------------------------------------------------------------- 1 | # This file is managed by Puppet do not edit manually 2 | 127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4 3 | ::1 localhost localhost.localdomain localhost6 localhost6.localdomain6 4 | 5 | <% if $facts['cloud']['provider'] == "gcp" { -%> 6 | 169.254.169.254 metadata.google.internal 7 | <% } -%> 8 | 9 | <% $instances.each|$key, $values| { -%> 10 | <%= $values['local_ip'] %> <%= $key %>.<%= $int_domain_name%> <%= $key %> 11 | <% } -%> 12 | -------------------------------------------------------------------------------- /.github/workflows/test.yaml: -------------------------------------------------------------------------------- 1 | name: Validate Puppet code 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v2 10 | - uses: ruby/setup-ruby@v1 11 | with: 12 | ruby-version: '3.4' 13 | bundler-cache: true # runs 'bundle install' and caches installed gems automatically 14 | - name: Test with Rake 15 | run: | 16 | bundle exec rake test 17 | local-tests/puppet-missing-files ./site 18 | -------------------------------------------------------------------------------- /site/profile/files/gpu/nvidia-gpu.te: -------------------------------------------------------------------------------- 1 | module nvidia-gpu 1.0; 2 | 3 | require { 4 | type device_t; 5 | type user_t; 6 | class capability mknod; 7 | class chr_file { create getattr ioctl open read setattr write }; 8 | class dir { add_name remove_name write }; 9 | } 10 | 11 | #============= user_t ============== 12 | 13 | allow user_t device_t:chr_file { create getattr ioctl open read setattr write }; 14 | allow user_t device_t:dir { add_name remove_name write }; 15 | 16 | allow user_t self:capability mknod; 17 | -------------------------------------------------------------------------------- /site/profile/templates/software_stack/z-01-site.sh.epp: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [[ $UID -ge <%= $min_uid %> ]]; then 4 | <% if $lmod_default_modules { -%> 5 | export LMOD_SYSTEM_DEFAULT_MODULES="<%= $lmod_default_modules.join(' ') %>" 6 | <% } -%> 7 | <% if $extra_site_env_vars { $extra_site_env_vars.each | $key, $value| { -%> 8 | export <%= $key %>=<%= $value %> 9 | <% }} -%> 10 | <% if $initial_profile { -%> 11 | if [[ -r <%= $initial_profile %> ]]; then 12 | source <%= $initial_profile %> 13 | fi 14 | <% } -%> 15 | fi 16 | -------------------------------------------------------------------------------- /site/profile/facts.d/cpu_ext.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | cpu_ext=$(grep -m1 flags /proc/cpuinfo | tr " " "\n" | tac | grep -m 1 -P '^(avx512f|avx2|avx|pni)$') 3 | case "$cpu_ext" in 4 | avx512f) 5 | cpu_ext="avx512" 6 | ;; 7 | pni) 8 | cpu_ext="sse3" 9 | ;; 10 | esac 11 | 12 | case "$cpu_ext" in 13 | avx512) 14 | cpu_microarch="x86-64-v4" 15 | ;; 16 | avx2) 17 | cpu_microarch="x86-64-v3" 18 | ;; 19 | esac 20 | 21 | echo "{ 'cpu_ext' : '${cpu_ext}', 'cpu_microarch': '${cpu_microarch}' }" 22 | -------------------------------------------------------------------------------- /site/profile/templates/prometheus/prometheus-slurm-exporter.service.epp: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Prometheus exporter of Slurm metrics 3 | After=network-online.target 4 | 5 | [Service] 6 | User=slurm 7 | Group=slurm 8 | Type=simple 9 | ExecStart=/usr/bin/prometheus-slurm-exporter <%= $collectors %> --listen-address=":<%= $port %>" 10 | PIDFile=/run/prometheus-slurm-exporter/prometheus-slurm-exporter.pid 11 | KillMode=process 12 | Environment=PATH=/usr/bin:/opt/software/slurm/bin 13 | Restart=always 14 | RestartSec=15s 15 | 16 | [Install] 17 | WantedBy=multi-user.target 18 | -------------------------------------------------------------------------------- /site/profile/templates/slurm/nodes.conf.epp: -------------------------------------------------------------------------------- 1 | # Default value for all types of nodes 2 | NodeName=DEFAULT MemSpecLimit=<%= $memlimit %> State=CLOUD 3 | 4 | # Always online computes nodes 5 | <% $nodes.each |$name, $attr| { -%> 6 | <% if !('pool' in $attr['tags']) { -%> 7 | <%= profile::generate_slurm_node_line($name, $attr, $weights[$name]) %> 8 | <% }} -%> 9 | 10 | # On-demand pool compute nodes 11 | <% $nodes.each |$name, $attr| { -%> 12 | <% if 'pool' in $attr['tags'] { -%> 13 | <%= profile::generate_slurm_node_line($name, $attr, $weights[$name]) %> 14 | <% }} -%> 15 | -------------------------------------------------------------------------------- /site/profile/manifests/globus.pp: -------------------------------------------------------------------------------- 1 | class profile::globus { 2 | package { 'wget': 3 | ensure => installed, 4 | } 5 | 6 | $public_ip = lookup('terraform.self.public_ip') 7 | class { 'globus': 8 | display_name => $globus::display_name, 9 | client_id => $globus::client_id, 10 | client_secret => $globus::client_secret, 11 | contact_email => $globus::contact_email, 12 | ip_address => $public_ip, 13 | organization => $globus::organization, 14 | owner => $globus::owner, 15 | require => Package['wget'], 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /data/software_stack/eessi.yaml: -------------------------------------------------------------------------------- 1 | profile::software_stack::initial_profile: "/cvmfs/software.eessi.io/versions/2023.06/init/Magic_Castle/bash" 2 | profile::software_stack::lmod_default_modules: 3 | - GCC 4 | 5 | jupyterhub::kernel::venv::python: /cvmfs/software.eessi.io/versions/2023.06/init/Magic_Castle/eessi_python3 6 | jupyterhub::kernel::venv::prefix: /opt/ipython-kernel-eessi 7 | 8 | jupyterhub::jupyterhub_config_hash: 9 | SlurmFormSpawner: 10 | ui_args: 11 | rstudio: 12 | modules: ['RStudio-Server'] 13 | code-server: 14 | modules: ['code-server'] 15 | -------------------------------------------------------------------------------- /site/profile/functions/gethostnames_with_class.pp: -------------------------------------------------------------------------------- 1 | function profile::gethostnames_with_class($class_name) >> Array[String] { 2 | $instances = lookup('terraform.instances') 3 | $site_all = lookup('magic_castle::site::all') 4 | $site_tags = lookup('magic_castle::site::tags') 5 | 6 | if $class_name in $site_all { 7 | return $instances.keys() 8 | } else { 9 | $tags = keys($site_tags).filter |$tag| { 10 | $class_name in $site_tags[$tag] 11 | } 12 | return keys($instances).filter |$hostname| { 13 | !intersection($tags, $instances[$hostname]['tags']).empty 14 | } 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /lib/puppet/functions/terraform_self.rb: -------------------------------------------------------------------------------- 1 | require 'yaml' 2 | 3 | Puppet::Functions.create_function(:terraform_self) do 4 | dispatch :terraform_self do 5 | param 'Hash', :options 6 | param 'Puppet::LookupContext', :context 7 | end 8 | 9 | def terraform_self(options, context) 10 | path = options['path'] 11 | hostname = options['hostname'] 12 | data = context.cached_file_data(path) do |content| 13 | begin 14 | Puppet::Util::Yaml.safe_load(content, [Symbol], path) 15 | end 16 | end 17 | return { 'terraform' => { 'self' => data['terraform']['instances'][hostname] || {} } } 18 | end 19 | end -------------------------------------------------------------------------------- /site/profile/files/base/opensshserver.config: -------------------------------------------------------------------------------- 1 | CRYPTO_POLICY='-oCiphers=chacha20-poly1305@openssh.com,aes256-gcm@openssh.com,aes128-gcm@openssh.com,aes256-ctr,aes192-ctr,aes128-ctr -oMACs=hmac-sha2-256-etm@openssh.com,hmac-sha2-512-etm@openssh.com,umac-128-etm@openssh.com -oGSSAPIKexAlgorithms=gss-curve25519-sha256- -oKexAlgorithms=curve25519-sha256,curve25519-sha256@libssh.org,diffie-hellman-group16-sha512,diffie-hellman-group18-sha512,diffie-hellman-group-exchange-sha256 -oHostKeyAlgorithms=ssh-ed25519,ssh-ed25519-cert-v01@openssh.com,rsa-sha2-256,rsa-sha2-512 -oPubkeyAcceptedKeyTypes=ssh-ed25519,ssh-ed25519-cert-v01@openssh.com,rsa-sha2-256,rsa-sha2-512' -------------------------------------------------------------------------------- /site/profile/files/base/opensshserver-9.config: -------------------------------------------------------------------------------- 1 | Ciphers chacha20-poly1305@openssh.com,aes256-gcm@openssh.com,aes128-gcm@openssh.com,aes256-ctr,aes192-ctr,aes128-ctr 2 | MACs hmac-sha2-256-etm@openssh.com,hmac-sha2-512-etm@openssh.com,umac-128-etm@openssh.com 3 | GSSAPIKexAlgorithms gss-curve25519-sha256- 4 | KexAlgorithms curve25519-sha256,curve25519-sha256@libssh.org,diffie-hellman-group16-sha512,diffie-hellman-group18-sha512,diffie-hellman-group-exchange-sha256 5 | HostKeyAlgorithms ssh-ed25519,ssh-ed25519-cert-v01@openssh.com,rsa-sha2-256,rsa-sha2-512 6 | PubkeyAcceptedKeyTypes ssh-ed25519,ssh-ed25519-cert-v01@openssh.com,rsa-sha2-256,rsa-sha2-512 7 | -------------------------------------------------------------------------------- /manifests/site.pp: -------------------------------------------------------------------------------- 1 | node default { 2 | $instance_tags = lookup('terraform.self.tags') 3 | 4 | $include_all = lookup('magic_castle::site::all', undef, undef, []) 5 | 6 | $include_tags = flatten( 7 | $instance_tags.map | $tag | { 8 | lookup("magic_castle::site::tags.${tag}", undef, undef, []) 9 | } 10 | ) 11 | 12 | if lookup('magic_castle::site::enable_chaos', undef, undef, false) { 13 | $classes = shuffle($include_all + $include_tags) 14 | notify { 'Chaos order': 15 | message => String($classes), 16 | } 17 | } else { 18 | $classes = $include_all + $include_tags 19 | } 20 | include($classes) 21 | } 22 | -------------------------------------------------------------------------------- /site/profile/manifests/swap.pp: -------------------------------------------------------------------------------- 1 | class profile::swap ( 2 | String $size = '1 GB', 3 | Integer $swappiness = 10, 4 | ) { 5 | if $facts['virtual'] !~ /^(container|lxc).*$/ { 6 | if '/mnt/ephemeral0' in $facts['mountpoints'] { 7 | $swapfile = '/mnt/ephemeral0/swap' 8 | } else { 9 | $swapfile = '/mnt/swap' 10 | } 11 | $swapfilesize = $size 12 | swap_file::files { 'default': 13 | ensure => present, 14 | swapfile => $swapfile, 15 | swapfilesize => $swapfilesize, 16 | } 17 | sysctl { 'vm.swappiness': 18 | ensure => 'present', 19 | value => $swappiness, 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /site/profile/files/fail2ban/fail2ban_route.te: -------------------------------------------------------------------------------- 1 | module fail2ban_route 1.0; 2 | 3 | require { 4 | type sysfs_t; 5 | type fail2ban_t; 6 | type ifconfig_exec_t; 7 | class capability net_admin; 8 | class netlink_route_socket nlmsg_write; 9 | class file { execute execute_no_trans getattr open read }; 10 | } 11 | 12 | #============= fail2ban_t ============== 13 | 14 | allow fail2ban_t ifconfig_exec_t:file { execute execute_no_trans getattr open read }; 15 | allow fail2ban_t self:capability net_admin; 16 | 17 | allow fail2ban_t self:netlink_route_socket nlmsg_write; 18 | 19 | allow fail2ban_t sysfs_t:file { open read }; -------------------------------------------------------------------------------- /site/profile/templates/reverse_proxy/subdomain.conf.epp: -------------------------------------------------------------------------------- 1 | <%= $subdomain %>.<%= $domain %> { 2 | import tls 3 | <% if $remote_ip != '' { -%> 4 | @allowed_ips remote_ip <%= join($remote_ip, ' ') %> 5 | <% } -%> 6 | route <% if $remote_ip != '' { %>@allowed_ips <% } %>{ 7 | respond /robots.txt 200 { 8 | body "<%= $robots_txt %>" 9 | close 10 | } 11 | reverse_proxy <%= $server %> <% if $server =~ /^https/ { -%> { 12 | transport http { 13 | tls_insecure_skip_verify 14 | } 15 | }<% } %> 16 | } 17 | <% if $remote_ip != '' { -%> 18 | route { 19 | respond "Unauthorized" 403 20 | } 21 | <% } -%> 22 | } 23 | 24 | -------------------------------------------------------------------------------- /site/profile/templates/freeipa/ipa-rewrite.conf.epp: -------------------------------------------------------------------------------- 1 | # VERSION 6 - DO NOT REMOVE THIS LINE 2 | 3 | RewriteEngine on 4 | 5 | # By default forward all requests to /ipa. If you don't want IPA 6 | # to be the default on your web server comment this line out. 7 | RewriteRule ^/$ /ipa/ui [L,NC,R=301] 8 | 9 | # Rewrite for plugin index, make it like it's a static file 10 | RewriteRule ^/ipa/ui/js/freeipa/plugins.js$ /ipa/wsgi/plugins.py [PT] 11 | 12 | RequestHeader edit Referer ^https://<%= regsubst("${external_hostname}", '\.', '\.', 'G') %> https://<%= $referee %> 13 | RequestHeader edit Referer ^https://<%= regsubst("${internal_hostname}", '\.', '\.', 'G') %> https://<%= $referee %> 14 | -------------------------------------------------------------------------------- /site/profile/templates/base/postrun.epp: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | email='<%= $email %>' 4 | 5 | # Check if puppet has done anything 6 | actions=$(journalctl -u puppet -n 2 | grep -c -P "(Starting Puppet client|Applied catalog in)") 7 | 8 | # Send email with results after modifications were applied 9 | if [[ $actions -lt 2 ]] && [[ ! -z "$email" ]]; then 10 | lines=($(journalctl -u puppet | grep -n -E 'Starting Puppet client|Applied catalog in' | cut -f 1 -d : | tail -n 2)) 11 | cat << EOF | /usr/sbin/sendmail $email 12 | To: $email 13 | From: $(hostname -s)-puppet-noreply 14 | Subject: $(hostname -f) is online 15 | $(journalctl -u puppet | sed -n "${lines[0]},${lines[1]}p") 16 | EOF 17 | fi 18 | -------------------------------------------------------------------------------- /site/profile/templates/slurm/sacct.cfg.epp: -------------------------------------------------------------------------------- 1 | # 2 | Cluster - '<%= $cluster %>':<%= join($cluster_options.map|$key, $value| { "${key}=${value}" }, ':')%> 3 | Parent - root 4 | User - root:AdminLevel=Administrator 5 | <% $admins.each |$username| { -%> 6 | User - <%= $username %>:AdminLevel=Administrator 7 | <% } -%> 8 | 9 | <% $accounts.each |$key, $values| { -%> 10 | Account - <%= $key %>:<%= join($values.map|$key, $value| { "${key}=${value}" }, ':')%> 11 | <% } -%> 12 | <% $users.each |$user, $accounts| { -%> 13 | <% $accounts.each |$index, $account| { -%> 14 | Parent - <%= $account %> 15 | User - <%= $user %><% if $index == 0 { %>:DefaultAccount=<%= $account %><% } %> 16 | <% } -%> 17 | <% } -%> 18 | -------------------------------------------------------------------------------- /site/profile/templates/freeipa/group_rules.py.epp: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | api.Command.batch( 3 | <% if $hbac_rules != undef { -%> 4 | <% $hbac_rules.each |$rule| { -%> 5 | { 'method': 'hbacrule_add_user', 'params': [['<%= $rule %>'], {'group': '<%= $group %>'}] }, 6 | <% } -%> 7 | <% } -%> 8 | <% if $automember { -%> 9 | { 'method': 'automember_add', 'params': [[], {'cn': '<%= $group %>', 'type': 'group', 'all': False, 'raw': False}]}, 10 | { 'method': 'automember_add_condition', 'params': [[], {'cn': '<%= $group %>', 'key': 'mail', 'type': 'group', 'all': False, 'raw': False, 'automemberinclusiveregex': '^(?!\s*$).+'}]}, 11 | { 'method': 'automember_rebuild', 'params': [[], {'type': 'group'}] }, 12 | <% } -%> 13 | ) 14 | -------------------------------------------------------------------------------- /site/profile/files/software_stack/z-00-rsnt_arch.sh.ctmpl: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | {{ scratch.Set "sse3" 1 -}} 3 | {{ scratch.Set "avx" 2 -}} 4 | {{ scratch.Set "avx2" 3 -}} 5 | {{ scratch.Set "avx512" 4 -}} 6 | 7 | {{ scratch.MapSet "arch_index_map" "1" "sse3" -}} 8 | {{ scratch.MapSet "arch_index_map" "2" "avx" -}} 9 | {{ scratch.MapSet "arch_index_map" "3" "avx2" -}} 10 | {{ scratch.MapSet "arch_index_map" "4" "avx512" -}} 11 | 12 | {{ scratch.Set "arch_index" 5 -}} 13 | 14 | {{ range service "software_stack" -}} 15 | {{ scratch.Set "arch_index" ( minimum (scratch.Get "arch_index") (scratch.Get .ServiceMeta.arch) ) -}} 16 | {{ end -}} 17 | 18 | {{ if lt (scratch.Get "arch_index") 5 -}} 19 | export RSNT_ARCH="{{index (scratch.Get "arch_index_map") (scratch.Get "arch_index" | print)}}" 20 | {{ end -}} -------------------------------------------------------------------------------- /lib/puppet/functions/slurm_compute_weights.rb: -------------------------------------------------------------------------------- 1 | Puppet::Functions.create_function(:slurm_compute_weights) do 2 | dispatch :slurm_compute_weights do 3 | param 'Hash', :instances 4 | return_type 'Hash' 5 | end 6 | 7 | def slurm_compute_weights(instances) 8 | require 'set' 9 | unique_specs = Set.new(instances.values.map {|i| i['specs']}) 10 | sorted_specs = unique_specs.sort_by{|spec| [spec['gpus'], spec['ram'], spec['cpus']]} 11 | weights = Hash.new 12 | for i in 0..sorted_specs.size-1 13 | weights[sorted_specs[i]] = i+1 14 | end 15 | weights_per_node = Hash.new 16 | for inst in instances 17 | weights_per_node[inst[0]] = weights[inst[1]['specs']] 18 | end 19 | return weights_per_node 20 | end 21 | end -------------------------------------------------------------------------------- /site/profile/files/slurm/slurmd.te: -------------------------------------------------------------------------------- 1 | 2 | module slurmd 1.0; 3 | 4 | require { 5 | type devpts_t; 6 | type fusefs_t; 7 | type unconfined_service_t; 8 | type user_t; 9 | type var_spool_t; 10 | class chr_file { getattr ioctl read write }; 11 | class file entrypoint; 12 | class process { sigchld transition }; 13 | class process2 nosuid_transition; 14 | } 15 | 16 | #============= unconfined_service_t ============== 17 | 18 | allow unconfined_service_t user_t:process transition; 19 | allow unconfined_service_t user_t:process2 nosuid_transition; 20 | 21 | #============= user_t ============== 22 | 23 | allow user_t unconfined_service_t:process sigchld; 24 | allow user_t devpts_t:chr_file { ioctl read write }; 25 | allow user_t var_spool_t:file entrypoint; 26 | allow user_t fusefs_t:file entrypoint; 27 | allow user_t devpts_t:chr_file getattr; -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require 'bundler/setup' 2 | require 'puppetlabs_spec_helper/rake_tasks' 3 | require 'puppet-lint/tasks/puppet-lint' 4 | require 'puppet-lint-param-docs/tasks' 5 | require 'puppet-syntax/tasks/puppet-syntax' 6 | 7 | PuppetLint.configuration.send("disable_140chars") 8 | PuppetLint.configuration.log_format = "::%{kind} file=%{path},line=%{line},col=%{column},title=%{check}::%{message}" 9 | PuppetLint.configuration.fail_on_warnings = true 10 | PuppetLint.configuration.send('disable_autoloader_layout') 11 | 12 | exclude_paths = [ 13 | "pkg/**/*", 14 | "vendor/**/*", 15 | "spec/**/*", 16 | "site/profile/files/**/*" 17 | ] 18 | PuppetLint.configuration.ignore_paths = exclude_paths 19 | PuppetSyntax.exclude_paths = exclude_paths 20 | 21 | desc "Run syntax, lint, and spec tests." 22 | task :test => [ 23 | :syntax, 24 | :lint, 25 | ] 26 | -------------------------------------------------------------------------------- /site/profile/templates/slurm/gres.conf.epp: -------------------------------------------------------------------------------- 1 | ########################################################### 2 | # Slurm's Generic Resource (GRES) configuration file 3 | ########################################################### 4 | AutoDetect=off 5 | <% $nodes.each |$name, $attr| { -%> 6 | <% if $attr['specs']['gpus'] > 0 { -%> 7 | <% if $attr['specs']['mig'] and ! $attr['specs']['mig'].empty { -%> 8 | <% $attr['specs']['mig'].map|$key,$value| { -%> 9 | NodeName=<%= $name %> Name=gpu Type=<%= $key %> Count=<%= $value * $attr['specs']['gpus'] %> File=<%= join( range(0, $value * $attr['specs']['gpus'] - 1).map|$i| { "/dev/nvidia-mig-${key}-${i}" } , ',') %> 10 | <% }} else { -%> 11 | NodeName=<%= $name %> Name=gpu Count=<%= $attr['specs']['gpus'] %> File=<%= join( range(0, $attr['specs']['gpus']-1).map|$i| { "/dev/nvidia${i}" } , ',') %> 12 | <% }}} -%> 13 | -------------------------------------------------------------------------------- /site/profile/manifests/fail2ban.pp: -------------------------------------------------------------------------------- 1 | class profile::fail2ban ( 2 | Array[String] $ignoreip = [], 3 | ) { 4 | include epel 5 | 6 | class { 'fail2ban' : 7 | whitelist => ['127.0.0.1/8', profile::getcidr()] + $ignoreip, 8 | } 9 | 10 | file_line { 'fail2ban_sshd_recv_disconnect': 11 | ensure => present, 12 | path => '/etc/fail2ban/filter.d/sshd.conf', 13 | line => ' ^Received disconnect from %(__on_port_opt)s:\s*11:( Bye Bye)?%(__suff)s$', 14 | after => '^mdre-extra\ \=*', 15 | notify => Service['fail2ban'], 16 | require => Class['fail2ban::install'], 17 | } 18 | 19 | Yumrepo['epel'] -> Class['fail2ban::install'] 20 | 21 | selinux::module { 'fail2ban_route': 22 | ensure => 'present', 23 | source_pp => 'puppet:///modules/profile/fail2ban/fail2ban_route.pp', 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /site/profile/facts.d/nvidia_grid_vgpu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # NVIDIA Vendor ID is : 0x10de 4 | # Grep the GPU memory size from the product description and convert to bytes 5 | GPU_MEM_SIZE=$(/usr/sbin/lspci -d 10de: | grep -m1 -oP '\[*\K[0-9]*[T|G](?=B)' | numfmt --from=iec) 6 | # Grep the actual GPU memory size from lspci verbose and convert to bytes 7 | AVAIL_MEM_SIZE=$(/usr/sbin/lspci -d 10de: -v | grep -m1 -oP 'Memory.*\(64-bit, prefetchable\).*\[size=\K([0-9]*[T|G])(?=])' | numfmt --from=iec) 8 | 9 | # If the memory available from the product description is greater than the available memory 10 | # we conclude it must be a virtual GPU 11 | if [[ ! -z "${GPU_MEM_SIZE}" ]] && [[ ! -z "${AVAIL_MEM_SIZE}" ]]; then 12 | IS_VGPU=$(test "${GPU_MEM_SIZE}" -gt "${AVAIL_MEM_SIZE}" && echo true || echo false) 13 | else 14 | IS_VGPU='false' 15 | fi 16 | 17 | echo "{ 'nvidia_grid_vgpu' : $IS_VGPU }" -------------------------------------------------------------------------------- /site/profile/files/slurm/nvidia_gres.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | /usr/bin/nvidia-smi --query-gpu=index,mig.mode.current --format=csv,noheader | sed 's/,//' | while read GPU_INDEX MIG_ENABLED; do 4 | if [ "${MIG_ENABLED}" == "Enabled" ]; then 5 | /usr/bin/nvidia-smi mig -lgi -i ${GPU_INDEX} | grep MIG | awk '{gsub("[|]", ""); print $3,$5}' | while read MIG_PROFILE MIG_ID; do 6 | GPU_CAP_ID=$(grep -oP 'DeviceFileMinor: \K([0-9]+)' /proc/driver/nvidia/capabilities/gpu${GPU_INDEX}/mig/gi${MIG_ID}/access) 7 | echo "Name=gpu Type=${MIG_PROFILE} MultipleFiles=/dev/nvidia${GPU_INDEX},/dev/nvidia-caps/nvidia-cap${GPU_CAP_ID}" 8 | done 9 | else 10 | GPU_TYPE=$(/usr/bin/nvidia-smi -i ${GPU_INDEX} --query-gpu=gpu_name --format=csv,noheader | awk '{print $2}') 11 | echo "Name=gpu Type=${GPU_TYPE} File=/dev/nvidia${GPU_INDEX}" 12 | fi 13 | done 14 | -------------------------------------------------------------------------------- /environment.conf: -------------------------------------------------------------------------------- 1 | # Each environment can have an environment.conf file. Its settings will only 2 | # affect its own environment. See docs for more info: 3 | # https://puppet.com/docs/puppet/latest/config_file_environment.html 4 | 5 | # Any unspecified settings use default values; some of those defaults are based 6 | # on puppet.conf settings. 7 | 8 | # If these settings include relative file paths, they'll be resolved relative to 9 | # this environment's directory. 10 | 11 | # Allowed settings and default values: 12 | 13 | modulepath = site:modules:$basemodulepath 14 | # manifest = (default_manifest from puppet.conf, which defaults to ./manifests) 15 | # config_version = (no script; Puppet will use the time the catalog was compiled) 16 | # environment_timeout = (environment_timeout from puppet.conf, which defaults to 0) 17 | # Note: unless you have a specific reason, we recommend only setting 18 | # environment_timeout in puppet.conf. 19 | -------------------------------------------------------------------------------- /site/profile/facts.d/letsencrypt.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | echo "---" 4 | if [ -d /etc/letsencrypt ]; then 5 | echo "letsencrypt:" 6 | for domain in $(ls /etc/letsencrypt/live); do 7 | path=/etc/letsencrypt/live/$domain 8 | echo " $domain:" 9 | echo " fullchain:" $(test -e $path/fullchain.pem && echo true || echo false) 10 | echo " cert:" $(test -e $path/cert.pem && echo true || echo false) 11 | echo " privkey:" $(test -e $path/privkey.pem && echo true || echo false) 12 | echo " chain:" $(test -e $path/chain.pem && echo true || echo false) 13 | if [ -e $path/fullchain.pem ]; then 14 | echo " startdate:" $(openssl x509 -in $path/fullchain.pem -startdate -noout | cut -d= -f2) 15 | echo " enddate:" $(openssl x509 -in $path/fullchain.pem -enddate -noout | cut -d= -f2) 16 | echo " willexpire:" $(openssl x509 -in $path/fullchain.pem -checkend 1800 | grep -q "will expire" && echo true || echo false) 17 | fi 18 | done 19 | else 20 | echo "letsencrypt: {}" 21 | fi -------------------------------------------------------------------------------- /site/profile/manifests/vector.pp: -------------------------------------------------------------------------------- 1 | class profile::vector ( 2 | String $config = file('puppet:///modules/profile/vector/default_config.yaml') 3 | ) { 4 | tag 'mc_bootstrap' 5 | 6 | yumrepo { 'vector': 7 | ensure => present, 8 | enabled => true, 9 | baseurl => "https://yum.vector.dev/stable/vector-0/${::facts['os']['architecture']}/", 10 | gpgcheck => 1, 11 | gpgkey => [ 12 | 'https://keys.datadoghq.com/DATADOG_RPM_KEY_CURRENT.public', 13 | 'https://keys.datadoghq.com/DATADOG_RPM_KEY_B01082D3.public', 14 | 'https://keys.datadoghq.com/DATADOG_RPM_KEY_FD4BF915.public', 15 | ], 16 | repo_gpgcheck => 1, 17 | } 18 | 19 | package { 'vector': 20 | ensure => 'installed', 21 | require => [Yumrepo['vector']], 22 | } 23 | 24 | service { 'vector': 25 | ensure => running, 26 | enable => true, 27 | require => [Package['vector']], 28 | } 29 | 30 | file { '/etc/vector/vector.yaml': 31 | notify => Service['vector'], 32 | content => $config, 33 | require => [Package['vector']], 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /site/profile/functions/generate_slurm_node_line.pp: -------------------------------------------------------------------------------- 1 | function profile::generate_slurm_node_line($name, $attr, $comp_weight) >> String { 2 | if $attr['specs']['gpus'] > 0 { 3 | if $attr['specs']['mig'] and ! $attr['specs']['mig'].empty { 4 | $gpu = $attr['specs']['mig'].map|$key,$value| { 5 | ['gpu', $key, $value * $attr['specs']['gpus']].join(':') 6 | }.join(',') 7 | } else { 8 | $gpu = "gpu:${attr['specs']['gpus']}" 9 | } 10 | if $attr['specs']['shard'] and ! $attr['specs']['shard'].empty { 11 | $shard = ",shard:${attr['specs']['shard']}" 12 | } else { 13 | $shard = '' 14 | } 15 | $gres = "${gpu}${shard}" 16 | } else { 17 | $gres = 'gpu:0' 18 | } 19 | $weight = pick($attr['specs']['weight'], $comp_weight) 20 | if $attr['specs']['features'] and ! $attr['specs']['features'].empty { 21 | $features = $attr['specs']['features'].join(',') 22 | $features_option = "Features=${features}" 23 | } 24 | else { 25 | $features_option = '' 26 | } 27 | "NodeName=${name} CPUs=${attr['specs']['cpus']} RealMemory=${attr['specs']['ram']} Gres=${gres} Weight=${weight} ${features_option}" 28 | } 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Félix-Antoine Fortin, Université Laval 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /site/profile/manifests/metrix.pp: -------------------------------------------------------------------------------- 1 | class profile::metrix { 2 | $instances = lookup('terraform.instances') 3 | $logins = keys($instances.filter |$keys, $values| { 'login' in $values['tags'] }) 4 | 5 | $domain_name = lookup('terraform.data.domain_name') 6 | $int_domain_name = lookup('profile::freeipa::base::ipa_domain') 7 | $base_dn = join(split($int_domain_name, '[.]').map |$dc| { "dc=${dc}" }, ',') 8 | 9 | class { 'metrix': 10 | root_api_token => lookup('metrix::root_api_token'), 11 | password => lookup('metrix::password'), 12 | prometheus_ip => lookup('metrix::prometheus_ip'), 13 | prometheus_port => lookup('metrix::prometheus_port'), 14 | db_ip => lookup('metrix::db_ip'), 15 | db_port => lookup('metrix::db_port'), 16 | ldap_password => lookup('metrix::ldap_password'), 17 | slurm_password => lookup('metrix::slurm_password'), 18 | cluster_name => lookup('metrix::cluster_name'), 19 | subdomain => lookup('metrix::subdomain'), 20 | logins => $logins, 21 | base_dn => $base_dn, 22 | domain_name => $domain_name, 23 | } 24 | Class['metrix'] ~> Service['httpd'] 25 | } 26 | -------------------------------------------------------------------------------- /bootstrap.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Initialize random hieradata values 3 | set -e 4 | PATH=$PATH:/opt/puppetlabs/puppet/bin 5 | PKCS7_KEY="/etc/puppetlabs/puppet/eyaml/boot_public_key.pkcs7.pem" 6 | ENC_CMD="eyaml encrypt -o block --pkcs7-public-key=${PKCS7_KEY}" 7 | ( 8 | $ENC_CMD -l 'jupyterhub::prometheus_token' -s $(uuidgen) 9 | $ENC_CMD -l 'profile::consul::acl_api_token' -s $(uuidgen) 10 | $ENC_CMD -l 'profile::slurm::base::munge_key' -s $(openssl rand 1024 | openssl enc -A -base64) 11 | $ENC_CMD -l 'profile::slurm::accounting::password' -s $(openssl rand -base64 9) 12 | $ENC_CMD -l 'profile::freeipa::mokey::password' -s $(openssl rand -base64 9) 13 | $ENC_CMD -l 'profile::freeipa::server::ds_password' -s $(openssl rand -base64 9) 14 | $ENC_CMD -l 'profile::freeipa::server::admin_password' -s $(openssl rand -base64 9) 15 | $ENC_CMD -l 'metrix::password' -s $(openssl rand -base64 9) 16 | $ENC_CMD -l 'metrix::root_api_token' -s $(openssl rand -hex 20) 17 | ) > /etc/puppetlabs/code/environments/production/data/bootstrap.yaml 18 | 19 | # Apply bootstrap classes if any 20 | puppet apply /etc/puppetlabs/code/environments/production/manifests/site.pp --tags mc_bootstrap 21 | -------------------------------------------------------------------------------- /site/profile/templates/jupyterhub/login.html.epp: -------------------------------------------------------------------------------- 1 | {% extends "templates/login.html" %} 2 | {% block footer %} 3 | 8 | {% endblock %} 9 | 10 | {% block stylesheet %} 11 | {{ super() }} 12 | 25 | {% endblock %} 26 | 27 | {% block script %} 28 | <% if $register_url != '' or $reset_pw_url != '' { -%> 29 | 46 | <% } -%> 47 | {% endblock %} 48 | -------------------------------------------------------------------------------- /site/profile/files/consul/puppet_event_handler.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | INPUT=$(cat -) 3 | logger ${INPUT} 4 | 5 | # No event, dry run of handler 6 | if [[ "${INPUT}" == "[]" ]]; then 7 | exit 0; 8 | fi 9 | 10 | event_type=$(echo $INPUT | jq -r '.[-1] | .Name') 11 | 12 | if [[ "$event_type" != "puppet" ]]; then 13 | exit 0 14 | fi 15 | 16 | payload=$(echo $INPUT | jq -r '.[-1] | .Payload' | base64 -d) 17 | 18 | if [ -f /opt/puppetlabs/puppet/cache/state/agent_catalog_run.lock ]; then 19 | # Puppet is already running, we check if the event precedes the start of the current run 20 | # If it is, we ignore it, otherwise we wait for the run to complete and then we restart 21 | puppet_begin=$(stat -c %W /opt/puppetlabs/puppet/cache/state/agent_catalog_run.lock) 22 | if [ "${puppet_begin}" -gt "${payload}" ]; then 23 | exit 0 24 | fi 25 | while [ -f /opt/puppetlabs/puppet/cache/state/agent_catalog_run.lock ]; do sleep 30; done 26 | 27 | elif [ -f /opt/puppetlabs/puppet/cache/state/last_run_summary.yaml ]; then 28 | # If the last puppet run began after the event timestamp, we ignore the event 29 | puppet_begin=$(grep ' config:' /opt/puppetlabs/puppet/cache/state/last_run_summary.yaml | cut -d: -f 2 | sed 's/ //') 30 | if [ "${puppet_begin}" -gt "${payload}" ]; then 31 | exit 0 32 | fi 33 | fi 34 | 35 | sudo systemctl reload puppet -------------------------------------------------------------------------------- /site/profile/manifests/efa.pp: -------------------------------------------------------------------------------- 1 | class profile::efa ( 2 | String $version = 'latest' 3 | ) { 4 | package { 'libibverbs-utils': 5 | ensure => 'installed', 6 | } 7 | 8 | package { 'rdma-core-devel': 9 | ensure => 'installed', 10 | } 11 | 12 | package { 'librdmacm-utils': 13 | ensure => 'installed', 14 | } 15 | 16 | archive { 'download-efa-driver': 17 | path => "/opt/puppetlabs/puppet/cache/puppet-archive/aws-efa-installer-${version}.tar.gz", 18 | extract => true, 19 | extract_path => '/tmp/', 20 | source => "https://efa-installer.amazonaws.com/aws-efa-installer-${version}.tar.gz" 21 | } 22 | 23 | exec { 'install-efa-driver': 24 | command => 'bash efa_installer.sh -y --minimal && rm -rf /tmp/aws-efa-installer', 25 | cwd => '/tmp/aws-efa-installer', 26 | require => [ 27 | Archive['download-efa-driver'], 28 | Package['libibverbs-utils'], 29 | Package['rdma-core-devel'], 30 | Package['librdmacm-utils'], 31 | ], 32 | path => ['/bin', '/usr/bin', '/sbin','/usr/sbin'], 33 | creates => '/etc/modprobe.d/efa.conf', 34 | } 35 | 36 | kmod::load { 'efa': 37 | require => Exec['install-efa-driver'] 38 | } 39 | 40 | selinux::module { 'efa': 41 | ensure => 'present', 42 | source_pp => 'puppet:///modules/profile/efa/efa.pp', 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /site/profile/templates/freeipa/hbac_rules.py.epp: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 1. Create a hostgroup for each tag 3 | # 2. Create an automember rule for each hostgroup 4 | # 3. Add a condition to the automember rule for each prefix with that tag 5 | # 4. Rebuild the automember rules 6 | api.Command.batch( 7 | <% $hbac_services.each |$service| { -%> 8 | { 'method': 'hbacsvc_add', 'params': [['<%= $service %>'], {}] }, 9 | <% } -%> 10 | <% $tags.each |$tag| { -%> 11 | { 'method': 'hostgroup_add', 'params': [['<%= $tag %>'], {}] }, 12 | { 'method': 'automember_add', 'params': [['<%= $tag %>'], {'type': 'hostgroup'}] }, 13 | <% $hbac_services.each |$service| { -%> 14 | { 'method': 'hbacrule_add', 'params': [['<%= $tag %>:<%= $service %>'], {'accessruletype': 'allow'}] }, 15 | { 'method': 'hbacrule_add_host', 'params': [['<%= $tag %>:<%= $service %>'], {'hostgroup': '<%= $tag %>'}] }, 16 | { 'method': 'hbacrule_add_service', 'params': [['<%= $tag %>:<%= $service %>'], {'hbacsvc': '<%= $service %>'}] }, 17 | <% }} -%> 18 | <% $prefixes_tags.each |$prefix, $tags| { -%> 19 | <% $tags.each |$tag| { -%> 20 | { 'method': 'automember_add_condition', 'params': [['<%= $tag %>'], {'type': 'hostgroup', 'key': 'fqdn', 'automemberinclusiveregex': "^<%= $prefix %>\d+.<%= $ipa_domain %>$"}] }, 21 | <% }} -%> 22 | { 'method': 'automember_rebuild', 'params': [[], {'type': 'hostgroup'}] }, 23 | ) 24 | -------------------------------------------------------------------------------- /site/profile/files/freeipa/27e9181bdc684915a7f9f15631f4c3dd6ac5f884.patch: -------------------------------------------------------------------------------- 1 | From 27e9181bdc684915a7f9f15631f4c3dd6ac5f884 Mon Sep 17 00:00:00 2001 2 | From: Christian Heimes 3 | Date: Apr 18 2023 10:13:47 +0000 4 | Subject: Speed up installer by restarting DS after DNA plugin 5 | 6 | 7 | DS does not enable plugins unless nsslapd-dynamic-plugins is enabled or 8 | DS is restarted. The DNA plugin creates its configuration entries with 9 | some delay after the plugin is enabled. 10 | 11 | DS is now restarted after the DNA plugin is enabled so it can create the 12 | entries while Dogtag and the rest of the system is installing. The 13 | updater `update_dna_shared_config` no longer blocks and waits for two 14 | times 60 seconds for `posix-ids` and `subordinate-ids`. 15 | 16 | Fixes: https://pagure.io/freeipa/issue/9358 17 | Signed-off-by: Christian Heimes 18 | Reviewed-By: Rob Crittenden 19 | 20 | --- 21 | 22 | diff --git a/ipaserver/install/dsinstance.py b/ipaserver/install/dsinstance.py 23 | index 157e21e..cbacfae 100644 24 | --- a/ipaserver/install/dsinstance.py 25 | +++ b/ipaserver/install/dsinstance.py 26 | @@ -269,6 +269,9 @@ class DsInstance(service.Service): 27 | self.step("activating extdom plugin", self._add_extdom_plugin) 28 | 29 | self.step("configuring directory to start on boot", self.__enable) 30 | + # restart to enable plugins 31 | + # speeds up creation of DNA plugin entries in cn=dna,cn=ipa,cn=etc 32 | + self.step("restarting directory server", self.__restart_instance) 33 | 34 | def init_info(self, realm_name, fqdn, domain_name, dm_password, 35 | subject_base, ca_subject, 36 | 37 | -------------------------------------------------------------------------------- /Puppetfile: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | forge "https://forgeapi.puppetlabs.com" 4 | 5 | mod 'cmdntrf-consul_template', '2.3.8' 6 | mod 'derdanne-nfs', '2.1.11' 7 | mod 'heini-wait_for', '2.2.0' 8 | mod 'puppet-alternatives', '6.0.0' 9 | mod 'puppet-augeasproviders_core', '4.0.1' 10 | mod 'puppet-augeasproviders_pam', '4.0.0' 11 | mod 'puppet-augeasproviders_shellvar', '6.0.0' 12 | mod 'puppet-augeasproviders_ssh', '6.0.0' 13 | mod 'puppet-augeasproviders_sysctl', '3.1.0' 14 | mod 'puppet-archive', '4.6.0' 15 | mod 'puppet-consul', '7.3.1' 16 | mod 'puppet-epel', '5.0.0' 17 | mod 'puppet-extlib', '7.0.0' 18 | mod 'puppet-fail2ban', '4.2.0' 19 | mod 'puppet-healthcheck', '1.0.1' 20 | mod 'puppet-kmod', '4.0.0' 21 | mod 'puppet-logrotate', '5.0.0' 22 | mod 'puppet-postfix', '5.1.0' 23 | mod 'puppet-prometheus', '12.5.0' 24 | mod 'puppet-prometheus_reporter', '2.0.0' 25 | mod 'puppet-rsyslog', '7.1.0' 26 | mod 'puppet-selinux', '3.4.1' 27 | mod 'puppet-squid', '3.0.0' 28 | mod 'puppet-swap_file', '5.0.0' 29 | mod 'puppet-systemd', '3.10.0' 30 | mod 'puppetlabs-concat', '7.4.0' 31 | mod 'puppetlabs-firewall', '5.0.0' 32 | mod 'puppetlabs-inifile', '6.1.0' 33 | mod 'puppetlabs-lvm', '1.4.0' 34 | mod 'puppetlabs-mailalias_core', '1.2.0' 35 | mod 'puppetlabs-motd', '7.1.0' 36 | mod 'puppetlabs-mount_core', '1.0.4' 37 | mod 'puppetlabs-mysql', '13.3.0' 38 | mod 'puppetlabs-stdlib', '5.2.0' 39 | mod 'puppetlabs-transition', '0.1.3' 40 | mod 'treydock-globus', '9.0.0' 41 | mod 'saz-limits', '3.0.4' 42 | 43 | mod 'computecanada-jupyterhub', 44 | :git => 'https://github.com/ComputeCanada/puppet-jupyterhub.git', 45 | :ref => 'v7.0.1' 46 | 47 | mod 'computecanada-metrix', 48 | :git => 'https://github.com/ComputeCanada/puppet-metrix.git', 49 | :ref => 'v1.0.3' 50 | 51 | mod 'computecanada-uv', 52 | :git => 'https://github.com/ComputeCanada/puppet-uv.git', 53 | :ref => 'v0.1.0' 54 | -------------------------------------------------------------------------------- /data/site.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | lookup_options: 3 | magic_castle::site::all: 4 | merge: 'first' 5 | magic_castle::site::tags: 6 | merge: 'hash' 7 | terraform: 8 | merge: 'hash' 9 | 10 | magic_castle::site::all: 11 | - profile::base 12 | - profile::consul 13 | - profile::freeipa 14 | - profile::users::local 15 | - profile::sssd::client 16 | - profile::prometheus::node_exporter 17 | - profile::rsyslog::client 18 | - profile::volumes 19 | - profile::mail 20 | 21 | magic_castle::site::tags: 22 | dtn: 23 | - profile::globus 24 | - profile::nfs 25 | login: 26 | - motd 27 | - profile::fail2ban 28 | - profile::slurm::submitter 29 | - profile::ssh::hostbased_auth::client 30 | - profile::nfs 31 | - profile::software_stack 32 | - profile::swap 33 | mgmt: 34 | - mysql::server 35 | - prometheus::server 36 | - prometheus::alertmanager 37 | - profile::prometheus::slurm_exporter 38 | - profile::prometheus::apache_exporter 39 | - profile::rsyslog::server 40 | - profile::squid::server 41 | - profile::slurm::controller 42 | - profile::slurm::accounting 43 | - profile::accounts 44 | - profile::nfs 45 | - metrix::slurm_jobscripts 46 | - profile::metrix 47 | - profile::swap 48 | node: 49 | - profile::gpu 50 | - profile::jupyterhub::node 51 | - profile::slurm::node 52 | - profile::ssh::hostbased_auth::client 53 | - profile::ssh::hostbased_auth::server 54 | - profile::prometheus::slurm_job_exporter 55 | - profile::nfs 56 | - profile::software_stack 57 | nfs: 58 | - profile::nfs 59 | - profile::cvmfs::alien_cache 60 | proxy: 61 | - profile::jupyterhub::hub 62 | - profile::jupyterhub::hub::keytab 63 | - profile::reverse_proxy 64 | - profile::prometheus::caddy_exporter 65 | efa: 66 | - profile::efa 67 | puppet: 68 | - profile::puppetserver 69 | - profile::swap -------------------------------------------------------------------------------- /data/software_stack/computecanada.yaml: -------------------------------------------------------------------------------- 1 | # for the default cuda/12.6, need at least 560 2 | profile::gpu::install::passthrough::nvidia_driver_stream: '575-dkms' 3 | profile::gpu::install::lib_symlink_path: '/usr/lib64/nvidia' 4 | 5 | profile::software_stack::initial_profile: "/cvmfs/soft.computecanada.ca/config/profile/bash.sh" 6 | profile::software_stack::lmod_default_modules: 7 | - StdEnv/2023 8 | - mii 9 | 10 | profile::software_stack::extra_site_env_vars: 11 | CC_CLUSTER: "magic_castle" 12 | # ensure that packages installed in the local kernel are found, if the same version of python is used in both cases 13 | EBPYTHONPREFIXES_PRIORITY: "%{alias('jupyterhub::kernel::venv::prefix')}" 14 | lookup_options: 15 | profile::software_stack::extra_site_env_vars: 16 | merge: hash 17 | 18 | jupyterhub::kernel::install_method: 'venv' 19 | jupyterhub::kernel::venv::python: /cvmfs/soft.computecanada.ca/easybuild/software/2023/%{facts.cpu_microarch}/Compiler/gcccore/python/3.11.5/bin/python 20 | jupyterhub::kernel::venv::prefix: /opt/ipython-kernel-computecanada 21 | 22 | lookup_options: 23 | jupyterhub::kernel::venv::pip_environment: 24 | merge: "deep" 25 | jupyterhub::kernel::venv::pip_environment: 26 | PYTHONPATH: ["/cvmfs/soft.computecanada.ca/easybuild/python/site-packages:/cvmfs/soft.computecanada.ca/custom/python/site-packages"] 27 | PIP_CONFIG_FILE: "/cvmfs/soft.computecanada.ca/config/python/pip-%{facts.cpu_microarch}-gentoo2023.conf" 28 | jupyterhub::kernel::venv::kernel_environment: 29 | "PYTHONPATH": "/cvmfs/soft.computecanada.ca/easybuild/python/site-packages:${PYTHONPATH}" 30 | "EBPYTHONPREFIXES": "${SLURM_TMPDIR}:${EBPYTHONPREFIXES}" 31 | 32 | jupyterhub::jupyterhub_config_hash: 33 | SlurmFormSpawner: 34 | ui_args: 35 | rstudio: 36 | modules: ['rstudio-server'] 37 | code-server: 38 | modules: ['code-server'] 39 | openrefine: 40 | modules: ['openrefine'] 41 | 42 | -------------------------------------------------------------------------------- /site/profile/manifests/puppetserver.pp: -------------------------------------------------------------------------------- 1 | class profile::puppetserver ( 2 | Integer $jruby_max_active_instances = 1, 3 | Integer $java_heap_size = 1024, 4 | ) { 5 | $eyaml_path = '/opt/puppetlabs/puppet/bin/eyaml' 6 | $boot_private_key_path = '/etc/puppetlabs/puppet/eyaml/boot_private_key.pkcs7.pem' 7 | $boot_eyaml = '/etc/puppetlabs/code/environments/production/data/bootstrap.yaml' 8 | $local_users = lookup('profile::users::local::users', undef, undef, {}) 9 | $local_users.each | $user, $attrs | { 10 | if pick($attrs['sudoer'], false) { 11 | file_line { "${user}_eyamlbootstrap": 12 | path => "/${user}/.bashrc", 13 | line => "alias eyamlbootstrap=\"sudo ${eyaml_path} decrypt --pkcs7-private-key ${boot_private_key_path} -f ${boot_eyaml} | less\"", 14 | require => User[$user], 15 | } 16 | } 17 | } 18 | 19 | file_line { 'puppetserver_java_heap_size': 20 | path => '/etc/sysconfig/puppetserver', 21 | match => '^JAVA_ARGS=', 22 | line => "JAVA_ARGS=\"-Xms${java_heap_size}m -Xmx${java_heap_size}m -Djruby.logger.class=com.puppetlabs.jruby_utils.jruby.Slf4jLogger\"", #lint:ignore:140chars 23 | notify => Service['puppetserver'], 24 | tag => ['mc_bootstrap'], 25 | } 26 | 27 | file_line { 'puppetserver_max_active_instances': 28 | path => '/etc/puppetlabs/puppetserver/conf.d/puppetserver.conf', 29 | match => '^ #max-active-instances:', 30 | line => " max-active-instances: ${jruby_max_active_instances}", 31 | notify => Service['puppetserver'], 32 | tag => ['mc_bootstrap'], 33 | } 34 | 35 | file { '/etc/puppetlabs/puppet/prometheus.yaml': 36 | owner => 'root', 37 | group => 'root', 38 | content => "---\ntextfile_directory: /var/lib/node_exporter", 39 | tag => ['mc_bootstrap'], 40 | } 41 | 42 | @user { 'puppet': 43 | ensure => present, 44 | notify => Service['puppetserver'], 45 | } 46 | 47 | service { 'puppetserver': 48 | ensure => running, 49 | enable => true, 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /site/profile/manifests/squid.pp: -------------------------------------------------------------------------------- 1 | class profile::squid::server ( 2 | Integer $port, 3 | Integer $cache_size, 4 | Array[String] $cvmfs_acl_regex, 5 | ) { 6 | class { 'squid': } 7 | squid::http_port { String($port): } 8 | squid::acl { 'SSL_ports': 9 | type => 'port', 10 | entries => ['443'], 11 | } 12 | squid::acl { 'Safe_ports': 13 | type => 'port', 14 | entries => ['80', '443', '1025-65535'], 15 | } 16 | squid::acl { 'CONNECT': 17 | type => 'method', 18 | entries => ['CONNECT'], 19 | } 20 | squid::acl { 'CLUSTER_NETWORK': 21 | type => 'src', 22 | entries => [profile::getcidr()], 23 | } 24 | # How can we have multiple regex entries under the same ACL name? 25 | # From Squid documentation: 26 | # You can put different values for the same ACL name on different lines. 27 | # Squid combines them into one list. 28 | squid::acl { 'CVMFS': 29 | type => 'dstdom_regex', 30 | entries => $cvmfs_acl_regex, 31 | } 32 | squid::http_access { 'manager localhost': 33 | action => 'allow', 34 | } 35 | squid::http_access { 'manager': 36 | action => 'deny', 37 | } 38 | squid::http_access { '!Safe_ports': 39 | action => 'deny', 40 | } 41 | squid::http_access { 'CONNECT !SSL_ports': 42 | action => 'deny', 43 | } 44 | squid::http_access { 'localhost': 45 | action => 'allow', 46 | } 47 | squid::http_access { 'all': 48 | action => 'deny', 49 | } 50 | squid::http_access { 'CLUSTER_NETWORK CVMFS': 51 | action => 'allow', 52 | } 53 | squid::cache_dir { '/var/spool/squid': 54 | type => 'ufs', 55 | options => "${cache_size} 16 256", 56 | } 57 | squid::extra_config_section { 'log': 58 | config_entries => { 59 | cache_store_log => '/var/log/squid/store.log', 60 | cache_log => '/var/log/squid/cache.log', 61 | }, 62 | } 63 | squid::refresh_pattern { '.': 64 | min => 0, 65 | max => 4320, 66 | percent => 20, 67 | } 68 | 69 | @consul::service { 'squid': 70 | port => $port, 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /site/profile/files/base/prepare4image.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | systemctl stop puppet 3 | systemctl stop slurmd &> /dev/null || true 4 | systemctl stop consul &> /dev/null || true 5 | systemctl stop consul-template &> /dev/null || true 6 | systemctl disable puppet 7 | systemctl disable slurmd &> /dev/null || true 8 | systemctl disable consul &> /dev/null || true 9 | systemctl disable consul-template &> /dev/null || true 10 | 11 | /sbin/ipa-client-install -U --uninstall 12 | rm -f /var/log/ipaclient-uninstall.log 13 | rm -f /var/log/ipaclient-install.log 14 | rm -rf /etc/sssd/sssd.conf.deleted 15 | 16 | rm -rf /etc/puppetlabs 17 | rm -rf /opt/puppetlabs/puppet/cache/{clientbucket,client_data,client_yaml,state} 18 | rm /opt/consul/node-id /opt/consul/checkpoint-signature /opt/consul/serf/local.snapshot 19 | 20 | # Turn off swap 21 | swapoff -a 22 | grep -q "swap" /etc/fstab && rm -f $(grep "swap" /etc/fstab | cut -f 1) 23 | # Unmount filesystems 24 | umount -a --types cephfs,nfs4 25 | # for xfs, we unmount only what's in /mnt, not things like / or /boot 26 | grep xfs /etc/fstab | cut -f 2 | grep /mnt | xargs --no-run-if-empty umount 27 | grep -P '(ext4|xfs|vfat|^#|^$)' /etc/fstab | grep -v /mnt > /etc/fstab.new 28 | mv -f /etc/fstab.new /etc/fstab 29 | systemctl daemon-reload 30 | 31 | systemctl stop rsyslog 32 | : > /var/log/messages 33 | test -d /var/log/munge && : > /var/log/munge/munged.log 34 | : > /var/log/secure 35 | : > /var/log/cron 36 | test -d /var/log/audit && : > /var/log/audit/audit.log 37 | 38 | if [ -f /etc/cloud/cloud-init.disabled ]; then 39 | # This is for GCP where we install cloud-init on first boot 40 | rm /etc/cloud/cloud-init.disabled 41 | yum install -y cloud-init 42 | systemctl disable cloud-init 43 | fi 44 | cloud-init clean --logs 45 | rm -rf /var/lib/cloud 46 | 47 | # Remove all internal domain host entries 48 | grep -v $(hostname -d) /etc/hosts > /etc/hosts.clean 49 | mv /etc/hosts.clean /etc/hosts 50 | 51 | rm -f /etc/hostname 52 | rm -f /etc/udev/rules.d/70-persistent-net.rules 53 | : > /etc/sysconfig/network 54 | : > /etc/machine-id 55 | 56 | rm /etc/NetworkManager/conf.d/zzz-puppet.conf 57 | : > /etc/resolv.conf 58 | 59 | cat > /etc/sysconfig/network-scripts/ifcfg-eth0 << EOF 60 | DEVICE=eth0 61 | TYPE=Ethernet 62 | ONBOOT=yes 63 | BOOTPROTO=dhcp 64 | EOF 65 | halt -p 66 | -------------------------------------------------------------------------------- /site/profile/templates/accounts/mkhome.sh.epp: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | source /sbin/account_functions.sh 3 | 4 | PROJECT_REGEX="<%= $project_regex %>" 5 | 6 | MKHOME_PROJECT_DIR="/var/lib/mkhome_project/" 7 | MKHOME_RETRY_PIPE="${MKHOME_PROJECT_DIR}/mkhome_retry.pipe" 8 | MKHOME_MODPROJECT_PIPE="${MKHOME_PROJECT_DIR}/mkhome_modproject.pipe" 9 | LAST_TIMESTAMP_FILE=${MKHOME_PROJECT_DIR}/mkhome.last_timestamp 10 | BASEDN=$(grep -o -P "basedn = \K(.*)" /etc/ipa/default.conf) 11 | 12 | trap "rm -f ${MKHOME_RETRY_PIPE}; rm -f ${MKHOME_MODPROJECT_PIPE}" EXIT 13 | 14 | mkdir -p ${MKHOME_PROJECT_DIR} 15 | 16 | if [[ ! -p ${MKHOME_RETRY_PIPE} ]]; then 17 | mkfifo -m=600 ${MKHOME_RETRY_PIPE} 18 | fi 19 | 20 | if [[ ! -p ${MKHOME_MODPROJECT_PIPE} ]]; then 21 | mkfifo -m=600 ${MKHOME_MODPROJECT_PIPE} 22 | fi 23 | 24 | if [ -e $LAST_TIMESTAMP_FILE ]; then 25 | LAST_TIMESTAMP=$(cat $LAST_TIMESTAMP_FILE) 26 | start_index=$(sed -n "/${LAST_TIMESTAMP//\//\\\/}/=" /var/log/dirsrv/slapd-*/access) 27 | start_index=$(($start_index+1)) 28 | else 29 | start_index=1 30 | fi 31 | 32 | ( 33 | tail -n +${start_index} -F /var/log/dirsrv/slapd-*/access | 34 | grep --line-buffered -P 'ADD dn=\"uid=\K(.*)(?=,cn=users)' | 35 | sed -u -r 's/^\[(.*) \+[0-9]{4}\].*uid=(.*),cn=users.*$/\1 \2/' & 36 | tail -n+1 -F ${MKHOME_RETRY_PIPE} 37 | ) | 38 | while read TIMESTAMP USERNAME; do 39 | if [[ "${TIMESTAMP}" != "retry" ]]; then 40 | echo $TIMESTAMP > $LAST_TIMESTAMP_FILE 41 | fi 42 | if [[ "${USERNAME}" == "admin" ]]; then 43 | continue 44 | fi 45 | <% if $manage_home { -%> 46 | if ! mkhome $USERNAME; then 47 | echo "retry" $USERNAME > ${MKHOME_RETRY_PIPE} & 48 | continue 49 | fi 50 | <% } -%> 51 | <% if $manage_scratch { -%> 52 | if ! mkscratch $USERNAME <%= $manage_home %>; then 53 | echo "retry" $USERNAME > ${MKHOME_RETRY_PIPE} & 54 | continue 55 | fi 56 | <% } -%> 57 | 58 | for PROJECT in $(kexec ldapsearch -o ldif-wrap=no -LLL -b "uid=${USERNAME},cn=users,cn=accounts,${BASEDN}" memberOf 2> /dev/null | grep -P -o ${PROJECT_REGEX}); do 59 | if [[ ! -z "${PROJECT}" ]]; then 60 | echo 0 ${PROJECT} <%= $manage_project %> ${USERNAME} > ${MKHOME_MODPROJECT_PIPE} & 61 | fi 62 | done 63 | done 64 | -------------------------------------------------------------------------------- /hiera.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | version: 5 3 | defaults: 4 | # The default value for "datadir" is "data" under the same directory as the hiera.yaml 5 | # file (this file) 6 | # When specifying a datadir, make sure the directory exists. 7 | # See https://puppet.com/docs/puppet/latest/environments_about.html for further details on environments. 8 | # datadir: data 9 | # data_hash: yaml_data 10 | hierarchy: 11 | - name: "Per hostname" 12 | globs: 13 | - "user_data/hostnames/%{facts.networking.hostname}/*.yaml" 14 | - "user_data/hostnames/%{facts.networking.hostname}.yaml" 15 | lookup_key: eyaml_lookup_key # eyaml backend 16 | options: 17 | pkcs7_private_key: /etc/puppetlabs/puppet/eyaml/private_key.pkcs7.pem 18 | - name: "Per prefix" 19 | globs: 20 | - "user_data/prefixes/%{facts.prefix}/*.yaml" 21 | - "user_data/prefixes/%{facts.prefix}.yaml" 22 | lookup_key: eyaml_lookup_key # eyaml backend 23 | options: 24 | pkcs7_private_key: /etc/puppetlabs/puppet/eyaml/private_key.pkcs7.pem 25 | - name: "Rest of user data" 26 | globs: 27 | - "user_data/*.yaml" 28 | - "user_data.yaml" 29 | lookup_key: eyaml_lookup_key # eyaml backend 30 | options: 31 | pkcs7_private_key: /etc/puppetlabs/puppet/eyaml/private_key.pkcs7.pem 32 | - name: "Terraform data" 33 | path: "terraform_data.yaml" 34 | lookup_key: eyaml_lookup_key # eyaml backend 35 | options: 36 | pkcs7_private_key: /etc/puppetlabs/puppet/eyaml/private_key.pkcs7.pem 37 | - name: "Terraform self" 38 | data_hash: terraform_self 39 | path: "terraform_data.yaml" 40 | options: 41 | hostname: "%{facts.networking.hostname}" 42 | - name: "Software stack" 43 | path: "software_stack/%{facts.software_stack}.yaml" 44 | - name: "Cloud provider region" 45 | path: "cloud/%{facts.cloud.provider}/%{facts.cloud.region}.yaml" 46 | - name: "Cloud provider" 47 | path: "cloud/%{facts.cloud.provider}.yaml" 48 | - name: "OS version" 49 | path: "os/%{facts.os.family}/%{facts.os.release.major}.yaml" 50 | - name: "Other YAML hierarchy levels" 51 | paths: 52 | - "common.yaml" 53 | - name: "hieradata generated by boostrap.sh" 54 | lookup_key: eyaml_lookup_key # eyaml backend 55 | paths: 56 | - "bootstrap.yaml" 57 | options: 58 | pkcs7_private_key: /etc/puppetlabs/puppet/eyaml/boot_private_key.pkcs7.pem 59 | - name: "site.pp definition" 60 | path: "site.yaml" 61 | -------------------------------------------------------------------------------- /site/profile/manifests/rsyslog.pp: -------------------------------------------------------------------------------- 1 | class profile::rsyslog::base { 2 | class { 'rsyslog': 3 | purge_config_files => false, 4 | override_default_config => false, 5 | } 6 | } 7 | 8 | class profile::rsyslog::client { 9 | include profile::rsyslog::base 10 | include rsyslog::config 11 | 12 | $remote_host_conf = @(EOT) 13 | {{ with $local := node -}} 14 | {{ range service "rsyslog" -}} 15 | {{ if ne $local.Node.Address .Address -}} 16 | *.* @@{{.Address}}:{{.Port}} 17 | {{ end -}} 18 | {{ end -}} 19 | {{ end -}} 20 | | EOT 21 | file { '/etc/rsyslog.d/remote_host.conf.ctmpl': 22 | content => $remote_host_conf, 23 | notify => Service['consul-template'], 24 | } 25 | 26 | consul_template::watch { 'remote_host.conf.ctmpl': 27 | require => File['/etc/rsyslog.d/remote_host.conf.ctmpl'], 28 | config_hash => { 29 | perms => '0644', 30 | source => '/etc/rsyslog.d/remote_host.conf.ctmpl', 31 | destination => '/etc/rsyslog.d/99-remote_host.conf', 32 | command => 'systemctl restart rsyslog || true', 33 | }, 34 | } 35 | } 36 | 37 | class profile::rsyslog::server { 38 | include profile::rsyslog::base 39 | 40 | @consul::service { 'rsyslog': 41 | port => 514, 42 | } 43 | 44 | file { '/etc/rsyslog.d/98-remotelogs.conf': 45 | notify => Service['rsyslog'], 46 | content => @(EOT) 47 | $template RemoteLogs,"/var/log/instances/%HOSTNAME%/%PROGRAMNAME%.log" 48 | if $fromhost-ip != '127.0.0.1' then -?RemoteLogs 49 | & stop 50 | |EOT 51 | } 52 | 53 | logrotate::rule { 'rsyslog_instances': 54 | path => '/var/log/instances/*/*.log', 55 | rotate => 5, 56 | ifempty => false, 57 | copytruncate => false, 58 | olddir => false, 59 | size => '5M', 60 | compress => true, 61 | create => true, 62 | create_mode => '0600', 63 | create_owner => 'root', 64 | create_group => 'root', 65 | postrotate => '/usr/bin/systemctl kill -s HUP rsyslog.service >/dev/null 2>&1 || true', 66 | } 67 | 68 | file_line { 'rsyslog_modload_imtcp': 69 | ensure => present, 70 | path => '/etc/rsyslog.conf', 71 | match => '^#$ModLoad imtcp', 72 | line => '$ModLoad imtcp', 73 | notify => Service['rsyslog'], 74 | } 75 | 76 | file_line { 'rsyslog_InputTCPServerRun': 77 | ensure => present, 78 | path => '/etc/rsyslog.conf', 79 | match => '^#$InputTCPServerRun 514', 80 | line => '$InputTCPServerRun 514', 81 | notify => Service['rsyslog'], 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /site/profile/manifests/sssd.pp: -------------------------------------------------------------------------------- 1 | class profile::sssd::client( 2 | Hash[String, Any] $domains = {}, 3 | Array[String] $access_tags = ['login', 'node'], 4 | Optional[Boolean] $deny_access = undef, 5 | Optional[String] $ldapclient_domain = undef, 6 | ){ 7 | ensure_resource('service', 'sssd', { 'ensure' => running, 'enable' => true }) 8 | 9 | package { 'sssd-ldap': } 10 | 11 | if ! defined('$deny_access') { 12 | $tags = lookup('terraform.self.tags') 13 | $deny_access = intersection($tags, $access_tags).empty 14 | } 15 | 16 | if $deny_access { 17 | $extra_config = { 18 | 'access_provider' => 'deny' 19 | } 20 | } else { 21 | $extra_config = {} 22 | } 23 | 24 | $domains.map | $domain, $config | { 25 | file { "/etc/sssd/conf.d/${domain}.conf": 26 | ensure => 'present', 27 | owner => 'root', 28 | group => 'root', 29 | mode => '0600', 30 | content => epp('profile/sssd/sssd.conf', { 31 | 'domain' => $domain, 32 | 'config' => $config + $extra_config, 33 | }), 34 | seltype => 'sssd_conf_t', 35 | notify => Service['sssd'] 36 | } 37 | } 38 | 39 | if $ldapclient_domain in $domains { 40 | $domain_values = $domains[$ldapclient_domain] 41 | $uris = join($domain_values['ldap_uri'], ' ') 42 | $ldap_conf_template = @("EOT") 43 | # Managed by puppet 44 | SASL_NOCANON on 45 | URI ${uris} 46 | BASE ${domain_values['ldap_search_base']} 47 | EOT 48 | file { '/etc/openldap/ldap.conf': 49 | content => $ldap_conf_template, 50 | owner => 'root', 51 | group => 'root', 52 | mode => '0644', 53 | } 54 | # ipa-client-install creates /etc/openldap/ldap.conf 55 | # We make sure if it has to be executed, it was executed 56 | # before we create our own version of the file. 57 | Exec <| tag == profile::freeipa |> -> File['/etc/openldap/ldap.conf'] 58 | } 59 | 60 | if $facts['ipa']['installed'] { 61 | $domain_list = join([$facts['ipa']['domain']] + keys($domains), ',') 62 | } else { 63 | $domain_list = join(keys($domains), ',') 64 | } 65 | 66 | if ! $domain_list.empty { 67 | $augeas_domains = "set target[ . = 'sssd']/domains ${domain_list}" 68 | } else { 69 | $augeas_domains = '' 70 | } 71 | 72 | file { '/etc/sssd/sssd.conf': 73 | ensure => 'file', 74 | owner => 'root', 75 | group => 'root', 76 | mode => '0600', 77 | notify => Service['sssd'], 78 | } 79 | 80 | augeas { 'sssd.conf': 81 | lens => 'sssd.lns', 82 | incl => '/etc/sssd/sssd.conf', 83 | changes => [ 84 | "set target[ . = 'sssd'] 'sssd'", 85 | "set target[ . = 'sssd']/services 'nss, sudo, pam, ssh, ifp'", 86 | $augeas_domains, 87 | ], 88 | require => File['/etc/sssd/sssd.conf'], 89 | notify => Service['sssd'], 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /site/profile/manifests/software_stack.pp: -------------------------------------------------------------------------------- 1 | class profile::software_stack ( 2 | Integer $min_uid = 1000, 3 | Optional[String] $initial_profile = undef, 4 | Optional[Array[String]] $lmod_default_modules = undef, 5 | Optional[Hash[String, String]] $extra_site_env_vars = undef, 6 | ) { 7 | include profile::consul 8 | include profile::cvmfs::client 9 | 10 | package { 'cvmfs-config-eessi': 11 | ensure => 'installed', 12 | provider => 'rpm', 13 | require => Package['cvmfs'], 14 | source => 'https://github.com/EESSI/filesystem-layer/releases/download/latest/cvmfs-config-eessi-latest.noarch.rpm', 15 | } 16 | 17 | package { 'computecanada-release-2.0-1.noarch': 18 | ensure => 'installed', 19 | provider => 'rpm', 20 | source => 'https://package.computecanada.ca/yum/cc-cvmfs-public/prod/RPM/computecanada-release-2.0-1.noarch.rpm', 21 | } 22 | 23 | package { 'cvmfs-config-computecanada': 24 | ensure => 'installed', 25 | require => [ 26 | Package['cvmfs'], 27 | Package['computecanada-release-2.0-1.noarch'] 28 | ], 29 | } 30 | 31 | if $facts['software_stack'] == 'computecanada' or $facts['software_stack'] == 'alliance' { 32 | if $facts['os']['architecture'] != 'x86_64' { 33 | fail("${facts['software_stack']} software stack does not support: ${facts['os']['architecture']}") 34 | } 35 | 36 | file { '/etc/consul-template/z-00-rsnt_arch.sh.ctmpl': 37 | source => 'puppet:///modules/profile/software_stack/z-00-rsnt_arch.sh.ctmpl', 38 | notify => Service['consul-template'], 39 | } 40 | 41 | consul_template::watch { 'z-00-rsnt_arch.sh': 42 | require => File['/etc/consul-template/z-00-rsnt_arch.sh.ctmpl'], 43 | config_hash => { 44 | perms => '0644', 45 | source => '/etc/consul-template/z-00-rsnt_arch.sh.ctmpl', 46 | destination => '/etc/profile.d/z-00-rsnt_arch.sh', 47 | command => '/usr/bin/true', 48 | }, 49 | } 50 | $software_stack_meta = { arch => $facts['cpu_ext'] } 51 | } else { 52 | file { '/etc/profile.d/z-00-rsnt_arch.sh': 53 | ensure => 'absent', 54 | } 55 | $software_stack_meta = {} 56 | } 57 | 58 | $ensure_stack = $facts['software_stack'] ? { 59 | 'computecanada' => 'present', 60 | 'alliance' => 'present', 61 | 'eessi' => 'present', 62 | default => 'absent', 63 | } 64 | 65 | file { '/etc/profile.d/z-01-site.sh': 66 | ensure => $ensure_stack, 67 | content => epp('profile/software_stack/z-01-site.sh', 68 | { 69 | 'min_uid' => $min_uid, 70 | 'lmod_default_modules' => $lmod_default_modules, 71 | 'initial_profile' => $initial_profile, 72 | 'extra_site_env_vars' => $extra_site_env_vars, 73 | } 74 | ), 75 | } 76 | 77 | @consul::service { 'software_stack': 78 | ensure => $ensure_stack, 79 | meta => $software_stack_meta, 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /Gemfile.lock: -------------------------------------------------------------------------------- 1 | GEM 2 | remote: https://rubygems.org/ 3 | specs: 4 | base64 (0.3.0) 5 | benchmark (0.5.0) 6 | concurrent-ruby (1.3.6) 7 | deep_merge (1.2.2) 8 | diff-lcs (1.6.2) 9 | facterdb (4.2.0) 10 | jgrep (~> 1.5, >= 1.5.4) 11 | fast_gettext (4.1.1) 12 | prime 13 | racc 14 | forwardable (1.4.0) 15 | getoptlong (0.2.1) 16 | hiera (3.12.0) 17 | hocon (1.4.0) 18 | jgrep (1.5.4) 19 | locale (2.1.4) 20 | logger (1.7.0) 21 | mocha (3.0.1) 22 | ruby2_keywords (>= 0.0.5) 23 | multi_json (1.18.0) 24 | openfact (5.2.1) 25 | base64 (>= 0.1, < 0.4) 26 | hocon (~> 1.3) 27 | logger (~> 1.5) 28 | thor (>= 1.0.1, < 2) 29 | openvox (8.24.2) 30 | base64 (>= 0.1, < 0.4) 31 | benchmark (>= 0.2, < 0.6) 32 | concurrent-ruby (~> 1.0) 33 | deep_merge (~> 1.0) 34 | fast_gettext (>= 2.1, < 5) 35 | getoptlong (~> 0.2.0) 36 | locale (~> 2.1) 37 | multi_json (~> 1.13) 38 | openfact (~> 5.0) 39 | ostruct (>= 0.5.5, < 0.7) 40 | puppet-resource_api (~> 2.0) 41 | racc (~> 1.5) 42 | scanf (~> 1.0) 43 | semantic_puppet (~> 1.0) 44 | ostruct (0.6.3) 45 | prime (0.1.4) 46 | forwardable 47 | singleton 48 | puppet-lint (5.1.1) 49 | puppet-lint-param-docs (3.0.0) 50 | puppet-lint (~> 5.1) 51 | puppet-resource_api (2.0.0) 52 | hocon (>= 1.0) 53 | puppet-syntax (7.0.1) 54 | openvox (>= 8, < 9) 55 | rake (~> 13.1) 56 | puppetlabs_spec_helper (1.1.1) 57 | mocha 58 | puppet-lint 59 | puppet-syntax 60 | rake 61 | rspec-puppet 62 | racc (1.8.1) 63 | rake (13.3.1) 64 | rspec (3.13.2) 65 | rspec-core (~> 3.13.0) 66 | rspec-expectations (~> 3.13.0) 67 | rspec-mocks (~> 3.13.0) 68 | rspec-core (3.13.6) 69 | rspec-support (~> 3.13.0) 70 | rspec-expectations (3.13.5) 71 | diff-lcs (>= 1.2.0, < 2.0) 72 | rspec-support (~> 3.13.0) 73 | rspec-mocks (3.13.7) 74 | diff-lcs (>= 1.2.0, < 2.0) 75 | rspec-support (~> 3.13.0) 76 | rspec-puppet (5.0.0) 77 | rspec (~> 3.0) 78 | rspec-puppet-facts (6.0.0) 79 | deep_merge (~> 1.2) 80 | facterdb (>= 3.1, < 5.0) 81 | openfact (~> 5.0) 82 | rspec-support (3.13.6) 83 | ruby2_keywords (0.0.5) 84 | scanf (1.0.0) 85 | semantic_puppet (1.1.1) 86 | singleton (0.3.0) 87 | syslog (0.3.0) 88 | logger 89 | thor (1.4.0) 90 | 91 | PLATFORMS 92 | ruby 93 | x86_64-linux 94 | 95 | DEPENDENCIES 96 | hiera 97 | openvox (~> 8.0) 98 | puppet-lint (~> 5.1, >= 5.1.1) 99 | puppet-lint-param-docs 100 | puppet-syntax (~> 7.0, >= 7.0.1) 101 | puppetlabs_spec_helper 102 | rake 103 | rspec 104 | rspec-core 105 | rspec-puppet 106 | rspec-puppet-facts 107 | syslog 108 | 109 | BUNDLED WITH 110 | 2.6.9 111 | -------------------------------------------------------------------------------- /site/profile/templates/slurm/slurm.conf.epp: -------------------------------------------------------------------------------- 1 | include /etc/slurm/nodes.conf 2 | 3 | <% if ! $slurmctl.empty { -%> 4 | <% $slurmctl.each | $hostname| { -%> 5 | SlurmctldHost=<%= $hostname %> 6 | <% }} -%> 7 | SlurmctldPort=6817 8 | 9 | ## Accounting 10 | <% if ! $slurmdb.empty { -%> 11 | AccountingStorageHost=<%= $slurmdb[0] %> 12 | <% if length($slurmdb) > 1 { -%> 13 | AccountingStorageBackupHost=<%= $slurmdb[1] %> 14 | <% } -%> 15 | AccountingStorageType=accounting_storage/slurmdbd 16 | AccountingStorageTRES=gres/gpu,cpu,mem 17 | AccountingStorageEnforce=associations 18 | JobAcctGatherType=jobacct_gather/cgroup 19 | JobAcctGatherFrequency=task=30 20 | JobAcctGatherParams=NoOverMemoryKill,UsePSS 21 | <% } -%> 22 | 23 | # MANAGEMENT POLICIES 24 | ClusterName=<%= $cluster_name %> 25 | AuthType=auth/munge 26 | CryptoType=crypto/munge 27 | SlurmUser=slurm 28 | # SCHEDULER CONFIGURATIONS 29 | SchedulerType=sched/backfill 30 | SelectType=select/cons_tres 31 | SelectTypeParameters=CR_Core_Memory 32 | 33 | # NODE CONFIGURATIONS 34 | GresTypes=gpu,shard 35 | 36 | TreeWidth=<%= max($nb_nodes, 1) %> 37 | ReturnToService=2 # A DOWN node will become available for use upon registration with a valid configuration. 38 | RebootProgram=/usr/sbin/reboot 39 | ResumeProgram=/usr/bin/slurm_resume 40 | SuspendProgram=/usr/bin/slurm_suspend 41 | ResumeFailProgram=/usr/bin/slurm_resume_fail 42 | ResumeTimeout=<%= $resume_timeout %> 43 | SuspendTime=<%= $suspend_time %> 44 | SuspendRate=<%= $suspend_rate %> 45 | ResumeRate=<%= $resume_rate %> 46 | <% if $suspend_exc_nodes != '' { -%> 47 | SuspendExcNodes=<%= $suspend_exc_nodes %> 48 | <% } -%> 49 | 50 | SchedulerParameters=salloc_wait_nodes 51 | SlurmctldParameters=idle_on_node_suspend,power_save_interval=30 52 | DebugFlags=Power 53 | 54 | # PARTITION CONFIGURATIONS 55 | DisableRootJobs=YES 56 | PartitionName=DEFAULT DefaultTime=1:00:00 DefMemPerCPU=256 OverSubscribe=YES 57 | PartitionName=cpubase_bycore_b1 Default=YES Nodes=ALL 58 | <% $partitions.map|$name, $values| { -%> 59 | PartitionName=<%= $name %> Nodes=<%= join($values['nodes'], ',') %> 60 | <% } -%> 61 | 62 | SlurmdPort=6818 63 | 64 | SlurmctldDebug=<%= $log_level %> 65 | SlurmctldLogFile=/var/log/slurm/slurmctld.log 66 | SlurmdDebug=<%= $log_level %> 67 | SlurmdLogFile=/var/log/slurm/slurmd.log 68 | 69 | SlurmctldPidFile=/var/run/slurmctld/slurmctld.pid 70 | SlurmdPidFile=/var/run/slurmd/slurmd.pid 71 | 72 | # JOBS AND TASKS/RESOURCES CONTROL 73 | TmpFS=/localscratch 74 | <% if $enable_x11_forwarding { -%> 75 | PrologFlags=alloc,contain,x11 76 | X11Parameters=home_xauthority 77 | <% } else { -%> 78 | PrologFlags=alloc,contain 79 | <% } -%> 80 | <% if $enable_scrontab { -%> 81 | ScronParameters=enable 82 | <% } -%> 83 | # Prolog=/etc/slurm/prolog 84 | Epilog=/etc/slurm/epilog 85 | PlugStackConfig=/etc/slurm/plugstack.conf 86 | MpiDefault=pmi2 87 | ProctrackType=proctrack/cgroup 88 | TaskPlugin=task/affinity,task/cgroup 89 | PropagateResourceLimits=NONE 90 | MailProg=/usr/sbin/slurm_mail 91 | 92 | StateSaveLocation=/var/spool/slurm 93 | InteractiveStepOptions="--interactive --mem-per-cpu=0 --preserve-env --pty $SHELL" 94 | LaunchParameters=use_interactive_step,disable_send_gids 95 | JobSubmitPlugins=lua 96 | AccountingStoreFlags=job_script 97 | 98 | 99 | include /etc/slurm/slurm-addendum.conf 100 | -------------------------------------------------------------------------------- /site/profile/manifests/consul.pp: -------------------------------------------------------------------------------- 1 | class profile::consul ( 2 | Array[String] $servers, 3 | String $acl_api_token, 4 | ) { 5 | tag 'mc_bootstrap' 6 | 7 | include consul_template 8 | 9 | $ipaddress = lookup('terraform.self.local_ip') 10 | if $ipaddress in $servers { 11 | $is_server = true 12 | $bootstrap_expect = length($servers) 13 | $retry_join = $servers.filter | $ip | { $ip != $ipaddress } 14 | } else { 15 | $is_server = false 16 | $bootstrap_expect = 0 17 | $retry_join = $servers 18 | } 19 | 20 | class { 'consul': 21 | config_mode => '0640', 22 | acl_api_token => $acl_api_token, 23 | config_hash => { 24 | 'bootstrap_expect' => $bootstrap_expect, 25 | 'bind_addr' => $ipaddress, 26 | 'data_dir' => '/opt/consul', 27 | 'log_level' => 'INFO', 28 | 'node_name' => $facts['networking']['hostname'], 29 | 'server' => $is_server, 30 | 'retry_join' => $retry_join, 31 | 'acl' => { 32 | 'enabled' => true, 33 | 'default_policy' => 'deny', 34 | 'tokens' => { 35 | 'initial_management' => $acl_api_token, 36 | 'agent' => $acl_api_token, 37 | }, 38 | }, 39 | }, 40 | } 41 | 42 | if ! $is_server { 43 | $consul_validators = $servers.map | $index, $server_ip | { 44 | tcp_conn_validator { "${server_ip}:8300": 45 | host => $server_ip, 46 | port => 8300, 47 | try_sleep => 5, 48 | timeout => 120, 49 | require => Service['consul'], 50 | } 51 | } 52 | } else { 53 | $consul_validators = [] 54 | } 55 | 56 | tcp_conn_validator { '127.0.0.1:8500': 57 | try_sleep => 5, 58 | timeout => 60, 59 | require => [Service['consul']] + $consul_validators, 60 | } 61 | 62 | include profile::consul::puppet_watch 63 | Consul::Service <| |> { token => $acl_api_token, require => Tcp_conn_validator['127.0.0.1:8500'] } 64 | Consul::Watch <| |> { token => $acl_api_token, require => Tcp_conn_validator['127.0.0.1:8500'] } 65 | } 66 | 67 | class profile::consul::puppet_watch { 68 | # jq can be used to easily retrieve the token from 69 | # consul config file like this: 70 | # jq -r .acl_agent_token /etc/consul/config.json 71 | include epel 72 | ensure_packages(['jq'], { ensure => 'present', require => Yumrepo['epel'] }) 73 | 74 | # Ensure consul can read the state of agent_catalog_run.lock 75 | file { '/opt/puppetlabs/puppet/cache': 76 | ensure => directory, 77 | mode => '0751', 78 | } 79 | 80 | $consul_sudoer = "consul ALL=(root) NOPASSWD: /usr/bin/systemctl reload puppet\n" 81 | file { '/etc/sudoers.d/99-consul': 82 | owner => 'root', 83 | group => 'root', 84 | mode => '0440', 85 | content => $consul_sudoer, 86 | } 87 | 88 | file { '/usr/bin/puppet_event_handler.sh': 89 | mode => '0755', 90 | owner => 'root', 91 | group => 'root', 92 | source => 'puppet:///modules/profile/consul/puppet_event_handler.sh', 93 | } 94 | 95 | @consul::watch { 'puppet_event': 96 | ensure => present, 97 | type => 'event', 98 | event_name => 'puppet', 99 | args => ['/usr/bin/puppet_event_handler.sh'], 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /site/profile/manifests/jupyterhub.pp: -------------------------------------------------------------------------------- 1 | class profile::jupyterhub::hub ( 2 | String $register_url = '', # lint:ignore:params_empty_string_assignment 3 | String $reset_pw_url = '', # lint:ignore:params_empty_string_assignment 4 | ) { 5 | contain jupyterhub 6 | 7 | Service <| tag == profile::sssd |> ~> Service['jupyterhub'] 8 | Yumrepo['epel'] -> Class['jupyterhub'] 9 | 10 | file { '/etc/jupyterhub/templates/login.html': 11 | content => epp('profile/jupyterhub/login.html', { 12 | 'register_url' => $register_url, 13 | 'reset_pw_url' => $reset_pw_url, 14 | } 15 | ), 16 | } 17 | include profile::slurm::submitter 18 | 19 | @consul::service { 'jupyterhub': 20 | port => 8081, 21 | tags => ['jupyterhub'], 22 | } 23 | 24 | file { "${jupyterhub::prefix}/bin/ipa_create_user.py": 25 | source => 'puppet:///modules/profile/users/ipa_create_user.py', 26 | mode => '0755', 27 | require => Jupyterhub::Uv::Venv['hub'], 28 | } 29 | 30 | file { "${jupyterhub::prefix}/bin/kinit_wrapper": 31 | source => 'puppet:///modules/profile/freeipa/kinit_wrapper', 32 | mode => '0755', 33 | require => Jupyterhub::Uv::Venv['hub'], 34 | } 35 | } 36 | 37 | class profile::jupyterhub::node { 38 | include jupyterhub::node 39 | if lookup('jupyterhub::kernel::install_method') == 'venv' and lookup('jupyterhub::kernel::venv::python') =~ /^\/cvmfs.*/ { 40 | Class['profile::software_stack'] -> Class['jupyterhub::kernel::venv'] 41 | } 42 | } 43 | 44 | class profile::jupyterhub::hub::keytab { 45 | $ipa_domain = lookup('profile::freeipa::base::ipa_domain') 46 | $jupyterhub_prefix = lookup('jupyterhub::prefix', undef, undef, '/opt/jupyterhub') 47 | 48 | $fqdn = "${facts['networking']['hostname']}.${ipa_domain}" 49 | $service_name = "jupyterhub/${fqdn}" 50 | $service_register_script = @("EOF") 51 | api.Command.batch( 52 | { 'method': 'service_add', 'params': [['${service_name}'], {}]}, 53 | { 'method': 'service_add_principal', 'params': [['${service_name}', 'jupyterhub/jupyterhub'], {}]}, 54 | { 'method': 'role_add', 'params': [['JupyterHub'], {'description' : 'JupyterHub User management'}]}, 55 | { 'method': 'role_add_privilege', 'params': [['JupyterHub'], {'privilege' : 'Group Administrators'}]}, 56 | { 'method': 'role_add_privilege', 'params': [['JupyterHub'], {'privilege' : 'User Administrators'}]}, 57 | { 'method': 'role_add_member', 'params': [['JupyterHub'], {'service' : '${service_name}'}]}, 58 | ) 59 | |EOF 60 | 61 | file { "${jupyterhub_prefix}/bin/ipa_register_service.py": 62 | content => $service_register_script, 63 | require => Jupyterhub::Uv::Venv['hub'], 64 | } 65 | 66 | $ipa_passwd = lookup('profile::freeipa::server::admin_password') 67 | $keytab_command = @("EOT") 68 | kinit_wrapper ipa console ${jupyterhub_prefix}/bin/ipa_register_service.py && \ 69 | kinit_wrapper ipa-getkeytab -p jupyterhub/jupyterhub -k /etc/jupyterhub/jupyterhub.keytab 70 | |EOT 71 | exec { 'jupyterhub_keytab': 72 | command => $keytab_command, 73 | creates => '/etc/jupyterhub/jupyterhub.keytab', 74 | require => [ 75 | File['/etc/jupyterhub'], 76 | File["${jupyterhub_prefix}/bin/kinit_wrapper"], 77 | Exec['ipa-install'], 78 | ], 79 | subscribe => File["${jupyterhub_prefix}/bin/ipa_register_service.py"], 80 | environment => ["IPA_ADMIN_PASSWD=${ipa_passwd}"], 81 | path => ['/bin', '/usr/bin', '/sbin','/usr/sbin', "${jupyterhub_prefix}/bin"], 82 | } 83 | 84 | file { '/etc/jupyterhub/jupyterhub.keytab': 85 | owner => 'root', 86 | group => 'jupyterhub', 87 | mode => '0640', 88 | subscribe => Exec['jupyterhub_keytab'], 89 | require => Group['jupyterhub'], 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /site/profile/manifests/ceph.pp: -------------------------------------------------------------------------------- 1 | type BindMount = Struct[{ 2 | 'src' => Stdlib::Unixpath, 3 | 'dst' => Stdlib::Unixpath, 4 | 'type' => Optional[Enum['file', 'directory']], 5 | }] 6 | 7 | type CephFS = Struct[ 8 | { 9 | 'share_name' => String, 10 | 'access_key' => String, 11 | 'export_path' => Stdlib::Unixpath, 12 | 'bind_mounts' => Optional[Array[BindMount]], 13 | 'binds_fcontext_equivalence' => Optional[Stdlib::Unixpath], 14 | } 15 | ] 16 | 17 | class profile::ceph::client ( 18 | Array[String] $mon_host, 19 | Hash[String, CephFS] $shares, 20 | ) { 21 | require profile::ceph::client::install 22 | 23 | $mon_host_string = join($mon_host, ',') 24 | $ceph_conf = @("EOT") 25 | [client] 26 | client quota = true 27 | mon host = ${mon_host_string} 28 | | EOT 29 | 30 | file { '/etc/ceph/ceph.conf': 31 | content => $ceph_conf, 32 | } 33 | 34 | ensure_resources(profile::ceph::client::share, $shares, { 'mon_host' => $mon_host, 'bind_mounts' => [] }) 35 | } 36 | 37 | class profile::ceph::client::install ( 38 | String $release = 'reef', 39 | Optional[String] $version = undef, 40 | ) { 41 | include epel 42 | 43 | if $version != undef and $version != '' { 44 | $repo = "rpm-${version}" 45 | } else { 46 | $repo = "rpm-${release}" 47 | } 48 | 49 | yumrepo { 'ceph-stable': 50 | ensure => present, 51 | enabled => true, 52 | baseurl => "https://download.ceph.com/${repo}/el${$::facts['os']['release']['major']}/${::facts['os']['architecture']}/", 53 | gpgcheck => 1, 54 | gpgkey => 'https://download.ceph.com/keys/release.asc', 55 | repo_gpgcheck => 0, 56 | } 57 | 58 | package { 59 | [ 60 | 'libcephfs2', 61 | 'python-cephfs', 62 | 'ceph-common', 63 | 'python3-ceph-argparse', 64 | # 'ceph-fuse', 65 | ]: 66 | ensure => installed, 67 | require => [Yumrepo['epel'], Yumrepo['ceph-stable']], 68 | } 69 | } 70 | 71 | define profile::ceph::client::share ( 72 | Array[String] $mon_host, 73 | String $share_name, 74 | String $access_key, 75 | Stdlib::Unixpath $export_path, 76 | Array[BindMount] $bind_mounts, 77 | Optional[Stdlib::Unixpath] $binds_fcontext_equivalence = undef, 78 | ) { 79 | $client_fullkey = @("EOT") 80 | [client.${name}] 81 | key = ${access_key} 82 | | EOT 83 | 84 | file { "/etc/ceph/client.fullkey.${name}": 85 | content => $client_fullkey, 86 | mode => '0600', 87 | owner => 'root', 88 | group => 'root', 89 | } 90 | 91 | file { "/etc/ceph/client.keyonly.${name}": 92 | content => Sensitive($access_key), 93 | mode => '0600', 94 | owner => 'root', 95 | group => 'root', 96 | } 97 | file { "/mnt/${name}": 98 | ensure => directory, 99 | } 100 | 101 | $mon_host_string = join($mon_host, ',') 102 | mount { "/mnt/${name}": 103 | ensure => 'mounted', 104 | fstype => 'ceph', 105 | device => "${mon_host_string}:${export_path}", 106 | options => "name=${share_name},secretfile=/etc/ceph/client.keyonly.${name}", 107 | require => File['/etc/ceph/ceph.conf'], 108 | } 109 | 110 | $bind_mounts.each |$mount| { 111 | file { $mount['dst']: 112 | ensure => pick($mount['type'], 'directory'), 113 | } 114 | mount { $mount['dst']: 115 | ensure => 'mounted', 116 | fstype => 'none', 117 | options => 'rw,bind', 118 | device => "/mnt/${name}${mount['src']}", 119 | require => [ 120 | File[$mount['dst']], 121 | Mount["/mnt/${name}"] 122 | ], 123 | } 124 | 125 | if ($binds_fcontext_equivalence and $binds_fcontext_equivalence != $mount['dst']) { 126 | selinux::fcontext::equivalence { $mount['dst']: 127 | ensure => 'present', 128 | target => $binds_fcontext_equivalence, 129 | require => Mount[$mount['dst']], 130 | } 131 | } 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /site/profile/manifests/accounts.pp: -------------------------------------------------------------------------------- 1 | # @summary Class configuring services to bridge LDAP users, Slurm accounts and users' folders in filesystems 2 | # @param project_regex Regex identifying FreeIPA groups that require a corresponding Slurm account 3 | # @param manage_home 4 | # @param manage_scratch 5 | # @param manage_project 6 | # @param skel_archives Archives extracted in each FreeIPA user's home when created 7 | class profile::accounts ( 8 | String $project_regex, 9 | Boolean $manage_home = true, 10 | Boolean $manage_scratch = true, 11 | Boolean $manage_project = true, 12 | Array[Struct[{ filename => String[1], source => String[1] }]] $skel_archives = [], 13 | ) { 14 | ensure_packages(['rsync'], { ensure => 'installed' }) 15 | 16 | file { 'account_functions.sh': 17 | path => '/sbin/account_functions.sh', 18 | source => 'puppet:///modules/profile/accounts/account_functions.sh', 19 | } 20 | 21 | file { '/sbin/mkhome.sh': 22 | content => epp('profile/accounts/mkhome.sh', 23 | { 24 | manage_home => $manage_home, 25 | manage_scratch => $manage_scratch, 26 | project_regex => $project_regex, 27 | manage_project => $manage_project, 28 | } 29 | ), 30 | mode => '0755', 31 | owner => 'root', 32 | } 33 | 34 | file { 'mkhome.service': 35 | path => '/lib/systemd/system/mkhome.service', 36 | source => 'puppet:///modules/profile/accounts/mkhome.service', 37 | } 38 | 39 | file { '/etc/skel.ipa': 40 | ensure => directory, 41 | owner => 'root', 42 | group => 'root', 43 | mode => '0755', 44 | } 45 | 46 | file { '/etc/skel.ipa/.bash_logout': 47 | source => 'file:///etc/skel/.bash_logout', 48 | owner => 'root', 49 | group => 'root', 50 | mode => '0644', 51 | require => File['/etc/skel.ipa'], 52 | } 53 | 54 | file { '/etc/skel.ipa/.bash_profile': 55 | source => 'file:///etc/skel/.bash_profile', 56 | owner => 'root', 57 | group => 'root', 58 | mode => '0644', 59 | require => File['/etc/skel.ipa'], 60 | } 61 | 62 | file { '/etc/skel.ipa/.bashrc': 63 | source => 'file:///etc/skel/.bashrc', 64 | owner => 'root', 65 | group => 'root', 66 | mode => '0644', 67 | require => File['/etc/skel.ipa'], 68 | } 69 | 70 | ensure_resource('file', '/opt/puppetlabs/puppet/cache/puppet-archive', { 'ensure' => 'directory' }) 71 | $skel_archives.each |$index, Hash $archive| { 72 | $filename = $archive['filename'] 73 | archive { "skel_${index}": 74 | path => "/opt/puppetlabs/puppet/cache/puppet-archive/${filename}", 75 | extract => true, 76 | extract_path => '/etc/skel.ipa', 77 | source => $archive['source'], 78 | require => File['/etc/skel.ipa'], 79 | notify => Exec['chown -R root:root /etc/skel.ipa'], 80 | } 81 | } 82 | 83 | exec { 'chown -R root:root /etc/skel.ipa': 84 | refreshonly => true, 85 | path => ['/bin/', '/usr/bin'], 86 | } 87 | 88 | $mkhome_running = $manage_home or $manage_scratch 89 | @service { 'mkhome': 90 | ensure => $mkhome_running, 91 | enable => $mkhome_running, 92 | require => Package['rsync'], 93 | subscribe => [ 94 | File['/sbin/mkhome.sh'], 95 | File['/sbin/account_functions.sh'], 96 | File['mkhome.service'], 97 | ], 98 | } 99 | 100 | file { 'mkproject.service': 101 | path => '/lib/systemd/system/mkproject.service', 102 | source => 'puppet:///modules/profile/accounts/mkproject.service', 103 | } 104 | 105 | file { '/sbin/mkproject.sh': 106 | content => epp('profile/accounts/mkproject.sh', 107 | { 108 | project_regex => $project_regex, 109 | manage_folder => $manage_project, 110 | } 111 | ), 112 | mode => '0755', 113 | owner => 'root', 114 | } 115 | 116 | # mkproject is always running even if /project does not exist 117 | # because it also handles the creation of Slurm accounts 118 | @service { 'mkproject': 119 | ensure => running, 120 | enable => true, 121 | require => Package['slurm'], 122 | subscribe => [ 123 | File['/sbin/mkproject.sh'], 124 | File['/sbin/account_functions.sh'], 125 | File['mkproject.service'], 126 | ], 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /site/profile/manifests/reverse_proxy.pp: -------------------------------------------------------------------------------- 1 | class profile::reverse_proxy ( 2 | String $domain_name, 3 | Hash[String, String] $subdomains, 4 | Hash[String, Array[String]] $remote_ips = {}, 5 | String $main2sub_redir = 'jupyter', 6 | String $robots_txt = "User-agent: *\nDisallow: /", 7 | ) { 8 | selinux::boolean { 'httpd_can_network_connect': } 9 | 10 | selinux::module { 'caddy': 11 | ensure => 'present', 12 | source_pp => 'puppet:///modules/profile/reverse_proxy/caddy.pp', 13 | } 14 | 15 | firewall { '200 httpd public': 16 | chain => 'INPUT', 17 | dport => [80, 443], 18 | proto => 'tcp', 19 | source => '0.0.0.0/0', 20 | action => 'accept', 21 | } 22 | 23 | yumrepo { 'caddy-copr-repo': 24 | enabled => true, 25 | descr => 'Copr repo for caddy', 26 | baseurl => "https://download.copr.fedorainfracloud.org/results/@caddy/caddy/epel-\$releasever-\$basearch/", 27 | skip_if_unavailable => true, 28 | gpgcheck => 1, 29 | gpgkey => 'https://download.copr.fedorainfracloud.org/results/@caddy/caddy/pubkey.gpg', 30 | repo_gpgcheck => 0, 31 | } 32 | 33 | package { 'caddy': 34 | ensure => 'installed', 35 | require => Yumrepo['caddy-copr-repo'], 36 | } 37 | 38 | if $domain_name in $::facts['letsencrypt'] { 39 | $fullchain_exists = $::facts['letsencrypt'][$domain_name]['fullchain'] 40 | $privkey_exists = $::facts['letsencrypt'][$domain_name]['privkey'] 41 | } else { 42 | $fullchain_exists = false 43 | $privkey_exists = false 44 | } 45 | 46 | $configure_tls = ($privkey_exists and $fullchain_exists) 47 | 48 | if $privkey_exists { 49 | file { "/etc/letsencrypt/live/${domain_name}/privkey.pem": 50 | owner => 'root', 51 | group => 'caddy', 52 | mode => '0640', 53 | links => 'follow', 54 | require => Package['caddy'], 55 | before => Service['caddy'], 56 | } 57 | } 58 | 59 | if $configure_tls { 60 | $tls_string = "tls /etc/letsencrypt/live/${domain_name}/fullchain.pem /etc/letsencrypt/live/${domain_name}/privkey.pem" 61 | } else { 62 | $tls_string = '' 63 | } 64 | 65 | file { '/etc/caddy/conf.d': 66 | ensure => directory, 67 | owner => 'root', 68 | group => 'root', 69 | mode => '0644', 70 | seltype => 'httpd_config_t', 71 | require => Package['caddy'], 72 | } 73 | 74 | $caddyfile_content = @("EOT") 75 | { 76 | admin off 77 | metrics { 78 | per_host 79 | } 80 | } 81 | (tls) { 82 | ${tls_string} 83 | } 84 | import conf.d/* 85 | | EOT 86 | file { '/etc/caddy/Caddyfile': 87 | owner => 'root', 88 | group => 'root', 89 | mode => '0644', 90 | seltype => 'httpd_config_t', 91 | require => Package['caddy'], 92 | content => $caddyfile_content, 93 | } 94 | 95 | $host_conf_template = @("END") 96 | ${domain_name} { 97 | import tls 98 | respond /robots.txt 200 { 99 | body "${robots_txt}" 100 | close 101 | } 102 | <% if '${main2sub_redir}' != '' { -%> 103 | redir / https://${main2sub_redir}.${domain_name} 104 | <% } -%> 105 | } 106 | |END 107 | 108 | file { '/etc/caddy/conf.d/host.conf': 109 | owner => 'root', 110 | group => 'root', 111 | mode => '0644', 112 | seltype => 'httpd_config_t', 113 | require => File['/etc/caddy/conf.d'], 114 | content => inline_epp($host_conf_template), 115 | } 116 | 117 | $subdomains.each | $key, $value | { 118 | file { "/etc/caddy/conf.d/${key}.conf": 119 | owner => 'root', 120 | group => 'root', 121 | mode => '0644', 122 | seltype => 'httpd_config_t', 123 | require => File['/etc/caddy/conf.d'], 124 | content => epp( 125 | 'profile/reverse_proxy/subdomain.conf', 126 | { 127 | 'domain' => $domain_name, 128 | 'subdomain' => $key, 129 | 'server' => $value, 130 | 'remote_ip' => $remote_ips.get($key, ''), 131 | 'robots_txt' => $robots_txt 132 | } 133 | ), 134 | } 135 | } 136 | 137 | service { 'caddy': 138 | ensure => running, 139 | enable => true, 140 | require => [ 141 | Package['caddy'], 142 | Selinux::Module['caddy'], 143 | ], 144 | subscribe => [ 145 | File['/etc/caddy/Caddyfile'], 146 | File['/etc/caddy/conf.d/host.conf'], 147 | ] + $subdomains.map |$key, $value| { File["/etc/caddy/conf.d/${key}.conf"] }, 148 | } 149 | } 150 | -------------------------------------------------------------------------------- /local-tests/puppet-missing-files: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Test puppet sourced files and templates for existence.""" 3 | 4 | import os.path,re 5 | import subprocess 6 | import sys 7 | 8 | def main(module_paths): 9 | """The main flow.""" 10 | owd = os.getcwd() 11 | for path in module_paths: 12 | os.chdir(path) 13 | manifests = get_manifests() 14 | paths = get_paths(manifests) 15 | check_paths(paths) 16 | os.chdir(owd) 17 | 18 | def check_paths(paths): 19 | 20 | retval = 0 21 | """Check the set of paths for existence (or symlinked existence).""" 22 | for path in paths: 23 | if not os.path.exists(path) and not os.path.islink(path): 24 | print("%s does not exist." % format(path)) 25 | retval = 1 26 | 27 | if retval == 1: 28 | sys.exit('Files missing - Fatal Error') 29 | else: 30 | print('No missing files - test PASSED') 31 | sys.exit(0) 32 | 33 | def get_manifests(): 34 | """Find all .pp files in the current working directory and subfolders.""" 35 | 36 | try: 37 | manifests = subprocess.check_output(["find", ".", "-type", "f", 38 | "-name", "*.pp"]) 39 | except subprocess.CalledProcessError as error: 40 | sys.exit(1, error) 41 | 42 | manifests = manifests.strip().splitlines() 43 | return manifests 44 | 45 | def get_paths(manifests): 46 | """Extract and construct paths to check.""" 47 | 48 | paths = set() 49 | 50 | for line in manifests: 51 | try: 52 | results = subprocess.check_output(["grep", "puppet:/", line]) 53 | # we don't care if grep does not find any matches in a file 54 | except subprocess.CalledProcessError: 55 | pass 56 | else: 57 | hits = results.splitlines() 58 | 59 | for hit in hits: 60 | working_copy = hit.decode().strip() 61 | # Exclude files with variables and things that aren't files 62 | if re.search(r'\${?',working_copy): 63 | continue 64 | if re.search('<%=',working_copy): 65 | continue 66 | if re.search('class puppet',working_copy): 67 | continue 68 | quotesplit = re.compile("puppet://(.*)['\"]") 69 | working_copy = quotesplit.split(working_copy) 70 | if len(working_copy) > 1: 71 | working_copy = working_copy[1] 72 | else: 73 | working_copy = working_copy[0] 74 | working_copy = working_copy.replace("puppet://", ".") 75 | 76 | segments = working_copy.split("/", 3) 77 | del segments[1] 78 | 79 | # If this is just a module by itself, we need to delete differently 80 | if len(sys.argv) == 1: 81 | segments.insert(2, 'files') 82 | else: 83 | if sys.argv[1] == 'module': 84 | del segments[1] 85 | segments.insert(1,'files') 86 | else: 87 | segments.insert(2, 'files') 88 | path = "/".join(segments) 89 | paths.add('.'+path) 90 | 91 | try: 92 | results = subprocess.check_output(["grep", "template(", line]) 93 | # we don't care if grep does not find any matches in a file 94 | except subprocess.CalledProcessError: 95 | pass 96 | else: 97 | hits = results.splitlines() 98 | 99 | for hit in hits: 100 | working_copy = hit.decode().strip() 101 | if re.search(r'${?',working_copy): 102 | continue 103 | if re.search(r'<%=',working_copy): 104 | continue 105 | quotesplit = re.compile(r"[\"']") 106 | working_copy = quotesplit.split(working_copy) 107 | if len(working_copy) > 1: 108 | working_copy = working_copy[1] 109 | else: 110 | working_copy = working_copy[0] 111 | segments = working_copy.split("/", 1) 112 | 113 | # If it's a solo module this needs to be handled differently 114 | if len(sys.argv) == 1: 115 | segments.insert(0, ".") 116 | segments.insert(2, "templates") 117 | else: 118 | if sys.argv[1] == 'module': 119 | del segments[0] 120 | segments.insert(0, ".") 121 | segments.insert(0, "templates") 122 | else: 123 | segments.insert(0, ".") 124 | segments.insert(2, "templates") 125 | 126 | path = "/".join(segments) 127 | paths.add(path) 128 | 129 | return paths 130 | 131 | if __name__ == "__main__": 132 | main(sys.argv[1:]) 133 | -------------------------------------------------------------------------------- /site/profile/manifests/base.pp: -------------------------------------------------------------------------------- 1 | class profile::base ( 2 | String $version, 3 | Array[String] $packages, 4 | Optional[String] $admin_email = undef, 5 | ) { 6 | include stdlib 7 | include epel 8 | include selinux 9 | include profile::base::etc_hosts 10 | include profile::base::powertools 11 | include profile::ssh::base 12 | 13 | package { 'selinux-policy': } 14 | Package['selinux-policy'] -> Class['selinux::config'] 15 | 16 | file { '/etc/magic-castle-release': 17 | content => "Magic Castle release ${version}", 18 | } 19 | 20 | file { '/usr/sbin/prepare4image.sh': 21 | source => 'puppet:///modules/profile/base/prepare4image.sh', 22 | mode => '0755', 23 | } 24 | 25 | file { '/etc/localtime': 26 | ensure => link, 27 | target => '/usr/share/zoneinfo/UTC', 28 | } 29 | 30 | if $admin_email { 31 | file { '/opt/puppetlabs/bin/postrun': 32 | mode => '0700', 33 | content => epp('profile/base/postrun', 34 | { 35 | 'email' => $admin_email, 36 | } 37 | ), 38 | } 39 | } 40 | 41 | # Allow users to run TCP servers - activated to allow users 42 | # to run mpi jobs. 43 | selinux::boolean { 'selinuxuser_tcp_server': } 44 | 45 | file { '/etc/puppetlabs/puppet/csr_attributes.yaml': 46 | ensure => absent, 47 | } 48 | 49 | package { 'pciutils': 50 | ensure => 'installed', 51 | } 52 | 53 | package { 'vim': 54 | ensure => 'installed', 55 | } 56 | 57 | package { 'unzip': 58 | ensure => 'installed', 59 | } 60 | 61 | package { 'firewalld': 62 | ensure => 'absent', 63 | } 64 | 65 | class { 'firewall': 66 | tag => 'mc_bootstrap', 67 | } 68 | 69 | # Sometimes systemd-tmpfiles-setup.service fails to create 70 | # /run/lock/subsys folder which is required by iptables. 71 | # This exec runs the command that should have created the folder 72 | # if it is missing. 73 | exec { 'systemd-tmpfiles --create --prefix=/run/lock/subsys': 74 | unless => 'test -d /run/lock/subsys', 75 | path => ['/bin'], 76 | notify => [Service['iptables'], Service['ip6tables']], 77 | } 78 | 79 | firewall { '001 accept all from local network': 80 | chain => 'INPUT', 81 | proto => 'all', 82 | source => profile::getcidr(), 83 | action => 'accept', 84 | tag => 'mc_bootstrap', 85 | } 86 | 87 | firewall { '001 drop access to metadata server': 88 | chain => 'OUTPUT', 89 | proto => 'tcp', 90 | destination => '169.254.169.254', 91 | action => 'drop', 92 | uid => '! root', 93 | tag => 'mc_bootstrap', 94 | } 95 | 96 | package { 'clustershell': 97 | ensure => 'installed', 98 | require => Yumrepo['epel'], 99 | } 100 | 101 | if versioncmp($::facts['os']['release']['major'], '8') == 0 { 102 | # haveged service is no longer required for kernel >= 5.4 103 | # RHEL 8 is the last release with a kernel < 5 104 | package { 'haveged': 105 | ensure => 'installed', 106 | require => Yumrepo['epel'], 107 | } 108 | 109 | service { 'haveged': 110 | ensure => running, 111 | enable => true, 112 | require => Package['haveged'], 113 | } 114 | } 115 | 116 | ensure_packages($packages, { ensure => 'installed', require => Yumrepo['epel'] }) 117 | 118 | if $::facts.dig('cloud', 'provider') == 'azure' { 119 | include profile::base::azure 120 | } 121 | 122 | # Remove scripts leftover by terraform remote-exec provisioner 123 | file { glob('/tmp/terraform_*.sh'): 124 | ensure => absent, 125 | } 126 | 127 | sysctl { 'kernel.dmesg_restrict': 128 | ensure => 'present', 129 | value => 1, 130 | } 131 | } 132 | 133 | class profile::base::azure { 134 | package { 'WALinuxAgent': 135 | ensure => purged, 136 | } 137 | 138 | file { '/etc/udev/rules.d/66-azure-storage.rules': 139 | source => 'https://raw.githubusercontent.com/Azure/WALinuxAgent/v2.2.48.1/config/66-azure-storage.rules', 140 | require => Package['WALinuxAgent'], 141 | owner => 'root', 142 | group => 'root', 143 | mode => '0644', 144 | checksum => 'md5', 145 | checksum_value => '51e26bfa04737fc1e1f14cbc8aeebece', 146 | } 147 | 148 | exec { 'udevadm trigger --action=change': 149 | refreshonly => true, 150 | subscribe => File['/etc/udev/rules.d/66-azure-storage.rules'], 151 | path => ['/usr/bin'], 152 | } 153 | } 154 | 155 | # build /etc/hosts 156 | class profile::base::etc_hosts { 157 | $ipa_domain = lookup('profile::freeipa::base::ipa_domain') 158 | $instances = lookup('terraform.instances') 159 | 160 | # build /etc/hosts 161 | # Make sure /etc/hosts entry for the current host is managed by Puppet or 162 | # that at least it is in entered in the right format. 163 | file { '/etc/hosts': 164 | mode => '0644', 165 | content => epp('profile/base/hosts', 166 | { 167 | 'instances' => $instances, 168 | 'int_domain_name' => $ipa_domain, 169 | } 170 | ), 171 | } 172 | } 173 | 174 | class profile::base::powertools { 175 | if versioncmp($::facts['os']['release']['major'], '8') == 0 { 176 | $repo_name = 'powertools' 177 | } else { 178 | $repo_name = 'crb' 179 | } 180 | package { 'dnf-plugins-core': } 181 | exec { 'enable_powertools': 182 | command => "dnf config-manager --set-enabled ${$repo_name}", 183 | unless => "dnf config-manager --dump ${repo_name} | grep -q \'enabled = 1\'", 184 | path => ['/usr/bin'], 185 | require => Package['dnf-plugins-core'], 186 | } 187 | } 188 | -------------------------------------------------------------------------------- /site/profile/files/users/ipa_create_user.py: -------------------------------------------------------------------------------- 1 | #!/usr/libexec/platform-python 2 | import argparse 3 | import logging 4 | import os 5 | import time 6 | import grp 7 | import pwd 8 | 9 | from ipalib import api, errors 10 | from ipalib.cli import cli 11 | from ipapython import ipautil 12 | from ipaplatform.paths import paths 13 | 14 | from six import text_type 15 | 16 | # TODO: get this value from /etc/login.defs 17 | UID_MAX = 60000 18 | 19 | iau_logger = logging.getLogger("IPA_CREATE_USER.PY") 20 | iau_logger.setLevel(logging.INFO,) 21 | formatter = logging.Formatter( 22 | fmt="%(asctime)s.%(msecs)03d %(levelname)s {%(module)s} [%(funcName)s] %(message)s", 23 | datefmt="%Y-%m-%d,%H:%M:%S", 24 | ) 25 | handler = logging.StreamHandler() 26 | handler.setFormatter(fmt=formatter) 27 | iau_logger.addHandler(handler) 28 | 29 | 30 | def init_api(): 31 | api.bootstrap_with_global_options(context="cli") 32 | api.add_plugin(cli) 33 | api.finalize() 34 | api.Backend.cli.create_context() 35 | 36 | 37 | def user_add(uid, first, last, password, shell, sshpubkeys): 38 | kargs = dict() 39 | kargs['uid'] = text_type(uid) 40 | kargs['givenname'] = text_type(first) 41 | kargs['sn'] = text_type(last) 42 | if password: 43 | kargs['userpassword'] = text_type(password) 44 | if sshpubkeys: 45 | kargs['ipasshpubkey'] = list(map(text_type, sshpubkeys)) 46 | kargs['loginshell'] = text_type(shell) 47 | 48 | try: 49 | uidnumber = os.stat("/mnt/home/" + uid).st_uid 50 | except: 51 | pass 52 | else: 53 | if uidnumber > UID_MAX: 54 | kargs["uidnumber"] = uidnumber 55 | 56 | # Try up to 5 times to add user to the database 57 | for i in range(1, 6): 58 | try: 59 | iau_logger.info("adding user {uid} (Try {i} / 5)".format(uid=uid, i=i)) 60 | return api.Command.user_add(**kargs) 61 | except errors.DuplicateEntry: 62 | iau_logger.warning( 63 | "User {uid} already in database (Try {i} / 5)".format(uid=uid, i=i,) 64 | ) 65 | return 66 | except errors.DatabaseError as err: 67 | iau_logger.error( 68 | "Database error while trying to create user: {uid} (Try {i} / 5). Exception: {err}".format( 69 | uid=uid, i=i, err=err 70 | ) 71 | ) 72 | # Give time to slapd to cleanup 73 | time.sleep(1.0) 74 | else: 75 | raise Exception("Could not add user: {uid}".format(**kargs)) 76 | 77 | 78 | def group_add_members(group, members, page_size=50): 79 | for i in range(0, len(members), page_size): 80 | iau_logger.info("adding members {begin}:{end} to {group}".format(begin=members[i], end=members[min(i+page_size, len(members)-1)],group=group)) 81 | api.Command.group_add_member( 82 | cn=text_type(group), user=list(map(text_type, members[i:i+page_size])) 83 | ) 84 | 85 | 86 | def kinit(username, password): 87 | ipautil.run([paths.KINIT, username], stdin=password + "\n") 88 | 89 | 90 | def kdestroy(): 91 | ipautil.run([paths.KDESTROY]) 92 | 93 | 94 | def dry_run(users, groups): 95 | "Verify if changes are required and true if needed." 96 | users = set(users) 97 | if groups: 98 | for group in groups: 99 | try: 100 | members = set(grp.getgrnam(group).gr_mem) 101 | except KeyError: 102 | return True 103 | if not members.issuperset(users): 104 | return True 105 | else: 106 | for user in users: 107 | try: 108 | pwd.getpwnam(user) 109 | except KeyError: 110 | return True 111 | return False 112 | 113 | 114 | def main(users, groups, passwd, sshpubkeys): 115 | init_api() 116 | added_users = set() 117 | for username in users: 118 | user = user_add( 119 | username, 120 | first=username, 121 | last=username, 122 | password=passwd, 123 | shell="/bin/bash", 124 | sshpubkeys=sshpubkeys 125 | ) 126 | if user is not None: 127 | added_users.add(username) 128 | 129 | if groups: 130 | for group in groups: 131 | group_add_members(group, users) 132 | 133 | if passwd: 134 | # configure user password 135 | for username in added_users: 136 | kinit(username, "\n".join([passwd] * 3)) 137 | kdestroy() 138 | 139 | 140 | if __name__ == "__main__": 141 | parser = argparse.ArgumentParser( 142 | description="Add a batch of users with common a password and groups" 143 | ) 144 | parser.add_argument("users", nargs="+", help="list of usernames to create") 145 | parser.add_argument("--group", action='append', help="group the users will be member of (can be specified multiple times)") 146 | parser.add_argument("--passwd", help="users's password") 147 | parser.add_argument("--sshpubkey", action="append", help="SSH public key (can be specified multiple times)") 148 | parser.add_argument("--dry", help="determine if changes are required", action='store_true') 149 | args = parser.parse_args() 150 | 151 | if args.passwd is not None: 152 | passwd = args.passwd 153 | elif "IPA_USER_PASSWD" in os.environ: 154 | passwd = os.environ["IPA_USER_PASSWD"] 155 | else: 156 | passwd = None 157 | 158 | if args.dry: 159 | if dry_run(args.users, args.group): 160 | exit(1) 161 | else: 162 | main( 163 | users=args.users, 164 | groups=args.group, 165 | passwd=passwd, 166 | sshpubkeys=args.sshpubkey 167 | ) 168 | -------------------------------------------------------------------------------- /site/profile/manifests/nfs.pp: -------------------------------------------------------------------------------- 1 | class profile::nfs (String $domain) { 2 | $server_ip = lookup('profile::nfs::client::server_ip') 3 | $ipaddress = lookup('terraform.self.local_ip') 4 | 5 | if $ipaddress == $server_ip { 6 | include profile::nfs::server 7 | } else { 8 | include profile::nfs::client 9 | } 10 | } 11 | 12 | class profile::nfs::client ( 13 | String $server_ip, 14 | Optional[Array[String]] $share_names = [], 15 | ) { 16 | $nfs_domain = lookup('profile::nfs::domain') 17 | class { 'nfs': 18 | client_enabled => true, 19 | nfs_v4_client => true, 20 | nfs_v4_idmap_domain => $nfs_domain, 21 | } 22 | 23 | $instances = lookup('terraform.instances') 24 | $nfs_server = Hash($instances.map| $key, $values | { [$values['local_ip'], $key] })[$server_ip] 25 | $nfs_volumes = $instances.dig($nfs_server, 'volumes', 'nfs') 26 | $shares_to_mount = keys($nfs_volumes) + $share_names 27 | 28 | 29 | $self_volumes = lookup('terraform.self.volumes') 30 | if $facts['virtual'] =~ /^(container|lxc).*$/ { 31 | # automount relies on a kernel module that currently does not support namespace. 32 | # Therefore it is not compatible with containers. 33 | # https://superuser.com/a/1372700 34 | $mount_options = 'x-systemd.mount-timeout=infinity,retry=10000,fg' 35 | } else { 36 | $mount_options = 'x-systemd.automount,x-systemd.mount-timeout=30' 37 | } 38 | 39 | ensure_resource('systemd::daemon_reload', 'nfs-client') 40 | exec { 'systemctl restart remote-fs.target': 41 | subscribe => Systemd::Daemon_reload['nfs-client'], 42 | refreshonly => true, 43 | tries => 20, # trye to connect the nfs mounts for 5 minutes 44 | try_sleep => 15, 45 | path => ['/bin', '/usr/bin'], 46 | } 47 | 48 | $options_nfsv4 = "proto=tcp,nosuid,nolock,noatime,actimeo=3,nfsvers=4.2,seclabel,_netdev,${mount_options}" 49 | $shares_to_mount.each | String $share_name | { 50 | # If the instance has a volume mounted under the same name as the nfs share, 51 | # we mount the nfs share under /nfs/${share_name}. 52 | if $self_volumes.any |$tag, $volume_hash| { $share_name in $volume_hash } { 53 | $mount_point = "/nfs/${share_name}" 54 | } else { 55 | $mount_point = "/${share_name}" 56 | } 57 | nfs::client::mount { $mount_point: 58 | ensure => present, 59 | server => $server_ip, 60 | share => $share_name, 61 | options_nfsv4 => $options_nfsv4, 62 | notify => Systemd::Daemon_reload['nfs-client'], 63 | } 64 | } 65 | } 66 | 67 | class profile::nfs::server ( 68 | Array[String] $no_root_squash_tags = ['mgmt'], 69 | Boolean $enable_client_quotas = false, 70 | Optional[Array[String]] $export_paths = [], 71 | ) { 72 | include profile::volumes 73 | 74 | $nfs_domain = lookup('profile::nfs::domain') 75 | class { 'nfs': 76 | server_enabled => true, 77 | nfs_v4 => true, 78 | storeconfigs_enabled => false, 79 | nfs_v4_export_root => '/export', 80 | nfs_v4_export_root_clients => "*.${nfs_domain}(ro,fsid=root,insecure,no_subtree_check,async,root_squash)", 81 | nfs_v4_idmap_domain => $nfs_domain, 82 | } 83 | 84 | file { '/etc/nfs.conf': 85 | owner => 'root', 86 | group => 'root', 87 | mode => '0644', 88 | source => 'puppet:///modules/profile/nfs/nfs.conf', 89 | notify => Service[$nfs::server_service_name], 90 | } 91 | 92 | if $enable_client_quotas { 93 | package { 'quota-rpc': 94 | ensure => 'installed', 95 | } 96 | service { 'rpc-rquotad': 97 | ensure => 'running', 98 | enable => true, 99 | require => [Service['rpcbind'], Service['rpcbind.socket'], Package['quota-rpc']] 100 | } 101 | service { ['rpcbind', 'rpcbind.socket']: 102 | enable => true, 103 | notify => Service[$nfs::server_service_name], 104 | } 105 | } 106 | else { 107 | service { ['rpcbind', 'rpcbind.socket']: 108 | ensure => stopped, 109 | enable => mask, 110 | notify => Service[$nfs::server_service_name], 111 | } 112 | } 113 | service { 'rpc-statd': 114 | ensure => stopped, 115 | enable => mask, 116 | notify => Service[$nfs::server_service_name], 117 | } 118 | 119 | $devices = lookup('terraform.self.volumes.nfs', Hash, undef, {}) 120 | if $devices =~ Hash[String, Hash] { 121 | $export_path_list = $export_paths + $devices.map | String $key, $glob | { "/mnt/nfs/${key}" } 122 | } else { 123 | $export_path_list = $export_paths 124 | } 125 | $export_paths.each |$path| { 126 | ensure_resource('file', $path, { ensure => directory, before => Nfs::Server::Export[$path] }) 127 | } 128 | 129 | if $export_path_list { 130 | # Allow instances with specific tags to mount NFS without root squash 131 | $instances = lookup('terraform.instances') 132 | $common_options = 'rw,async,no_all_squash,security_label' 133 | $prefixes = $instances.filter|$key, $values| { ! intersection($values['tags'], $no_root_squash_tags ).empty }.map|$key, $values| { $values['prefix'] }.unique 134 | $prefix_rules = $prefixes.map|$string| { "${string}*.${nfs_domain}(${common_options},no_root_squash)" }.join(' ') 135 | $clients = "${prefix_rules} *.${nfs_domain}(${common_options},root_squash)" 136 | $export_path_list.each | String $path| { 137 | nfs::server::export { $path: 138 | ensure => 'mounted', 139 | clients => $clients, 140 | notify => Service[$nfs::server_service_name], 141 | require => Class['nfs'], 142 | } 143 | } 144 | } 145 | Profile::Volumes::Volume<| |> -> Nfs::Server::Export <| |> 146 | Mount <| |> -> Service <| tag == 'profile::accounts' and title == 'mkhome' |> 147 | Mount <| |> -> Service <| tag == 'profile::accounts' and title == 'mkproject' |> 148 | } 149 | -------------------------------------------------------------------------------- /site/profile/manifests/prometheus.pp: -------------------------------------------------------------------------------- 1 | # Configure a Prometheus exporter that exports server usage metrics, for example: 2 | # - CPU usage 3 | # - memory usage 4 | # It should run on every server of the cluster. 5 | class profile::prometheus::node_exporter { 6 | include prometheus::node_exporter 7 | @consul::service { 'node_exporter': 8 | port => 9100, 9 | tags => ['exporter'], 10 | } 11 | 12 | file { '/var/lib/node_exporter': 13 | ensure => directory, 14 | owner => 'node-exporter', 15 | group => 'node-exporter', 16 | mode => '0775', 17 | } 18 | 19 | # In cases where the puppet user exists, we add it to 20 | # node-exporter group so it can write in /var/lib/node_exporter. 21 | # If the resource does not exist, the following statement is simply 22 | # ignored. Puppet needs to be added to node-exporter group before 23 | # the group of /var/lib/node_exporter is changed from puppet to 24 | # node-exporter. Otherwise, we risk not being able to write reports 25 | User <| title == 'puppet' |> { 26 | groups +> 'node-exporter', 27 | before => File['/var/lib/node_exporter'] 28 | } 29 | } 30 | 31 | # Configure a Prometheus exporter that exports the Slurm compute node metrics, for example: 32 | # - job memory usage 33 | # - job memory max 34 | # - job memory limit 35 | # - job core usage total 36 | # - job process count 37 | # - job threads count 38 | # - job power gpu 39 | # This exporter needs to run on compute nodes. 40 | # @param version The version of the slurm job exporter to install 41 | class profile::prometheus::slurm_job_exporter ( 42 | String $version, 43 | String $nvidia_ml_py_version = '11.515.75', 44 | ) { 45 | @consul::service { 'slurm-job-exporter': 46 | port => 9798, 47 | tags => ['slurm', 'exporter'], 48 | } 49 | 50 | $el = $facts['os']['release']['major'] 51 | ensure_packages(['python3'], { ensure => 'present' }) 52 | package { 'python3-prometheus_client': 53 | require => Yumrepo['epel'], 54 | } 55 | package { 'slurm-job-exporter': 56 | source => "https://github.com/guilbaults/slurm-job-exporter/releases/download/v${version}/slurm-job-exporter-${version}-1.el${el}.noarch.rpm", 57 | provider => 'yum', 58 | } 59 | 60 | if $facts['nvidia_gpu_count'] > 0 and profile::is_grid_vgpu() { 61 | # Used by slurm-job-exporter to export GPU metrics 62 | # DCGM does not work with GRID VGPU, most of the stats are missing 63 | ensure_packages(['python3-pip'], { ensure => 'present' }) 64 | $py3_version = lookup('os::redhat::python3::version') 65 | 66 | exec { 'pip install nvidia-ml-py': 67 | command => "/usr/bin/pip${py3_version} install --force-reinstall nvidia-ml-py==${nvidia_ml_py_version}", 68 | creates => "/usr/local/lib/python${py3_version}/site-packages/pynvml.py", 69 | notify => Service['slurm-job-exporter'], 70 | require => [ 71 | Package['python3'], 72 | Package['python3-pip'], 73 | ], 74 | } 75 | } 76 | 77 | service { 'slurm-job-exporter': 78 | ensure => 'running', 79 | enable => true, 80 | require => [ 81 | Package['slurm-job-exporter'], 82 | Package['python3-prometheus_client'], 83 | ], 84 | } 85 | 86 | @exec { 'stop_slurm-job-exporter': 87 | command => 'systemctl stop slurm-job-exporter', 88 | onlyif => 'systemctl is-active slurm-job-exporter', 89 | refreshonly => true, 90 | path => ['/usr/bin'], 91 | } 92 | } 93 | 94 | # Configure a Prometheus exporter that exports the Slurm scheduling metrics, for example: 95 | # - allocated nodes 96 | # - allocated gpus 97 | # - pending jobs 98 | # - completed jobs 99 | # This exporter typically runs on the Slurm controller server, but it can run on any server 100 | # with a functional Slurm command-line installation. 101 | class profile::prometheus::slurm_exporter ( 102 | Integer $port = 8081, 103 | Array[String] $collectors = ['partition'], 104 | ) { 105 | @consul::service { 'slurm-exporter': 106 | port => $port, 107 | tags => ['slurm', 'exporter'], 108 | } 109 | 110 | $slurm_exporter_url = 'https://download.copr.fedorainfracloud.org/results/cmdntrf/prometheus-slurm-exporter/' 111 | yumrepo { 'prometheus-slurm-exporter-copr-repo': 112 | enabled => true, 113 | descr => 'Copr repo for prometheus-slurm-exporter owned by cmdntrf', 114 | baseurl => "${slurm_exporter_url}/epel-\$releasever-\$basearch/", 115 | skip_if_unavailable => true, 116 | gpgcheck => 1, 117 | gpgkey => "${slurm_exporter_url}/pubkey.gpg", 118 | repo_gpgcheck => 0, 119 | } 120 | -> package { 'prometheus-slurm-exporter': } 121 | 122 | file { '/etc/systemd/system/prometheus-slurm-exporter.service': 123 | content => epp('profile/prometheus/prometheus-slurm-exporter.service', 124 | { 125 | port => $port, 126 | collectors => $collectors.map |$collector| { "--collector.${collector}" }.join(' '), 127 | } 128 | ), 129 | notify => Service['prometheus-slurm-exporter'], 130 | } 131 | 132 | service { 'prometheus-slurm-exporter': 133 | ensure => 'running', 134 | enable => true, 135 | require => [ 136 | Package['prometheus-slurm-exporter'], 137 | Package['slurm'], 138 | File['/etc/systemd/system/prometheus-slurm-exporter.service'], 139 | ], 140 | } 141 | } 142 | 143 | class profile::prometheus::apache_exporter { 144 | include prometheus::apache_exporter 145 | @consul::service { 'apache_exporter': 146 | port => 9117, 147 | tags => ['exporter'], 148 | } 149 | File<| title == '/etc/httpd/conf.d/server-status.conf' |> 150 | } 151 | 152 | class profile::prometheus::caddy_exporter (Integer $port = 2020) { 153 | include profile::consul 154 | @consul::service { 'caddy_exporter': 155 | port => $port, 156 | tags => ['exporter'], 157 | } 158 | 159 | $caddy_metrics_content = @("EOT") 160 | :${port} { 161 | metrics 162 | } 163 | | EOT 164 | file { '/etc/caddy/conf.d/local_metrics.conf': 165 | owner => 'root', 166 | group => 'root', 167 | mode => '0644', 168 | seltype => 'httpd_config_t', 169 | require => Package['caddy'], 170 | content => $caddy_metrics_content, 171 | } 172 | } 173 | -------------------------------------------------------------------------------- /site/profile/manifests/ssh.pp: -------------------------------------------------------------------------------- 1 | class profile::ssh::base ( 2 | Boolean $disable_passwd_auth = false, 3 | ) { 4 | service { 'sshd': 5 | ensure => running, 6 | enable => true, 7 | } 8 | 9 | sshd_config { 'Include': 10 | ensure => present, 11 | value => '/etc/ssh/sshd_config.d/*', 12 | notify => Service['sshd'], 13 | } 14 | 15 | sshd_config { 'PermitRootLogin': 16 | ensure => present, 17 | value => 'no', 18 | notify => Service['sshd'], 19 | } 20 | 21 | $password_auth = $disable_passwd_auth ? { true => 'no', false => 'yes' } 22 | sshd_config { 'PasswordAuthentication': 23 | ensure => present, 24 | value => $password_auth, 25 | notify => Service['sshd'], 26 | } 27 | 28 | file { '/etc/ssh/ssh_host_ed25519_key': 29 | mode => '0640', 30 | owner => 'root', 31 | group => 'ssh_keys', 32 | } 33 | 34 | file { '/etc/ssh/ssh_host_ed25519_key.pub': 35 | mode => '0644', 36 | owner => 'root', 37 | group => 'ssh_keys', 38 | } 39 | 40 | file { '/etc/ssh/ssh_host_rsa_key': 41 | mode => '0640', 42 | owner => 'root', 43 | group => 'ssh_keys', 44 | } 45 | 46 | file { '/etc/ssh/ssh_host_rsa_key.pub': 47 | mode => '0644', 48 | owner => 'root', 49 | group => 'ssh_keys', 50 | } 51 | 52 | if versioncmp($::facts['os']['release']['major'], '8') == 0 { 53 | # sshd hardening in RedHat 8 requires fidgetting with crypto-policies 54 | # instead of modifying /etc/ssh/sshd_config 55 | # https://sshaudit.com/hardening_guides.html#rhel8 56 | # We replace the file in /usr/share/crypto-policies instead of 57 | # /etc/crypto-policies as suggested by sshaudit.com, because the script 58 | # update-crypto-policies can be called by RPM scripts and overwrites the 59 | # config in /etc by what's in /usr/share. The files in /etc/crypto-policies 60 | # are in just symlinks to /usr/share 61 | file { '/usr/share/crypto-policies/DEFAULT/opensshserver.txt': 62 | source => 'puppet:///modules/profile/base/opensshserver.config', 63 | notify => Service['sshd'], 64 | } 65 | } elsif versioncmp($::facts['os']['release']['major'], '9') >= 0 { 66 | # In RedHat 9, the sshd policies are defined as an include of the 67 | # crypto policies. Parameters defined before the include supersede 68 | # the crypto policy. The include is done in a file named 50-redhat.conf. 69 | file { '/etc/ssh/sshd_config.d/49-magic_castle.conf': 70 | mode => '0700', 71 | owner => 'root', 72 | group => 'root', 73 | source => 'puppet:///modules/profile/base/opensshserver-9.config', 74 | notify => Service['sshd'], 75 | } 76 | } 77 | 78 | sshd_config { 'tf_sshd_AuthenticationMethods': 79 | ensure => present, 80 | condition => 'User tf', 81 | key => 'AuthenticationMethods', 82 | value => 'publickey', 83 | target => '/etc/ssh/sshd_config.d/50-authenticationmethods.conf', 84 | notify => Service['sshd'], 85 | } 86 | 87 | sshd_config { 'tf_sshd_AuthorizedKeysFile': 88 | ensure => present, 89 | condition => 'User tf', 90 | key => 'AuthorizedKeysFile', 91 | value => '/etc/ssh/authorized_keys.%u', 92 | target => '/etc/ssh/sshd_config.d/50-authenticationmethods.conf', 93 | notify => Service['sshd'], 94 | } 95 | 96 | $tf_public_key = lookup('terraform.data.tf_public_key') 97 | $tags = lookup('terraform.self.tags') 98 | $puppetserver_ips = lookup('terraform.tag_ip.puppet') 99 | 100 | if 'puppet' in $tags { 101 | $tf_authorized_keys_options = 'pty' 102 | } else { 103 | $permitopen = $puppetserver_ips.map |$ip| { "permitopen=\"${ip}:22\"" }.join(',') 104 | $tf_authorized_keys_options = "${permitopen},port-forwarding,command=\"/sbin/nologin\"" 105 | } 106 | 107 | $tf_authorized_keys = "restrict,${tf_authorized_keys_options} ${tf_public_key}" 108 | file { '/etc/ssh/authorized_keys.tf': 109 | content => $tf_authorized_keys, 110 | mode => '0644', 111 | owner => 'root', 112 | group => 'root', 113 | } 114 | } 115 | 116 | # building /etc/ssh/ssh_known_hosts 117 | # for host based authentication 118 | class profile::ssh::known_hosts { 119 | $instances = lookup('terraform.instances') 120 | $ipa_domain = lookup('profile::freeipa::base::ipa_domain') 121 | 122 | file { '/etc/ssh/ssh_known_hosts': 123 | content => '# This file is managed by Puppet', 124 | owner => 'root', 125 | group => 'root', 126 | mode => '0644', 127 | replace => false, 128 | } 129 | 130 | $type = 'ed25519' 131 | $sshkey_to_add = Hash( 132 | $instances.map |$k, $v| { 133 | [ 134 | $k, 135 | { 136 | 'key' => split($v['hostkeys'][$type], /\s/)[1], 137 | 'type' => "ssh-${type}", 138 | 'host_aliases' => ["${k}.${ipa_domain}"] + ( $v['local_ip'] != '' ? { true => [$v['local_ip']], false => [] }), 139 | 'require' => File['/etc/ssh/ssh_known_hosts'], 140 | } 141 | ] 142 | }) 143 | ensure_resources('sshkey', $sshkey_to_add) 144 | } 145 | 146 | # @summary Enable SSH hostbased authentication on the instance including this class 147 | # @param shosts_tags Tags of instances that can connect this server using hostbased authentication 148 | class profile::ssh::hostbased_auth::server ( 149 | Array[String] $shosts_tags = ['login', 'node'] 150 | ) { 151 | include profile::ssh::known_hosts 152 | 153 | $instances = lookup('terraform.instances') 154 | $ipa_domain = lookup('profile::freeipa::base::ipa_domain') 155 | $hosts = $instances.filter |$k, $v| { ! intersection($v['tags'], $shosts_tags).empty } 156 | $shosts = join($hosts.map |$k, $v| { "${k}.${ipa_domain}" }, "\n") 157 | 158 | file { '/etc/ssh/shosts.equiv': 159 | content => $shosts, 160 | } 161 | 162 | sshd_config { 'HostbasedAuthentication': 163 | ensure => present, 164 | value => 'yes', 165 | notify => Service['sshd'], 166 | } 167 | 168 | sshd_config { 'UseDNS': 169 | ensure => present, 170 | value => 'yes', 171 | notify => Service['sshd'], 172 | } 173 | 174 | selinux::boolean { 'ssh_keysign': } 175 | } 176 | 177 | class profile::ssh::hostbased_auth::client { 178 | include profile::ssh::known_hosts 179 | 180 | ssh_config { 'EnableSSHKeysign': 181 | ensure => present, 182 | host => '*', 183 | value => 'yes', 184 | } 185 | 186 | ssh_config { 'HostbasedAuthentication': 187 | ensure => present, 188 | host => '*', 189 | value => 'yes', 190 | } 191 | } 192 | -------------------------------------------------------------------------------- /site/profile/manifests/mail.pp: -------------------------------------------------------------------------------- 1 | class profile::mail::base ( 2 | String $origin, 3 | Array[String] $authorized_submit_users = ['root', 'slurm'], 4 | ) { 5 | postfix::config { 'authorized_submit_users': 6 | ensure => present, 7 | value => join($authorized_submit_users, ','), 8 | } 9 | 10 | file { '/etc/mailname': 11 | content => $origin, 12 | owner => 'root', 13 | group => 'root', 14 | mode => '0644', 15 | seltype => 'postfix_etc_t', 16 | } 17 | } 18 | 19 | class profile::mail { 20 | $relayhosts = lookup('profile::mail::sender::relayhosts') 21 | $ipaddress = lookup('terraform.self.local_ip') 22 | 23 | include profile::mail::base 24 | if $ipaddress in $relayhosts { 25 | include profile::mail::relayhost 26 | } else { 27 | include profile::mail::sender 28 | } 29 | } 30 | 31 | class profile::mail::sender ( 32 | Array[String] $relayhosts, 33 | ) { 34 | $origin = lookup('profile::mail::base::origin') 35 | class { 'postfix': 36 | inet_protocols => 'ipv4', 37 | relayhost => join($relayhosts, ','), 38 | myorigin => $origin, 39 | satellite => true, 40 | manage_mailx => false, 41 | manage_conffiles => false, 42 | manage_mailname => false, 43 | } 44 | } 45 | 46 | class profile::mail::relayhost { 47 | if lookup('profile::mail::dkim::private_key', undef, undef, '') != '' { 48 | include profile::mail::dkim 49 | } 50 | 51 | $cidr = profile::getcidr() 52 | $ipaddress = lookup('terraform.self.local_ip') 53 | $origin = lookup('profile::mail::base::origin') 54 | 55 | class { 'postfix': 56 | inet_interfaces => "127.0.0.1, ${ipaddress}", 57 | inet_protocols => 'ipv4', 58 | mynetworks => "127.0.0.0/8, ${cidr}", 59 | myorigin => $origin, 60 | mta => true, 61 | relayhost => 'direct', 62 | smtp_listen => 'all', 63 | manage_mailx => false, 64 | manage_conffiles => false, 65 | manage_mailname => false, 66 | } 67 | 68 | postfix::config { 'myhostname': 69 | ensure => present, 70 | value => "${facts['networking']['hostname']}.${origin}", 71 | } 72 | } 73 | 74 | # profile::mail::dkim class 75 | # 76 | # This class manages OpenDKIM installation and service. 77 | # It is meant to be used in conjunction with puppet::mail::relayhost. 78 | # OpenDKIM signs emails with a private key and email providers can 79 | # verify the email signature authenticity using the DKIM dns record. 80 | 81 | # The class assumes the dkim public key is published as a TXT DNS record 82 | # under default._domainkey.${domain_name}. 83 | # 84 | # @param dkim_private_key Private RSA key for DKIM 85 | class profile::mail::dkim ( 86 | String $private_key, 87 | ) { 88 | $domain_name = lookup('profile::mail::base::origin') 89 | $cidr = profile::getcidr() 90 | 91 | user { 'postfix': 92 | ensure => present, 93 | groups => ['opendkim'], 94 | membership => minimum, 95 | require => Package['opendkim'], 96 | } 97 | 98 | package { 'opendkim': 99 | ensure => 'installed', 100 | require => Yumrepo['epel'], 101 | } 102 | 103 | file { '/etc/opendkim/keys/default.private': 104 | content => $private_key, 105 | owner => 'opendkim', 106 | group => 'opendkim', 107 | mode => '0600', 108 | require => Package['opendkim'], 109 | } 110 | 111 | service { 'opendkim': 112 | ensure => running, 113 | enable => true, 114 | require => [ 115 | Package['opendkim'], 116 | File['/etc/opendkim/keys/default.private'], 117 | ], 118 | } 119 | 120 | file_line { 'opendkim-Mode': 121 | ensure => present, 122 | path => '/etc/opendkim.conf', 123 | line => 'Mode sv', 124 | match => '^Mode', 125 | notify => Service['opendkim'], 126 | require => Package['opendkim'], 127 | } 128 | 129 | file_line { 'opendkim-Canonicalization': 130 | ensure => present, 131 | path => '/etc/opendkim.conf', 132 | line => 'Canonicalization relaxed/simple', 133 | match => '^#?Canonicalization', 134 | notify => Service['opendkim'], 135 | require => Package['opendkim'], 136 | } 137 | 138 | file_line { 'opendkim-KeyFile': 139 | ensure => present, 140 | path => '/etc/opendkim.conf', 141 | line => '#KeyFile /etc/opendkim/keys/default.private', 142 | match => '^KeyFile', 143 | notify => Service['opendkim'], 144 | require => Package['opendkim'], 145 | } 146 | 147 | file_line { 'opendkim-KeyTable': 148 | ensure => present, 149 | path => '/etc/opendkim.conf', 150 | line => 'KeyTable refile:/etc/opendkim/KeyTable', 151 | match => '^#?KeyTable', 152 | notify => Service['opendkim'], 153 | require => Package['opendkim'], 154 | } 155 | 156 | file_line { 'opendkim-SigningTable': 157 | ensure => present, 158 | path => '/etc/opendkim.conf', 159 | line => 'SigningTable refile:/etc/opendkim/SigningTable', 160 | match => '^#?SigningTable', 161 | notify => Service['opendkim'], 162 | require => Package['opendkim'], 163 | } 164 | 165 | file_line { 'opendkim-ExternalIgnoreList': 166 | ensure => present, 167 | path => '/etc/opendkim.conf', 168 | line => 'ExternalIgnoreList refile:/etc/opendkim/TrustedHosts', 169 | match => '^#?ExternalIgnoreList', 170 | notify => Service['opendkim'], 171 | require => Package['opendkim'], 172 | } 173 | 174 | file_line { 'opendkim-InternalHosts': 175 | ensure => present, 176 | path => '/etc/opendkim.conf', 177 | line => 'InternalHosts refile:/etc/opendkim/TrustedHosts', 178 | match => '^#?InternalHosts', 179 | notify => Service['opendkim'], 180 | require => Package['opendkim'], 181 | } 182 | 183 | file_line { 'opendkim-KeyTable-content': 184 | ensure => present, 185 | path => '/etc/opendkim/KeyTable', 186 | line => "default._domainkey.${domain_name} ${domain_name}:default:/etc/opendkim/keys/default.private", 187 | notify => Service['opendkim'], 188 | require => Package['opendkim'], 189 | } 190 | 191 | file_line { 'opendkim-SigningTable-content': 192 | ensure => present, 193 | path => '/etc/opendkim/SigningTable', 194 | line => "*@${domain_name} default._domainkey.${domain_name}", 195 | notify => Service['opendkim'], 196 | require => Package['opendkim'], 197 | } 198 | 199 | file_line { 'opendkim-TrustedHosts': 200 | ensure => present, 201 | path => '/etc/opendkim/TrustedHosts', 202 | line => $cidr, 203 | notify => Service['opendkim'], 204 | require => Package['opendkim'], 205 | } 206 | 207 | postfix::config { 'smtpd_milters': 208 | ensure => present, 209 | value => 'local:/run/opendkim/opendkim.sock', 210 | } 211 | 212 | postfix::config { 'non_smtpd_milters': 213 | ensure => present, 214 | value => '$smtpd_milters', 215 | } 216 | 217 | postfix::config { 'milter_default_action': 218 | ensure => present, 219 | value => 'accept', 220 | } 221 | } 222 | -------------------------------------------------------------------------------- /site/profile/manifests/cvmfs.pp: -------------------------------------------------------------------------------- 1 | class profile::cvmfs::client ( 2 | Integer $quota_limit, 3 | Variant[Boolean, String] $disable_autofs = false, 4 | Boolean $strict_mount = false, 5 | Array[String] $repositories = [], 6 | Array[String] $alien_cache_repositories = [], 7 | String $cvmfs_root = '/cvmfs', 8 | ) { 9 | include profile::consul 10 | include profile::cvmfs::local_user 11 | $alien_fs_root_raw = lookup('profile::cvmfs::alien_cache::alien_fs_root', undef, undef, 'scratch') 12 | $alien_fs_root = regsubst($alien_fs_root_raw, '^/|/$', '', 'G') 13 | $alien_folder_name_raw = lookup('profile::cvmfs::alien_cache::alien_folder_name', undef, undef, 'cvmfs_alien_cache') 14 | $alien_folder_name = regsubst($alien_folder_name_raw, '^/|/$', '', 'G') 15 | 16 | package { 'cvmfs-repo': 17 | ensure => 'installed', 18 | provider => 'rpm', 19 | name => 'cvmfs-release-3-2.noarch', 20 | source => 'https://ecsft.cern.ch/dist/cvmfs/cvmfs-release/cvmfs-release-3-2.noarch.rpm', 21 | } 22 | 23 | package { 'cvmfs': 24 | ensure => 'installed', 25 | require => [Package['cvmfs-repo']], 26 | } 27 | 28 | file { $cvmfs_root: 29 | ensure => directory, 30 | seltype => 'root_t', 31 | } 32 | 33 | file_line { 'cvmfs_default': 34 | path => '/etc/cvmfs/default.conf', 35 | match => '^CVMFS_HTTP_PROXY', 36 | line => 'CVMFS_HTTP_PROXY=DIRECT', 37 | require => Package['cvmfs'], 38 | } 39 | 40 | file_line { 'cvmfs_mount_dir': 41 | ensure => present, 42 | path => '/etc/cvmfs/default.conf', 43 | line => " readonly CVMFS_MOUNT_DIR=${cvmfs_root}", 44 | match => '^ readonly CVMFS_MOUNT_DIR=/cvmfs$', 45 | require => Package['cvmfs'] 46 | } 47 | 48 | file { '/etc/cvmfs/default.local.ctmpl': 49 | content => epp('profile/cvmfs/default.local', { 50 | 'strict_mount' => $strict_mount ? { true => 'yes', false => 'no' }, # lint:ignore:selector_inside_resource 51 | 'quota_limit' => $quota_limit, 52 | 'repositories' => $repositories + $alien_cache_repositories, 53 | }), 54 | notify => Service['consul-template'], 55 | require => Package['cvmfs'], # 'cvmfs' packages provides /etc/cvmfs 56 | } 57 | 58 | $alien_cache_repositories.each |$repo| { 59 | file { "/etc/cvmfs/config.d/${repo}.conf": 60 | content => epp('profile/cvmfs/alien_cache.conf.epp', { 61 | 'alien_fs_root' => $alien_fs_root, 62 | 'alien_folder_name' => $alien_folder_name, 63 | }), 64 | require => Package['cvmfs'], # 'cvmfs' packages provides /etc/cvmfs/config.d 65 | } 66 | } 67 | 68 | consul_template::watch { '/etc/cvmfs/default.local': 69 | require => File['/etc/cvmfs/default.local.ctmpl'], 70 | config_hash => { 71 | perms => '0644', 72 | source => '/etc/cvmfs/default.local.ctmpl', 73 | destination => '/etc/cvmfs/default.local', 74 | command => '/usr/bin/cvmfs_config reload', 75 | }, 76 | } 77 | 78 | if Boolean($disable_autofs) { 79 | $repositories.each|$repository| { 80 | file { "/cvmfs/${repository}": 81 | ensure => directory, 82 | require => File[$cvmfs_root], 83 | } 84 | -> mount { "/cvmfs/${repository}": 85 | ensure => 'mounted', 86 | device => $repository, 87 | fstype => 'cvmfs', 88 | require => [ 89 | Package['cvmfs'], 90 | File_line['cvmfs_default'], 91 | ], 92 | } 93 | Package<| tag == profile::software_stack |> -> Mount["/cvmfs/${repository}"] 94 | } 95 | } else { 96 | file { '/etc/auto.master.d/cvmfs.autofs': 97 | notify => Service['autofs'], 98 | require => [ 99 | Package['cvmfs'], 100 | File['/cvmfs'], 101 | ], 102 | content => @("EOF") 103 | # generated by Puppet for CernVM-FS 104 | ${cvmfs_root} /etc/auto.cvmfs 105 | |EOF 106 | } 107 | 108 | service { 'autofs': 109 | ensure => running, 110 | enable => true, 111 | } 112 | 113 | # Make sure CVMFS repos are mounted when requiring this class 114 | exec { 'init_default.local': 115 | command => 'consul-template -template="/etc/cvmfs/default.local.ctmpl:/etc/cvmfs/default.local" -once', 116 | environment => ["CONSUL_TOKEN=${lookup('profile::consul::acl_api_token')}"], 117 | path => ['/bin', '/usr/bin', $consul_template::bin_dir], 118 | unless => 'test -f /etc/cvmfs/default.local', 119 | require => [ 120 | File['/etc/cvmfs/default.local.ctmpl'], 121 | Service['consul'], 122 | Service['autofs'], 123 | ], 124 | } 125 | Package<| tag == profile::software_stack |> ~> Service['autofs'] 126 | } 127 | 128 | # Fix issue with BASH_ENV, SSH and lmod where 129 | # ssh client would get a "Permission denied" when 130 | # trying to connect to a server. The errors 131 | # results from the SELinux context type of 132 | # /cvmfs/soft.computecanada.ca/nix/var/nix/profiles/16.09/lmod/lmod/init/bash 133 | # To be authorized in the ssh context, it would need 134 | # to be a bin_t type, but it is a fusefs_t and since 135 | # CVMFS is a read-only filesystem, the context cannot be changed. 136 | # 'use_fusefs_home_dirs' policy fix that issue. 137 | selinux::boolean { 'use_fusefs_home_dirs': } 138 | } 139 | 140 | # Create an alien source that refers to the uid and gid of cvmfs user 141 | class profile::cvmfs::alien_cache ( 142 | String $alien_fs_root_raw = 'scratch', 143 | String $alien_folder_name_raw = 'cvmfs_alien_cache', 144 | ) { 145 | $uid = lookup('profile::cvmfs::local_user::uid', undef, undef, 13000004) 146 | $gid = lookup('profile::cvmfs::local_user::gid', undef, undef, 8000131) 147 | $alien_fs_root = regsubst($alien_fs_root_raw, '^/|/$', '', 'G') 148 | $alien_folder_name = regsubst($alien_folder_name_raw, '^/|/$', '', 'G') 149 | 150 | # Ensure the alien cache parent folder exists 151 | ensure_resource('file', "/mnt/${alien_fs_root}", { 'ensure' => 'directory', 'seltype' => 'home_root_t' }) 152 | 153 | file { "/mnt/${alien_fs_root}/${alien_folder_name}": 154 | ensure => directory, 155 | group => $gid, 156 | owner => $uid, 157 | require => File["/mnt/${alien_fs_root}"], 158 | seluser => 'unconfined_u', 159 | } 160 | } 161 | 162 | # Create a local cvmfs user 163 | class profile::cvmfs::local_user ( 164 | String $uname = 'cvmfs', 165 | String $group = 'cvmfs-reserved', 166 | Integer $uid = 13000004, 167 | Integer $gid = 8000131, 168 | String $selinux_user = 'unconfined_u', 169 | String $mls_range = 's0-s0:c0.c1023', 170 | ) { 171 | group { $group: 172 | ensure => present, 173 | gid => $gid, 174 | before => Package['cvmfs'], 175 | } 176 | user { $uname: 177 | ensure => present, 178 | forcelocal => true, 179 | uid => $uid, 180 | gid => $gid, 181 | managehome => false, 182 | home => '/var/lib/cvmfs', 183 | shell => '/usr/sbin/nologin', 184 | require => Group[$group], 185 | before => Package['cvmfs'], 186 | } 187 | if $group != 'cvmfs' { 188 | # cvmfs rpm create a user and a group 'cvmfs' if they do not exist. 189 | # If the group created for the local user 'cvmfs' is not named 'cvmfs', 190 | # we make sure the group 'cvmfs' is attributed the same gid before installing 191 | # package cvmfs. 192 | group { 'cvmfs': 193 | allowdupe => true, 194 | gid => $gid, 195 | require => Group[$group], 196 | before => Package['cvmfs'], 197 | } 198 | } 199 | } 200 | -------------------------------------------------------------------------------- /site/profile/manifests/volumes.pp: -------------------------------------------------------------------------------- 1 | # lookup_options: 2 | # profile::volumes::devices: 3 | # merge: 'deep' 4 | 5 | ## common.yaml 6 | # profile::volumes::devices: %{alias('terraform.self.volumes')} 7 | 8 | ## Provided by the user 9 | # profile::volumes::devices: 10 | # nfs: 11 | # home: 12 | # mode: '0600' 13 | # owner: 'root' 14 | # group: 'root' 15 | # quota: '5g' 16 | 17 | class profile::volumes ( 18 | Hash[String, Hash[String, Hash]] $devices, 19 | ) { 20 | package { 'lvm2': 21 | ensure => installed, 22 | } 23 | $devices.each | String $volume_tag, $device_map | { 24 | ensure_resource('file', "/mnt/${volume_tag}", { 'ensure' => 'directory' }) 25 | $device_map.each | String $key, $values | { 26 | profile::volumes::volume { "${volume_tag}-${key}": 27 | volume_name => $key, 28 | volume_tag => $volume_tag, 29 | * => $values, 30 | } 31 | } 32 | } 33 | } 34 | 35 | type QuotaSpec = Struct[ 36 | { 37 | 'bsoft' => Optional[Variant[String[1],Integer]], 38 | 'bhard' => Optional[Variant[String[1],Integer]], 39 | 'isoft' => Optional[Variant[String[1],Integer]], 40 | 'ihard' => Optional[Variant[String[1],Integer]], 41 | } 42 | ] 43 | define profile::volumes::volume ( 44 | String[1] $volume_name, 45 | String[1] $volume_tag, 46 | String[1] $glob, 47 | Integer[1] $size, 48 | String[1] $owner = 'root', 49 | String[1] $group = 'root', 50 | String[3,4] $mode = '0755', 51 | String[1] $seltype = 'home_root_t', 52 | Boolean $bind_mount = true, 53 | Boolean $enable_resize = false, 54 | Enum['xfs', 'ext4'] $filesystem = 'xfs', 55 | Optional[String[1]] $bind_target = undef, 56 | Optional[String[1]] $type = undef, 57 | Optional[Variant[String[1],QuotaSpec]] $quota = undef, 58 | Optional[String[1]] $mkfs_options = undef, 59 | Optional[Boolean] $managed = undef, 60 | ) { 61 | $regex = Regexp(regsubst($glob, /[?*]/, { '?' => '.', '*' => '.*' })) 62 | $bind_target_ = pick($bind_target, "/${volume_name}") 63 | 64 | file { "/mnt/${volume_tag}/${volume_name}": 65 | ensure => 'directory', 66 | owner => $owner, 67 | group => $group, 68 | mode => $mode, 69 | seltype => $seltype, 70 | } 71 | 72 | $device = (values($::facts['/dev/disk'].filter |$k, $v| { $k =~ $regex }).unique)[0] 73 | $dev_mapper_id = "/dev/mapper/${volume_tag}--${volume_name}_vg-${volume_tag}--${volume_name}" 74 | 75 | exec { "vgchange-${name}_vg": 76 | command => "vgchange -ay ${name}_vg", 77 | onlyif => ["test ! -d /dev/${name}_vg", "vgscan -t | grep -q '${name}_vg'"], 78 | require => [Package['lvm2']], 79 | path => ['/bin', '/usr/bin', '/sbin', '/usr/sbin'], 80 | } 81 | 82 | if $device != undef { 83 | physical_volume { $device: 84 | ensure => present, 85 | } 86 | } else { 87 | notify { "error_${volume_name}": 88 | message => @("EOT") 89 | WARNING: Could not find device ${glob} associated with ${volume_tag}-${volume_name}. 90 | This will cause errors with resources related to ${volume_tag}-${volume_name}. 91 | | EOT 92 | } 93 | } 94 | 95 | volume_group { "${name}_vg": 96 | ensure => present, 97 | physical_volumes => $device, 98 | createonly => true, 99 | followsymlinks => true, 100 | } 101 | 102 | if $filesystem == 'xfs' { 103 | $options = 'defaults,usrquota' 104 | } else { 105 | $options = 'defaults' 106 | } 107 | 108 | lvm::logical_volume { $name: 109 | ensure => present, 110 | volume_group => "${name}_vg", 111 | fs_type => $filesystem, 112 | mkfs_options => $mkfs_options, 113 | mountpath => "/mnt/${volume_tag}/${volume_name}", 114 | mountpath_require => true, 115 | options => $options, 116 | } 117 | 118 | exec { "chown ${owner}:${group} /mnt/${volume_tag}/${volume_name}": 119 | onlyif => "test \"$(stat -c%U:%G /mnt/${volume_tag}/${volume_name})\" != \"${owner}:${group}\"", 120 | refreshonly => true, 121 | subscribe => Lvm::Logical_volume[$name], 122 | path => ['/bin'], 123 | } 124 | 125 | exec { "chmod ${mode} /mnt/${volume_tag}/${volume_name}": 126 | onlyif => "test \"$(stat -c0%a /mnt/${volume_tag}/${volume_name})\" != \"${mode}\"", 127 | refreshonly => true, 128 | subscribe => Lvm::Logical_volume[$name], 129 | path => ['/bin'], 130 | } 131 | 132 | if $enable_resize { 133 | $logical_volume_size_cmd = "pvs --noheadings -o pv_size ${device} | sed -nr 's/^.*[ <]([0-9]+)\\..*g$/\\1/p'" 134 | $physical_volume_size_cmd = "pvs --noheadings -o dev_size ${device} | sed -nr 's/^ *([0-9]+)\\..*g/\\1/p'" 135 | exec { "pvresize ${device}": 136 | onlyif => "test `${logical_volume_size_cmd}` -lt `${physical_volume_size_cmd}`", 137 | path => ['/usr/bin', '/bin', '/usr/sbin'], 138 | require => Lvm::Logical_volume[$name], 139 | } 140 | 141 | $pv_freespace_cmd = "pvs --noheading -o pv_free ${device} | sed -nr 's/^ *([0-9]*)\\..*g/\\1/p'" 142 | exec { "lvextend -l '+100%FREE' -r /dev/${name}_vg/${name}": 143 | onlyif => "test `${pv_freespace_cmd}` -gt 0", 144 | path => ['/usr/bin', '/bin', '/usr/sbin'], 145 | require => Exec["pvresize ${device}"], 146 | } 147 | } 148 | 149 | selinux::fcontext::equivalence { "/mnt/${volume_tag}/${volume_name}": 150 | ensure => 'present', 151 | target => '/home', 152 | require => Mount["/mnt/${volume_tag}/${volume_name}"], 153 | notify => Selinux::Exec_restorecon["/mnt/${volume_tag}/${volume_name}"], 154 | } 155 | 156 | selinux::exec_restorecon { "/mnt/${volume_tag}/${volume_name}": } 157 | 158 | if $bind_mount { 159 | ensure_resource('file', $bind_target_, { 'ensure' => 'directory', 'seltype' => $seltype }) 160 | mount { $bind_target_: 161 | ensure => mounted, 162 | device => "/mnt/${volume_tag}/${volume_name}", 163 | fstype => none, 164 | options => 'rw,bind', 165 | require => [ 166 | File[$bind_target_], 167 | Lvm::Logical_volume[$name], 168 | ], 169 | } 170 | } elsif ( 171 | $facts['mountpoints'][$bind_target_] != undef and 172 | $facts['mountpoints'][$bind_target_]['device'] == $dev_mapper_id 173 | ) { 174 | mount { $bind_target_: 175 | ensure => absent, 176 | } 177 | } 178 | 179 | if $quota and $filesystem == 'xfs' { 180 | ensure_resource('file', '/etc/xfs_quota', { 'ensure' => 'directory' }) 181 | # Save the xfs quota setting to avoid applying at every iteration 182 | if $quota.is_a(QuotaSpec) { 183 | # ensure defaults of no quota is set 184 | $quotas = {'bsoft' => '0', 'bhard' => '0', 'ihard' => '0', 'isoft' => '0'} + $quota 185 | $quota_options = "bsoft=${quotas['bsoft']} bhard=${quotas['bhard']} isoft=${quotas['isoft']} ihard=${quotas['ihard']}" 186 | } 187 | else { 188 | $quota_options = "bsoft=${quota} bhard=${quota}" 189 | } 190 | file { "/etc/xfs_quota/${volume_tag}-${volume_name}": 191 | ensure => 'file', 192 | content => "#FILE TRACKED BY PUPPET DO NOT EDIT MANUALLY\n${quota_options}", 193 | require => File['/etc/xfs_quota'], 194 | } 195 | 196 | exec { "apply-quota-${name}": 197 | command => "xfs_quota -x -c 'limit ${quota_options} -d' /mnt/${volume_tag}/${volume_name}", 198 | require => Mount["/mnt/${volume_tag}/${volume_name}"], 199 | path => ['/bin', '/usr/bin', '/sbin', '/usr/sbin'], 200 | refreshonly => true, 201 | subscribe => [File["/etc/xfs_quota/${volume_tag}-${volume_name}"]], 202 | } 203 | } 204 | } 205 | -------------------------------------------------------------------------------- /extra.md: -------------------------------------------------------------------------------- 1 | # Configuring multifactor authentication with Duo Unix 2 | ## Adding `duo_unix` to your `Puppetfile` 3 | In order to support multifactor authentication with Duo, you will first need to add the `duo_unix` Puppet module to your `Puppetfile` 4 | and define it in your [`main.tf`](https://github.com/ComputeCanada/magic_castle/tree/main/docs#419-puppetfile-optional). If you want to 5 | use the original version, you would add 6 | ``` 7 | mod 'iu-duo_unix', '4.0.1' 8 | ``` 9 | to your `Puppetfile`. 10 | 11 | ## Adding `duo_unix` to your instances 12 | You need to add the `duo_unix` module to your instances using Magic Castle [tags](https://github.com/ComputeCanada/puppet-magic_castle/tree/main?tab=readme-ov-file#magic_castlesite). 13 | To do so, define a new tag, and apply it to your instances through the `main.tf`: 14 | ``` 15 | magic_castle::site::tags: 16 | worldssh: 17 | - duo_unix 18 | ``` 19 | and then in your `main.tf`, add the `worldssh` tag to your `login` instance: 20 | ``` 21 | login = { type = "...", tags = ["login", "public", "worldssh"], count = 1 } 22 | ``` 23 | 24 | ## Adding your Duo configuration 25 | In your hieradata file, add the following: 26 | ``` 27 | duo_unix::usage: 'pam' 28 | duo_unix::ikey: 29 | duo_unix::skey: 30 | duo_unix::host: 31 | duo_unix::motd: 'yes' 32 | duo_unix::groups: '*,!centos' 33 | duo_unix::pam_ssh_config::keyonly: true # optional 34 | ``` 35 | where the last line is if you want to restrict the primary authentication to SSH keys only. Since this configuration contains 36 | secrets, it is strongly recommended generate and upload [eyaml certificates](https://github.com/ComputeCanada/magic_castle/tree/main/docs#1013-generate-and-replace-puppet-hieradata-encryption-keys) 37 | and use them to [encrypt your data](https://simp.readthedocs.io/en/master/HOWTO/20_Puppet/Hiera_eyaml.html). 38 | 39 | # Configuring `sudo` 40 | ## Adding `saz-sudo` to your `Puppetfile` 41 | If you want to configure `sudo` commands on your cluster, you will want to add the [`saz-sudo`](https://forge.puppet.com/modules/saz/sudo/readme) Puppet module to your `Puppetfile` 42 | and define it in your [`main.tf`](https://github.com/ComputeCanada/magic_castle/tree/main/docs#419-puppetfile-optional). You would add 43 | ``` 44 | mod 'saz-sudo', '8.0.0' 45 | ``` 46 | to your `Puppetfile`. 47 | 48 | ## Adding `sudo` to your instances 49 | You need to add the `sudo` module to your instances using Magic Castle [tags](https://github.com/ComputeCanada/puppet-magic_castle/tree/main?tab=readme-ov-file#magic_castlesite). 50 | To do so, define a new tag and apply it to your instances through the `main.tf`: 51 | ``` 52 | magic_castle::site::tags: 53 | sudo: 54 | - sudo 55 | ``` 56 | and then in your `main.tf`, add the `sudo` tag to your instance: 57 | ``` 58 | login = { type = "...", tags = ["login", "public", "sudo"], count = 1 } 59 | ``` 60 | 61 | ## Adding your `sudo` configuration 62 | Add the content of `sudoers` files to your hieradata. For example: 63 | ``` 64 | sudo::ldap_enable: true 65 | sudo::config_file_replace: false 66 | sudo::prefix: '10-mysudoers_' 67 | sudo::purge_ignore: '[!10-mysudoers_]*' 68 | sudo::configs: 69 | 'general': 70 | 'content': | 71 | Cmnd_Alias ADMIN_ROOTCMD = /bin/cat *, /bin/ls *, /bin/chmod *, /bin/vim *, /usr/bin/su -, /bin/yum *, /bin/less *, /bin/grep *, /bin/kill *, /usr/sbin/reboot 72 | %admin ALL=(ALL) NOPASSWD: ADMIN_ROOTCMD 73 | ``` 74 | 75 | # Configuring a system's `cron` 76 | ## Adding `puppet-cron` to your `Puppetfile` 77 | If you want to configure `cron` commands on your cluster, you will want to add the [`puppet-cron`]([https://forge.puppet.com/modules/saz/sudo/readme](https://github.com/voxpupuli/puppet-cron)) Puppet module to your `Puppetfile` 78 | and define it in your [`main.tf`](https://github.com/ComputeCanada/magic_castle/tree/main/docs#419-puppetfile-optional). You would add 79 | ``` 80 | mod 'puppet-cron', '2.0.0' 81 | ``` 82 | to your `Puppetfile`. 83 | 84 | ## Adding `cron` to your instances 85 | You need to add the `cron` module to your instances using Magic Castle [tags](https://github.com/ComputeCanada/puppet-magic_castle/tree/main?tab=readme-ov-file#magic_castlesite). 86 | Define a new tag, and apply it to your instances through the `main.tf`: 87 | ``` 88 | magic_castle::site::tags: 89 | cron: 90 | - cron 91 | ``` 92 | and then in your `main.tf`, add the `sudo` tag to your instance: 93 | ``` 94 | login = { type = "...", tags = ["login", "public", "cron"], count = 1 } 95 | ``` 96 | 97 | ## Adding your `cron` configuration 98 | Add the configuration to your hieradata. For example: 99 | ``` 100 | cron::job: 101 | mii_cache: 102 | command: 'source $HOME/.bashrc; /etc/rsnt/generate_mii_index.py --arch sse3 avx avx2 avx512 &>> /home/ebuser/crontab_mii.log' 103 | minute: '*/10' 104 | hour: '*' 105 | date: '*' 106 | month: '*' 107 | weekday: '*' 108 | user: ebuser 109 | description: 'Generate Mii cache' 110 | ``` 111 | 112 | # Creating a HAProxy instance 113 | If you are using external LDAP replicas instead of the local FreeIPA, you may wish to configure an instance to run a (HAProxy)[https://www.haproxy.org/] 114 | load balancer. This can be useful for example if you want to route all queries to LDAP through a single instance, so that the LDAPs' firewalls only need 115 | to be opened for a single IP address. 116 | 117 | ## Adding `puppetlabs-haproxy` to your `Puppetfile` 118 | If you want to configure a HAProxy instance in your cluster, you will want to add the [`puppetlabs-haproxy`](https://forge.puppet.com/modules/puppetlabs/haproxy/readme) Puppet module to your `Puppetfile` 119 | and define it in your [`main.tf`](https://github.com/ComputeCanada/magic_castle/tree/main/docs#419-puppetfile-optional). You would add 120 | ``` 121 | mod 'puppetlabs-haproxy', '8.0.0' 122 | ``` 123 | to your `Puppetfile`. 124 | 125 | ## Adding a HAProxy instance 126 | You need to add an instance with the `haproxy` module. Create a new tag, and apply it to your instances through the `main.tf`: 127 | ``` 128 | magic_castle::site::tags: 129 | haproxy: 130 | - haproxy 131 | ``` 132 | and then in your `main.tf`, add the `sudo` tag to your instance: 133 | ``` 134 | haproxy = { type = "p2-3gb", tags = ["haproxy"], count = 1 } 135 | ``` 136 | 137 | ## Configuring your HAproxy instance 138 | Add the HAProxy configuration to your hieradata, for example: 139 | ``` 140 | haproxy::merge_options: false 141 | haproxy::defaults_options: 142 | log: global 143 | option: ['tcplog', 'tcpka'] 144 | balance: first 145 | timeout server: 1800s 146 | timeout connect: 2s 147 | mode: tcp 148 | 149 | haproxy::custom_fragment: | 150 | 151 | frontend ldaps_service_front 152 | mode tcp 153 | bind %{lookup('terraform.self.local_ip')}:636 154 | description LDAPS Service 155 | option socket-stats 156 | option tcpka 157 | timeout client 3600s 158 | default_backend ldaps_service_back 159 | 160 | backend ldaps_service_back 161 | server ldap-1 :636 check fall 1 rise 1 inter 2s 162 | server ldap-2 :636 check fall 1 rise 1 inter 2s 163 | option ssl-hello-chk 164 | ``` 165 | 166 | 167 | ## Configuring your other instances to query the HAProxy 168 | For a LDAP HAProxy, you will then want to configure your other instances to use that proxy as LDAP source: 169 | ``` 170 | profile::sssd::client::domains: 171 | MYLDAP: 172 | id_provider: ldap 173 | auth_provider: ldap 174 | ldap_schema: rfc2307 175 | ldap_uri: 176 | - ldaps://haproxy1 177 | ..... 178 | ``` 179 | -------------------------------------------------------------------------------- /site/profile/manifests/users.pp: -------------------------------------------------------------------------------- 1 | class profile::users::ldap ( 2 | Hash $users, 3 | Hash $groups, 4 | ) { 5 | Exec <| title == 'ipa-install' |> -> Profile::Users::Ldap_group <| |> 6 | Service <| |> -> Profile::Users::Ldap_group <| |> 7 | Profile::Users::Ldap_group <| |> -> Profile::Users::Ldap_user <| |> 8 | 9 | file { '/sbin/ipa_create_user.py': 10 | source => 'puppet:///modules/profile/users/ipa_create_user.py', 11 | mode => '0755', 12 | } 13 | 14 | # After adding a user or a group, we need to invalidate the SSS cache 15 | # that could otherwise return that the user or the group does not exist 16 | # when probing the Unix user account and password database or 17 | # the Unix group database. The command is only executed when a user or 18 | # a group has been created during a Puppet run. 19 | exec { 'sss_cache -E': 20 | refreshonly => true, 21 | path => ['/bin', '/usr/bin', '/sbin','/usr/sbin'], 22 | } 23 | 24 | $users_groups = Hash(unique(flatten($users.map |$key, $values| { pick($values['groups'], []) })).map|$group_name| { [$group_name, {}] }) 25 | ensure_resources(profile::users::ldap_group, $users_groups + $groups) 26 | ensure_resources(profile::users::ldap_user, $users) 27 | } 28 | 29 | class profile::users::local ( 30 | Hash $users 31 | ) { 32 | file { '/etc/sudoers.d/90-puppet-users': 33 | ensure => file, 34 | mode => '0440', 35 | owner => 'root', 36 | group => 'root', 37 | } 38 | 39 | # file { '/etc/sudoers.d/90-cloud-init-users': 40 | # ensure => absent, 41 | # require => $users.map | $k, $v | { Profile::Users::Local_user[$k] }, 42 | # } 43 | 44 | ensure_resources(profile::users::local_user, $users) 45 | } 46 | 47 | define profile::users::ldap_group ( 48 | Boolean $posix = true, 49 | Boolean $automember = false, 50 | Optional[Array[String]] $hbac_rules = undef, 51 | ) { 52 | $admin_password = lookup('profile::freeipa::server::admin_password') 53 | $environment = ["IPA_ADMIN_PASSWD=${admin_password}"] 54 | if $posix { 55 | $arg = '' 56 | } 57 | else { 58 | $arg = '--nonposix' 59 | } 60 | exec { "ldap_group_${name}": 61 | command => "kinit_wrapper ipa group-add ${name} ${arg}", 62 | environment => $environment, 63 | path => ['/bin', '/usr/bin', '/sbin','/usr/sbin'], 64 | unless => "getent group ${name}", 65 | notify => Exec['sss_cache -E'], 66 | require => [ 67 | Exec['ipa-install'], 68 | File['kinit_wrapper'], 69 | ], 70 | } 71 | 72 | if $hbac_rules != undef or $automember { 73 | file { "/etc/ipa/group_rules_${name}.py": 74 | mode => '0700', 75 | content => epp( 76 | 'profile/freeipa/group_rules.py', 77 | { 78 | 'group' => $name, 79 | 'automember' => $automember, 80 | 'hbac_rules' => $hbac_rules, 81 | } 82 | ), 83 | } 84 | exec { "group_rules_${name}": 85 | command => "kinit_wrapper ipa console /etc/ipa/group_rules_${name}.py", 86 | refreshonly => true, 87 | require => [ 88 | File['kinit_wrapper'], 89 | ], 90 | environment => $environment, 91 | path => ['/bin', '/usr/bin', '/sbin','/usr/sbin'], 92 | subscribe => [ 93 | File["/etc/ipa/group_rules_${name}.py"], 94 | Exec['hbac_rules'], 95 | Exec["ldap_group_${name}"], 96 | ], 97 | } 98 | } 99 | } 100 | 101 | define profile::users::ldap_user ( 102 | Array[String] $groups = [], 103 | Array[String] $public_keys = [], 104 | Integer[0] $count = 1, 105 | Boolean $manage_password = true, 106 | Optional[String[1]] $passwd = undef, 107 | ) { 108 | $admin_password = lookup('profile::freeipa::server::admin_password') 109 | $group_args = join($groups.map |$group| { "--group ${group}" }, ' ') 110 | $sshpubkey_string = join($public_keys.map |$key| { "--sshpubkey '${key}'" }, ' ') 111 | $cmd_args = "${group_args} ${$sshpubkey_string}" 112 | if $count > 1 { 113 | $page_size = 50 114 | $prefix = $name 115 | $exec_name = range(1, $count, $page_size).map |$i| { 116 | "ldap_user_${name}_${i}-${min($i+$page_size, $count)}" 117 | } 118 | $command = range(1, $count, $page_size).map |$i| { 119 | "ipa_create_user.py $(seq -f'${prefix}%0${length(String($count))}g' ${i} ${min($count, $i+$page_size)}) ${cmd_args}" 120 | } 121 | $timeout = $page_size * 10 122 | } elsif $count == 1 { 123 | $exec_name = ["ldap_user_${name}"] 124 | $command = ["ipa_create_user.py ${name} ${cmd_args}"] 125 | $timeout = 10 126 | } 127 | 128 | $environment = ["IPA_ADMIN_PASSWD=${admin_password}"] 129 | 130 | if $count > 0 { 131 | $exec_name.each |Integer $i, String $exec_name_i| { 132 | exec { $exec_name_i: 133 | command => "kinit_wrapper ${command[$i]}", 134 | unless => "${command[$i]} --dry", 135 | environment => $environment, 136 | path => ['/bin', '/usr/bin', '/sbin','/usr/sbin'], 137 | timeout => $timeout, 138 | require => [ 139 | File['kinit_wrapper'], 140 | File['/sbin/ipa_create_user.py'], 141 | ], 142 | notify => Exec['sss_cache -E'], 143 | } 144 | } 145 | 146 | if $passwd { 147 | $ds_password = lookup('profile::freeipa::server::ds_password') 148 | $ipa_domain = lookup('profile::freeipa::base::ipa_domain') 149 | $fqdn = "${facts['networking']['hostname']}.${ipa_domain}" 150 | $ldap_dc_string = join(split($ipa_domain, '[.]').map |$dc| { "dc=${dc}" }, ',') 151 | 152 | $ldad_passwd_cmd = @("EOT") 153 | ldappasswd -ZZ -H ldap://${fqdn} \ 154 | -x -D "cn=Directory Manager" -w "${ds_password}" \ 155 | -S "uid={},cn=users,cn=accounts,${ldap_dc_string}" \ 156 | -s "${passwd}" 157 | |EOT 158 | 159 | if $count > 1 { 160 | $set_password_cmd = range(1, $count, $page_size).map |$i| { 161 | "seq -f'${prefix}%0${length(String($count))}g' ${i} ${min($count, $i+$page_size)} | xargs -I '{}' ${ldad_passwd_cmd}" 162 | } 163 | $check_password_cmd = range(1, $count, $page_size).map |$i| { 164 | "echo ${passwd} | kinit $(seq -f'${prefix}%0${length(String($count))}g' ${i} ${min($count, $i+$page_size)} | shuf | head -n1) && kdestroy" 165 | } 166 | } else { 167 | $set_password_cmd = [regsubst($ldad_passwd_cmd, '{}', $name)] 168 | $check_password_cmd = ["echo ${passwd} | kinit ${name} && kdestroy"] 169 | } 170 | 171 | $exec_name.each |Integer $i, String $exec_name_i| { 172 | exec { "ldap_set_password_${$exec_name_i}": 173 | command => Sensitive($set_password_cmd[$i]), 174 | unless => Sensitive($check_password_cmd[$i]), 175 | path => ['/bin', '/usr/bin', '/sbin','/usr/sbin'], 176 | refreshonly => ! $manage_password, 177 | subscribe => Exec[$exec_name_i], 178 | } 179 | } 180 | } 181 | } 182 | } 183 | 184 | define profile::users::local_user ( 185 | Array[String] $public_keys, 186 | Array[String] $groups, 187 | Boolean $sudoer = false, 188 | String $selinux_user = 'unconfined_u', 189 | String $mls_range = 's0-s0:c0.c1023', 190 | String $authenticationmethods = '', 191 | Boolean $manage_home = true, 192 | Boolean $purge_ssh_keys = true, 193 | Optional[String] $shell = undef, 194 | Optional[Integer] $uid = undef, 195 | Optional[Integer] $gid = undef, 196 | String $group = $name, 197 | String $home = "/${name}", 198 | ) { 199 | ensure_resource('group', $group, { 200 | ensure => present, 201 | gid => $gid, 202 | forcelocal => true, 203 | } 204 | ) 205 | # Configure local account and ssh keys 206 | user { $name: 207 | ensure => present, 208 | forcelocal => true, 209 | uid => $uid, 210 | gid => $group, 211 | groups => $groups, 212 | home => $home, 213 | purge_ssh_keys => $purge_ssh_keys, 214 | managehome => $manage_home, 215 | shell => $shell, 216 | require => Group[$group], 217 | } 218 | 219 | if $manage_home { 220 | selinux::exec_restorecon { $home: 221 | subscribe => User[$name] 222 | } 223 | } 224 | 225 | $public_keys.each | Integer $index, String $sshkey | { 226 | $split = split($sshkey, ' ') 227 | $key_type_index = $split.index|$value| { $value =~ /^(?:ssh|ecdsa).*$/ } 228 | 229 | $key_type = $split[$key_type_index] 230 | $key_value = $split[$key_type_index+1] 231 | 232 | if $key_type_index != 0 { 233 | $key_options = ssh_split_options($split[0, $key_type_index].join(' ')) 234 | } else { 235 | $key_options = undef 236 | } 237 | if length($split) > $key_type_index + 2 { 238 | $comment_index = $key_type_index + 2 239 | $comment = String($split[$comment_index, -1].join(' '), '%t') 240 | $key_name = "${name}_${index}:${comment}" 241 | } else { 242 | $key_name = "${name}_${index}" 243 | } 244 | ssh_authorized_key { "${name}_${index}": 245 | ensure => present, 246 | name => $key_name, 247 | user => $name, 248 | type => $key_type, 249 | key => $key_value, 250 | options => $key_options, 251 | } 252 | } 253 | 254 | # Configure user selinux mapping 255 | exec { "selinux_login_${name}": 256 | command => "semanage login -a -S targeted -s '${selinux_user}' -r '${mls_range}' ${name}", 257 | unless => "grep -q '${name}:${selinux_user}:${mls_range}' /etc/selinux/targeted/seusers", 258 | path => ['/bin', '/usr/bin', '/sbin', '/usr/sbin'], 259 | } 260 | 261 | $ensure_sudoer = $sudoer ? { true => 'present', false => 'absent' } 262 | file_line { "sudoer_${name}": 263 | ensure => $ensure_sudoer, 264 | path => '/etc/sudoers.d/90-puppet-users', 265 | line => "${name} ALL=(ALL) NOPASSWD:ALL", 266 | require => File['/etc/sudoers.d/90-puppet-users'], 267 | } 268 | 269 | if $authenticationmethods != '' { 270 | sshd_config { "${name} authenticationmethods": 271 | ensure => present, 272 | condition => "User ${name}", 273 | key => 'AuthenticationMethods', 274 | value => $authenticationmethods, 275 | target => '/etc/ssh/sshd_config.d/50-authenticationmethods.conf', 276 | notify => Service['sshd'] 277 | } 278 | } 279 | } 280 | -------------------------------------------------------------------------------- /site/profile/templates/accounts/mkproject.sh.epp: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # mkproject.sh watches 389-ds access log for operations related to group 4 | # matching a regex ($PROJECT_REGEX) with the intent of automatically 5 | # manipulating Slurm accounts and folders under /project associated with these 6 | # groups. 7 | 8 | # Three operations are currently supported: ADD, MOD and DEL. 9 | # - ADD triggers a creation of Slurm account and directory under /project 10 | # - MOD triggers either the additions of users to the associated Slurm 11 | # accounts and their folder under /project/GID/`username` 12 | # or it triggers the removals of the users from the associated Slurm account 13 | # and the removal of the symlink from the users home to the project folder. 14 | # - DEL trigger the removal of all users from the associated Slurm account 15 | # and the removal of the symlink to the project from all previous member 16 | # home folders. 17 | 18 | 19 | PROJECT_REGEX="<%= $project_regex %>" 20 | BASEDN=$(grep -o -P "basedn = \K(.*)" /etc/ipa/default.conf) 21 | WITH_FOLDER="<%= $manage_folder %>" 22 | PREV_CONN="" 23 | 24 | source /sbin/account_functions.sh 25 | 26 | MKHOME_PROJECT_DIR="/var/lib/mkhome_project/" 27 | mkdir -p ${MKHOME_PROJECT_DIR} 28 | 29 | MODPROJECT_CACHE=${MKHOME_PROJECT_DIR}/modproject.cache 30 | MODPROJECT_CACHE_SIZE=1000 31 | 32 | MKPROJECT_CACHE=${MKHOME_PROJECT_DIR}/mkproject.cache 33 | MKPROJECT_CACHE_SIZE=1000 34 | 35 | LAST_TIMESTAMP_FILE=${MKHOME_PROJECT_DIR}/mkproject.last_timestamp 36 | 37 | # FIFO pipes could eventually be replaced by sockets 38 | # to allow input from other instances, like login nodes 39 | # and avoid background echo processes. A FIFO pipe can 40 | # be replaced by a UNIX socket with netcat and this command: 41 | # nc -lkU /var/tmp/dsocket 42 | # To write data to the UNIX socket: 43 | # echo $((NRETRY+1)) "${GROUP}" "${WITH_FOLDER}" "${USERNAMES}" | nc -N -U /var/tmp/dsocket 44 | MODPROJECT_PIPE=${MKHOME_PROJECT_DIR}/modproject.pipe 45 | MODPROJECT_RETRY_PIPE=${MKHOME_PROJECT_DIR}/modproject_retry.pipe 46 | MKHOME_MODPROJECT_PIPE=${MKHOME_PROJECT_DIR}/mkhome_modproject.pipe 47 | 48 | trap "rm -f ${MODPROJECT_PIPE} ${MODPROJECT_RETRY_PIPE} ${MODPROJECT_CACHE} ${MKPROJECT_CACHE}" EXIT 49 | 50 | if [[ ! -p ${MODPROJECT_PIPE} ]]; then 51 | mkfifo -m=600 ${MODPROJECT_PIPE} 52 | fi 53 | 54 | if [[ ! -p ${MODPROJECT_RETRY_PIPE} ]]; then 55 | mkfifo -m=600 ${MODPROJECT_RETRY_PIPE} 56 | fi 57 | 58 | # Create two empty cache files that are only r+w by root 59 | install -m 600 /dev/null ${MKPROJECT_CACHE} 60 | install -m 600 /dev/null ${MODPROJECT_CACHE} 61 | 62 | wait_for_slurm 63 | 64 | CONSECUTIVE_FAILURES=0 65 | # Loop over three sources of project modification 66 | # 1. Modification coming from SLAPD log 67 | # 2. Modification coming from mkhome daemon 68 | # 3. Modification coming from the loop that have errored on a previous try. 69 | ( 70 | tail -n+1 -F ${MODPROJECT_PIPE} & 71 | tail -n+1 -F ${MODPROJECT_RETRY_PIPE} & 72 | tail -n+1 -F ${MKHOME_MODPROJECT_PIPE} 73 | ) | 74 | while read NRETRY GROUP WITH_FOLDER USERNAMES; do 75 | if [[ ! -z "${USERNAMES}" ]]; then 76 | # Filter group+usernames that have been successfully modified 77 | # and that are still in cache. 78 | USERNAMES=$( 79 | for USERNAME in ${USERNAMES}; do 80 | if ! grep -q "${GROUP} ${WITH_FOLDER} ${USERNAME}" ${MODPROJECT_CACHE}; then 81 | echo ${USERNAME} 82 | fi 83 | done 84 | ) 85 | if [[ -z "${USERNAMES}" ]]; then 86 | continue 87 | fi 88 | fi 89 | if [[ ${CONSECUTIVE_FAILURES} -gt 0 ]] && [[ ${NRETRY} -gt 0 ]]; then 90 | # NRETRY corresponds to the number of times that modproject has been called 91 | # with these arguments without success. If it is greater than 0, the script 92 | # sleeps to give time to the system internal to adjust before retrying. 93 | sleep ${CONSECUTIVE_FAILURES} 94 | fi 95 | if [ -e "${MKHOME_PROJECT_DIR}/${GROUP}.slurm.failed" ]; then 96 | if mkproject ${GROUP} ${WITH_FOLDER}; then 97 | rm "${MKHOME_PROJECT_DIR}/${GROUP}.slurm.failed" 98 | echo "${GROUP}" >> ${MKPROJECT_CACHE} 99 | if [[ $(wc -l < ${MKPROJECT_CACHE}) -gt ${MKPROJECT_CACHE_SIZE} ]]; then 100 | sed -i '1d' ${MKPROJECT_CACHE} 101 | fi 102 | # Identify usernames that are already member of the group - most likely through automembership 103 | USERNAMES=$(kexec ipa group-show ${GROUP} --raw | grep -o -P "uid=\K(.*)(?=,cn=users)") 104 | if [[ ! -z ${USERNAMES} ]]; then 105 | echo 0 ${GROUP} ${WITH_FOLDER} ${USERNAMES} > ${MODPROJECT_PIPE} & 106 | fi 107 | else 108 | CONSECUTIVE_FAILURES=$(($CONSECUTIVE_FAILURES+1)) 109 | echo $((NRETRY+1)) ${GROUP} ${WITH_FOLDER} ${USERNAMES} > ${MODPROJECT_RETRY_PIPE} & 110 | continue 111 | fi 112 | fi 113 | modproject ${GROUP} ${WITH_FOLDER} ${USERNAMES} 114 | case $? in 115 | 0) 116 | # modproject has been successful 117 | if [[ ! -z "${USERNAMES}" ]]; then 118 | # If usernames were determined from the logs, it means we added user to a group, 119 | # so we add all usernames individually with the group name to 120 | # the cache 121 | for USERNAME in ${USERNAMES}; do 122 | echo "${GROUP}" "${WITH_FOLDER}" "${USERNAME}" >> ${MODPROJECT_CACHE} 123 | done 124 | # then we restrict the size of the cache to its maximum value 125 | # by keeping the N most recent element added. 126 | echo "$(tail -n ${MODPROJECT_CACHE_SIZE} ${MODPROJECT_CACHE})" > ${MODPROJECT_CACHE} 127 | elif [[ ! -z "${REMOVED_USERNAMES}" ]]; then 128 | # If the usernames were not found from the logs, it means we removed user 129 | # from the group. The variable $REMOVE_USERNAMES is configured by modproject and 130 | # contains all usernames that were successfully removed from the group $GROUP 131 | for USERNAME in ${REMOVED_USERNAMES}; do 132 | sed -i "/${GROUP} ${WITH_FOLDER} ${USERNAME}/d" ${MODPROJECT_CACHE} 133 | done 134 | unset REMOVED_USERNAMES 135 | fi 136 | CONSECUTIVE_FAILURES=0 137 | ;; 138 | 1) 139 | # modproject was not successul, but the arguments are valid, so we should try again 140 | # later. We increment the number of retry and add the arguments the retry pipeline. 141 | CONSECUTIVE_FAILURES=$(($CONSECUTIVE_FAILURES+1)) 142 | if [[ ${NRETRY} -lt 10 ]]; then 143 | echo $((NRETRY+1)) ${GROUP} ${WITH_FOLDER} ${USERNAMES} > ${MODPROJECT_RETRY_PIPE} & 144 | else 145 | echo "ERROR::modproject ${GROUP} ${WITH_FOLDER} ${USERNAMES}: giving up after 10 attempts." 146 | fi 147 | ;; 148 | *) 149 | # modproject was not successful and the return code indicates the function should not be 150 | # called again with these arguments (invalid arguments, do not retry). 151 | CONSECUTIVE_FAILURES=$(($CONSECUTIVE_FAILURES+1)) 152 | ;; 153 | esac 154 | done & 155 | 156 | # Do not reprocess previous entries if we remember the last timestamp treated 157 | if [ -e $LAST_TIMESTAMP_FILE ]; then 158 | LAST_TIMESTAMP=$(cat $LAST_TIMESTAMP_FILE) 159 | start_index=$(sed -n "/${LAST_TIMESTAMP//\//\\\/}/=" /var/log/dirsrv/slapd-*/access) 160 | start_index=$(($start_index+1)) 161 | else 162 | start_index=1 163 | fi 164 | 165 | tail -n +${start_index} -F /var/log/dirsrv/slapd-*/access | 166 | grep --line-buffered -P "dn=\"cn=${PROJECT_REGEX},cn=groups" | 167 | sed -u -r 's/^\[(.*) \+[0-9]{4}\] conn=([0-9]*) op=[0-9]* (\w+) dn="cn=(.*),cn=groups.*$/\1 \2 \3 \4/' | 168 | while read TIMESTAMP CONN OP GROUP; do 169 | # An operation has been done on a group in LDAP 170 | # We have already completed this request 171 | if [[ "${PREV_CONN}" == "${CONN}" ]]; then 172 | echo "${TIMESTAMP}" > ${LAST_TIMESTAMP_FILE} 173 | continue 174 | fi 175 | 176 | # We wait for the operation $CONN to be completed. 177 | # Taken from StackExchange: 178 | # https://unix.stackexchange.com/questions/416150/make-tail-f-exit-on-a-broken-pipe 179 | { 180 | grep --line-buffered -q -m 1 "conn=${CONN} op=[0-9]* UNBIND"; 181 | kill -s PIPE "$!"; 182 | } < <(tail -n +0 -F /var/log/dirsrv/slapd-*/access 2> /dev/null) 183 | 184 | # We support three operations : ADD, MOD or DEL 185 | case ${OP} in 186 | ADD) 187 | if grep -q "${GROUP}" ${MKPROJECT_CACHE}; then 188 | echo "${TIMESTAMP}" > ${LAST_TIMESTAMP_FILE} 189 | PREV_CONN="${CONN}" 190 | continue 191 | fi 192 | 193 | if mkproject ${GROUP} ${WITH_FOLDER}; then 194 | echo "${GROUP}" >> ${MKPROJECT_CACHE} 195 | if [[ $(wc -l < ${MKPROJECT_CACHE}) -gt ${MKPROJECT_CACHE_SIZE} ]]; then 196 | sed -i '1d' ${MKPROJECT_CACHE} 197 | fi 198 | # Identify usernames that are already member of the group - most likely through automembership 199 | USERNAMES=$(kexec ipa group-show ${GROUP} --raw | grep -o -P "uid=\K(.*)(?=,cn=users)") 200 | if [[ ! -z ${USERNAMES} ]]; then 201 | echo 0 ${GROUP} ${WITH_FOLDER} ${USERNAMES} > ${MODPROJECT_PIPE} & 202 | fi 203 | else 204 | touch "${MKHOME_PROJECT_DIR}/${GROUP}.slurm.failed" 205 | fi 206 | ;; 207 | MOD) 208 | # One or more groups have been modified 209 | # We grep the log for all operations related to request $CONN that contain a uid 210 | USERNAMES=$(grep -oP "conn=${CONN} op=[0-9]* SRCH base=\"uid=\K(.*)(?=,cn=users)" /var/log/dirsrv/slapd-*/access | sort | uniq) 211 | # We grep all unique groups that have been modified 212 | MOD_GROUPS=$(grep -oP "conn=${CONN} op=[0-9]* MOD dn=\"cn=\K${PROJECT_REGEX}" /var/log/dirsrv/slapd-*/access | uniq) 213 | for GROUP in ${MOD_GROUPS}; do 214 | echo 0 ${GROUP} ${WITH_FOLDER} ${USERNAMES} > ${MODPROJECT_PIPE} & 215 | done 216 | ;; 217 | DEL) 218 | delproject ${GROUP} ${WITH_FOLDER} 219 | ;; 220 | *) 221 | echo "Unknown operation ${OP}" 222 | ;; 223 | esac 224 | PREV_CONN="${CONN}" 225 | echo "${TIMESTAMP}" > ${LAST_TIMESTAMP_FILE} 226 | done 227 | -------------------------------------------------------------------------------- /site/profile/templates/freeipa/mokey.yaml.epp: -------------------------------------------------------------------------------- 1 | --- 2 | #------------------------------------------------------------------------------ 3 | # Database connection 4 | #------------------------------------------------------------------------------ 5 | dsn: "<%= $user %>:<%= $password %>@/<%= $dbname %>?parseTime=true" 6 | 7 | #------------------------------------------------------------------------------ 8 | # Database driver 9 | #------------------------------------------------------------------------------ 10 | driver: "mysql" 11 | 12 | #------------------------------------------------------------------------------ 13 | # Secure webserver port to listen on 14 | #------------------------------------------------------------------------------ 15 | port: <%= $port %> 16 | 17 | #------------------------------------------------------------------------------ 18 | # Insecure redirect host and port. If set will redirect http to https 19 | #------------------------------------------------------------------------------ 20 | # insecure_redirect_port: 80 21 | # insecure_redirect_host: localhost 22 | 23 | #------------------------------------------------------------------------------ 24 | # Webserver interface to listen on 25 | #------------------------------------------------------------------------------ 26 | # For utilizing all available IP interfaces, use: 27 | bind: "0.0.0.0" 28 | 29 | #------------------------------------------------------------------------------ 30 | # SSL certificate 31 | #------------------------------------------------------------------------------ 32 | # cert: "/path/to/cert" 33 | 34 | #------------------------------------------------------------------------------ 35 | # SSL private key 36 | #------------------------------------------------------------------- 37 | # key: "/path/to/key" 38 | 39 | #------------------------------------------------------------------------------ 40 | # Password requirements 41 | #------------------------------------------------------------------------------ 42 | # min_passwd_len: 8 43 | # min_passwd_classes: 2 44 | 45 | #------------------------------------------------------------------------------ 46 | # Authentication key used for HMAC token signing and secure cookies 47 | #------------------------------------------------------------------------------ 48 | auth_key: <%= $auth_key %> 49 | 50 | #------------------------------------------------------------------------------ 51 | # Encryption key used for encrypting cookies 52 | #------------------------------------------------------------------------------ 53 | enc_key: <%= $enc_key %> 54 | 55 | #------------------------------------------------------------------------------ 56 | # Templates directory 57 | #------------------------------------------------------------------------------ 58 | templates: /usr/share/mokey/templates 59 | 60 | #------------------------------------------------------------------------------ 61 | # Custom URL context path 62 | #------------------------------------------------------------------------------ 63 | # path_prefix: "/mokey" 64 | 65 | #------------------------------------------------------------------------------ 66 | # Keytab file and username for mokey to user for operations requiring elevated 67 | # privileges (should have "Modify users and Reset passwords" privilege in 68 | # FreeIPA) 69 | #------------------------------------------------------------------------------ 70 | keytab: "/etc/mokey/keytab/mokeyapp.keytab" 71 | ktuser: "mokey/mokey" 72 | 73 | #------------------------------------------------------------------------------ 74 | # Enable rate limiting based on remote ip (requires redis) 75 | #------------------------------------------------------------------------------ 76 | rate_limit: false 77 | 78 | #------------------------------------------------------------------------------ 79 | # Redis server (used for rate limiting) 80 | #------------------------------------------------------------------------------ 81 | # redis: ":6379" 82 | 83 | #------------------------------------------------------------------------------ 84 | # Max POST requests. This value sets a max limit on the number of POST requests 85 | # made in a given time period. The time is defined by "rate_limit_expire". 86 | #------------------------------------------------------------------------------ 87 | # max_requests: 15 88 | 89 | #------------------------------------------------------------------------------ 90 | # The expire time in seconds for the max_requests counter. By default the 91 | # number of post requests from a given IP address is limited to 15 requests per 92 | # hour. 93 | #------------------------------------------------------------------------------ 94 | # rate_limit_expire: 3600 95 | 96 | #------------------------------------------------------------------------------ 97 | # SMTP server 98 | #------------------------------------------------------------------------------ 99 | smtp_host: "localhost" 100 | #smtp_username: "username" 101 | #smtp_password: "password" 102 | 103 | #------------------------------------------------------------------------------ 104 | # SMTP port / TLS 105 | # Possible values for TLS are: 106 | # - on: Connection is fully encrypted with TLS 107 | # - off: Connection is unencrypted 108 | # - starttls: Connections is encrypted on demand via the STARTTLS command 109 | #------------------------------------------------------------------------------ 110 | smtp_port: 25 111 | smtp_tls: "off" 112 | 113 | #------------------------------------------------------------------------------ 114 | # From address used when sending emails 115 | #------------------------------------------------------------------------------ 116 | email_from: <%= $email_from %> 117 | 118 | #------------------------------------------------------------------------------ 119 | # Email signature used when sending emails 120 | #------------------------------------------------------------------------------ 121 | email_sig: "" 122 | 123 | #------------------------------------------------------------------------------ 124 | # Base URL of mokey server. Used for links in emails 125 | #------------------------------------------------------------------------------ 126 | email_link_base: <%= $email_link_base %> 127 | 128 | #------------------------------------------------------------------------------ 129 | # Subject prefix used when sending emails 130 | #------------------------------------------------------------------------------ 131 | email_prefix: "magic castle" 132 | 133 | #------------------------------------------------------------------------------ 134 | # Max age (in seconds) of setup account email tokens. 135 | #------------------------------------------------------------------------------ 136 | setup_max_age: 86400 137 | 138 | #------------------------------------------------------------------------------ 139 | # Max age (in seconds) of reset password email tokens. 140 | #------------------------------------------------------------------------------ 141 | reset_max_age: 3600 142 | 143 | #------------------------------------------------------------------------------ 144 | # Max attempts for password resets and account setup. 145 | #------------------------------------------------------------------------------ 146 | max_attempts: 10 147 | 148 | #------------------------------------------------------------------------------ 149 | # Sign emails using PGP/Mime 150 | #------------------------------------------------------------------------------ 151 | pgp_sign: false 152 | 153 | #------------------------------------------------------------------------------ 154 | # PGP private key 155 | #------------------------------------------------------------------------------ 156 | # pgp_key: "/path/to/key.gpg" 157 | 158 | #------------------------------------------------------------------------------ 159 | # passphrase for PGP private key (if encrypted) 160 | #------------------------------------------------------------------------------ 161 | # pgp_passphrase: "secret" 162 | 163 | #------------------------------------------------------------------------------ 164 | # CAPTCHA support 165 | #------------------------------------------------------------------------------ 166 | # enable_captcha: true 167 | 168 | #------------------------------------------------------------------------------ 169 | # New User Account Signup 170 | #------------------------------------------------------------------------------ 171 | enable_user_signup: <%= $enable_user_signup %> 172 | # default_shell: "/bin/bash" 173 | # default_homedir: "/home" 174 | # 175 | #------------------------------------------------------------------------------ 176 | # Require users to verify email address. With this option enabled new accounts 177 | # are disabled by default until the user verifies their email address 178 | #------------------------------------------------------------------------------ 179 | # require_verify_email: false 180 | 181 | #------------------------------------------------------------------------------ 182 | # Require FreeIPA admin to activate the account. With this option enabled new 183 | # accounts are disabled by default until a FreeIPA admin activates them. 184 | # This option is mutually exclusive with require_verify_email. 185 | #------------------------------------------------------------------------------ 186 | require_verify_admin: <%= $require_verify_admin %> 187 | 188 | #------------------------------------------------------------------------------ 189 | # Developer mode 190 | #------------------------------------------------------------------------------ 191 | develop: false 192 | 193 | #------------------------------------------------------------------------------ 194 | # Globus Signup 195 | #------------------------------------------------------------------------------ 196 | # globus_signup: false 197 | # globus_iss: "https://auth.globus.org" 198 | # globus_client_id: "xxx" 199 | # globus_secret: "xxx" 200 | # globus_trusted_providers: 201 | # - xxx 202 | # - xxx 203 | 204 | #------------------------------------------------------------------------------ 205 | # Hydra config 206 | #------------------------------------------------------------------------------ 207 | # hydra_admin_url: "https://localhost:4445" 208 | # hydra_consent_timeout: 86400 209 | # hydra_login_timeout: 86400 210 | # hydra_fake_tls_termination: true 211 | 212 | #------------------------------------------------------------------------------ 213 | # Public oauth2 clients for Api Key access (requires Hydra) 214 | #------------------------------------------------------------------------------ 215 | # enable_api_keys: false 216 | # enabled_api_client_ids: 217 | # - openstack-api 218 | # - mypublic-api 219 | # 220 | # openstack-api: 221 | # name: "Openstack CLI" 222 | # desc: "Access to Openstack CLI" 223 | # scopes: openid 224 | # 225 | # mypublic-api: 226 | # name: "Some other API" 227 | # desc: "Access to some other API" 228 | # scopes: openid 229 | -------------------------------------------------------------------------------- /site/profile/manifests/gpu.pp: -------------------------------------------------------------------------------- 1 | class profile::gpu ( 2 | Boolean $restrict_profiling, 3 | ) { 4 | if $facts['nvidia_gpu_count'] > 0 { 5 | include profile::gpu::install 6 | include profile::gpu::services 7 | } 8 | } 9 | 10 | class profile::gpu::install ( 11 | Optional[String] $lib_symlink_path = undef 12 | ) { 13 | $restrict_profiling = lookup('profile::gpu::restrict_profiling') 14 | ensure_resource('file', '/etc/nvidia', { 'ensure' => 'directory' }) 15 | ensure_packages(['kernel-devel'], { 'name' => "kernel-devel-${facts['kernelrelease']}" }) 16 | ensure_packages(['kernel-headers'], { 'name' => "kernel-headers-${facts['kernelrelease']}" }) 17 | ensure_packages(['dkms'], { 'require' => [Package['kernel-devel'], Yumrepo['epel']] }) 18 | $nvidia_kmod = ['nvidia', 'nvidia_modeset', 'nvidia_drm', 'nvidia_uvm'] 19 | 20 | selinux::module { 'nvidia-gpu': 21 | ensure => 'present', 22 | source_pp => 'puppet:///modules/profile/gpu/nvidia-gpu.pp', 23 | } 24 | 25 | file { '/etc/modprobe.d/nvidia.conf': 26 | ensure => file, 27 | owner => 'root', 28 | group => 'root', 29 | mode => '0755', 30 | } 31 | 32 | file_line { 'nvidia_restrict_profiling': 33 | path => '/etc/modprobe.d/nvidia.conf', 34 | match => '^options nvidia NVreg_RestrictProfilingToAdminUsers', 35 | line => "options nvidia NVreg_RestrictProfilingToAdminUsers=${Integer($restrict_profiling)}", 36 | require => File['/etc/modprobe.d/nvidia.conf'], 37 | notify => [ 38 | Exec['stop_nvidia_services'], 39 | Exec['unload_nvidia_drivers'], 40 | ], 41 | } 42 | 43 | exec { 'unload_nvidia_drivers': 44 | command => sprintf('modprobe -r %s', $nvidia_kmod.reverse.join(' ')), 45 | onlyif => 'grep -qE "^nvidia " /proc/modules', 46 | refreshonly => true, 47 | require => Exec['stop_nvidia_services'], 48 | notify => Kmod::Load[$nvidia_kmod], 49 | path => ['/bin', '/sbin'], 50 | } 51 | File_line['nvidia_restrict_profiling'] ~> Exec<| title == stop_slurm-job-exporter |> 52 | Exec<| title == stop_slurm-job-exporter |> -> Exec['unload_nvidia_drivers'] 53 | 54 | if ! profile::is_grid_vgpu() { 55 | include profile::gpu::install::passthrough 56 | Class['profile::gpu::install::passthrough'] -> Exec['dkms_nvidia'] 57 | } else { 58 | include profile::gpu::install::vgpu 59 | } 60 | 61 | # Binary installer do not build drivers with DKMS 62 | $installer = lookup('profile::gpu::install::vgpu::installer', undef, undef, '') 63 | if ! profile::is_grid_vgpu() or $installer != 'bin' { 64 | exec { 'dkms_nvidia': 65 | command => "dkms autoinstall -m nvidia -k ${facts['kernelrelease']}", 66 | path => ['/usr/bin', '/usr/sbin'], 67 | onlyif => "dkms status -m nvidia -k ${facts['kernelrelease']} | grep -v -q installed", 68 | timeout => 0, 69 | before => Kmod::Load[$nvidia_kmod], 70 | require => [ 71 | Package['kernel-devel'], 72 | Package['dkms'], 73 | ], 74 | } 75 | } 76 | 77 | kmod::load { $nvidia_kmod: } 78 | 79 | if $lib_symlink_path { 80 | $lib_symlink_path_split = split($lib_symlink_path, '/') 81 | $lib_symlink_dir = Hash( 82 | $lib_symlink_path_split[1,-1].map |Integer $index, String $value| { 83 | [join($lib_symlink_path_split[0, $index+2], '/'), { 'ensure' => 'directory', 'notify' => Exec['nvidia-symlink'] }] 84 | }.filter |$array| { 85 | !($array[0] in ['/lib', '/lib64', '/usr', '/usr/lib', '/usr/lib64', '/opt']) 86 | } 87 | ) 88 | $lib_symlink_dir_res = ensure_resources('file', $lib_symlink_dir) 89 | exec { 'nvidia-symlink': 90 | command => "rpm -qa *nvidia* | xargs rpm -ql | grep -P '/usr/lib64/[a-z0-9-.]*.so[0-9.]*' | xargs -I {} ln -sf {} ${lib_symlink_path}", # lint:ignore:140chars 91 | refreshonly => true, 92 | path => ['/bin', '/usr/bin'], 93 | } 94 | 95 | Package<| tag == profile::gpu::install |> ~> Exec['nvidia-symlink'] 96 | Exec<| tag == profile::gpu::install::vgpu::bin |> ~> Exec['nvidia-symlink'] 97 | } 98 | Kmod::Load[$nvidia_kmod] ~> Service<| tag == profile::gpu::services |> 99 | } 100 | 101 | class profile::gpu::install::passthrough ( 102 | Array[String] $packages, 103 | String $nvidia_driver_stream = '550-dkms' 104 | ) { 105 | $os = "rhel${::facts['os']['release']['major']}" 106 | $arch = $::facts['os']['architecture'] 107 | 108 | exec { 'cuda-repo': 109 | command => "dnf config-manager --add-repo http://developer.download.nvidia.com/compute/cuda/repos/${os}/${arch}/cuda-${os}.repo", 110 | creates => "/etc/yum.repos.d/cuda-${os}.repo", 111 | path => ['/usr/bin'], 112 | } 113 | 114 | package { 'nvidia-stream': 115 | ensure => $nvidia_driver_stream, 116 | name => 'nvidia-driver', 117 | provider => dnfmodule, 118 | enable_only => true, 119 | require => [ 120 | Exec['cuda-repo'], 121 | ], 122 | } 123 | 124 | $mig_profile = lookup("terraform.instances.${facts['networking']['hostname']}.specs.mig", Variant[Undef, Hash[String, Integer]], undef, {}) 125 | class { 'profile::gpu::config::mig': 126 | mig_profile => $mig_profile, 127 | require => Package[$packages], 128 | } 129 | 130 | package { $packages: 131 | ensure => 'installed', 132 | require => [ 133 | Package['nvidia-stream'], 134 | Package['kernel-devel'], 135 | Exec['cuda-repo'], 136 | Yumrepo['epel'], 137 | ], 138 | } 139 | 140 | # Used by slurm-job-exporter to export GPU metrics 141 | -> package { 'datacenter-gpu-manager': } 142 | 143 | -> augeas { 'nvidia-persistenced.service': 144 | context => '/files/lib/systemd/system/nvidia-persistenced.service/Service', 145 | changes => [ 146 | 'set DynamicUser/value yes', 147 | 'set StateDirectory/value nvidia-persistenced', 148 | 'set RuntimeDirectory/value nvidia-persistenced', 149 | 'rm ExecStart/arguments', 150 | ], 151 | } 152 | } 153 | 154 | class profile::gpu::config::mig ( 155 | Variant[Undef, Hash] $mig_profile, 156 | String $mig_manager_version = '0.5.5', 157 | ) { 158 | $arch = $::facts['os']['architecture'] 159 | package { 'nvidia-mig-manager': 160 | ensure => 'latest', 161 | provider => 'rpm', 162 | name => 'nvidia-mig-manager', 163 | source => "https://github.com/NVIDIA/mig-parted/releases/download/v${$mig_manager_version}/nvidia-mig-manager-${mig_manager_version}-1.${arch}.rpm", 164 | } 165 | 166 | service { 'nvidia-mig-manager': 167 | ensure => stopped, 168 | enable => false, 169 | require => Package['nvidia-mig-manager'], 170 | } 171 | 172 | file { '/etc/nvidia-mig-manager/puppet-config.yaml': 173 | require => Package['nvidia-mig-manager'], 174 | content => @("EOT") 175 | version: v1 176 | mig-configs: 177 | default: 178 | - devices: all 179 | mig-enabled: true 180 | mig-devices: ${to_json($mig_profile)} 181 | |EOT 182 | } 183 | 184 | file_line { 'nvidia-persistenced.service': 185 | ensure => present, 186 | path => '/etc/nvidia-mig-manager/hooks.sh', 187 | after => 'driver_services=\(', 188 | line => ' nvidia-persistenced.service', 189 | require => Package['nvidia-mig-manager'], 190 | } 191 | 192 | file { '/etc/nvidia-mig-manager/puppet-hooks.yaml': 193 | require => Package['nvidia-mig-manager'], 194 | content => @(EOT) 195 | version: v1 196 | hooks: 197 | pre-apply-mode: 198 | - workdir: "/etc/nvidia-mig-manager" 199 | command: "/bin/bash" 200 | args: ["-x", "-c", "source hooks.sh; stop_driver_services"] 201 | - workdir: "/etc/nvidia-mig-manager" 202 | command: "/bin/sh" 203 | args: ["-c", "systemctl -q is-active slurmd && systemctl stop slurmd || true"] 204 | |EOT 205 | } 206 | 207 | if $mig_profile and ! $mig_profile.empty { 208 | $mig_parted_config_name = 'default' 209 | $mig_parted_config_file = '/etc/nvidia-mig-manager/puppet-config.yaml' 210 | } else { 211 | $mig_parted_config_name = 'all-disabled' 212 | $mig_parted_config_file = '/etc/nvidia-mig-manager/config.yaml' 213 | } 214 | 215 | exec { 'nvidia-mig-parted apply': 216 | unless => 'nvidia-mig-parted assert', 217 | require => [ 218 | Package['nvidia-mig-manager'], 219 | File['/etc/nvidia-mig-manager/puppet-config.yaml'], 220 | File['/etc/nvidia-mig-manager/puppet-hooks.yaml'], 221 | ], 222 | environment => [ 223 | "MIG_PARTED_CONFIG_FILE=${mig_parted_config_file}", 224 | 'MIG_PARTED_HOOKS_FILE=/etc/nvidia-mig-manager/puppet-hooks.yaml', 225 | "MIG_PARTED_SELECTED_CONFIG=${mig_parted_config_name}", 226 | 'MIG_PARTED_SKIP_RESET=false', 227 | ], 228 | path => ['/usr/bin'], 229 | notify => [ 230 | Service['nvidia-persistenced'], 231 | Service['nvidia-dcgm'], 232 | ], 233 | } 234 | Kmod::Load <| tag == profile::gpu::install |> -> Exec['nvidia-mig-parted apply'] 235 | } 236 | 237 | class profile::gpu::install::vgpu ( 238 | Enum['rpm', 'bin', 'none'] $installer = 'none', 239 | Array[String] $grid_vgpu_types = [], 240 | ) { 241 | if $installer == 'rpm' { 242 | include profile::gpu::install::vgpu::rpm 243 | } elsif $installer == 'bin' { 244 | # install from binary installer 245 | include profile::gpu::install::vgpu::bin 246 | } 247 | } 248 | 249 | class profile::gpu::install::vgpu::rpm ( 250 | String $source, 251 | Array[String] $packages, 252 | ) { 253 | $source_pkg_name = (split($source, '[/]')[-1]).regsubst(/\.rpm/, '', 'G') 254 | package { 'vgpu-repo': 255 | ensure => 'installed', 256 | provider => 'rpm', 257 | name => $source_pkg_name, 258 | source => $source, 259 | } 260 | 261 | package { $packages: 262 | ensure => 'installed', 263 | require => [ 264 | Package['kernel-devel'], 265 | Yumrepo['epel'], 266 | Package['vgpu-repo'], 267 | ], 268 | } 269 | 270 | # The device files/dev/nvidia* are normally created by nvidia-modprobe 271 | # If the permissions of nvidia-modprobe exclude setuid, some device files 272 | # will be missing. 273 | # https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#runfile-verifications 274 | -> file { '/usr/bin/nvidia-modprobe': 275 | ensure => file, 276 | mode => '4755', 277 | owner => 'root', 278 | group => 'root', 279 | } 280 | } 281 | 282 | class profile::gpu::install::vgpu::bin ( 283 | String $source, 284 | Optional[String] $gridd_content = undef, 285 | Optional[String] $gridd_source = undef, 286 | ) { 287 | exec { 'vgpu-driver-install-bin': 288 | command => "curl -L ${source} -o /tmp/NVIDIA-driver.run && sh /tmp/NVIDIA-driver.run --ui=none --no-questions --disable-nouveau && rm /tmp/NVIDIA-driver.run", # lint:ignore:140chars 289 | path => ['/bin', '/usr/bin', '/sbin','/usr/sbin'], 290 | creates => [ 291 | '/usr/bin/nvidia-smi', 292 | '/usr/bin/nvidia-modprobe', 293 | ], 294 | timeout => 300, 295 | require => [ 296 | Package['kernel-devel'], 297 | Package['dkms'], 298 | ], 299 | } 300 | 301 | if $gridd_content { 302 | $gridd_definition = { 'content' => $gridd_content } 303 | } elsif $gridd_source { 304 | $gridd_definition = { 'source' => $gridd_source } 305 | } else { 306 | $gridd_definition = {} 307 | } 308 | 309 | file { '/etc/nvidia/gridd.conf': 310 | ensure => file, 311 | mode => '0644', 312 | owner => 'root', 313 | group => 'root', 314 | * => $gridd_definition, 315 | } 316 | } 317 | 318 | class profile::gpu::services { 319 | if ! profile::is_grid_vgpu() { 320 | $gpu_services = ['nvidia-persistenced', 'nvidia-dcgm'] 321 | } else { 322 | $gpu_services = ['nvidia-persistenced', 'nvidia-gridd'] 323 | } 324 | service { $gpu_services: 325 | ensure => 'running', 326 | enable => true, 327 | } 328 | 329 | exec { 'stop_nvidia_services': 330 | command => sprintf('systemctl stop %s', $gpu_services.reverse.join(' ')), 331 | onlyif => sprintf('systemctl is-active %s', $gpu_services.reverse.join(' ')), 332 | refreshonly => true, 333 | path => ['/usr/bin'], 334 | } 335 | 336 | Package<| tag == profile::gpu::install |> -> Service[$gpu_services] 337 | Exec<| tag == profile::gpu::install::vgpu::bin |> -> Service[$gpu_services] 338 | } 339 | --------------------------------------------------------------------------------