├── README.md ├── make-sensitiviteit.sh ├── get-vacancies.sh ├── isolate-json.sh └── get-listing.sh /README.md: -------------------------------------------------------------------------------- 1 | # banen-rijksoverheid 2 | tools to retrieve all dutch government jobs & do statistics on them 3 | -------------------------------------------------------------------------------- /make-sensitiviteit.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | (echo vacaturenummer,sensitiviteit 4 | for a in vacancies-2022-07-09/* 5 | do 6 | (grep "'Vacaturenummer'" $a | cut -f4 -d\' ; grep -ic -E 'sensitiviteit|sensitief' $a ) | tr "\n" "," | sed 's/,$//' 7 | echo 8 | done) > sensitiviteit.csv 9 | 10 | -------------------------------------------------------------------------------- /get-vacancies.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | dirname=vacancies-$(date "+%Y-%m-%d") 3 | mkdir -p $dirname 4 | cd $dirname 5 | 6 | for a in $(cat ../listing/*.html | tr " " "\n" | grep href=\"/vacatures | cut -f2 -d\" | sort -u) 7 | do 8 | echo wget "https://werkenvoornederland.nl/$a" 9 | done | parallel -j 128 10 | -------------------------------------------------------------------------------- /isolate-json.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | FNAME=$1 4 | STARTLINE=$[$(grep -n '" | head -1 | cut -f1 -d:) -1] 7 | #echo $LEN 8 | tail -n +$STARTLINE $FNAME | head -$LEN | jq 9 | 10 | -------------------------------------------------------------------------------- /get-listing.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | mkdir -p listing 3 | for a in $(seq 1 500) 4 | do 5 | curl "https://www.werkenvoornederland.nl/vacatures?_hn%3Atype=resource&_hn%3Aref=r35_r1_r4&_hn%3Arid=append-next-page-results&pagina=$a" -H 'authority: www.werkenvoornederland.nl' -H 'accept: */*' -H 'accept-language: en-US,en;q=0.9' -H 'cache-control: no-cache' -H 'cookie: _ga=GA1.2.1453073791.1644833991; XSRF-TOKEN=a52dadc9-3208-44b0-bbe6-9e7aacc5ba44; BACKEND=site1; _pk_ses.246.dffd=*; _gid=GA1.2.1320661302.1656797976; _pk_id.246.dffd=06d2e175a79c393e.1644833991.6.1656798082.1656797976.; JSESSIONID=7DEFC2CBF3711A9C39C96A097EBD75DF' -H 'pragma: no-cache' -H 'referer: https://www.werkenvoornederland.nl/vacatures?pagina=2' -H 'sec-ch-ua: ".Not/A)Brand";v="99", "Google Chrome";v="103", "Chromium";v="103"' -H 'sec-ch-ua-mobile: ?0' -H 'sec-ch-ua-platform: "Linux"' -H 'sec-fetch-dest: empty' -H 'sec-fetch-mode: cors' -H 'sec-fetch-site: same-origin' -H 'user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36' --compressed \ 6 | > listing/$a.html 7 | done 8 | --------------------------------------------------------------------------------