├── .gitignore ├── Procfile ├── .dockerignore ├── renovate.json ├── .env-example ├── Dockerfile ├── package.json ├── README.md └── index.js /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | .env 3 | -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | worker: npm run start 2 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | .git/ 2 | .gitignore 3 | renovate.json 4 | README.md 5 | -------------------------------------------------------------------------------- /renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": [ 3 | "config:base" 4 | ] 5 | } 6 | -------------------------------------------------------------------------------- /.env-example: -------------------------------------------------------------------------------- 1 | WILLHABEN_URL=https://www.willhaben.at/iad/immobilien/mietwohnungen/steiermark/graz/ 2 | XPATH_QUERY=//x:article/x:section[3]/x:div[1]/x:a/@href 3 | INTERVAL=15000 4 | TELEGRAM_TOKEN= 5 | TELEGRAM_CHAT_ID= 6 | HIGHLIGHT_WORDS= 7 | IGNORE_WORDS= 8 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:14-alpine 2 | 3 | COPY * /app/ 4 | WORKDIR /app 5 | 6 | RUN apk update && apk upgrade && npm ci 7 | 8 | RUN addgroup -S appgroup && adduser -S appuser -G appgroup 9 | 10 | USER appuser 11 | 12 | ENTRYPOINT ["/usr/local/bin/npm"] 13 | CMD ["run","start"] 14 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "watch-willhaben", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "start": "node index.js", 8 | "test": "echo \"Error: no test specified\" && exit 1" 9 | }, 10 | "author": "Peter Grassberger ", 11 | "license": "MIT", 12 | "dependencies": { 13 | "dotenv": "^8.2.0", 14 | "node-fetch": "^2.6.1", 15 | "parse5": "^6.0.0", 16 | "xmldom": "^0.3.0", 17 | "xmlserializer": "^0.6.1", 18 | "xpath": "0.0.30" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | watch-willhaben 2 | =============== 3 | 4 | is a node.js app that watches a [willhaben](https://www.willhaben.at/) 5 | search and reports new results to a telegram channel. It will only 6 | watch the first page of results. 7 | 8 | In the first few intervals it may report some urls that are not really 9 | new, but that should stop in a while. If all search results are on the 10 | same results page false reports won't happen. 11 | 12 | run 13 | --- 14 | 15 | Requires [npm and node.js](https://nodejs.org). 16 | 17 | 1. install dependencies: `npm ci`. 18 | 2. copy `.env-example` to `.env` and fill variables 19 | - Create a telegram bot by talking to the [botfather](https://t.me/botfather), 20 | use the token for `TELEGRAM_TOKEN`. 21 | - Talk to your new bot once. 22 | - Talk to the [userinfobot](https://telegram.me/userinfobot) to get your user id for `TELEGRAM_CHAT_ID`. 23 | - Fill `WILLHABEN_URL` with your search page. If you can, increase results per page. 24 | - Fill `INTERVAL` with interval in milliseconds (1000 is one second) it should look for updates. 25 | 3. run `npm run start` to start. 26 | 27 | 28 | Docker 29 | --- 30 | 31 | 1. Build docker image via `docker build -t watch-willhaben-test . ` 32 | 2. Run docker image mounting your `.env` file to get correct results 33 | `docker run --rm -ti -v "$(pwd)"/.env:/app/.env watch-willhaben-test` 34 | 3. For running on server detached and with restart and some name 35 | `docker run -d --restart unless-stopped -v "$(pwd)"/.env:/app/.env --name "willhaben-1" watch-willhaben-test` 36 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | const fetch = require('node-fetch'); 2 | const parse5 = require('parse5'); 3 | const xmlser = require('xmlserializer'); 4 | const xpath = require('xpath'); 5 | const Dom = require('xmldom').DOMParser; 6 | require('dotenv').config(); 7 | 8 | const WILLHABEN_URL = process.env.WILLHABEN_URL; 9 | const INTERVAL = process.env.INTERVAL; 10 | const TELEGRAM_TOKEN = process.env.TELEGRAM_TOKEN; // bot from https://t.me/botfather 11 | const TELEGRAM_CHAT_ID = process.env.TELEGRAM_CHAT_ID; // id from https://telegram.me/userinfobot 12 | const XPATH_QUERY = process.env.XPATH_QUERY; 13 | const HIGHLIGHT_WORDS = process.env.HIGHLIGHT_WORDS.split(','); 14 | const IGNORE_WORDS = process.env.IGNORE_WORDS.split(','); 15 | 16 | const telegramMessagePath = `https://api.telegram.org/bot${TELEGRAM_TOKEN}/sendMessage?chat_id=${TELEGRAM_CHAT_ID}&text=`; 17 | 18 | async function fetchLinks(url, query, highlightWords, ignoreWords) { 19 | const result = await fetch(url); 20 | const html = await result.text(); 21 | const document = parse5.parse(html.toString()); 22 | const xhtml = xmlser.serializeToString(document); 23 | const doc = new Dom().parseFromString(xhtml, 'text/html'); 24 | const select = xpath.useNamespaces({"x": "http://www.w3.org/1999/xhtml"}); 25 | const nodes = select(query, doc); 26 | const willhabenURL = new URL(WILLHABEN_URL); 27 | return nodes 28 | .map((node) => { 29 | const url = new URL(willhabenURL.protocol + '//' + willhabenURL.host + node.value); 30 | return { 31 | link: url.origin + url.pathname, 32 | ignore: ignoreWords.some(element => node.value.includes(element)), 33 | foundHighlightWords: highlightWords.filter(h => node.value.includes(h)), 34 | }; 35 | }); 36 | } 37 | 38 | async function start() { 39 | const linkCache = []; 40 | let countFetches = 0; 41 | 42 | const loop = async () => { 43 | try { 44 | console.log('check for new links', countFetches); 45 | const newResults = await fetchLinks(WILLHABEN_URL, XPATH_QUERY, HIGHLIGHT_WORDS, IGNORE_WORDS); 46 | newResults.forEach((r) => { 47 | if (!linkCache.includes(r.link)) { 48 | linkCache.push(r.link); 49 | const foundHighlightWordStr = r.foundHighlightWords.map(h => `#${h}`.replace('-', '')).join(' '); 50 | //console.log('new', r.link, r.ignore, foundHighlightWordStr); 51 | // Don't message for the first 10 fetches. 52 | if (countFetches > 10 && !r.ignore) { 53 | fetch(telegramMessagePath + encodeURI(r.link) + encodeURIComponent(` ${foundHighlightWordStr}`)); 54 | console.log('new', r.link, foundHighlightWordStr); 55 | } 56 | } 57 | }); 58 | countFetches++; 59 | } catch (e) { 60 | console.error(e); 61 | } 62 | setTimeout(loop, INTERVAL); 63 | } 64 | 65 | loop(); 66 | } 67 | 68 | start(); 69 | --------------------------------------------------------------------------------