├── .gitignore ├── README.md ├── package.json └── src ├── App.js └── modules ├── Config.js ├── Mail.js └── Scraper.js /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | .DS_Store 3 | package-lock.json -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## OLX offers scraper 2 | 3 | 1. Aby uruchomić projekt najpierw zainstaluj zależności: `npm i` 4 | 2. Następnie uruchom automat `node ./src/App.js` 5 | 6 | Adres strony z której pobierane są oferty znajdziesz w `src/modules/Config.js` 7 | 8 | Informacje na temat kodu znajdziesz na moim blogu: 9 | https://rockandcode.pl/jak-programista-szuka-mieszkania/ -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "scraping", 3 | "version": "1.0.0", 4 | "description": "My webpack project", 5 | "main": "src/app.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1", 8 | "build": "webpack", 9 | "start": "webpack-dev-server" 10 | }, 11 | "author": "Wojciech Dasiukiewicz", 12 | "license": "ISC", 13 | "dependencies": { 14 | "lodash": "^4.17.15", 15 | "nodemailer": "^6.3.0", 16 | "puppeteer": "^1.19.0" 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /src/App.js: -------------------------------------------------------------------------------- 1 | const Scraper = require('./modules/Scraper'); 2 | const Mail = require('./modules/Mail'); 3 | const Config = require('./modules/Config'); 4 | const _ = require('lodash'); 5 | let offers = []; 6 | 7 | function getOffers(){ 8 | Scraper().then(newOffers => { 9 | const beforeScraperOffersLength = offers.length; 10 | //Filtrujemy unikalne obiekty na podstawie adresu url z olx.pl 11 | offers = _.uniqBy([...offers, ...newOffers], 'url'); 12 | //Jeśli pojawiły się nowe oferty 13 | if(offers.length > beforeScraperOffersLength){ 14 | //Wysyłamy maila 15 | Mail(offers); 16 | } 17 | }); 18 | } 19 | 20 | setInterval(getOffers, Config.refreshTime); -------------------------------------------------------------------------------- /src/modules/Config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | refreshTime: 600000, 3 | pageURL: 'https://www.olx.pl/nieruchomosci/mieszkania/krakow/', 4 | mailConnection: { 5 | host: "smtp.mailtrap.io", 6 | port: 2525, 7 | auth: { 8 | user: "XX", 9 | pass: "XX" 10 | } 11 | }, 12 | mailMessage: { 13 | from: 'wojtek@rockandcode.pl', 14 | to: 'wojtek@rockandcode.pl', 15 | subject: 'Nowe oferty z olx' 16 | } 17 | }; -------------------------------------------------------------------------------- /src/modules/Mail.js: -------------------------------------------------------------------------------- 1 | const Config = require('./Config'); 2 | 3 | module.exports = newOffers => { 4 | const nodemailer = require('nodemailer'); 5 | const transporter = nodemailer.createTransport(Config.mailConnection); 6 | //Generuj treść HTML 7 | const html = generateMailHtml(newOffers); 8 | //Wysyłamy maila 9 | transporter.sendMail({...Config.mailMessage, html}); 10 | } 11 | 12 | //Funkcja generuje listę HTML 13 | function generateMailHtml(newOffers){ 14 | return ''; 15 | } 16 | 17 | //Funkcja generuje element listy 18 | function renderListElement(offer) { 19 | return '
  • ' + offer.title + ' - ' + offer.location + ' za ' + offer.price + '
  • '; 20 | } -------------------------------------------------------------------------------- /src/modules/Scraper.js: -------------------------------------------------------------------------------- 1 | const puppeteer = require('puppeteer'); 2 | const Config = require('./Config'); 3 | 4 | module.exports = async () => { 5 | const browser = await puppeteer.launch(); 6 | const page = await browser.newPage(); 7 | await page.goto(Config.pageURL); 8 | const fetchedOffers = await page.evaluate(() => { 9 | const offers = Array.from(document.querySelectorAll(".offer-wrapper")); 10 | const offersTitles = offers.map( offer => { 11 | return { 12 | title: offer.querySelector('strong').innerText, 13 | url: offer.querySelector('.link').getAttribute('href'), 14 | price: offer.querySelector('.price strong').innerText, 15 | location: offer.querySelector('.breadcrumb').innerText 16 | }; 17 | }); 18 | return offersTitles; 19 | }); 20 | await browser.close(); 21 | return fetchedOffers; 22 | }; --------------------------------------------------------------------------------