├── .gitignore ├── README.md ├── pom.xml └── src ├── main ├── java │ └── com │ │ └── olegshan │ │ ├── JobParserApplication.java │ │ ├── controllers │ │ ├── ErrorHandler.java │ │ └── ParseController.java │ │ ├── entity │ │ └── Job.java │ │ ├── exception │ │ └── ParserException.java │ │ ├── notifier │ │ ├── Notifier.java │ │ └── impl │ │ │ └── NotifierImpl.java │ │ ├── parser │ │ ├── Parser.java │ │ ├── Performer.java │ │ ├── impl │ │ │ └── ParserImpl.java │ │ └── siteparsers │ │ │ ├── DouUaJobParser.java │ │ │ ├── HeadHunterUaJobParser.java │ │ │ ├── JobParser.java │ │ │ ├── JobsUaJobParser.java │ │ │ ├── RabotaUaJobParser.java │ │ │ └── WorkUaJobParser.java │ │ ├── repository │ │ └── JobRepository.java │ │ ├── service │ │ ├── JobService.java │ │ ├── StatisticsService.java │ │ └── impl │ │ │ ├── JobServiceImpl.java │ │ │ └── StatisticsServiceImpl.java │ │ ├── sites │ │ ├── DouUa.java │ │ ├── HeadHunterUa.java │ │ ├── JobSite.java │ │ ├── RabotaUa.java │ │ └── WorkUa.java │ │ ├── social │ │ └── JTwitter.java │ │ ├── statistics │ │ └── Statistics.java │ │ └── util │ │ ├── PageBox.java │ │ └── TimeUtil.java └── resources │ ├── application-dev.properties │ ├── application-prod.properties │ ├── application.properties │ ├── static │ ├── favicon.ico │ ├── style.css │ └── twitter.png │ └── templates │ ├── Error.html │ ├── about.html │ ├── exception.html │ ├── index.html │ └── layout.html └── test └── java └── com └── olegshan ├── AbstractTest.java ├── controllers ├── ErrorHandlerTest.java └── ParseControllerTest.java └── service └── JobServiceTest.java /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | !.mvn/wrapper/maven-wrapper.jar 3 | 4 | ### STS ### 5 | .classpath 6 | .factorypath 7 | .project 8 | .settings 9 | .springBeans 10 | 11 | ### IntelliJ IDEA ### 12 | .idea 13 | *.iws 14 | *.iml 15 | *.ipr 16 | 17 | ### NetBeans ### 18 | nbproject/private/ 19 | build/ 20 | nbbuild/ 21 | dist/ 22 | nbdist/ 23 | .nb-gradle/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # jParser 2 | 3 | This app parses Java developer vacancies in Kyiv, Ukraine on four main Ukrainian job sites: Rabota.ua, Work.ua, Jobs.ua and hh.ua and on the main Ukrainian site for developers — Dou.ua. 4 | 5 | Thanks to @Antrakos for help with implementation of Strategy pattern and common improvements. 6 | 7 | Please run it locally with following VM-option: `-Dspring.profiles.active="dev"` and set Maven profile in your IDE to `dev`. 8 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.olegshan 7 | jobparser 8 | 0.0.1-SNAPSHOT 9 | war 10 | 11 | jParser 12 | Java jobs aggregator 13 | 14 | 15 | org.springframework.boot 16 | spring-boot-starter-parent 17 | 1.5.7.RELEASE 18 | 19 | 20 | 21 | 22 | UTF-8 23 | UTF-8 24 | 1.8 25 | 26 | 27 | 28 | 29 | 30 | prod 31 | 32 | true 33 | 34 | 35 | 36 | org.springframework.boot 37 | spring-boot-starter-tomcat 38 | provided 39 | 40 | 41 | 42 | org.postgresql 43 | postgresql 44 | 9.4.1212.jre7 45 | 46 | 47 | 48 | 49 | 50 | 51 | dev 52 | 53 | 54 | com.h2database 55 | h2 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | org.springframework.boot 65 | spring-boot-starter-web 66 | 67 | 68 | 69 | org.springframework.boot 70 | spring-boot-starter-thymeleaf 71 | 72 | 73 | 74 | org.springframework.boot 75 | spring-boot-starter-data-jpa 76 | 77 | 78 | 79 | org.springframework.boot 80 | spring-boot-starter-social-twitter 81 | 82 | 83 | 84 | org.springframework.boot 85 | spring-boot-starter-mail 86 | 87 | 88 | 89 | org.springframework.boot 90 | spring-boot-starter-test 91 | test 92 | 93 | 94 | 95 | org.projectlombok 96 | lombok 97 | 1.16.10 98 | 99 | 100 | 101 | org.jsoup 102 | jsoup 103 | 1.9.2 104 | 105 | 106 | 107 | io.prometheus 108 | simpleclient 109 | 0.5.0 110 | 111 | 112 | 113 | io.prometheus 114 | simpleclient_common 115 | 0.5.0 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | org.springframework.boot 124 | spring-boot-maven-plugin 125 | 126 | 127 | 128 | 129 | 130 | -------------------------------------------------------------------------------- /src/main/java/com/olegshan/JobParserApplication.java: -------------------------------------------------------------------------------- 1 | package com.olegshan; 2 | 3 | import org.springframework.boot.SpringApplication; 4 | import org.springframework.boot.autoconfigure.SpringBootApplication; 5 | import org.springframework.boot.builder.SpringApplicationBuilder; 6 | import org.springframework.boot.web.support.SpringBootServletInitializer; 7 | import org.springframework.scheduling.annotation.EnableScheduling; 8 | 9 | @EnableScheduling 10 | @SpringBootApplication 11 | public class JobParserApplication extends SpringBootServletInitializer { 12 | 13 | public static void main(String[] args) { 14 | SpringApplication.run(JobParserApplication.class, args); 15 | } 16 | 17 | @Override 18 | protected SpringApplicationBuilder configure(SpringApplicationBuilder builder) { 19 | return builder.sources(JobParserApplication.class); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/main/java/com/olegshan/controllers/ErrorHandler.java: -------------------------------------------------------------------------------- 1 | package com.olegshan.controllers; 2 | 3 | import org.springframework.ui.Model; 4 | import org.springframework.web.bind.annotation.ControllerAdvice; 5 | import org.springframework.web.bind.annotation.ExceptionHandler; 6 | 7 | @ControllerAdvice 8 | public class ErrorHandler { 9 | 10 | @ExceptionHandler(Exception.class) 11 | public String exception(Exception e, Model model) { 12 | model.addAttribute("errorMessage", e.getMessage()); 13 | return "exception"; 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /src/main/java/com/olegshan/controllers/ParseController.java: -------------------------------------------------------------------------------- 1 | package com.olegshan.controllers; 2 | 3 | import com.olegshan.entity.Job; 4 | import com.olegshan.service.JobService; 5 | import com.olegshan.util.PageBox; 6 | import io.prometheus.client.CollectorRegistry; 7 | import io.prometheus.client.exporter.common.TextFormat; 8 | import org.springframework.beans.factory.annotation.Autowired; 9 | import org.springframework.data.domain.Page; 10 | import org.springframework.data.domain.PageRequest; 11 | import org.springframework.data.domain.Pageable; 12 | import org.springframework.data.domain.Sort; 13 | import org.springframework.stereotype.Controller; 14 | import org.springframework.web.bind.annotation.RequestMapping; 15 | import org.springframework.web.bind.annotation.RequestMethod; 16 | import org.springframework.web.bind.annotation.RequestParam; 17 | import org.springframework.web.servlet.ModelAndView; 18 | 19 | import java.io.IOException; 20 | import java.io.Writer; 21 | 22 | @Controller 23 | public class ParseController { 24 | 25 | private static final int PAGE_SIZE = 40; 26 | private JobService jobService; 27 | 28 | @Autowired 29 | public ParseController(JobService jobService) { 30 | this.jobService = jobService; 31 | } 32 | 33 | @RequestMapping(value = "/", method = RequestMethod.GET) 34 | public ModelAndView showJobs(@RequestParam(value = "page", required = false) Integer page) { 35 | 36 | ModelAndView modelAndView = new ModelAndView("index"); 37 | int currentPageNumber = (page == null || page < 1) ? 0 : page - 1; 38 | 39 | Pageable request = new PageRequest(currentPageNumber, PAGE_SIZE, Sort.Direction.DESC, "date"); 40 | Page jobs = jobService.getJobs(request); 41 | PageBox pageBox = new PageBox(jobs.getTotalPages(), jobs.getNumber()); 42 | 43 | modelAndView.addObject("jobs", jobs); 44 | modelAndView.addObject("pageBox", pageBox.getPageBox()); 45 | 46 | return modelAndView; 47 | } 48 | 49 | @RequestMapping("/about") 50 | public String about() { 51 | return "about"; 52 | } 53 | 54 | @RequestMapping(path = "/metrics") 55 | public void metrics(Writer responseWriter) throws IOException { 56 | TextFormat.write004(responseWriter, CollectorRegistry.defaultRegistry.metricFamilySamples()); 57 | responseWriter.close(); 58 | } 59 | } -------------------------------------------------------------------------------- /src/main/java/com/olegshan/entity/Job.java: -------------------------------------------------------------------------------- 1 | package com.olegshan.entity; 2 | 3 | import lombok.Data; 4 | 5 | import javax.persistence.Column; 6 | import javax.persistence.Entity; 7 | import javax.persistence.Id; 8 | import java.time.LocalDateTime; 9 | 10 | import static java.time.format.DateTimeFormatter.ofPattern; 11 | 12 | @Entity 13 | @Data 14 | public class Job { 15 | 16 | @Id 17 | private String url; 18 | private String title; 19 | // Max value for PostgreSQL 20 | @Column(length = 10485760) 21 | private String description; 22 | private String company; 23 | private String source; 24 | private LocalDateTime date; 25 | private String dateToDisplay; 26 | 27 | public Job() { 28 | } 29 | 30 | public Job(String title, String description, String company, String source, String url, LocalDateTime date) { 31 | this.title = title; 32 | this.description = description; 33 | this.company = company; 34 | this.source = source; 35 | this.url = url; 36 | this.date = date; 37 | } 38 | 39 | public String getDateToDisplay() { 40 | return date.format(ofPattern("d MMMM")); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/com/olegshan/exception/ParserException.java: -------------------------------------------------------------------------------- 1 | package com.olegshan.exception; 2 | 3 | public class ParserException extends Exception { 4 | 5 | public ParserException(String message) { 6 | super(message); 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /src/main/java/com/olegshan/notifier/Notifier.java: -------------------------------------------------------------------------------- 1 | package com.olegshan.notifier; 2 | 3 | public interface Notifier { 4 | 5 | void notifyAdmin(String issue); 6 | } 7 | -------------------------------------------------------------------------------- /src/main/java/com/olegshan/notifier/impl/NotifierImpl.java: -------------------------------------------------------------------------------- 1 | package com.olegshan.notifier.impl; 2 | 3 | import com.olegshan.notifier.Notifier; 4 | import org.slf4j.Logger; 5 | import org.slf4j.LoggerFactory; 6 | import org.springframework.beans.factory.annotation.Autowired; 7 | import org.springframework.beans.factory.annotation.Value; 8 | import org.springframework.mail.MailSender; 9 | import org.springframework.mail.SimpleMailMessage; 10 | import org.springframework.stereotype.Service; 11 | 12 | @Service 13 | public class NotifierImpl implements Notifier { 14 | 15 | @Value("${mail.recipient}") 16 | private String recipient; 17 | private MailSender mailSender; 18 | 19 | @Autowired 20 | public NotifierImpl(MailSender mailSender) { 21 | this.mailSender = mailSender; 22 | } 23 | 24 | public void notifyAdmin(String issue) { 25 | 26 | SimpleMailMessage message = new SimpleMailMessage(); 27 | message.setTo(recipient); 28 | message.setSubject("jParser issue"); 29 | message.setText(issue + "\n\nhttp://www.jparser.info"); 30 | 31 | mailSender.send(message); 32 | log.info("Admin was notified about following issue: " + issue + "\n"); 33 | } 34 | 35 | private static final Logger log = LoggerFactory.getLogger(NotifierImpl.class); 36 | } -------------------------------------------------------------------------------- /src/main/java/com/olegshan/parser/Parser.java: -------------------------------------------------------------------------------- 1 | package com.olegshan.parser; 2 | 3 | import com.olegshan.sites.JobSite; 4 | 5 | public interface Parser { 6 | 7 | void parse(JobSite jobSite); 8 | } 9 | -------------------------------------------------------------------------------- /src/main/java/com/olegshan/parser/Performer.java: -------------------------------------------------------------------------------- 1 | package com.olegshan.parser; 2 | 3 | import com.olegshan.sites.JobSite; 4 | import io.prometheus.client.Gauge; 5 | import org.springframework.beans.factory.annotation.Autowired; 6 | import org.springframework.scheduling.annotation.Scheduled; 7 | import org.springframework.stereotype.Component; 8 | 9 | import java.util.List; 10 | 11 | import static com.olegshan.util.TimeUtil.LOCAL_TIME_ZONE; 12 | 13 | @Component 14 | public class Performer { 15 | 16 | private List sites; 17 | private Parser parser; 18 | private boolean isParsingRunning; 19 | 20 | private static final Gauge lastRun = Gauge.build() 21 | .name("last_run") 22 | .help("Last run.") 23 | .register(); 24 | 25 | @Autowired 26 | public Performer(List sites, Parser parser) { 27 | this.sites = sites; 28 | this.parser = parser; 29 | } 30 | 31 | @Scheduled(cron = "0 1 7-23 * * *", zone = LOCAL_TIME_ZONE) 32 | public void perform() { 33 | if (isParsingRunning) 34 | return; 35 | isParsingRunning = true; 36 | for (JobSite jobSite : sites) { 37 | parser.parse(jobSite); 38 | } 39 | isParsingRunning = false; 40 | lastRun.setToCurrentTime(); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/com/olegshan/parser/impl/ParserImpl.java: -------------------------------------------------------------------------------- 1 | package com.olegshan.parser.impl; 2 | 3 | import com.olegshan.entity.Job; 4 | import com.olegshan.notifier.Notifier; 5 | import com.olegshan.parser.Parser; 6 | import com.olegshan.parser.siteparsers.JobParser; 7 | import com.olegshan.service.JobService; 8 | import com.olegshan.service.StatisticsService; 9 | import com.olegshan.sites.JobSite; 10 | import org.jsoup.nodes.Document; 11 | import org.jsoup.nodes.Element; 12 | import org.jsoup.select.Elements; 13 | import org.slf4j.Logger; 14 | import org.slf4j.LoggerFactory; 15 | import org.springframework.beans.factory.annotation.Autowired; 16 | import org.springframework.stereotype.Component; 17 | 18 | import java.time.LocalDateTime; 19 | 20 | import static java.time.temporal.ChronoUnit.MINUTES; 21 | 22 | @Component 23 | public class ParserImpl implements Parser { 24 | 25 | private JobService jobService; 26 | private Notifier notifier; 27 | private StatisticsService statisticsService; 28 | 29 | @Autowired 30 | public ParserImpl(JobService jobService, Notifier notifier, StatisticsService statisticsService) { 31 | this.jobService = jobService; 32 | this.notifier = notifier; 33 | this.statisticsService = statisticsService; 34 | } 35 | 36 | public void parse(JobSite jobSite) { 37 | 38 | JobParser jobParser = jobSite.getParser(); 39 | String url = ""; 40 | 41 | try { 42 | Document doc = jobParser.getDoc(jobSite.url()); 43 | 44 | for (Element job : jobParser.getJobBlocks(doc)) { 45 | 46 | Elements titleBlock = jobParser.getTitleBlock(job); 47 | url = jobParser.getUrl(titleBlock); 48 | LocalDateTime date = jobParser.getDate(job, url).truncatedTo(MINUTES); 49 | if (isJobTooOld(date)) continue; 50 | 51 | String title = jobParser.getTitle(titleBlock); 52 | String description = jobParser.getDescription(job, url); 53 | String company = jobParser.getCompany(job, url); 54 | 55 | Job parsedJob = new Job(title, description, company, jobSite.name(), url, date); 56 | jobService.save(parsedJob); 57 | } 58 | 59 | statisticsService.saveStatistics(jobSite.name()); 60 | } catch (Exception e) { 61 | log.error("Error while parsing", e); 62 | notifier.notifyAdmin("Error while parsing " + url + "\nError message is: " + e.getMessage()); 63 | } 64 | } 65 | 66 | private boolean isJobTooOld(LocalDateTime date) { 67 | return LocalDateTime.now().minusMonths(2).isAfter(date); 68 | } 69 | 70 | private static final Logger log = LoggerFactory.getLogger(ParserImpl.class); 71 | } -------------------------------------------------------------------------------- /src/main/java/com/olegshan/parser/siteparsers/DouUaJobParser.java: -------------------------------------------------------------------------------- 1 | package com.olegshan.parser.siteparsers; 2 | 3 | import com.olegshan.exception.ParserException; 4 | import com.olegshan.sites.JobSite; 5 | import com.olegshan.util.TimeUtil; 6 | import org.jsoup.nodes.Document; 7 | import org.jsoup.nodes.Element; 8 | 9 | import java.time.LocalDate; 10 | import java.time.LocalDateTime; 11 | 12 | import static java.lang.Integer.parseInt; 13 | 14 | public class DouUaJobParser extends JobParser { 15 | 16 | public DouUaJobParser(JobSite jobSite) { 17 | super(jobSite); 18 | } 19 | 20 | @Override 21 | public LocalDateTime getDate(Element job, String url) throws ParserException { 22 | 23 | Document dateDoc = getDoc(url); 24 | 25 | String dateLine = getElements(dateDoc, jobSite.date()).text(); 26 | check(dateLine, "date line", url); 27 | String[] dateParts = dateLine.split(jobSite.split()); 28 | TimeUtil.removeZero(dateParts); 29 | 30 | int year = parseInt(dateParts[2]); 31 | int month = TimeUtil.MONTHS.get(dateParts[1].toLowerCase()); 32 | int day = parseInt(dateParts[0]); 33 | 34 | return LocalDate.of(year, month, day).atTime(getTime()); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/com/olegshan/parser/siteparsers/HeadHunterUaJobParser.java: -------------------------------------------------------------------------------- 1 | package com.olegshan.parser.siteparsers; 2 | 3 | import com.olegshan.exception.ParserException; 4 | import com.olegshan.sites.JobSite; 5 | import com.olegshan.util.TimeUtil; 6 | import org.jsoup.nodes.Document; 7 | import org.jsoup.nodes.Element; 8 | 9 | import java.time.LocalDate; 10 | import java.time.LocalDateTime; 11 | 12 | import static java.lang.Integer.parseInt; 13 | 14 | public class HeadHunterUaJobParser extends JobParser { 15 | 16 | public HeadHunterUaJobParser(JobSite jobSite) { 17 | super(jobSite); 18 | } 19 | 20 | @Override 21 | protected LocalDateTime getDateByLine(String dateLine) { 22 | String[] dateParts = dateLine.split(jobSite.split()); 23 | TimeUtil.removeZero(dateParts); 24 | 25 | int day = parseInt(dateParts[0]); 26 | int month = TimeUtil.MONTHS.get(dateParts[1].toLowerCase()); 27 | int year = getYear(month); 28 | 29 | return LocalDate.of(year, month, day).atTime(getTime()); 30 | } 31 | 32 | @Override 33 | public String getCompany(Element job, String url) throws ParserException { 34 | Document innerJob = getDoc(url); 35 | return super.getCompany(innerJob, url); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/com/olegshan/parser/siteparsers/JobParser.java: -------------------------------------------------------------------------------- 1 | package com.olegshan.parser.siteparsers; 2 | 3 | import com.olegshan.exception.ParserException; 4 | import com.olegshan.parser.Parser; 5 | import com.olegshan.sites.JobSite; 6 | import com.olegshan.util.TimeUtil; 7 | import org.jsoup.Jsoup; 8 | import org.jsoup.nodes.Document; 9 | import org.jsoup.nodes.Element; 10 | import org.jsoup.select.Elements; 11 | import org.slf4j.Logger; 12 | import org.slf4j.LoggerFactory; 13 | 14 | import java.io.IOException; 15 | import java.time.LocalDate; 16 | import java.time.LocalDateTime; 17 | import java.time.LocalTime; 18 | 19 | import static com.olegshan.util.TimeUtil.localTimeZone; 20 | import static java.lang.Integer.parseInt; 21 | 22 | public class JobParser { 23 | 24 | public static final String NBSP = "\u00a0"; 25 | 26 | JobSite jobSite; 27 | 28 | public JobParser(JobSite jobSite) { 29 | this.jobSite = jobSite; 30 | } 31 | 32 | public Document getDoc(String siteUrl) throws ParserException { 33 | try { 34 | return Jsoup.connect(siteUrl).userAgent("Mozilla").timeout(0).get(); 35 | } catch (IOException e) { 36 | throw new ParserException("Failed connecting to " + siteUrl + "\n" + e.getMessage()); 37 | } 38 | } 39 | 40 | public String getUrl(Elements titleBlock) { 41 | return jobSite.urlPrefix() + titleBlock.attr("href"); 42 | } 43 | 44 | public Elements getJobBlocks(Document doc) throws ParserException { 45 | Elements jobBlocks = getElements(doc, jobSite.jobBox()); 46 | check(jobBlocks, "job blocks"); 47 | return jobBlocks; 48 | } 49 | 50 | public Elements getTitleBlock(Element job) throws ParserException { 51 | Elements titleBlock = getElements(job, jobSite.titleBox()); 52 | check(titleBlock, "title blocks"); 53 | return titleBlock; 54 | } 55 | 56 | public String getTitle(Elements titleBlock) { 57 | return titleBlock.text(); 58 | } 59 | 60 | public String getDescription(Element job, String url) { 61 | return getElements(job, jobSite.description()).text(); 62 | } 63 | 64 | public String getCompany(Element job, String url) throws ParserException { 65 | String company = removeNbsp(getElements(job, jobSite.company()).text()); 66 | check(company, "company", url); 67 | return company; 68 | } 69 | 70 | public LocalDateTime getDate(Element job, String url) throws Exception { 71 | String dateLine = getElements(job, jobSite.date()).text(); 72 | check(dateLine, "date", url); 73 | return getDateByLine(dateLine); 74 | } 75 | 76 | protected LocalDateTime getDateByLine(String dateLine) { 77 | String[] dateParts = dateLine.split(jobSite.split()); 78 | TimeUtil.removeZero(dateParts); 79 | return LocalDate.of(parseInt(dateParts[2]), parseInt(dateParts[1]), parseInt(dateParts[0])).atTime(getTime()); 80 | } 81 | 82 | protected LocalTime getTime() { 83 | return LocalTime.now(localTimeZone()); 84 | } 85 | 86 | //in case we parse in January jobs of last December. Needed for jobs.ua and hh.ua 87 | int getYear(int month) { 88 | if (month > LocalDate.now(localTimeZone()).getMonthValue()) 89 | return LocalDate.now().getYear() - 1; 90 | return LocalDate.now(localTimeZone()).getYear(); 91 | } 92 | 93 | Elements getElements(Element element, JobSite.Holder holder) { 94 | return getElements(element, holder, false); 95 | } 96 | 97 | Elements getElements(Element element, JobSite.Holder holder, boolean starting) { 98 | if (starting) 99 | return element.getElementsByAttributeValueStarting(holder.key, holder.value); 100 | return element.getElementsByAttributeValue(holder.key, holder.value); 101 | } 102 | 103 | String removeNbsp(String text) { 104 | return text.replaceAll(NBSP, ""); 105 | } 106 | 107 | void check(Object o, String data) throws ParserException { 108 | check(o, data, null); 109 | } 110 | 111 | void check(Object o, String data, String url) throws ParserException { 112 | String jobUrl = url == null ? "" : url; 113 | if (o == null || o.toString().trim().length() == 0) { 114 | log.error("Error getting {} from {}, {}", data, jobSite.name(), jobUrl); 115 | throw new ParserException("Error getting " + data + " from " + jobSite.name() + "\n" + jobUrl); 116 | } 117 | } 118 | 119 | private static final Logger log = LoggerFactory.getLogger(Parser.class); 120 | } 121 | -------------------------------------------------------------------------------- /src/main/java/com/olegshan/parser/siteparsers/JobsUaJobParser.java: -------------------------------------------------------------------------------- 1 | package com.olegshan.parser.siteparsers; 2 | 3 | import com.olegshan.exception.ParserException; 4 | import com.olegshan.sites.JobSite; 5 | import com.olegshan.sites.JobSite.Holder; 6 | import com.olegshan.util.TimeUtil; 7 | import org.jsoup.nodes.Document; 8 | import org.jsoup.nodes.Element; 9 | import org.jsoup.select.Elements; 10 | 11 | import java.time.LocalDate; 12 | import java.time.LocalDateTime; 13 | 14 | import static java.lang.Integer.parseInt; 15 | 16 | public class JobsUaJobParser extends JobParser { 17 | 18 | public JobsUaJobParser(JobSite jobSite) { 19 | super(jobSite); 20 | } 21 | 22 | @Override 23 | public Elements getJobBlocks(Document doc) throws ParserException { 24 | Elements jobBlocks = getElements(doc, jobSite.jobBox()); 25 | check(jobBlocks, "job blocks"); 26 | removeAd(jobBlocks); 27 | 28 | return jobBlocks; 29 | } 30 | 31 | private void removeAd(Elements jobBlocks) { 32 | 33 | // ad block on jobs.ua has the same tags as the job blocks, so it should be removed 34 | for (int i = 0; i < jobBlocks.size(); i++) { 35 | 36 | String jobBlock = getElements( 37 | jobBlocks.get(i), 38 | Holder.of("class", "b-city__title b-city__companies-title"), 39 | true 40 | ) 41 | .text(); 42 | 43 | if (jobBlock.contains("VIP компании в Украине:")) 44 | jobBlocks.remove(i); 45 | } 46 | } 47 | 48 | @Override 49 | public LocalDateTime getDate(Element job, String url) throws ParserException { 50 | Document dateDoc = getDoc(url); 51 | String dateLine = getElements(dateDoc, jobSite.date()).text(); 52 | 53 | check(dateLine, "date line", url); 54 | return getDateByLine(dateLine); 55 | } 56 | 57 | @Override 58 | protected LocalDateTime getDateByLine(String dateLine) { 59 | dateLine = dateLine.substring(dateLine.indexOf(NBSP) + 1, dateLine.lastIndexOf(NBSP)).trim(); 60 | String[] dateParts = dateLine.split(jobSite.split()); 61 | TimeUtil.removeZero(dateParts); 62 | 63 | int day = parseInt(dateParts[0]); 64 | int month = TimeUtil.MONTHS.get(dateParts[1].toLowerCase()); 65 | int year = dateParts.length > 2 ? Integer.parseInt(dateParts[2]) : getYear(month); 66 | 67 | return LocalDate.of(year, month, day).atTime(getTime()); 68 | } 69 | 70 | @Override 71 | public String getCompany(Element job, String url) throws ParserException { 72 | String company = removeNbsp(getElements(job, jobSite.company()).first().text()); 73 | check(company, "company", url); 74 | return company; 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /src/main/java/com/olegshan/parser/siteparsers/RabotaUaJobParser.java: -------------------------------------------------------------------------------- 1 | package com.olegshan.parser.siteparsers; 2 | 3 | import com.fasterxml.jackson.databind.JsonNode; 4 | import com.fasterxml.jackson.databind.ObjectMapper; 5 | import com.olegshan.exception.ParserException; 6 | import com.olegshan.sites.JobSite; 7 | import org.jsoup.nodes.Document; 8 | import org.jsoup.nodes.Element; 9 | import org.jsoup.select.Elements; 10 | import org.slf4j.Logger; 11 | import org.slf4j.LoggerFactory; 12 | import org.springframework.util.StringUtils; 13 | 14 | import java.time.LocalDateTime; 15 | 16 | import static com.olegshan.util.TimeUtil.localTimeZone; 17 | 18 | public class RabotaUaJobParser extends JobParser { 19 | 20 | public RabotaUaJobParser(JobSite jobSite) { 21 | super(jobSite); 22 | } 23 | 24 | @Override 25 | public String getUrl(Elements titleBlock) { 26 | return jobSite.urlPrefix() + titleBlock 27 | .get(0) 28 | .getElementsByTag("a") 29 | .attr("href"); 30 | } 31 | 32 | @Override 33 | public Elements getTitleBlock(Element job) throws ParserException { 34 | Elements titleBlock = getElements(job, jobSite.titleBox(), true); 35 | check(titleBlock, "title blocks"); 36 | return titleBlock; 37 | } 38 | 39 | @Override 40 | public String getDescription(Element job, String url) { 41 | return getElements(job, jobSite.description(), true).text(); 42 | } 43 | 44 | @Override 45 | public String getCompany(Element job, String url) { 46 | String company = removeNbsp(getElements(job, jobSite.company(), true).text()); 47 | if (company.length() == 0) 48 | company = "Anonymous employer"; 49 | return company; 50 | } 51 | 52 | @Override 53 | public LocalDateTime getDate(Element job, String url) throws Exception { 54 | Document dateDoc = getDoc(url); 55 | 56 | Elements scriptElements = dateDoc.getElementsByTag("script"); 57 | 58 | String varScript = null; 59 | 60 | for (Element scriptElement : scriptElements) { 61 | if (scriptElement.data().contains("var ruavars")) 62 | varScript = scriptElement.data(); 63 | } 64 | 65 | if (StringUtils.isEmpty(varScript)) { 66 | LocalDateTime ldt = LocalDateTime.now(localTimeZone()); 67 | log.warn("There was no date for job {}, return current date {}", url, ldt); 68 | return ldt; 69 | } 70 | 71 | String json = varScript.substring(varScript.indexOf("{"), varScript.lastIndexOf("}") + 1); 72 | JsonNode jsonNode = new ObjectMapper().readTree(json); 73 | String vacancyDate = jsonNode.get("vacancy_VacancyDate").toString().replaceAll("\\\"", ""); 74 | 75 | return LocalDateTime.parse(vacancyDate); 76 | } 77 | 78 | private static final Logger log = LoggerFactory.getLogger(RabotaUaJobParser.class); 79 | } 80 | -------------------------------------------------------------------------------- /src/main/java/com/olegshan/parser/siteparsers/WorkUaJobParser.java: -------------------------------------------------------------------------------- 1 | package com.olegshan.parser.siteparsers; 2 | 3 | import com.olegshan.exception.ParserException; 4 | import com.olegshan.sites.JobSite; 5 | import com.olegshan.util.TimeUtil; 6 | import org.jsoup.nodes.Document; 7 | import org.jsoup.nodes.Element; 8 | import org.jsoup.select.Elements; 9 | 10 | import java.time.LocalDate; 11 | import java.time.LocalDateTime; 12 | 13 | import static java.lang.Integer.parseInt; 14 | 15 | public class WorkUaJobParser extends JobParser { 16 | 17 | public WorkUaJobParser(JobSite jobSite) { 18 | super(jobSite); 19 | } 20 | 21 | @Override 22 | public Elements getJobBlocks(Document doc) throws ParserException { 23 | Elements jobBlocks = getElements(doc, jobSite.jobBox(), true); 24 | check(jobBlocks, "job blocks"); 25 | return jobBlocks; 26 | } 27 | 28 | @Override 29 | public Elements getTitleBlock(Element job) { 30 | return job.getElementsByTag("a"); 31 | } 32 | 33 | @Override 34 | public String getTitle(Elements titleBlock) { 35 | return titleBlock.first().text(); 36 | } 37 | 38 | @Override 39 | public String getDescription(Element job, String url) { 40 | return getElements(job, jobSite.description(), true).text(); 41 | } 42 | 43 | @Override 44 | public LocalDateTime getDate(Element job, String url) throws ParserException { 45 | String title = getTitleBlock(job).attr("title"); 46 | String[] dateParts = title.substring(title.indexOf("вакансія від ") + "вакансія від ".length()).split(jobSite.split()); 47 | check(dateParts, "date parts", url); 48 | 49 | int year = parseInt(dateParts[2]); 50 | int month = TimeUtil.MONTHS.get(dateParts[1]); 51 | int day = parseInt(dateParts[0]); 52 | 53 | return LocalDate.of(year, month, day).atTime(getTime()); 54 | } 55 | 56 | @Override 57 | public String getCompany(Element job, String url) throws ParserException { 58 | Elements company = job.getElementsByTag("b"); 59 | check(company, "company", url); 60 | 61 | return (company != null && !company.isEmpty()) ? removeNbsp(company.get(0).text()) : "Anonymous company"; 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/main/java/com/olegshan/repository/JobRepository.java: -------------------------------------------------------------------------------- 1 | package com.olegshan.repository; 2 | 3 | import com.olegshan.entity.Job; 4 | import org.springframework.data.jpa.repository.JpaRepository; 5 | 6 | public interface JobRepository extends JpaRepository { 7 | } 8 | -------------------------------------------------------------------------------- /src/main/java/com/olegshan/service/JobService.java: -------------------------------------------------------------------------------- 1 | package com.olegshan.service; 2 | 3 | import com.olegshan.entity.Job; 4 | import org.springframework.data.domain.Page; 5 | import org.springframework.data.domain.Pageable; 6 | 7 | public interface JobService { 8 | 9 | void save(Job job); 10 | 11 | Page getJobs(Pageable request); 12 | } 13 | -------------------------------------------------------------------------------- /src/main/java/com/olegshan/service/StatisticsService.java: -------------------------------------------------------------------------------- 1 | package com.olegshan.service; 2 | 3 | import com.olegshan.entity.Job; 4 | 5 | public interface StatisticsService { 6 | 7 | void saveStatistics(String siteName); 8 | 9 | void updateStatistics(Job job, boolean isNew); 10 | } 11 | -------------------------------------------------------------------------------- /src/main/java/com/olegshan/service/impl/JobServiceImpl.java: -------------------------------------------------------------------------------- 1 | package com.olegshan.service.impl; 2 | 3 | import com.olegshan.entity.Job; 4 | import com.olegshan.notifier.Notifier; 5 | import com.olegshan.repository.JobRepository; 6 | import com.olegshan.service.JobService; 7 | import com.olegshan.service.StatisticsService; 8 | import com.olegshan.social.JTwitter; 9 | import org.slf4j.Logger; 10 | import org.slf4j.LoggerFactory; 11 | import org.springframework.beans.factory.annotation.Autowired; 12 | import org.springframework.data.domain.Page; 13 | import org.springframework.data.domain.Pageable; 14 | import org.springframework.stereotype.Service; 15 | 16 | import java.time.LocalDate; 17 | 18 | @Service 19 | public class JobServiceImpl implements JobService { 20 | 21 | private JobRepository jobRepository; 22 | private StatisticsService statisticsService; 23 | private JTwitter twitter; 24 | private Notifier notifier; 25 | 26 | @Autowired 27 | public JobServiceImpl( 28 | JobRepository jobRepository, 29 | StatisticsService statisticsService, 30 | JTwitter twitter, 31 | Notifier notifier 32 | ) { 33 | this.jobRepository = jobRepository; 34 | this.statisticsService = statisticsService; 35 | this.twitter = twitter; 36 | this.notifier = notifier; 37 | } 38 | 39 | public void save(Job job) { 40 | if (jobRepository.exists(job.getUrl())) { 41 | updateIfNeeded(job); 42 | } else { 43 | saveAndTweet(job); 44 | updateStatistics(job, true); 45 | log.info("New job '{}' on {} found", job.getTitle(), job.getSource()); 46 | } 47 | } 48 | 49 | private void updateIfNeeded(Job job) { 50 | Job jobFromDb = jobRepository.findOne(job.getUrl()); 51 | LocalDate jobFromDbDate = jobFromDb.getDate().toLocalDate(); 52 | LocalDate jobDate = job.getDate().toLocalDate(); 53 | if (!jobFromDbDate.equals(jobDate)) { 54 | saveAndTweet(job); 55 | updateStatistics(job, false); 56 | } 57 | } 58 | 59 | private void saveAndTweet(Job job) { 60 | saveJob(job); 61 | twitter.tweet(job); 62 | } 63 | 64 | private void updateStatistics(Job job, boolean isNew) { 65 | statisticsService.updateStatistics(job, isNew); 66 | } 67 | 68 | public Page getJobs(Pageable request) { 69 | return jobRepository.findAll(request); 70 | } 71 | 72 | private void saveJob(Job job) { 73 | try { 74 | jobRepository.save(job); 75 | } catch (Exception e) { 76 | log.error("Error while saving job '{}', {} into database", job.getTitle(), job.getUrl(), e); 77 | notifier.notifyAdmin("Error while saving following job into database: '" + 78 | job.getTitle() + "', " + job.getUrl() + "\n\n" + e.getMessage()); 79 | } 80 | } 81 | 82 | private static final Logger log = LoggerFactory.getLogger(JobServiceImpl.class); 83 | } 84 | -------------------------------------------------------------------------------- /src/main/java/com/olegshan/service/impl/StatisticsServiceImpl.java: -------------------------------------------------------------------------------- 1 | package com.olegshan.service.impl; 2 | 3 | import com.olegshan.entity.Job; 4 | import com.olegshan.service.StatisticsService; 5 | import io.prometheus.client.Counter; 6 | import io.prometheus.client.Gauge; 7 | import org.springframework.stereotype.Service; 8 | 9 | import java.util.concurrent.atomic.AtomicInteger; 10 | 11 | @Service 12 | public class StatisticsServiceImpl implements StatisticsService { 13 | 14 | private final AtomicInteger newJobs = new AtomicInteger(); 15 | private final AtomicInteger updatedJobs = new AtomicInteger(); 16 | 17 | private static final Gauge newJobsFoundPerRun = Gauge.build() 18 | .name("new_jobs_per_run") 19 | .help("New jobs per run.") 20 | .labelNames("site_name") 21 | .register(); 22 | 23 | private static final Gauge updatedJobsFoundPerRun = Gauge.build() 24 | .name("updated_jobs_per_run") 25 | .help("Updated jobs per run.") 26 | .labelNames("site_name") 27 | .register(); 28 | 29 | private static final Counter totalJobsCount = Counter.build() 30 | .name("total_jobs_count") 31 | .help("Total jobs count.") 32 | .labelNames("site_name") 33 | .register(); 34 | 35 | @Override 36 | public void updateStatistics(Job job, boolean isNew) { 37 | if (isNew) { 38 | newJobs.incrementAndGet(); 39 | totalJobsCount 40 | .labels(job.getSource()) 41 | .inc(); 42 | } else { 43 | updatedJobs.incrementAndGet(); 44 | } 45 | } 46 | 47 | @Override 48 | public void saveStatistics(String siteName) { 49 | 50 | newJobsFoundPerRun 51 | .labels(siteName) 52 | .set(newJobs.get()); 53 | 54 | updatedJobsFoundPerRun 55 | .labels(siteName) 56 | .set(updatedJobs.get()); 57 | 58 | newJobs.set(0); 59 | updatedJobs.set(0); 60 | } 61 | } -------------------------------------------------------------------------------- /src/main/java/com/olegshan/sites/DouUa.java: -------------------------------------------------------------------------------- 1 | package com.olegshan.sites; 2 | 3 | import com.olegshan.parser.siteparsers.DouUaJobParser; 4 | import com.olegshan.parser.siteparsers.JobParser; 5 | import org.springframework.stereotype.Component; 6 | 7 | @Component 8 | public class DouUa extends JobSite { 9 | 10 | @Override 11 | public String name() { 12 | return "Dou.ua"; 13 | } 14 | 15 | @Override 16 | public String url() { 17 | return "https://jobs.dou.ua/vacancies/?city=%D0%9A%D0%B8%D1%97%D0%B2&category=Java"; 18 | } 19 | 20 | @Override 21 | public String split() { 22 | return " "; 23 | } 24 | 25 | @Override 26 | public Holder jobBox() { 27 | return Holder.of("class", "vacancy"); 28 | } 29 | 30 | @Override 31 | public Holder titleBox() { 32 | return Holder.of("class", "vt"); 33 | } 34 | 35 | @Override 36 | public Holder company() { 37 | return Holder.of("class", "company"); 38 | } 39 | 40 | @Override 41 | public Holder description() { 42 | return Holder.of("class", "sh-info"); 43 | } 44 | 45 | @Override 46 | public Holder date() { 47 | return Holder.of("class", "date"); 48 | } 49 | 50 | @Override 51 | public JobParser getParser() { 52 | return new DouUaJobParser(this); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/main/java/com/olegshan/sites/HeadHunterUa.java: -------------------------------------------------------------------------------- 1 | package com.olegshan.sites; 2 | 3 | import com.olegshan.parser.siteparsers.HeadHunterUaJobParser; 4 | import com.olegshan.parser.siteparsers.JobParser; 5 | import org.springframework.stereotype.Component; 6 | 7 | import static com.olegshan.parser.siteparsers.JobParser.NBSP; 8 | 9 | @Component 10 | public class HeadHunterUa extends JobSite { 11 | 12 | @Override 13 | public String name() { 14 | return "HeadHunter.ua"; 15 | } 16 | 17 | @Override 18 | public String url() { 19 | return "https://hh.ua/search/vacancy?text=java&area=115"; 20 | } 21 | 22 | @Override 23 | public String split() { 24 | return NBSP; 25 | } 26 | 27 | @Override 28 | public Holder jobBox() { 29 | return Holder.of("data-qa", "vacancy-serp__vacancy"); 30 | } 31 | 32 | @Override 33 | public Holder titleBox() { 34 | return Holder.of("data-qa", "vacancy-serp__vacancy-title"); 35 | } 36 | 37 | @Override 38 | public Holder company() { 39 | return Holder.of("class", "vacancy-company-name-wrapper"); 40 | } 41 | 42 | @Override 43 | public Holder description() { 44 | return Holder.of("data-qa", "vacancy-serp__vacancy_snippet_requirement"); 45 | } 46 | 47 | @Override 48 | public Holder date() { 49 | return Holder.of("class", "vacancy-serp-item__publication-date"); 50 | } 51 | 52 | @Override 53 | public JobParser getParser() { 54 | return new HeadHunterUaJobParser(this); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/com/olegshan/sites/JobSite.java: -------------------------------------------------------------------------------- 1 | package com.olegshan.sites; 2 | 3 | import com.olegshan.parser.siteparsers.JobParser; 4 | 5 | public abstract class JobSite { 6 | 7 | public abstract String name(); 8 | 9 | public abstract String url(); 10 | 11 | public String urlPrefix() { 12 | return ""; 13 | } 14 | 15 | public String split() { 16 | return ""; 17 | } 18 | 19 | public Holder jobBox() { 20 | return Holder.empty(); 21 | } 22 | 23 | public Holder titleBox() { 24 | return Holder.empty(); 25 | } 26 | 27 | public Holder company() { 28 | return Holder.empty(); 29 | } 30 | 31 | public Holder description() { 32 | return Holder.empty(); 33 | } 34 | 35 | public Holder date() { 36 | return Holder.empty(); 37 | } 38 | 39 | public JobParser getParser() { 40 | return new JobParser(this); 41 | } 42 | 43 | public static class Holder { 44 | public String key; 45 | public String value; 46 | 47 | public static Holder of(String key, String value) { 48 | Holder holder = new Holder(); 49 | holder.key = key; 50 | holder.value = value; 51 | 52 | return holder; 53 | } 54 | 55 | public static Holder empty() { 56 | return Holder.of("", ""); 57 | } 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/main/java/com/olegshan/sites/RabotaUa.java: -------------------------------------------------------------------------------- 1 | package com.olegshan.sites; 2 | 3 | import com.olegshan.parser.siteparsers.JobParser; 4 | import com.olegshan.parser.siteparsers.RabotaUaJobParser; 5 | import org.springframework.stereotype.Component; 6 | 7 | @Component 8 | public class RabotaUa extends JobSite { 9 | 10 | @Override 11 | public String name() { 12 | return "Rabota.ua"; 13 | } 14 | 15 | @Override 16 | public String url() { 17 | return "https://rabota.ua/jobsearch/vacancy_list?regionId=1&keyWords=java"; 18 | } 19 | 20 | @Override 21 | public String urlPrefix() { 22 | return "https://rabota.ua"; 23 | } 24 | 25 | @Override 26 | public Holder jobBox() { 27 | return Holder.of("class", "card-body"); 28 | } 29 | 30 | @Override 31 | public Holder titleBox() { 32 | return Holder.of("class", "card-title"); 33 | } 34 | 35 | @Override 36 | public Holder company() { 37 | return Holder.of("class", "company-profile-name"); 38 | } 39 | 40 | @Override 41 | public Holder description() { 42 | return Holder.of("class", "card-description"); 43 | } 44 | 45 | @Override 46 | public JobParser getParser() { 47 | return new RabotaUaJobParser(this); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/com/olegshan/sites/WorkUa.java: -------------------------------------------------------------------------------- 1 | package com.olegshan.sites; 2 | 3 | import com.olegshan.parser.siteparsers.JobParser; 4 | import com.olegshan.parser.siteparsers.WorkUaJobParser; 5 | import org.springframework.stereotype.Component; 6 | 7 | @Component 8 | public class WorkUa extends JobSite { 9 | 10 | @Override 11 | public String name() { 12 | return "Work.ua"; 13 | } 14 | 15 | @Override 16 | public String url() { 17 | return "https://www.work.ua/jobs-kyiv-java/"; 18 | } 19 | 20 | @Override 21 | public String urlPrefix() { 22 | return "https://work.ua"; 23 | } 24 | 25 | @Override 26 | public String split() { 27 | return " "; 28 | } 29 | 30 | @Override 31 | public Holder jobBox() { 32 | return Holder.of("class", "card card-hover card-visited wordwrap job-link"); 33 | } 34 | 35 | @Override 36 | public Holder description() { 37 | return Holder.of("class", "overflow"); 38 | } 39 | 40 | @Override 41 | public JobParser getParser() { 42 | return new WorkUaJobParser(this); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/com/olegshan/social/JTwitter.java: -------------------------------------------------------------------------------- 1 | package com.olegshan.social; 2 | 3 | import com.olegshan.entity.Job; 4 | import com.olegshan.notifier.Notifier; 5 | import org.springframework.beans.factory.annotation.Autowired; 6 | import org.springframework.core.env.Environment; 7 | import org.springframework.social.twitter.api.Twitter; 8 | import org.springframework.social.twitter.api.impl.TwitterTemplate; 9 | import org.springframework.stereotype.Component; 10 | 11 | import java.util.Arrays; 12 | 13 | @Component 14 | public class JTwitter { 15 | 16 | private Twitter twitter; 17 | private Environment environment; 18 | private Notifier notifier; 19 | 20 | @Autowired 21 | public JTwitter(Environment environment, Notifier notifier) { 22 | this.environment = environment; 23 | this.notifier = notifier; 24 | initTwitter(); 25 | } 26 | 27 | public void tweet(Job job) { 28 | if (twitter == null) return; 29 | 30 | String tweet = String.format("%s %s More jobs here: http://jparser.info", job.getTitle(), job.getUrl()); 31 | try { 32 | twitter.timelineOperations().updateStatus(tweet); 33 | } catch (Exception e) { 34 | if (!"Status is a duplicate.".equals(e.getMessage())) 35 | notifier.notifyAdmin( 36 | "Error while twitting following tweet:\n " + tweet + 37 | "\nException was:\n" + e.getMessage() 38 | ); 39 | } 40 | } 41 | 42 | private void initTwitter() { 43 | if (isDevEnv()) return; 44 | 45 | String consumerKey = System.getProperty("CKjP"); 46 | String consumerSecret = System.getProperty("CSjP"); 47 | String accessToken = System.getProperty("ATjP"); 48 | String accessTokenSecret = System.getProperty("ATSjP"); 49 | 50 | twitter = new TwitterTemplate(consumerKey, consumerSecret, accessToken, accessTokenSecret); 51 | } 52 | 53 | private boolean isDevEnv() { 54 | return Arrays.stream(environment.getActiveProfiles()) 55 | .anyMatch(env -> env.equalsIgnoreCase("dev")); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/main/java/com/olegshan/statistics/Statistics.java: -------------------------------------------------------------------------------- 1 | package com.olegshan.statistics; 2 | 3 | import lombok.Data; 4 | 5 | import javax.persistence.Entity; 6 | import javax.persistence.Id; 7 | import java.time.LocalDateTime; 8 | 9 | @Entity 10 | @Data 11 | public class Statistics { 12 | 13 | @Id 14 | private String id; 15 | private String siteName; 16 | private LocalDateTime run; 17 | private int newJobsFoundByRun; 18 | private int updatedJobsByRun; 19 | 20 | public void setId(String siteName) { 21 | id = siteName + run.toString(); 22 | } 23 | 24 | public void incrementNewJobsCount() { 25 | newJobsFoundByRun = newJobsFoundByRun + 1; 26 | } 27 | 28 | public void incrementUpdatedJobsCount() { 29 | updatedJobsByRun = updatedJobsByRun + 1; 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/main/java/com/olegshan/util/PageBox.java: -------------------------------------------------------------------------------- 1 | package com.olegshan.util; 2 | 3 | public class PageBox { 4 | 5 | private static final int BUTTONS_TO_SHOW = 5; 6 | 7 | private int totalPages; 8 | private int currentPage; 9 | private int firstPage; 10 | private int lastPage; 11 | 12 | public PageBox(int totalPages, int currentPage) { 13 | 14 | this.totalPages = totalPages; 15 | this.currentPage = currentPage; 16 | } 17 | 18 | public PageBox getPageBox() { 19 | int halfBoxSize = BUTTONS_TO_SHOW / 2; 20 | 21 | if (totalPages <= BUTTONS_TO_SHOW) { 22 | setFirstPage(1); 23 | setLastPage(totalPages); 24 | 25 | } else if (currentPage - halfBoxSize <= 0) { 26 | setFirstPage(1); 27 | setLastPage(BUTTONS_TO_SHOW); 28 | 29 | } else if (currentPage + halfBoxSize == totalPages) { 30 | setFirstPage(currentPage - halfBoxSize); 31 | setLastPage(totalPages); 32 | 33 | } else if (currentPage + halfBoxSize > totalPages) { 34 | setFirstPage(totalPages - BUTTONS_TO_SHOW + 1); 35 | setLastPage(totalPages); 36 | 37 | } else { 38 | setFirstPage(currentPage - halfBoxSize); 39 | setLastPage(currentPage + halfBoxSize); 40 | } 41 | 42 | return this; 43 | } 44 | 45 | public int getFirstPage() { 46 | return firstPage; 47 | } 48 | 49 | public void setFirstPage(int firstPage) { 50 | this.firstPage = firstPage; 51 | } 52 | 53 | public int getLastPage() { 54 | return lastPage; 55 | } 56 | 57 | public void setLastPage(int lastPage) { 58 | this.lastPage = lastPage; 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/java/com/olegshan/util/TimeUtil.java: -------------------------------------------------------------------------------- 1 | package com.olegshan.util; 2 | 3 | import java.time.ZoneId; 4 | import java.util.HashMap; 5 | import java.util.Map; 6 | 7 | public class TimeUtil { 8 | 9 | public static final String LOCAL_TIME_ZONE = "Europe/Athens"; 10 | public static final Map MONTHS = new HashMap() {{ 11 | 12 | put("січня", 1); 13 | put("лютого", 2); 14 | put("березня", 3); 15 | put("квітня", 4); 16 | put("травня", 5); 17 | put("червня", 6); 18 | put("липня", 7); 19 | put("серпня", 8); 20 | put("вересня", 9); 21 | put("жовтня", 10); 22 | put("листопада", 11); 23 | put("грудня", 12); 24 | 25 | put("января", 1); 26 | put("февраля", 2); 27 | put("марта", 3); 28 | put("апреля", 4); 29 | put("мая", 5); 30 | put("июня", 6); 31 | put("июля", 7); 32 | put("августа", 8); 33 | put("сентября", 9); 34 | put("октября", 10); 35 | put("ноября", 11); 36 | put("декабря", 12); 37 | 38 | put("янв", 1); 39 | put("фев", 2); 40 | put("мар", 3); 41 | put("апр", 4); 42 | put("май", 5); 43 | put("июн", 6); 44 | put("июл", 7); 45 | put("авг", 8); 46 | put("сен", 9); 47 | put("окт", 10); 48 | put("ноя", 11); 49 | put("дек", 12); 50 | 51 | put("january", 1); 52 | put("february", 2); 53 | put("march", 3); 54 | put("april", 4); 55 | put("may", 5); 56 | put("june", 6); 57 | put("july", 7); 58 | put("august", 8); 59 | put("september", 9); 60 | put("october", 10); 61 | put("november", 11); 62 | put("december", 12); 63 | }}; 64 | 65 | public static ZoneId localTimeZone() { 66 | return ZoneId.of(LOCAL_TIME_ZONE); 67 | } 68 | 69 | //if day or month starts with '0' 70 | public static void removeZero(String[] dateParts) { 71 | for (int i = 0; i < dateParts.length; i++) { 72 | if (dateParts[i].startsWith("0")) { 73 | dateParts[i] = dateParts[i].substring(1); 74 | } 75 | } 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/main/resources/application-dev.properties: -------------------------------------------------------------------------------- 1 | spring.jpa.show-sql = true 2 | spring.thymeleaf.cache=false 3 | -------------------------------------------------------------------------------- /src/main/resources/application-prod.properties: -------------------------------------------------------------------------------- 1 | spring.datasource.url = jdbc:postgresql://${RDS_HOSTNAME}:${RDS_PORT}/${RDS_DB_NAME} 2 | spring.datasource.username = ${RDS_USERNAME} 3 | spring.datasource.password = ${RDS_PASSWORD} 4 | spring.datasource.driverClassName = org.postgresql.Driver 5 | spring.jpa.hibernate.ddl-auto = update 6 | -------------------------------------------------------------------------------- /src/main/resources/application.properties: -------------------------------------------------------------------------------- 1 | spring.profiles.active = prod 2 | spring.mvc.throw-exception-if-no-handler-found = true 3 | 4 | spring.mail.host = smtp.gmail.com 5 | spring.mail.port = 465 6 | spring.mail.username = ${jMailSender} 7 | mail.recipient = ${jMailRecipient} 8 | spring.mail.password = ${jMailPassword} 9 | spring.mail.properties.smtp.auth = true 10 | spring.mail.properties.mail.smtp.ssl.enable = true -------------------------------------------------------------------------------- /src/main/resources/static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olegshan/JobParser/cb9a807944b5fbab57baeb8b2316ec3e42c2b087/src/main/resources/static/favicon.ico -------------------------------------------------------------------------------- /src/main/resources/static/style.css: -------------------------------------------------------------------------------- 1 | 2 | .logo { 3 | margin: 30px 0 40px 0; 4 | padding-left: 0; 5 | } 6 | 7 | .logo h1, .logo span { 8 | display: inline-block; 9 | } 10 | 11 | .logo h2 { 12 | margin-top: -8px; 13 | font-size: 14px; 14 | } 15 | 16 | .logo a:hover { 17 | text-decoration: none; 18 | } 19 | 20 | .logo, .twitter { 21 | display: inline-block; 22 | float: none; 23 | } 24 | 25 | .logo small, .logo span { 26 | color: #ffc66d; 27 | } 28 | 29 | .logo h2, .title small:hover, .under, .footer { 30 | color: #cc7832; 31 | } 32 | 33 | .title small, .about small { 34 | color: #ffc66d; 35 | } 36 | 37 | .description, .about p { 38 | color: #6a8759 39 | } 40 | 41 | .company { 42 | color: #bbb529 43 | } 44 | 45 | .footer a, .about a { 46 | color: #6796a3 47 | } 48 | 49 | .pagination>li>a { 50 | background-color: transparent; 51 | color: #cc7832; 52 | border: none; 53 | } 54 | 55 | .pagination>li>a:hover { 56 | background-color: #ffc66d; 57 | } 58 | 59 | .pagination>li.active>a { 60 | background-color: #cc7832; 61 | } 62 | 63 | .pagination>li.active>a:hover { 64 | background-color: #cc7832; 65 | } 66 | 67 | body { 68 | background-color: #2b2b2b; 69 | } 70 | 71 | .twitter { 72 | vertical-align: super; 73 | } 74 | 75 | .under, .statistics { 76 | font-size: 12px; 77 | } 78 | 79 | .center-block { 80 | float: none 81 | } 82 | 83 | .jobBlock { 84 | word-wrap: break-word; 85 | } 86 | 87 | .jobBlock, .pagination { 88 | margin-top: 35px; 89 | } 90 | 91 | .footer { 92 | margin: 30px 0 30px 0; 93 | } -------------------------------------------------------------------------------- /src/main/resources/static/twitter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olegshan/JobParser/cb9a807944b5fbab57baeb8b2316ec3e42c2b087/src/main/resources/static/twitter.png -------------------------------------------------------------------------------- /src/main/resources/templates/Error.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 |
5 |

6 | Sorry, an error occurred 7 |

8 |

9 | Return to homepage 10 |
11 |
12 | -------------------------------------------------------------------------------- /src/main/resources/templates/about.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 |
5 |

6 | About jParser 7 |

8 |

9 | jParser helps Java developers to find a job in Kyiv. Every hour it parses vacancies on three main Ukrainian 10 | job sites: 11 | Rabota.ua, 12 | Work.ua and 13 | Headhunter.ua 14 | and on main Ukrainian site for developers — 15 | Dou.ua. 16 |

17 |

18 | All found jobs are being posted to jParser's 19 | Twitter account automatically. 20 |

21 |

22 | jParser was created by Java developer 23 | Oleg Shankovskyi. 24 | 25 |

26 |

27 | Source code is on Github. 28 |

29 |
30 |

Return to homepage

31 |
32 |
33 | -------------------------------------------------------------------------------- /src/main/resources/templates/exception.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 |
5 |

6 | Sorry, an error occurred 7 |

8 |

9 | Return to homepage 10 |
11 |
12 | -------------------------------------------------------------------------------- /src/main/resources/templates/index.html: -------------------------------------------------------------------------------- 1 | 3 | 4 |
5 | 6 |
7 | 8 |

9 | 10 |

11 |
12 |

13 | 14 | 15 | 16 |
17 | 18 |
19 |
    20 |
  • 21 | « 22 |
  • 23 |
  • 24 | 26 |
  • 27 |
  • 29 | 30 |
  • 31 |
  • 32 | 34 |
  • 35 |
  • 36 | » 38 |
  • 39 |
40 |
41 | 42 | 49 | 50 |
51 | 52 | -------------------------------------------------------------------------------- /src/main/resources/templates/layout.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | jParser 11 | 12 | 13 | 14 | 31 | 32 | 37 | 38 | 39 | 41 | 42 | 43 | 45 | 46 | 47 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 |
59 |
60 |
61 | 73 | 74 | 77 | 78 |
79 | 80 |
81 | 82 |
83 |
84 |
85 | 86 | 87 | -------------------------------------------------------------------------------- /src/test/java/com/olegshan/AbstractTest.java: -------------------------------------------------------------------------------- 1 | package com.olegshan; 2 | 3 | import org.junit.runner.RunWith; 4 | import org.slf4j.Logger; 5 | import org.slf4j.LoggerFactory; 6 | import org.springframework.boot.test.context.SpringBootTest; 7 | import org.springframework.test.context.ActiveProfiles; 8 | import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; 9 | 10 | @RunWith(SpringJUnit4ClassRunner.class) 11 | @SpringBootTest 12 | @ActiveProfiles("dev") 13 | public abstract class AbstractTest { 14 | 15 | protected Logger logger = LoggerFactory.getLogger(this.getClass()); 16 | } 17 | -------------------------------------------------------------------------------- /src/test/java/com/olegshan/controllers/ErrorHandlerTest.java: -------------------------------------------------------------------------------- 1 | package com.olegshan.controllers; 2 | 3 | import org.junit.Before; 4 | import org.junit.Test; 5 | import org.junit.runner.RunWith; 6 | import org.mockito.Mock; 7 | import org.mockito.runners.MockitoJUnitRunner; 8 | import org.springframework.test.web.servlet.MockMvc; 9 | import org.springframework.test.web.servlet.setup.MockMvcBuilders; 10 | 11 | import static org.mockito.Mockito.when; 12 | import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; 13 | import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.*; 14 | 15 | @RunWith(MockitoJUnitRunner.class) 16 | public class ErrorHandlerTest { 17 | 18 | @Mock 19 | private ParseController parseController; 20 | private MockMvc mockMvc; 21 | 22 | @Before 23 | public void setUp() throws Exception { 24 | mockMvc = MockMvcBuilders.standaloneSetup(parseController) 25 | .setControllerAdvice(new ErrorHandler()) 26 | .build(); 27 | } 28 | 29 | @Test 30 | public void unexpectedExceptionsAreCaught() throws Exception { 31 | 32 | when(parseController.about()).thenThrow(new RuntimeException("Unexpected exception")); 33 | 34 | mockMvc.perform(get("/about")) 35 | .andExpect(status().isOk()) 36 | .andExpect(view().name("exception")) 37 | .andExpect(model().attribute("errorMessage", "Unexpected exception")); 38 | } 39 | } -------------------------------------------------------------------------------- /src/test/java/com/olegshan/controllers/ParseControllerTest.java: -------------------------------------------------------------------------------- 1 | package com.olegshan.controllers; 2 | 3 | import com.olegshan.AbstractTest; 4 | import org.junit.Before; 5 | import org.junit.Test; 6 | import org.springframework.beans.factory.annotation.Autowired; 7 | import org.springframework.test.web.servlet.MockMvc; 8 | import org.springframework.test.web.servlet.setup.MockMvcBuilders; 9 | import org.springframework.web.context.WebApplicationContext; 10 | 11 | import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; 12 | import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.*; 13 | 14 | public class ParseControllerTest extends AbstractTest { 15 | 16 | private MockMvc mockMvc; 17 | @Autowired 18 | private WebApplicationContext webApplicationContext; 19 | 20 | @Before 21 | public void setUp() { 22 | mockMvc = MockMvcBuilders.webAppContextSetup(webApplicationContext).build(); 23 | } 24 | 25 | @Test 26 | public void showJobsReturnsCorrectModelAndView() throws Exception { 27 | 28 | mockMvc.perform(get("/")) 29 | .andExpect(status().isOk()) 30 | .andExpect(view().name("index")) 31 | .andExpect(model().attributeExists("jobs")) 32 | .andExpect(model().attributeExists("pageBox")); 33 | } 34 | 35 | @Test 36 | public void aboutPageTest() throws Exception { 37 | mockMvc.perform(get("/about")) 38 | .andExpect(status().isOk()) 39 | .andExpect(view().name("about")); 40 | } 41 | } -------------------------------------------------------------------------------- /src/test/java/com/olegshan/service/JobServiceTest.java: -------------------------------------------------------------------------------- 1 | package com.olegshan.service; 2 | 3 | import com.olegshan.AbstractTest; 4 | import com.olegshan.entity.Job; 5 | import com.olegshan.repository.JobRepository; 6 | import com.olegshan.social.JTwitter; 7 | import org.junit.After; 8 | import org.junit.Before; 9 | import org.junit.Test; 10 | import org.mockito.InjectMocks; 11 | import org.mockito.Mock; 12 | import org.springframework.beans.factory.annotation.Autowired; 13 | import org.springframework.data.domain.Page; 14 | import org.springframework.data.domain.PageRequest; 15 | import org.springframework.data.domain.Sort; 16 | 17 | import java.time.LocalDateTime; 18 | import java.util.List; 19 | import java.util.Random; 20 | import java.util.stream.IntStream; 21 | 22 | import static com.olegshan.util.TimeUtil.localTimeZone; 23 | import static java.time.LocalDateTime.now; 24 | import static org.junit.Assert.assertEquals; 25 | import static org.junit.Assert.assertTrue; 26 | import static org.mockito.Mockito.never; 27 | import static org.mockito.Mockito.verify; 28 | 29 | public class JobServiceTest extends AbstractTest { 30 | 31 | private static final String JOB_URL = "http://somesite.ua/company/vacancy"; 32 | private static final int CURRENT_PAGE = 1; 33 | private static final int PAGE_SIZE = 5; 34 | 35 | @Mock 36 | private JTwitter mockTwitter; 37 | 38 | @InjectMocks 39 | @Autowired 40 | private JobService jobService; 41 | @Autowired 42 | private JobRepository jobRepository; 43 | 44 | @Before 45 | public void setUp() { 46 | Job job; 47 | Random random = new Random(); 48 | for (int i = 0; i < 10; i++) { 49 | //jobs are saved into database with random dates 50 | job = new Job("Title" + i, "Description" + i, "Company" + i, "Site" + i, JOB_URL + i, 51 | now(localTimeZone()).minusDays(random.nextInt(20))); 52 | jobService.save(job); 53 | } 54 | } 55 | 56 | @Test 57 | public void jobsInSetUpMethodWereSaved() { 58 | assertEquals("There should be 10 elements in the database", jobRepository.findAll().size(), 10); 59 | } 60 | 61 | @Test 62 | public void savingOfNewJobWithTheSameUrlAndDifferentDateUpdatesExistingJob() { 63 | Job job = jobRepository.findOne(JOB_URL + 5); 64 | assertEquals("Title5", job.getTitle()); 65 | LocalDateTime newDate = job.getDate().minusDays(1); 66 | job.setDate(newDate); 67 | job.setTitle("New title"); 68 | jobService.save(job); 69 | verify(mockTwitter).tweet(job); 70 | 71 | job = jobRepository.findOne(JOB_URL + 5); 72 | assertEquals("New title", job.getTitle()); 73 | assertEquals(newDate, job.getDate()); 74 | assertEquals("There should be still 10 elements in the database after updating", 75 | jobRepository.findAll().size(), 10); 76 | } 77 | 78 | @Test 79 | public void savingOfJobWithTheSameUrlAndSameDateDoesNotUpdateExistingJob() { 80 | Job job = jobRepository.findOne(JOB_URL + 7); 81 | assertEquals("Title7", job.getTitle()); 82 | job.setTitle("New title"); 83 | jobService.save(job); 84 | verify(mockTwitter, never()).tweet(job); 85 | 86 | job = jobRepository.findOne(JOB_URL + 7); 87 | assertEquals("Title7", job.getTitle()); 88 | assertEquals("There should be still 10 elements in the database", jobRepository.findAll().size(), 10); 89 | } 90 | 91 | @Test 92 | public void jobsAreRetrievedFromDatabaseSortedByDateInDescendingOrder() { 93 | Page jobs = jobService.getJobs(new PageRequest(CURRENT_PAGE, PAGE_SIZE, Sort.Direction.DESC, "date")); 94 | assertEquals(PAGE_SIZE + " elements should be retrieved", PAGE_SIZE, jobs.getContent().size()); 95 | assertTrue("The jobs should be sorted from new to old", isSortedDescending(jobs)); 96 | } 97 | 98 | private boolean isSortedDescending(Page page) { 99 | List list = page.getContent(); 100 | return IntStream.range(0, PAGE_SIZE - 1).allMatch(i -> list.get(i).getDate() 101 | .compareTo(list.get(i + 1).getDate()) >= 0); 102 | } 103 | 104 | @After 105 | public void tearDown() { 106 | jobRepository.deleteAll(); 107 | } 108 | } --------------------------------------------------------------------------------