├── LICENSE ├── README.md ├── pom.xml └── src └── main ├── java └── com │ └── techshard │ └── batch │ ├── Application.java │ ├── TracePerformanceAspect.java │ ├── configuration │ ├── BatchConfiguration.java │ ├── NotificationListener.java │ ├── VoltageFieldSetMapper.java │ └── VoltageProcessor.java │ └── dao │ ├── entity │ └── Voltage.java │ └── repository │ └── IVoltageRepository.java └── resources ├── Volts.csv └── application.properties /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Swathi Prasad 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # batch-processing-large-datasets-spring 2 | Batch Processing Large Data Sets with Spring Boot and Spring Batch 3 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | com.techshard.batch 8 | springboot-batch 9 | 1.0-SNAPSHOT 10 | 11 | 12 | org.springframework.boot 13 | spring-boot-starter-parent 14 | 2.1.6.RELEASE 15 | 16 | 17 | 18 | 19 | UTF-8 20 | UTF-8 21 | 22 | 23 | 24 | 25 | org.springframework.boot 26 | spring-boot-starter-web 27 | 28 | 29 | org.springframework.boot 30 | spring-boot-starter-aop 31 | 32 | 33 | org.springframework.boot 34 | spring-boot-starter-batch 35 | 36 | 37 | org.springframework.boot 38 | spring-boot-starter-data-jpa 39 | 40 | 41 | com.h2database 42 | h2 43 | runtime 44 | 45 | 46 | org.slf4j 47 | slf4j-api 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /src/main/java/com/techshard/batch/Application.java: -------------------------------------------------------------------------------- 1 | package com.techshard.batch; 2 | 3 | import org.springframework.boot.SpringApplication; 4 | import org.springframework.boot.autoconfigure.SpringBootApplication; 5 | import org.springframework.boot.web.servlet.support.SpringBootServletInitializer; 6 | 7 | @SpringBootApplication 8 | public class Application extends SpringBootServletInitializer { 9 | 10 | public static void main(String[] args) { 11 | SpringApplication.run(Application.class, args); 12 | } 13 | 14 | } 15 | -------------------------------------------------------------------------------- /src/main/java/com/techshard/batch/TracePerformanceAspect.java: -------------------------------------------------------------------------------- 1 | package com.techshard.batch; 2 | 3 | import org.aspectj.lang.ProceedingJoinPoint; 4 | import org.aspectj.lang.annotation.Around; 5 | import org.aspectj.lang.annotation.Aspect; 6 | import org.aspectj.lang.reflect.MethodSignature; 7 | import org.slf4j.Logger; 8 | import org.slf4j.LoggerFactory; 9 | import org.springframework.stereotype.Component; 10 | 11 | @Aspect 12 | @Component 13 | public class TracePerformanceAspect { 14 | 15 | private final Logger logger = LoggerFactory.getLogger(TracePerformanceAspect.class); 16 | 17 | @Around ("execution(* com.techshard..*.*(..)))") 18 | public Object logTracePerformanceAspect(ProceedingJoinPoint joinPoint) throws Throwable { 19 | 20 | MethodSignature methodSignature = (MethodSignature) joinPoint.getSignature(); 21 | 22 | //Get intercepted method details 23 | String className = methodSignature.getDeclaringType().getSimpleName(); 24 | String methodName = methodSignature.getName(); 25 | 26 | long start = System.currentTimeMillis(); 27 | 28 | Object result = joinPoint.proceed(); 29 | long end = System.currentTimeMillis(); 30 | 31 | //Log method execution time 32 | logger.info("Execution time of " + className + "." + methodName + " :: " + (end - start) + " ms"); 33 | 34 | return result; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/com/techshard/batch/configuration/BatchConfiguration.java: -------------------------------------------------------------------------------- 1 | package com.techshard.batch.configuration; 2 | 3 | import com.techshard.batch.dao.entity.Voltage; 4 | import org.springframework.batch.core.Job; 5 | import org.springframework.batch.core.Step; 6 | import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing; 7 | import org.springframework.batch.core.configuration.annotation.JobBuilderFactory; 8 | import org.springframework.batch.core.configuration.annotation.StepBuilderFactory; 9 | import org.springframework.batch.core.launch.support.RunIdIncrementer; 10 | import org.springframework.batch.item.database.BeanPropertyItemSqlParameterSourceProvider; 11 | import org.springframework.batch.item.database.JdbcBatchItemWriter; 12 | import org.springframework.batch.item.database.builder.JdbcBatchItemWriterBuilder; 13 | import org.springframework.batch.item.file.FlatFileItemReader; 14 | import org.springframework.batch.item.file.LineMapper; 15 | import org.springframework.batch.item.file.builder.FlatFileItemReaderBuilder; 16 | import org.springframework.batch.item.file.mapping.BeanWrapperFieldSetMapper; 17 | import org.springframework.batch.item.file.mapping.DefaultLineMapper; 18 | import org.springframework.batch.item.file.transform.DelimitedLineTokenizer; 19 | import org.springframework.beans.factory.annotation.Autowired; 20 | import org.springframework.context.annotation.Bean; 21 | import org.springframework.context.annotation.Configuration; 22 | import org.springframework.core.io.ClassPathResource; 23 | 24 | import javax.sql.DataSource; 25 | 26 | @Configuration 27 | @EnableBatchProcessing 28 | public class BatchConfiguration { 29 | 30 | @Autowired 31 | public JobBuilderFactory jobBuilderFactory; 32 | 33 | @Autowired 34 | public StepBuilderFactory stepBuilderFactory; 35 | 36 | @Bean 37 | public FlatFileItemReader reader() { 38 | return new FlatFileItemReaderBuilder() 39 | .name("voltItemReader") 40 | .resource(new ClassPathResource("Volts.csv")) 41 | .delimited() 42 | .names(new String[]{"volt", "time"}) 43 | .lineMapper(lineMapper()) 44 | .fieldSetMapper(new BeanWrapperFieldSetMapper() {{ 45 | setTargetType(Voltage.class); 46 | }}) 47 | .build(); 48 | } 49 | 50 | @Bean 51 | public LineMapper lineMapper() { 52 | 53 | final DefaultLineMapper defaultLineMapper = new DefaultLineMapper<>(); 54 | final DelimitedLineTokenizer lineTokenizer = new DelimitedLineTokenizer(); 55 | lineTokenizer.setDelimiter(";"); 56 | lineTokenizer.setStrict(false); 57 | lineTokenizer.setNames(new String[] {"volt","time"}); 58 | 59 | final VoltageFieldSetMapper fieldSetMapper = new VoltageFieldSetMapper(); 60 | defaultLineMapper.setLineTokenizer(lineTokenizer); 61 | defaultLineMapper.setFieldSetMapper(fieldSetMapper); 62 | 63 | return defaultLineMapper; 64 | } 65 | 66 | @Bean 67 | public VoltageProcessor processor() { 68 | return new VoltageProcessor(); 69 | } 70 | 71 | @Bean 72 | public JdbcBatchItemWriter writer(final DataSource dataSource) { 73 | return new JdbcBatchItemWriterBuilder() 74 | .itemSqlParameterSourceProvider(new BeanPropertyItemSqlParameterSourceProvider<>()) 75 | .sql("INSERT INTO voltage (volt, time) VALUES (:volt, :time)") 76 | .dataSource(dataSource) 77 | .build(); 78 | } 79 | 80 | @Bean 81 | public Job importVoltageJob(NotificationListener listener, Step step1) { 82 | return jobBuilderFactory.get("importVoltageJob") 83 | .incrementer(new RunIdIncrementer()) 84 | .listener(listener) 85 | .flow(step1) 86 | .end() 87 | .build(); 88 | } 89 | 90 | @Bean 91 | public Step step1(JdbcBatchItemWriter writer) { 92 | return stepBuilderFactory.get("step1") 93 | . chunk(10) 94 | .reader(reader()) 95 | .processor(processor()) 96 | .writer(writer) 97 | .build(); 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /src/main/java/com/techshard/batch/configuration/NotificationListener.java: -------------------------------------------------------------------------------- 1 | package com.techshard.batch.configuration; 2 | 3 | import com.techshard.batch.dao.entity.Voltage; 4 | import org.slf4j.Logger; 5 | import org.slf4j.LoggerFactory; 6 | import org.springframework.batch.core.BatchStatus; 7 | import org.springframework.batch.core.JobExecution; 8 | import org.springframework.batch.core.listener.JobExecutionListenerSupport; 9 | import org.springframework.beans.factory.annotation.Autowired; 10 | import org.springframework.jdbc.core.JdbcTemplate; 11 | import org.springframework.stereotype.Component; 12 | 13 | @Component 14 | public class NotificationListener extends JobExecutionListenerSupport{ 15 | 16 | private static final Logger LOGGER = LoggerFactory.getLogger(NotificationListener.class); 17 | 18 | private final JdbcTemplate jdbcTemplate; 19 | 20 | @Autowired 21 | public NotificationListener(final JdbcTemplate jdbcTemplate) { 22 | this.jdbcTemplate = jdbcTemplate; 23 | } 24 | 25 | @Override 26 | public void afterJob(final JobExecution jobExecution) { 27 | if(jobExecution.getStatus() == BatchStatus.COMPLETED) { 28 | LOGGER.info("!!! JOB FINISHED! Time to verify the results"); 29 | 30 | jdbcTemplate.query("SELECT volt, time FROM voltage", 31 | (rs, row) -> new Voltage( 32 | rs.getBigDecimal(1), 33 | rs.getDouble(2)) 34 | ).forEach(voltage -> LOGGER.info("Found <" + voltage + "> in the database.")); 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/com/techshard/batch/configuration/VoltageFieldSetMapper.java: -------------------------------------------------------------------------------- 1 | package com.techshard.batch.configuration; 2 | 3 | import com.techshard.batch.dao.entity.Voltage; 4 | import org.springframework.batch.item.file.mapping.FieldSetMapper; 5 | import org.springframework.batch.item.file.transform.FieldSet; 6 | import org.springframework.stereotype.Component; 7 | 8 | @Component 9 | public class VoltageFieldSetMapper implements FieldSetMapper { 10 | 11 | @Override 12 | public Voltage mapFieldSet(FieldSet fieldSet) { 13 | final Voltage voltage = new Voltage(); 14 | 15 | voltage.setVolt(fieldSet.readBigDecimal("volt")); 16 | voltage.setTime(fieldSet.readDouble("time")); 17 | return voltage; 18 | 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/techshard/batch/configuration/VoltageProcessor.java: -------------------------------------------------------------------------------- 1 | package com.techshard.batch.configuration; 2 | 3 | import com.techshard.batch.dao.entity.Voltage; 4 | 5 | import org.springframework.batch.item.ItemProcessor; 6 | 7 | import java.math.BigDecimal; 8 | 9 | public class VoltageProcessor implements ItemProcessor{ 10 | 11 | @Override 12 | public Voltage process(final Voltage voltage) { 13 | final BigDecimal volt = voltage.getVolt(); 14 | final double time = voltage.getTime(); 15 | 16 | final Voltage processedVoltage = new Voltage(); 17 | processedVoltage.setVolt(volt); 18 | processedVoltage.setTime(time); 19 | return processedVoltage; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/main/java/com/techshard/batch/dao/entity/Voltage.java: -------------------------------------------------------------------------------- 1 | package com.techshard.batch.dao.entity; 2 | 3 | import javax.persistence.*; 4 | import javax.validation.constraints.NotNull; 5 | import java.math.BigDecimal; 6 | 7 | @Entity 8 | public class Voltage { 9 | 10 | @Id 11 | @Column (name = "ID", nullable = false) 12 | @GeneratedValue (strategy = GenerationType.IDENTITY) 13 | private long id; 14 | 15 | @NotNull 16 | @Column (name = "volt", precision = 10, scale = 4, nullable = false) 17 | private BigDecimal volt; 18 | 19 | @NotNull 20 | @Column (name = "time", nullable = false) 21 | private double time; 22 | 23 | public Voltage() { 24 | } 25 | 26 | public Voltage(final BigDecimal volt, final double time) { 27 | this.volt = volt; 28 | this.time = time; 29 | } 30 | 31 | public long getId(){ 32 | return id; 33 | } 34 | 35 | public BigDecimal getVolt(){ 36 | return volt; 37 | } 38 | 39 | public void setVolt(final BigDecimal volt){ 40 | this.volt = volt; 41 | } 42 | 43 | public double getTime(){ 44 | return time; 45 | } 46 | 47 | public void setTime(final double time){ 48 | this.time = time; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/com/techshard/batch/dao/repository/IVoltageRepository.java: -------------------------------------------------------------------------------- 1 | package com.techshard.batch.dao.repository; 2 | 3 | import com.techshard.batch.dao.entity.Voltage; 4 | import org.springframework.data.jpa.repository.JpaRepository; 5 | import org.springframework.stereotype.Repository; 6 | 7 | @Repository 8 | public interface IVoltageRepository extends JpaRepository { 9 | 10 | } 11 | -------------------------------------------------------------------------------- /src/main/resources/Volts.csv: -------------------------------------------------------------------------------- 1 | 9.2128;0 2 | 8.7952;0.02 3 | 8.4175;0.04 4 | 8.0795;0.06 5 | 7.7018;0.08 6 | 7.3836;0.1 7 | 7.0655;0.12 8 | 6.7474;0.14 9 | 6.4691;0.16 10 | 6.1908;0.18 11 | 5.9323;0.2 12 | 5.6738;0.22 13 | 5.4353;0.24 14 | 5.2166;0.26 15 | 4.9979;0.28 16 | 4.7792;0.3 17 | 4.5803;0.32 18 | 4.3815;0.34 19 | 4.2026;0.36 20 | 4.0237;0.38 21 | 3.8646;0.4 22 | 3.7056;0.42 23 | 3.5465;0.44 24 | 3.4073;0.46 25 | 3.2682;0.48 26 | 3.129;0.5 27 | 3.0097;0.52 28 | 2.8904;0.54 29 | 2.7711;0.56 30 | 2.6518;0.58 31 | 2.5524;0.6 32 | 2.453;0.62 33 | 2.3735;0.64 34 | 2.2542;0.66 35 | 2.1747;0.68 36 | 2.0951;0.7 37 | 2.0156;0.72 38 | 1.9361;0.74 39 | 1.8566;0.76 40 | 1.7969;0.78 41 | 1.7174;0.8 42 | 1.6577;0.82 43 | 1.5981;0.84 44 | 1.5385;0.86 45 | 1.4788;0.88 46 | 1.4192;0.9 47 | 1.3794;0.92 48 | 1.3198;0.94 49 | 1.28;0.96 50 | 1.2402;0.98 -------------------------------------------------------------------------------- /src/main/resources/application.properties: -------------------------------------------------------------------------------- 1 | spring.datasource.url=jdbc:h2:mem:batchdb 2 | spring.datasource.driverClassName=org.h2.Driver 3 | spring.datasource.username=sa 4 | spring.datasource.password=password 5 | spring.jpa.database-platform=org.hibernate.dialect.H2Dialect 6 | spring.h2.console.enabled=true --------------------------------------------------------------------------------