├── .gitignore ├── docker-compose.yaml ├── pom.xml ├── readme.adoc └── src └── main ├── java └── com │ └── amrut │ └── prabhu │ └── bulkdatainsert │ ├── Book.java │ ├── BookRepository.java │ └── BulkDataInsertApplication.java └── resources └── application.yaml /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | !.mvn/wrapper/maven-wrapper.jar 3 | !**/src/main/**/target/ 4 | !**/src/test/**/target/ 5 | 6 | 7 | ### IntelliJ IDEA ### 8 | .idea 9 | *.iws 10 | *.iml 11 | *.ipr 12 | -------------------------------------------------------------------------------- /docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | mysql-server: 4 | container_name: "mysql" 5 | image: "mysql:5.7" 6 | ports: 7 | - "13306:3306" 8 | environment: 9 | MYSQL_ROOT_PASSWORD: "zZijbfa64LnL2quYCFyH5jcRn2f3iUNLSrfRwiX3" 10 | MYSQL_DATABASE: "book_db" 11 | TZ: "Europe/Berlin" -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | org.springframework.boot 7 | spring-boot-starter-parent 8 | 2.4.2 9 | 10 | 11 | com.amrut.prabhu 12 | bulk-data-insert 13 | 0.0.1-SNAPSHOT 14 | bulk-data-insert 15 | Project to demonstrate bulk data insert 16 | 17 | 18 | 11 19 | 20 | 21 | 22 | 23 | org.springframework.boot 24 | spring-boot-starter-data-jpa 25 | 26 | 27 | org.springframework.boot 28 | spring-boot-starter-web 29 | 30 | 31 | mysql 32 | mysql-connector-java 33 | 34 | 35 | org.projectlombok 36 | lombok 37 | true 38 | 39 | 40 | org.springframework.boot 41 | spring-boot-starter-test 42 | test 43 | 44 | 45 | 46 | 47 | 48 | 49 | org.springframework.boot 50 | spring-boot-maven-plugin 51 | 52 | 53 | 54 | org.projectlombok 55 | lombok 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /readme.adoc: -------------------------------------------------------------------------------- 1 | = Spring Boot: JPA Bulk Database Insert 2 | 3 | In this project, I achieved reducing 10k records insertion time from 183 seconds to just 5 secs. 4 | 5 | For this I did teh following changes :- 6 | 7 | ==== 1) Change the number of records while inserting. 8 | 9 | i. Set hibernate batchin insert size with the folowing properties. 10 | 11 | 12 | spring.jpa.properties.hibernate.jdbc.batch_size=30 13 | 14 | ii. Add connection string properties. 15 | 16 | 17 | cachePrepStmts=true 18 | &useServerPrepStmts=true 19 | &rewriteBatchedStatements=true 20 | 21 | e.g 22 | jdbc:mysql://localhost:3306/BOOKS_DB?serverTimezone=UTC&cachePrepStmts=true&useServerPrepStmts=true&rewriteBatchedStatements=true 23 | 24 | iii. Changed the code for inserting, so that saveAll methods get batch sizes of 30 to insert as per what we also set in the properties file. 25 | 26 | A very crude implementation of something like this. 27 | 28 | for (int i = 0; i < totalObjects; i = i + batchSize) { 29 | if( i+ batchSize > totalObjects){ 30 | List books1 = books.subList(i, totalObjects - 1); 31 | repository.saveAll(books1); 32 | break; 33 | } 34 | List books1 = books.subList(i, i + batchSize); 35 | repository.saveAll(books1); 36 | } 37 | 38 | This reduced the time by not that much, but dropped from 185 secs to 153 Secs. That's approximately 18% improvement. 39 | 40 | 41 | ==== 2) Change the ID generation strategy. 42 | 43 | This made a major impact. 44 | 45 | I stopped usign the `@GeneratedValue` annotation with strategy i.e `GenerationType.IDENTITY` on my entity class. 46 | Hibernate has disabled batch update with this strategy, Because it has to make a select call to get the id from the database to insert each row. 47 | 48 | I changed the strategy to SEQUENCE and provided a sequence generator. 49 | 50 | public class Book { 51 | @Id 52 | @GeneratedValue(strategy = SEQUENCE, generator = "seqGen") 53 | @SequenceGenerator(name = "seqGen", sequenceName = "seq", initialValue = 1) 54 | private Long id; 55 | } 56 | 57 | This change drastically changed the insert performance as Hibernate was able to leverage bulk insert. 58 | From the previous performance improvement of 153 secs, the time to insert 10k records reduced to only 9 secs. Thats an increase in performance by nearly 95%. 59 | 60 | Next, I pushed it further to use higher batch sizes and I noticed that doubling the batch size does not double down on time. The time to insert only gradually reduces. 61 | 62 | |=== 63 | |Batch Size | Time to insert (Secs) 64 | 65 | |30 66 | |9.5 67 | 68 | |60 69 | |6.48 70 | 71 | |200 72 | |5.04 73 | 74 | |500 75 | |4.46 76 | 77 | |1000 78 | |4.39 79 | 80 | |2000 81 | |4.5 82 | 83 | |5000 84 | |5.09 85 | 86 | |=== 87 | 88 | 89 | The most optimal I found for my case was a batch size of 1000 which took around 4.39 secs for 10K records. After that, I saw the performance degrading as you can see in the graph. -------------------------------------------------------------------------------- /src/main/java/com/amrut/prabhu/bulkdatainsert/Book.java: -------------------------------------------------------------------------------- 1 | package com.amrut.prabhu.bulkdatainsert; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Builder; 5 | import lombok.Data; 6 | import lombok.NoArgsConstructor; 7 | 8 | import javax.persistence.*; 9 | 10 | @Data 11 | @Builder 12 | @Entity 13 | @NoArgsConstructor 14 | @AllArgsConstructor 15 | public class Book { 16 | 17 | @Id 18 | @GeneratedValue(strategy = GenerationType.SEQUENCE, generator = "seqGen") 19 | @SequenceGenerator(name = "seqGen", sequenceName = "seq", initialValue = 1) 20 | private Long id; 21 | private String name; 22 | private Integer Price; 23 | 24 | } 25 | -------------------------------------------------------------------------------- /src/main/java/com/amrut/prabhu/bulkdatainsert/BookRepository.java: -------------------------------------------------------------------------------- 1 | package com.amrut.prabhu.bulkdatainsert; 2 | 3 | import org.springframework.data.jpa.repository.JpaRepository; 4 | import org.springframework.stereotype.Repository; 5 | 6 | @Repository 7 | public interface BookRepository extends JpaRepository { 8 | 9 | } 10 | -------------------------------------------------------------------------------- /src/main/java/com/amrut/prabhu/bulkdatainsert/BulkDataInsertApplication.java: -------------------------------------------------------------------------------- 1 | package com.amrut.prabhu.bulkdatainsert; 2 | 3 | import org.springframework.beans.factory.annotation.Autowired; 4 | import org.springframework.beans.factory.annotation.Value; 5 | import org.springframework.boot.SpringApplication; 6 | import org.springframework.boot.autoconfigure.SpringBootApplication; 7 | import org.springframework.boot.context.event.ApplicationReadyEvent; 8 | import org.springframework.context.event.EventListener; 9 | 10 | import java.util.ArrayList; 11 | import java.util.List; 12 | import java.util.stream.Collectors; 13 | import java.util.stream.IntStream; 14 | 15 | @SpringBootApplication 16 | public class BulkDataInsertApplication { 17 | 18 | public static void main(String[] args) { 19 | SpringApplication.run(BulkDataInsertApplication.class, args); 20 | } 21 | 22 | @Autowired 23 | private BookRepository repository; 24 | 25 | @Value("${spring.jpa.properties.hibernate.jdbc.batch_size}") 26 | private int batchSize; 27 | 28 | @EventListener(ApplicationReadyEvent.class) 29 | public void doSomethingAfterStartup() { 30 | 31 | int totalObjects = 10000; 32 | 33 | long start = System.currentTimeMillis(); 34 | List books = IntStream.range(0, totalObjects) 35 | .mapToObj(val -> Book.builder() 36 | .name("books" + val) 37 | .Price(val) 38 | .build()) 39 | .collect(Collectors.toList()); 40 | 41 | System.out.println("Finished creating "+totalObjects+" objects in memory in:" + (System.currentTimeMillis() - start)/1000); 42 | 43 | start = System.currentTimeMillis(); 44 | System.out.println("Inserting .........."); 45 | 46 | for (int i = 0; i < totalObjects; i += batchSize) { 47 | if( i+ batchSize > totalObjects){ 48 | List books1 = books.subList(i, totalObjects - 1); 49 | repository.saveAll(books1); 50 | break; 51 | } 52 | List books1 = books.subList(i, i + batchSize); 53 | repository.saveAll(books1); 54 | } 55 | 56 | System.out.println("Finished inserting "+totalObjects+" objects in :" + (System.currentTimeMillis() - start)); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/main/resources/application.yaml: -------------------------------------------------------------------------------- 1 | spring: 2 | datasource: 3 | url: jdbc:mysql://localhost:13306/book_db?serverTimezone=UTC&cachePrepStmts=true&useServerPrepStmts=true&rewriteBatchedStatements=true 4 | username: root 5 | password: zZijbfa64LnL2quYCFyH5jcRn2f3iUNLSrfRwiX3 6 | jpa: 7 | # show-sql: true 8 | generate-ddl: true 9 | properties: 10 | hibernate: 11 | jdbc: 12 | batch_size: 1000 13 | # cache: 14 | # use_second_level_cache: true 15 | # order_updates: true 16 | # order_inserts: true 17 | # generate_statistics: true --------------------------------------------------------------------------------