├── .gitignore
├── docker-compose.yaml
├── pom.xml
├── readme.adoc
└── src
└── main
├── java
└── com
│ └── amrut
│ └── prabhu
│ └── bulkdatainsert
│ ├── Book.java
│ ├── BookRepository.java
│ └── BulkDataInsertApplication.java
└── resources
└── application.yaml
/.gitignore:
--------------------------------------------------------------------------------
1 | target/
2 | !.mvn/wrapper/maven-wrapper.jar
3 | !**/src/main/**/target/
4 | !**/src/test/**/target/
5 |
6 |
7 | ### IntelliJ IDEA ###
8 | .idea
9 | *.iws
10 | *.iml
11 | *.ipr
12 |
--------------------------------------------------------------------------------
/docker-compose.yaml:
--------------------------------------------------------------------------------
1 | version: '3'
2 | services:
3 | mysql-server:
4 | container_name: "mysql"
5 | image: "mysql:5.7"
6 | ports:
7 | - "13306:3306"
8 | environment:
9 | MYSQL_ROOT_PASSWORD: "zZijbfa64LnL2quYCFyH5jcRn2f3iUNLSrfRwiX3"
10 | MYSQL_DATABASE: "book_db"
11 | TZ: "Europe/Berlin"
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 | 4.0.0
5 |
6 | org.springframework.boot
7 | spring-boot-starter-parent
8 | 2.4.2
9 |
10 |
11 | com.amrut.prabhu
12 | bulk-data-insert
13 | 0.0.1-SNAPSHOT
14 | bulk-data-insert
15 | Project to demonstrate bulk data insert
16 |
17 |
18 | 11
19 |
20 |
21 |
22 |
23 | org.springframework.boot
24 | spring-boot-starter-data-jpa
25 |
26 |
27 | org.springframework.boot
28 | spring-boot-starter-web
29 |
30 |
31 | mysql
32 | mysql-connector-java
33 |
34 |
35 | org.projectlombok
36 | lombok
37 | true
38 |
39 |
40 | org.springframework.boot
41 | spring-boot-starter-test
42 | test
43 |
44 |
45 |
46 |
47 |
48 |
49 | org.springframework.boot
50 | spring-boot-maven-plugin
51 |
52 |
53 |
54 | org.projectlombok
55 | lombok
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
--------------------------------------------------------------------------------
/readme.adoc:
--------------------------------------------------------------------------------
1 | = Spring Boot: JPA Bulk Database Insert
2 |
3 | In this project, I achieved reducing 10k records insertion time from 183 seconds to just 5 secs.
4 |
5 | For this I did teh following changes :-
6 |
7 | ==== 1) Change the number of records while inserting.
8 |
9 | i. Set hibernate batchin insert size with the folowing properties.
10 |
11 |
12 | spring.jpa.properties.hibernate.jdbc.batch_size=30
13 |
14 | ii. Add connection string properties.
15 |
16 |
17 | cachePrepStmts=true
18 | &useServerPrepStmts=true
19 | &rewriteBatchedStatements=true
20 |
21 | e.g
22 | jdbc:mysql://localhost:3306/BOOKS_DB?serverTimezone=UTC&cachePrepStmts=true&useServerPrepStmts=true&rewriteBatchedStatements=true
23 |
24 | iii. Changed the code for inserting, so that saveAll methods get batch sizes of 30 to insert as per what we also set in the properties file.
25 |
26 | A very crude implementation of something like this.
27 |
28 | for (int i = 0; i < totalObjects; i = i + batchSize) {
29 | if( i+ batchSize > totalObjects){
30 | List books1 = books.subList(i, totalObjects - 1);
31 | repository.saveAll(books1);
32 | break;
33 | }
34 | List books1 = books.subList(i, i + batchSize);
35 | repository.saveAll(books1);
36 | }
37 |
38 | This reduced the time by not that much, but dropped from 185 secs to 153 Secs. That's approximately 18% improvement.
39 |
40 |
41 | ==== 2) Change the ID generation strategy.
42 |
43 | This made a major impact.
44 |
45 | I stopped usign the `@GeneratedValue` annotation with strategy i.e `GenerationType.IDENTITY` on my entity class.
46 | Hibernate has disabled batch update with this strategy, Because it has to make a select call to get the id from the database to insert each row.
47 |
48 | I changed the strategy to SEQUENCE and provided a sequence generator.
49 |
50 | public class Book {
51 | @Id
52 | @GeneratedValue(strategy = SEQUENCE, generator = "seqGen")
53 | @SequenceGenerator(name = "seqGen", sequenceName = "seq", initialValue = 1)
54 | private Long id;
55 | }
56 |
57 | This change drastically changed the insert performance as Hibernate was able to leverage bulk insert.
58 | From the previous performance improvement of 153 secs, the time to insert 10k records reduced to only 9 secs. Thats an increase in performance by nearly 95%.
59 |
60 | Next, I pushed it further to use higher batch sizes and I noticed that doubling the batch size does not double down on time. The time to insert only gradually reduces.
61 |
62 | |===
63 | |Batch Size | Time to insert (Secs)
64 |
65 | |30
66 | |9.5
67 |
68 | |60
69 | |6.48
70 |
71 | |200
72 | |5.04
73 |
74 | |500
75 | |4.46
76 |
77 | |1000
78 | |4.39
79 |
80 | |2000
81 | |4.5
82 |
83 | |5000
84 | |5.09
85 |
86 | |===
87 |
88 |
89 | The most optimal I found for my case was a batch size of 1000 which took around 4.39 secs for 10K records. After that, I saw the performance degrading as you can see in the graph.
--------------------------------------------------------------------------------
/src/main/java/com/amrut/prabhu/bulkdatainsert/Book.java:
--------------------------------------------------------------------------------
1 | package com.amrut.prabhu.bulkdatainsert;
2 |
3 | import lombok.AllArgsConstructor;
4 | import lombok.Builder;
5 | import lombok.Data;
6 | import lombok.NoArgsConstructor;
7 |
8 | import javax.persistence.*;
9 |
10 | @Data
11 | @Builder
12 | @Entity
13 | @NoArgsConstructor
14 | @AllArgsConstructor
15 | public class Book {
16 |
17 | @Id
18 | @GeneratedValue(strategy = GenerationType.SEQUENCE, generator = "seqGen")
19 | @SequenceGenerator(name = "seqGen", sequenceName = "seq", initialValue = 1)
20 | private Long id;
21 | private String name;
22 | private Integer Price;
23 |
24 | }
25 |
--------------------------------------------------------------------------------
/src/main/java/com/amrut/prabhu/bulkdatainsert/BookRepository.java:
--------------------------------------------------------------------------------
1 | package com.amrut.prabhu.bulkdatainsert;
2 |
3 | import org.springframework.data.jpa.repository.JpaRepository;
4 | import org.springframework.stereotype.Repository;
5 |
6 | @Repository
7 | public interface BookRepository extends JpaRepository {
8 |
9 | }
10 |
--------------------------------------------------------------------------------
/src/main/java/com/amrut/prabhu/bulkdatainsert/BulkDataInsertApplication.java:
--------------------------------------------------------------------------------
1 | package com.amrut.prabhu.bulkdatainsert;
2 |
3 | import org.springframework.beans.factory.annotation.Autowired;
4 | import org.springframework.beans.factory.annotation.Value;
5 | import org.springframework.boot.SpringApplication;
6 | import org.springframework.boot.autoconfigure.SpringBootApplication;
7 | import org.springframework.boot.context.event.ApplicationReadyEvent;
8 | import org.springframework.context.event.EventListener;
9 |
10 | import java.util.ArrayList;
11 | import java.util.List;
12 | import java.util.stream.Collectors;
13 | import java.util.stream.IntStream;
14 |
15 | @SpringBootApplication
16 | public class BulkDataInsertApplication {
17 |
18 | public static void main(String[] args) {
19 | SpringApplication.run(BulkDataInsertApplication.class, args);
20 | }
21 |
22 | @Autowired
23 | private BookRepository repository;
24 |
25 | @Value("${spring.jpa.properties.hibernate.jdbc.batch_size}")
26 | private int batchSize;
27 |
28 | @EventListener(ApplicationReadyEvent.class)
29 | public void doSomethingAfterStartup() {
30 |
31 | int totalObjects = 10000;
32 |
33 | long start = System.currentTimeMillis();
34 | List books = IntStream.range(0, totalObjects)
35 | .mapToObj(val -> Book.builder()
36 | .name("books" + val)
37 | .Price(val)
38 | .build())
39 | .collect(Collectors.toList());
40 |
41 | System.out.println("Finished creating "+totalObjects+" objects in memory in:" + (System.currentTimeMillis() - start)/1000);
42 |
43 | start = System.currentTimeMillis();
44 | System.out.println("Inserting ..........");
45 |
46 | for (int i = 0; i < totalObjects; i += batchSize) {
47 | if( i+ batchSize > totalObjects){
48 | List books1 = books.subList(i, totalObjects - 1);
49 | repository.saveAll(books1);
50 | break;
51 | }
52 | List books1 = books.subList(i, i + batchSize);
53 | repository.saveAll(books1);
54 | }
55 |
56 | System.out.println("Finished inserting "+totalObjects+" objects in :" + (System.currentTimeMillis() - start));
57 | }
58 | }
59 |
--------------------------------------------------------------------------------
/src/main/resources/application.yaml:
--------------------------------------------------------------------------------
1 | spring:
2 | datasource:
3 | url: jdbc:mysql://localhost:13306/book_db?serverTimezone=UTC&cachePrepStmts=true&useServerPrepStmts=true&rewriteBatchedStatements=true
4 | username: root
5 | password: zZijbfa64LnL2quYCFyH5jcRn2f3iUNLSrfRwiX3
6 | jpa:
7 | # show-sql: true
8 | generate-ddl: true
9 | properties:
10 | hibernate:
11 | jdbc:
12 | batch_size: 1000
13 | # cache:
14 | # use_second_level_cache: true
15 | # order_updates: true
16 | # order_inserts: true
17 | # generate_statistics: true
--------------------------------------------------------------------------------