The choice between forEach and Batch for MyBatis batch insertion

The choice of forEach and Batch for MyBatis batch insertion

When using the MyBatis framework, let you write a batch insert, will you only use the forEach tag loop in the mapper.xml file? Do you know the problems with using the forEach tag?

1. Prerequisite configuration

1.1, create a data table

Create a data table and set 22 fields. Maybe you are wondering why so many fields are created? Because the advantages of BATCH can only be reflected when there are multiple fields and a large amount of data. That is to say, in the case where there are few data table fields and the amount of saved data is small, the batch insertion implemented by forEach still has advantages, but there is an implicit The risk of is not shown here.

/*
 Source Server Type : MySQL
 Source Server Version : 80027
*/
SET NAMES utf8mb4;
SET FOREIGN_KEY_CHECKS = 0;
-- ----------------------------
-- Table structure for t_user
-- ----------------------------
DROP TABLE IF EXISTS `t_user`;
CREATE TABLE `t_user` (
  `id` int NOT NULL AUTO_INCREMENT,
  `user_id` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
  `user_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
  `user_name2` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
  `user_name3` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
  `user_name4` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
  `user_name5` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
  `user_name6` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
  `user_name7` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
  `user_name8` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
  `user_name9` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
  `user_name10` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
  `user_name11` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
  `user_name12` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
  `user_name13` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
  `user_name14` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
  `user_name15` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
  `user_name16` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
  `user_name17` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
  `user_name18` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
  `user_name19` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
  `user_name20` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
  PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_bin ROW_FORMAT = Dynamic;
SET FOREIGN_KEY_CHECKS = 1;
1.2. Writing Java classes

As for the pom information of the MyBatis framework jar, you can find it in the maven warehouse, or paste it on the mybatis official website, so I won’t go into details here, because the author It was tested in MyBatis source code.

public class User {<!-- -->

    // ID identification
    private Integer id;
    private String userId;
    private String userName;
    private String userName2;
    private String userName3;
    private String userName4;
    private String userName5;
    private String userName6;
    private String userName7;
    private String userName8;

    public User() {<!-- -->
    }

    public User(Integer id, String userId, String userName, String userName2,
    String userName3, String userName4, String userName5, String userName6,
    String userName7, String userName8) {<!-- -->
        this.id = id;
        this. userId = userId;
        this. userName = userName;
        this. userName2 = userName2;
        this. userName3 = userName3;
        this. userName4 = userName4;
        this. userName5 = userName5;
        this. userName6 = userName6;
        this. userName7 = userName7;
        this. userName8 = userName8;
    }
    /************** The getter and setter methods of each attribute are omitted here ***************/
}
1.3, write two insertion methods in the mapper.xml file

Note: This is for testing, so write data table fields at will, please follow the field naming rules in actual projects.

<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
        "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.tjau.mapper.UserMapper">
    <insert id="insert" parameterType="com.tjau.pojo.User">
        insert into t_user(user_id, user_name, user_name2, user_name3, user_name4,
        user_name5, user_name6, user_name7, user_name8, user_name9, user_name10,
        user_name11, user_name12, user_name13, user_name14, user_name15,
        user_name16, user_name17, user_name18, user_name19, user_name20)
        values (#{userId, jdbcType=VARCHAR},
                #{userName, jdbcType=VARCHAR},
                #{userName2, jdbcType=VARCHAR},
                #{userName3, jdbcType=VARCHAR},
                #{userName4, jdbcType=VARCHAR},
                #{userName5, jdbcType=VARCHAR},
                #{userName6, jdbcType=VARCHAR},
                #{userName7, jdbcType=VARCHAR},
                #{userName8, jdbcType=VARCHAR},
                #{userName8, jdbcType=VARCHAR},
                #{userName8, jdbcType=VARCHAR},
                #{userName8, jdbcType=VARCHAR},
                #{userName8, jdbcType=VARCHAR},
                #{userName8, jdbcType=VARCHAR},
                #{userName8, jdbcType=VARCHAR},
                #{userName8, jdbcType=VARCHAR},
                #{userName8, jdbcType=VARCHAR},
                #{userName8, jdbcType=VARCHAR},
                #{userName8, jdbcType=VARCHAR},
                #{userName8, jdbcType=VARCHAR},
                #{userName8, jdbcType=VARCHAR}
                )
    </insert>

    <insert id="insertBatch" parameterType="java.util.List">
        insert into t_user(user_id, user_name, user_name2, user_name3, user_name4,
        user_name5, user_name6, user_name7, user_name8, user_name9, user_name10,
        user_name11, user_name12, user_name13, user_name14, user_name15,
        user_name16, user_name17, user_name18, user_name19, user_name20)
        values
        <foreach collection="list" item="item" separator=",">
            (
                #{item. userId, jdbcType=VARCHAR},
                #{item. userName, jdbcType=VARCHAR},
                #{item.userName2, jdbcType=VARCHAR},
                #{item.userName3, jdbcType=VARCHAR},
                #{item.userName4, jdbcType=VARCHAR},
                #{item.userName5, jdbcType=VARCHAR},
                #{item.userName6, jdbcType=VARCHAR},
                #{item.userName7, jdbcType=VARCHAR},
                #{item.userName8, jdbcType=VARCHAR},
                #{item.userName8, jdbcType=VARCHAR},
                #{item.userName8, jdbcType=VARCHAR},
                #{item.userName8, jdbcType=VARCHAR},
                #{item.userName8, jdbcType=VARCHAR},
                #{item.userName8, jdbcType=VARCHAR},
                #{item.userName8, jdbcType=VARCHAR},
                #{item.userName8, jdbcType=VARCHAR},
                #{item.userName8, jdbcType=VARCHAR},
                #{item.userName8, jdbcType=VARCHAR},
                #{item.userName8, jdbcType=VARCHAR},
                #{item.userName8, jdbcType=VARCHAR},
                #{item.userName8, jdbcType=VARCHAR}
            )
        </foreach>
    </insert>
</mapper>
<?xml version="1.0" encoding="UTF-8"?>
<! DOCTYPE configuration
        PUBLIC "-//mybatis.org//DTD Config 3.0//EN"
        "http://mybatis.org/dtd/mybatis-3-config.dtd">
<configuration>

    <!--Part 1: Data Source Configuration-->
    <environments default="development">
        <environment id="development">
            <!--Use jdbc transaction management -->
            <transactionManager type="JDBC"/>
            <!-- database connection pool -->
            <dataSource type="POOLED">
                <property name="driver" value="com.mysql.cj.jdbc.Driver"/>
                <property name="url"
                          value="jdbc:mysql:///mybatis?useSSL=false &amp;characterEncoding=UTF-8 &amp;serverTimezone=UTC"/>
                <property name="username" value="root"/>
                <property name="password" value="root"/>
            </dataSource>
        </environment>
    </environments>

    <!--Part 2: Import mapping configuration file -->
    <mappers>
        <mapper resource="mapper/UserMapper.xml"/>
    </mappers>

</configuration>

1.4, the last is the code test class
public class MyBatisBatchTest {<!-- -->

    public static void main(String[] args) throws IOException {<!-- -->
        // 1. Read the configuration file
        InputStream resourceAsStream = Resources.getResourceAsStream("MybatisConfig.xml");
        SqlSessionFactory sqlSessionFactory = new SqlSessionFactoryBuilder().build(resourceAsStream);
        // 2. Create and insert data
        List<User> list = new ArrayList<>();
        for (int i = 0; i < 5000; i ++ ) {<!-- -->
            list.add(new User(null, "userId-" + i, "userName-" + i, "userName2-" + i,
                    "userName3-" + i, "userName4-" + i, "userName5-" + i,
                    "userName6-" + i, "userName7-" + i, "userName8-" + i));
        }
        // 3. Different insert TODO
    }
}

At this point, the basic pre-work is completed, so the next step is to face the choice between forEach and Batch.

2. Hidden risks of forEach

As mentioned earlier, there is an implicit risk in using forEach, so use code to reproduce it.
Write the test code:

public static void main(String[] args) throws IOException {<!-- -->
        // 1. Read the configuration file
        InputStream resourceAsStream = Resources.getResourceAsStream("MybatisConfig.xml");
        SqlSessionFactory sqlSessionFactory = new SqlSessionFactoryBuilder().build(resourceAsStream);
        // 2. Create and insert data
        List<User> list = new ArrayList<>();
        for (int i = 0; i < 6000; i ++ ) {<!-- -->
            list.add(new User(null, "userId-" + i, "userName-" + i, "userName2-" + i,
                    "userName3-" + i, "userName4-" + i, "userName5-" + i,
                    "userName6-" + i, "userName7-" + i, "userName8-" + i));
        }
        // 3. forEach insert
        insertForEach(sqlSessionFactory, list);
    }
    
    /**
     * forEach batch insert
     * @param sqlSessionFactory sqlSession factory
     * @param list batch insert data
     */
public static void insertForEach(SqlSessionFactory sqlSessionFactory,
List<User> list){<!-- -->
        // 1. Get the mapper proxy class - here the default is SIMPLE mode
        SqlSession sqlSession = sqlSessionFactory. openSession();
        UserMapper userMapper = sqlSession. getMapper(UserMapper. class);

        // 2. Batch insert
        long start = System. currentTimeMillis();
        int count = userMapper.insertBatch(list);
        sqlSession.commit();
        long end = System. currentTimeMillis();
        System.out.println(count);
        System.out.println("ForEach time: " + (end - start));
        sqlSession. close();
    }

Results of the:

That’s right, an error was reported, because the essence of the forEach loop is to piece together the insert statements, send them to the database and execute them.
The principle of this method to improve the batch insertion speed is to combine the traditional:

INSERT INTO `table1` (`field1`, `field2`) VALUES ("data1", "data2");
INSERT INTO `table1` (`field1`, `field2`) VALUES ("data1", "data2");
INSERT INTO `table1` (`field1`, `field2`) VALUES ("data1", "data2");
INSERT INTO `table1` (`field1`, `field2`) VALUES ("data1", "data2");
INSERT INTO `table1` (`field1`, `field2`) VALUES ("data1", "data2");

Converted to:

INSERT INTO `table1` (`field1`, `field2`) VALUES ("data1", "data2"),
                                                 ("data1", "data2"),
                                                 ("data1", "data2"),
                                                 ("data1", "data2"),
                                                 ("data1", "data2");

This will cause the data packet inserted at one time to be too large, exceeding the default value of the database. The default max_allowed_packet of the database is 4M by default. You can avoid this error by modifying the size of max_allowed_packet:

set global max_allowed_packet = 2*1024*1024*10

After restarting the MySQL database,
In this way, the batch insertion problem of forEach is solved this time, but in the actual project development, it is not realistic to modify the database parameters at will.
Therefore, if the project design can ensure that the batch insertion of data does not have a large amount of data, you can choose forEach as the solution for batch insertion. If there is a surge in data volume, use forEach There will be a risk of mine burying.

3, BATCH and forEach multi-field batch saving

Due to the use of forEach to achieve batch insertion, the database has a bottleneck in the amount of received data. Next, the time gap between the two can only be tested by reducing the amount of data. After testing, the amount of data is set to 5600 pieces of data.

public static void main(String[] args) throws IOException {<!-- -->
        // 1. Read the configuration file
        InputStream resourceAsStream = Resources.getResourceAsStream("MybatisConfig.xml");
        SqlSessionFactory sqlSessionFactory = new SqlSessionFactoryBuilder().build(resourceAsStream);
        // 2. Create and insert data
        List<User> list = new ArrayList<>();
        for (int i = 0; i < 5600; i ++ ) {<!-- -->
            list.add(new User(null, "userId-" + i, "userName-" + i, "userName2-" + i,
                    "userName3-" + i, "userName4-" + i, "userName5-" + i,
                    "userName6-" + i, "userName7-" + i, "userName8-" + i));
        }
        // 3. Insertion rules
// insertBatch(sqlSessionFactory, list);
        insertForEach(sqlSessionFactory, list);
    }

    /**
     * BATCH batch insert
     * @param sqlSessionFactory sqlSession factory
     * @param list batch insert data
     */
    public static void insertBatch(SqlSessionFactory sqlSessionFactory, List<User> list){<!-- -->
        // 1. Get the mapper proxy class
        SqlSession sqlSession = sqlSessionFactory. openSession(ExecutorType. BATCH);
        UserMapper userMapper = sqlSession. getMapper(UserMapper. class);

        // 2. Batch insert
        long start = System. currentTimeMillis();
        list.forEach(userMapper::insert);
        sqlSession.commit();
        long end = System. currentTimeMillis();
        System.out.println(list.size());
        System.out.println("BATCH time: " + (end - start));
        sqlSession. close();
    }

    /**
     * forEach batch insert
     * @param sqlSessionFactory sqlSession factory
     * @param list batch insert data
     */
    public static void insertForEach(SqlSessionFactory sqlSessionFactory, List<User> list){<!-- -->
        // 1. Get the mapper proxy class
        SqlSession sqlSession = sqlSessionFactory. openSession();
        UserMapper userMapper = sqlSession. getMapper(UserMapper. class);

        // 2. Batch insert
        long start = System. currentTimeMillis();
        int count = userMapper.insertBatch(list);
        sqlSession.commit();
        long end = System. currentTimeMillis();
        System.out.println(count);
        System.out.println("ForEach time: " + (end - start));
        sqlSession. close();
    }

The following execution results are as follows:
forEach insert time-consuming:

BATCH insert time-consuming:

By comparison, we will find that max_allowed_packet defaults to the critical point of 4M, and forEach can only save 5600 pieces of data, BATCH has a slight lead in time compared to forEach, not to mention when the amount of data is much larger than 5600.

How long will it take BATCH to insert tens of thousands of data?

Try as follows:

 public static void main(String[] args) throws IOException {<!-- -->
        // 1. Read the configuration file
        InputStream resourceAsStream = Resources.getResourceAsStream("MybatisConfig.xml");
        SqlSessionFactory sqlSessionFactory = new SqlSessionFactoryBuilder().build(resourceAsStream);
        // 2. Create and insert data
        List<User> list = new ArrayList<>();
        for (int i = 0; i < 16000; i ++ ) {<!-- -->
            list.add(new User(null, "userId-" + i, "userName-" + i, "userName2-" + i,
                    "userName3-" + i, "userName4-" + i, "userName5-" + i,
                    "userName6-" + i, "userName7-" + i, "userName8-" + i));
        }
        // 3. Insertion rules
        insertBatch(sqlSessionFactory, list);
    }
    /**
     * BATCH batch insert
     * @param sqlSessionFactory sqlSession factory
     * @param list batch insert data
     */
    public static void insertBatch(SqlSessionFactory sqlSessionFactory, List<User> list){<!-- -->
        // 1. Get the mapper proxy class
        SqlSession sqlSession = sqlSessionFactory. openSession(ExecutorType. BATCH);
        UserMapper userMapper = sqlSession. getMapper(UserMapper. class);

        // 2. Batch insert
        long start = System. currentTimeMillis();
        list.forEach(userMapper::insert);
        sqlSession.commit();
        long end = System. currentTimeMillis();
        System.out.println(list.size());
        System.out.println("BATCH time: " + (end - start));
        sqlSession. close();
    }

Insert result: takes 2 seconds

4, BATCH and forEach save fields in batches

Modify the mapper.xml file to introduce the insertion field as 10 fields.

<insert id="insert" parameterType="com.tjau.pojo.User">
        insert into t_user(user_id, user_name, user_name2, user_name3, user_name4, user_name5, user_name6, user_name7, user_name8)
        values (#{userId, jdbcType=VARCHAR},
                #{userName, jdbcType=VARCHAR},
                #{userName2, jdbcType=VARCHAR},
                #{userName3, jdbcType=VARCHAR},
                #{userName4, jdbcType=VARCHAR},
                #{userName5, jdbcType=VARCHAR},
                #{userName6, jdbcType=VARCHAR},
                #{userName7, jdbcType=VARCHAR},
                #{userName8, jdbcType=VARCHAR}
                )
    </insert>

    <insert id="insertBatch" parameterType="java.util.List">
        insert into t_user(user_id, user_name, user_name2, user_name3, user_name4, user_name5, user_name6, user_name7, user_name8)
        values
        <foreach collection="list" item="item" separator=",">
            (
                #{item. userId, jdbcType=VARCHAR},
                #{item. userName, jdbcType=VARCHAR},
                #{item.userName2, jdbcType=VARCHAR},
                #{item.userName3, jdbcType=VARCHAR},
                #{item.userName4, jdbcType=VARCHAR},
                #{item.userName5, jdbcType=VARCHAR},
                #{item.userName6, jdbcType=VARCHAR},
                #{item.userName7, jdbcType=VARCHAR},
                #{item.userName8, jdbcType=VARCHAR}
            )
        </foreach>
    </insert>

Adjust the amount of data that needs to be saved in batches to 12000, and you may wonder why this value is, because this is the critical value for batch insertion of forEach.

public static void main(String[] args) throws IOException {<!-- -->
        // 1. Read the configuration file
        InputStream resourceAsStream = Resources.getResourceAsStream("MybatisConfig.xml");
        SqlSessionFactory sqlSessionFactory = new SqlSessionFactoryBuilder().build(resourceAsStream);
        // 2. Create and insert data
        List<User> list = new ArrayList<>();
        for (int i = 0; i < 12000; i ++ ) {<!-- -->
            list.add(new User(null, "userId-" + i, "userName-" + i, "userName2-" + i,
                    "userName3-" + i, "userName4-" + i, "userName5-" + i,
                    "userName6-" + i, "userName7-" + i, "userName8-" + i));
        }
        // 3. Insertion rules
// insertBatch(sqlSessionFactory, list);
        insertForEach(sqlSessionFactory, list);
    }


    /**
     * BATCH batch insert
     * @param sqlSessionFactory sqlSession factory
     * @param list batch insert data
     */
    public static void insertBatch(SqlSessionFactory sqlSessionFactory, List<User> list){<!-- -->
        // 1. Get the mapper proxy class
        SqlSession sqlSession = sqlSessionFactory. openSession(ExecutorType. BATCH);
        UserMapper userMapper = sqlSession. getMapper(UserMapper. class);

        // 2. Batch insert
        long start = System. currentTimeMillis();
        list.forEach(userMapper::insert);
        sqlSession.commit();
        long end = System. currentTimeMillis();
        System.out.println(list.size());
        System.out.println("BATCH time: " + (end - start));
        sqlSession. close();
    }

    /**
     * forEach batch insert
     * @param sqlSessionFactory sqlSession factory
     * @param list batch insert data
     */
    public static void insertForEach(SqlSessionFactory sqlSessionFactory, List<User> list){<!-- -->
        // 1. Get the mapper proxy class
        SqlSession sqlSession = sqlSessionFactory. openSession();
        UserMapper userMapper = sqlSession. getMapper(UserMapper. class);

        // 2. Batch insert
        long start = System. currentTimeMillis();
        int count = userMapper.insertBatch(list);
        sqlSession.commit();
        long end = System. currentTimeMillis();
        System.out.println(count);
        System.out.println("ForEach time: " + (end - start));
        sqlSession. close();
    }

forEach insert time-consuming:

BATCH insert time-consuming:

It can be clearly found here that when inserting data in a small number of field tables in batches, using forEach does not exceed the default 4M receiving package of MySQL, the performance is better than BATCH outperformed.

Supplement:

In the SpringBoot integration MyBatis project, how to change the default execution mode of MyBatis?

@Autowired
private SqlSessionFactory sqlSessionFactroy;

@Transactional
public void insertBatch(List<User> list){<!-- -->
SqlSession sqlSession = sqlSessionFactory. openSession(ExecutorType. BATCH);
UserMapper userMapper = sqlSession. getMapper(UserMapper. class);
list.forEach(userMapper::insert);
    // Key point: Don't forget to commit at the end
    sqlSession.commit();
    // sqlSession. close();
}

Adding the @Transactional annotation to the method can avoid repeated creation of different sqlSession, so that all mappers of this method class use the same sqlSession >, instead of opening a sqlSession for each method executed.
If you are not using the @Transactional annotation, remember to close the sqlSession.

5. Summary

When choosing a batch insert method, you need to consider the following three points:

  • The number of inserted data
  • The number of fields inserted into the data table
  • The content size of the inserted field

When there are many data fields or data items to save, carefully choose forEach, and give priority to BATCH;
Otherwise, forEach is preferred.