easyexcel parses Excel tables with large amounts of data

Problem description:

The project needs to import 100,000-level excel data. When I first started writing, I didn’t consider that the amount of data would be so large. As a result, the application easily reported a memory overflow when importing data.

Cause analysis

  1. The import requires a collection to store the values of Excel rows and columns. After the batch is imported, the collection is not cleared and continues to occupy memory.
  2. When data is inserted into the data, MyBatis’ batch insertion is used. When MyBatis processes the result set, it loops through the data one by one, which is obviously not very efficient. The bottom layer of MyBatis operates the database through JDBC, so we can Directly use JDBC for batch operations

Solution:

In view of the two reasons analyzed above, the following changes are made:

EasyExcel Listener

package com.southsmart.sgeocserver.analyze.pipe.listener;

import com.alibaba.excel.annotation.ExcelProperty;
import com.alibaba.excel.context.AnalysisContext;
import com.alibaba.excel.event.AnalysisEventListener;
import com.alibaba.excel.exception.ExcelAnalysisException;
import lombok.SneakyThrows;
import org.springframework.util.StringUtils;

import java.lang.reflect.Field;
import java.util.*;

/**
 * The type Point read listener.
 *
 * @version 1.0
 */
public class PointReadListener extends AnalysisEventListener<ExcelPointVO> {<!-- -->

    private final Class<ExcelPointVO> tClass;

    protected final IPointService pointService;

    public PointReadListener(Class<ExcelPointVO> tClass, IPointService pointService) {<!-- -->
        this.tClass = tClass;
        this.pointService= pointService;
    }
    
    /**
     *The List.
     */
    List<ExcelPointVO> list = new ArrayList<>();

    @Override
    public void invokeHeadMap(Map<Integer, String> headMap, AnalysisContext context) {<!-- -->
        super.invokeHeadMap(headMap, context);
    }

    /**
     * All listeners receive this method when any one Listener does an error report. If an exception is thrown here, the
     * entire read will terminate.
     *
     * @param exception
     * @param context
     */
    @Override
    public void onException(Exception exception, AnalysisContext context) throws Exception {<!-- -->
        list.clear();
        super.onException(exception, context);
    }

    /**
     * When analyzing one row trigger invoke function.
     *
     * @param data one row value. Is is same as {@link AnalysisContext#readRowHolder()}
     * @param context
     */
    @SneakyThrows
    @Override
    public void invoke(ExcelPointVO data, AnalysisContext context) {<!-- -->
        Class<? extends ExcelPointVO> clazz = data.getClass();
        Field[] declaredFields = clazz.getDeclaredFields();
        for (Field declaredField : declaredFields) {<!-- -->
            declaredField.setAccessible(true);
            Class<?> type = declaredField.getType();
            if (type.isAssignableFrom(String.class) & amp; & amp; !StringUtils.hasText((String) declaredField.get(data))) {<!-- -->
                declaredField.set(data, "");
            }
        }
        list.add(data);
        if (list.size() >= Constants.GENERAL_ONCE_SAVE_TO_DB_ROWS_JDBC) {<!-- -->
            // Store in the database: If the data is less than 2000, just use batch insertion.
            saveData();
            // Clean up the collection to facilitate GC recycling
            list.clear();
        }
    }

    /**
     * Save data to DB
     */
    private void saveData() {<!-- -->
        if (list.size() > 0) {<!-- -->
            pointService.batchSaveImportPointData(list);
            list.clear();
        }
    }

    /**
     * if have something to do after all analysis
     *
     * @param context c
     */
    @Override
    public void doAfterAllAnalysed(AnalysisContext context) {<!-- -->
        saveData();
        list.clear();
    }
}

JDBC batch insertion implementation class

 public Map<String, Object> batchSaveImportPointData(List<ExcelPointVO> dataList) {<!-- -->
        //Insert into database
        Map<String, List<ExcelPointVO>> listMap = dataList.stream().collect(Collectors.groupingBy(ExcelPointVO::getTableType));
        Map<String, Object> result = new HashMap<>();
        //When the data in the result set is 0, end the method. Make the next call
        if (dataList.size() == 0) {<!-- -->
            return result ;
        }
        for (Map.Entry<String, List<ExcelPointVO>> entry : listMap.entrySet()) {<!-- -->
            // JDBC batch insertion + transaction operation completes the insertion of 2000 data
            Connection conn = null;
            PreparedStatement ps1 = null;
            try {<!-- -->
                long startTime = System.currentTimeMillis();
                // Get table name
                String name = "your table name";
                //Insert data
                StringBuilder stringBuilder = new StringBuilder();
                //Business sql after processing
                String sql = dealSql(entry.getValue());
                stringBuilder.append("INSERT INTO ").append(name).append("("id", "modelid", ...) VALUES ")
                        .append(" ").append(sql).append(" ON conflict(id) DO NOTHING ");
                log.info("{} items, start time of importing into database: {}", entry.getValue().size(), startTime + "ms");
                conn = jdbcDruidUtils.getConnection();
                //Control transaction: not submitted by default
                conn.setAutoCommit(false);
                ps1 = conn.prepareStatement(stringBuilder.toString());
                //Execute batch processing
                ps1.execute();
                // Manually commit the transaction
                conn.commit();
                long endTime = System.currentTimeMillis();
                log.info("{} entries, end time imported into database: {}", entry.getValue().size(), endTime + "ms");
                log.info("{} items, import time: {}", entry.getValue().size(), (endTime - startTime) + "ms");
                result.put("success", "1");
            } catch (Exception e) {<!-- -->
                result.put("exception", "0");
                e.printStackTrace();
            } finally {<!-- -->
                // close connection
                JDBCDruidUtils.close(ps1);
                JDBCDruidUtils.close(conn);
            }
        }
        return result;
    }

JDBCDruidUtils tool class

package com.southsmart.sgeocserver.analyze.util;

import org.springframework.stereotype.Component;

import javax.annotation.Resource;
import javax.sql.DataSource;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;

@Component
public class JDBCDruidUtils {<!-- -->
    /**
     * Create connection pool object
     */
    @Resource
    private DataSource dataSource;

    /**
     * Get a connection object from the connection pool
     *
     * @return Connection interface
     * @throws SQLException When you use this method in the future, you need to know whether an exception occurs in this method. It is used for warning, so it is thrown
     */
    public Connection getConnection() throws SQLException {<!-- -->
        return dataSource.getConnection();
    }

    /**
     * Release (return) resources
     *
     * @param stmt Statement object to execute SQL
     * @param conn Connection database connection object
     */
    public static void close(Statement stmt, Connection conn) {<!-- -->
        //Call close with three parameters, pass null for the first parameter
        close(null, stmt, conn);
    }

    /**
     * Release (return) resources
     *
     * @param stmt Statement object to execute SQL
     */
    public static void close(Statement stmt) {<!-- -->
        //Call close with three parameters, pass null for the first parameter
        close(null, stmt, null);
    }

    /**
     * Release (return) resources
     *
     * @param conn Connection database connection object
     */
    public static void close(Connection conn) {<!-- -->
        //Call close with three parameters, pass null for the first parameter
        close(null, null, conn);
    }

    /**
     * Release (return) resources
     *
     * @param rs ResultSet result set
     * @param stmt Statement object to execute SQL
     * @param conn Connection database connection object
     */
    public static void close(ResultSet rs, Statement stmt, Connection conn) {<!-- -->
        if (rs != null) {<!-- -->
            try {<!-- -->
                rs.close();
            } catch (SQLException e) {<!-- -->
                e.printStackTrace();
            }
        }

        if (stmt != null) {<!-- -->
            try {<!-- -->
                stmt.close();
            } catch (SQLException e) {<!-- -->
                e.printStackTrace();
            }
        }

        if (conn != null) {<!-- -->
            try {<!-- -->
                conn.close();
            } catch (SQLException e) {<!-- -->
                e.printStackTrace();
            }
        }
    }
}

After reading other people’s optimization plans, you can consider adding multi-threading for transformation later.

Reference links

  1. https://blog.csdn.net/weixin_36754290/article/details/123715039
  2. https://blog.csdn.net/vnjohn/article/details/125980327