Java implements file fragment upload and breakpoint resume

1. Simple multipart upload

For the first question, if the file is too large and it is disconnected halfway through the upload, it will take a lot of time to restart the upload, and you don’t know which part has been uploaded since the last time it was disconnected. Therefore, we should fragment large files first to prevent the problems mentioned above.

Front end code:

<!-- html code -->
<!DOCTYPE html>
<html>
<head>
    <title>Example of file upload</title>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
</head>
<body>
<form>
    <input type="file" id="fileInput" multiple>
    <button type="button" onclick="upload()" >Upload</button>
</form>
<script>
    function upload() {<!-- -->
        var fileInput = document. getElementById('fileInput');
        var fileName = document.getElementById("fileInput").files[0].name;
        var files = fileInput. files;
        var chunkSize = 1024 * 10; // the size of each chunk is 10KB
        var totalChunks = Math.ceil(files[0].size / chunkSize); // total chunks of the file
        var currentChunk = 0; // current chunk number

        // upload file in parts
        function uploadChunk() {<!-- -->
            var xhr = new XMLHttpRequest();
            var formData = new FormData();

            // Add the current number of blocks and the total number of blocks to formData
            formData.append('currentChunk', currentChunk);
            formData.append('totalChunks', totalChunks);
            formData.append('fileName', fileName);

            // Calculate the offset and length of the current block in the file
            var start = currentChunk * chunkSize;
            var end = Math.min(files[0].size, start + chunkSize);
            var chunk = files[0]. slice(start, end);

            // Add the current block to formData
            formData.append('chunk', chunk);

            // Send the fragment to the backend
            xhr.open('POST', '/file/upload');
            xhr. send(formData);

            xhr.onload = function() {<!-- -->
                // Update the current block number
                currentChunk++;

                // If there are unuploaded chunks, continue uploading
                if (currentChunk < totalChunks) {<!-- -->
                    uploadChunk();
                } else {<!-- -->
                    // All blocks are uploaded, file merge
                    mergeChunks(fileName);
                }
            }
        }

        // Merge all shards
        function mergeChunks() {<!-- -->
            var xhr = new XMLHttpRequest();
            xhr.open("POST", "/file/merge", true);
            xhr.setRequestHeader("Content-type", "application/x-www-form-urlencoded");
            xhr.onreadystatechange = function() {<!-- -->
                if (xhr.readyState === 4) {<!-- -->
                    if (xhr. status === 200) {<!-- -->
                        console.log("File upload completed:", xhr.responseText);
                    } else {<!-- -->
                        console.error(xhr.responseText);
                    }
                }
            };
            xhr.send("fileName=" + fileName);
        }

        // Start upload
        uploadChunk();
    }
</script>
</body>
</html>

ps: The above code is completed using html + js, and the request is sent using xhr. The address of xhr.open is its own local interface address. Since the usual test does not need to upload large files, the size of each segment is defined as 10KB to simulate large file uploads.

Backend code:

//java code
@RestController
@RequestMapping("/file")
public class FileController {<!-- -->
    @Autowired
    private ResourceLoader resourceLoader;

    @Value("${my.config.savePath}")
    private String uploadPath;

    private Map<String, List<File>> chunksMap = new ConcurrentHashMap<>();

    @PostMapping("/upload")
    public void upload(@RequestParam int currentChunk, @RequestParam int totalChunks,
                       @RequestParam MultipartFile chunk,@RequestParam String fileName) throws IOException {<!-- -->

        // save the shards to a temporary folder
        String chunkName = chunk.getOriginalFilename() + "." + currentChunk;
        File chunkFile = new File(uploadPath, chunkName);
        chunk. transferTo(chunkFile);

        // Record multipart upload status
        List<File> chunkList = chunksMap. get(fileName);
        if (chunkList == null) {<!-- -->
            chunkList = new ArrayList<>(totalChunks);
            chunksMap.put(fileName, chunkList);
        }
        chunkList.add(chunkFile);
    }

    @PostMapping("/merge")
    public String merge(@RequestParam String fileName) throws IOException {<!-- -->

        // Get all the shards and merge them into one file in the order of the shards
        List<File> chunkList = chunksMap. get(fileName);
        if (chunkList == null || chunkList. size() == 0) {<!-- -->
            throw new RuntimeException("Shard does not exist");
        }

        File outputFile = new File(uploadPath, fileName);
        try (FileChannel outChannel = new FileOutputStream(outputFile).getChannel()) {<!-- -->
            for (int i = 0; i < chunkList. size(); i ++ ) {<!-- -->
                try (FileChannel inChannel = new FileInputStream(chunkList. get(i)). getChannel()) {<!-- -->
                    inChannel. transferTo(0, inChannel. size(), outChannel);
                }
                chunkList.get(i).delete(); // Delete a fragment
            }
        }

        chunksMap.remove(fileName); // delete records
        // Get the access URL of the file
        Resource resource =
        resourceLoader.getResource("file:" + uploadPath + fileName); //Since it is a local file, it starts with "file". If it is a server, please change it to your own server prefix
        return resource. getURI(). toString();
    }
}

ps: Use a map to record which fragments have been uploaded. Here, the fragments are stored in a local folder, and the fragments are merged and deleted after the upload of the fragments is completed. ConcurrentHashMap is used instead of HashMap because it is safe under multi-threading.
The above is just a simple file upload code, but as long as another modification is made on it, the above-mentioned problems can be solved.

2. Solve the problem

1. How to avoid a large number of hard disk reads and writes
One disadvantage of the above code is that the content of the fragment is stored in the local folder. And when merging, judging whether the upload is complete is also to read the file from the folder. A large number of read and write operations on the disk are not only slow, but also cause the server to crash, so the following code uses redis to store fragmentation information to avoid excessive reading and writing to the disk. (You can also use mysql or other middleware to store information, since reading and writing should not be in mysql, so I use redis).

2. The target file is too large, what should I do if it is disconnected during the upload process
Use redis to store the content of the fragment. After disconnection, the file information is still stored in redis. When the user uploads again, check whether redis has the content of the fragment, and skip it if so.

3. How to find out that the file data uploaded on the front-end page is inconsistent with the original file data
When the front end calls the upload interface, it first calculates the checksum of the file, and then sends the file and the checksum to the backend. The backend calculates the checksum again for the file, and compares the two checksums. If they are equal , it means that the data is consistent. If not, an error will be reported and the front end will re-upload the segment. js calculation checksum code:

// Calculate the SHA-256 checksum of the file
//javascript code
    function calculateHash(fileChunk) {<!-- -->
        return new Promise((resolve, reject) => {<!-- -->
            const blob = new Blob([fileChunk]);
            const reader = new FileReader();
            reader.readAsArrayBuffer(blob);
            reader.onload = () => {<!-- -->
                const arrayBuffer = reader. result;
                const crypto = window.crypto || window.msCrypto;
                const digest = crypto.subtle.digest("SHA-256", arrayBuffer);
                digest.then(hash => {<!-- -->
                    const hashArray = Array.from(new Uint8Array(hash));
                    const hashHex = hashArray.map(b => b.toString(16).padStart(2, '0')).join('');
                    resolve(hashHex);
                });
            };
            reader.onerror = () => {<!-- -->
                reject(new Error('Failed to calculate hash'));
            };
        });
    }
//java code
public static String calculateHash(byte[] fileChunk) throws Exception {<!-- -->
        MessageDigest md = MessageDigest.getInstance("SHA-256");
        md. update(fileChunk);
        byte[] hash = md.digest();
        ByteBuffer byteBuffer = ByteBuffer. wrap(hash);
        StringBuilder hexString = new StringBuilder();
        while (byteBuffer.hasRemaining()) {<!-- -->
            hexString.append(String.format("x", byteBuffer.get()));
        }
        return hexString.toString();
    }

be careful:

1, here the algorithm for calculating the checksum of the front-end and the back-end must be consistent, otherwise the same result will not be obtained.
2. Crypto is used in the front end to calculate the file, and related js needs to be introduced. You can use script import or download js directly
<script src="//i2.wp.com/cdn.bootcss.com/crypto-js/3.1.9-1/crypto-js.min.js"></script>

The download address of crypto If github cannot be opened, you may need to use npm to download

4. If it is disconnected during the upload process, how to judge which fragments are not uploaded

For redis, detect which fragment subscript does not exist, if it does not exist, store it in the list, and finally return the list to the front end

//java code
boolean allChunksUploaded = true;
List<Integer> missingChunkIndexes = new ArrayList<>();
for (int i = 0; i < hashMap. size(); i ++ ) {<!-- -->
if (!hashMap.containsKey(String.valueOf(i))) {<!-- -->
allChunksUploaded = false;
missingChunkIndexes.add(i);
}
}
if (!allChunksUploaded) {<!-- -->
return ResponseEntity.status(HttpStatus.BAD_REQUEST).body(missingChunkIndexes);
}

3. Complete code

1. Introduce dependencies

<dependency>
  <groupId>io.lettuce</groupId>
    <artifactId>lettuce-core</artifactId>
    <version>6.1.4.RELEASE</version>
</dependency>
<dependency>
    <groupId>org.springframework.boot</groupId>
    <artifactId>spring-boot-starter-data-redis</artifactId>
</dependency>

lettuce is a Redis client, you don’t need to import it, just use redisTemplat directly

2. Front-end code




    
    File Upload Demo




<script src="//i2.wp.com/cdn.bootcss.com/crypto-js/3.1.9-1/crypto-js.min.js"></script>

3. Backend interface code

@RestController
@RequestMapping("/file2")
public class File2Controller {<!-- -->

    private static final String FILE_UPLOAD_PREFIX = "file_upload:";

    @Autowired
    private ResourceLoader resourceLoader;

    @Value("${my.config.savePath}")
    private String uploadPath;
    @Autowired
    private ThreadLocal<RedisConnection> redisConnectionThreadLocal;
    
// @Autowired
// private RedisTemplate redisTemplate;

    @PostMapping("/upload")
    public ResponseEntity<?> uploadFile(@RequestParam("chunk") MultipartFile chunk,
                                        @RequestParam("chunkIndex") Integer chunkIndex,
                                        @RequestParam("chunkSize") Integer chunkSize,
                                        @RequestParam("chunkChecksum") String chunkChecksum,
                                        @RequestParam("fileId") String fileId) throws Exception {<!-- -->
        if (StringUtils.isBlank(fileId) || StringUtils.isEmpty(fileId)) {<!-- -->
            fileId = UUID.randomUUID().toString();
        }
        String key = FILE_UPLOAD_PREFIX + fileId;
        byte[] chunkBytes = chunk. getBytes();
        String actualChecksum = calculateHash(chunkBytes);
        if (!chunkChecksum.equals(actualChecksum)) {<!-- -->
            return ResponseEntity.status(HttpStatus.BAD_REQUEST).body("Chunk checksum does not match");
        }
// if(!redisTemplate.opsForHash().hasKey(key,String.valueOf(chunkIndex))) {<!-- -->
// redisTemplate.opsForHash().put(key, String.valueOf(chunkIndex), chunkBytes);
// }
        RedisConnection connection = redisConnectionThreadLocal.get();

        Boolean flag = connection.hExists(key.getBytes(), String.valueOf(chunkIndex).getBytes());
        if (flag==null || flag == false) {<!-- -->
            connection.hSet(key.getBytes(), String.valueOf(chunkIndex).getBytes(), chunkBytes);
        }

        return ResponseEntity.ok(fileId);

    }

    public static String calculateHash(byte[] fileChunk) throws Exception {<!-- -->
        MessageDigest md = MessageDigest.getInstance("SHA-256");
        md. update(fileChunk);
        byte[] hash = md.digest();
        ByteBuffer byteBuffer = ByteBuffer. wrap(hash);
        StringBuilder hexString = new StringBuilder();
        while (byteBuffer.hasRemaining()) {<!-- -->
            hexString.append(String.format("x", byteBuffer.get()));
        }
        return hexString.toString();
    }

    @PostMapping("/merge")
    public ResponseEntity<?> mergeFile(@RequestParam("fileId") String fileId, @RequestParam("fileName") String fileName) throws IOException {<!-- -->
        String key = FILE_UPLOAD_PREFIX + fileId;
        RedisConnection connection = redisConnectionThreadLocal.get();
        try {<!-- -->
            Map<byte[], byte[]> chunkMap = connection.hGetAll(key.getBytes());
// Map chunkMap = redisTemplate.opsForHash().entries(key);
            if (chunkMap.isEmpty()) {<!-- -->
                return ResponseEntity.status(HttpStatus.NOT_FOUND).body("File not found");
            }

            Map<String,byte[]> hashMap = new HashMap<>();
            for(Map.Entry<byte[],byte[]> entry :chunkMap.entrySet()){<!-- -->
                hashMap.put((new String(entry.getKey())),entry.getValue());
            }
            // Check if all shards have been uploaded
            boolean allChunksUploaded = true;
            List<Integer> missingChunkIndexes = new ArrayList<>();
            for (int i = 0; i < hashMap. size(); i ++ ) {<!-- -->
                if (!hashMap.containsKey(String.valueOf(i))) {<!-- -->
                    allChunksUploaded = false;
                    missingChunkIndexes.add(i);
                }
            }
            if (!allChunksUploaded) {<!-- -->
                return ResponseEntity.status(HttpStatus.BAD_REQUEST).body(missingChunkIndexes);
            }

            File outputFile = new File(uploadPath, fileName);
            boolean flag = mergeChunks(hashMap, outputFile);
            Resource resource = resourceLoader.getResource("file:" + uploadPath + fileName);


            if (flag == true) {<!-- -->
                connection.del(key.getBytes());
// redisTemplate. delete(key);
                return ResponseEntity.ok().body(resource.getURI().toString());
            } else {<!-- -->
                return ResponseEntity.status(555).build();
            }
        } catch (Exception e) {<!-- -->
            e.printStackTrace();
            return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(e.getMessage());
        }
    }

    private boolean mergeChunks(Map<String, byte[]> chunkMap, File destFile) {<!-- -->
        try (FileOutputStream outputStream = new FileOutputStream(destFile)) {<!-- -->
            // Merge the shards in order
            for (int i = 0; i < chunkMap. size(); i ++ ) {<!-- -->
                byte[] chunkBytes = chunkMap. get(String. valueOf(i));
                outputStream.write(chunkBytes);
            }
            return true;
        } catch (IOException e) {<!-- -->
            e.printStackTrace();
            return false;
        }
    }
}

4. Redis configuration

@Configuration
public class RedisConfig {<!-- -->
    @Value("${spring.redis.host}")
    private String host;

    @Value("${spring.redis.port}")
    private int port;

    @Value("${spring.redis.password}")
    private String password;

    @Bean
    public RedisConnectionFactory redisConnectionFactory() {<!-- -->
        RedisStandaloneConfiguration config = new RedisStandaloneConfiguration();
        config.setHostName(host);
        config.setPort(port);
        config.setPassword(RedisPassword.of(password));
        return new LettuceConnectionFactory(config);
    }
    @Bean
    public ThreadLocal<RedisConnection> redisConnectionThreadLocal(RedisConnectionFactory redisConnectionFactory) {<!-- -->
        return ThreadLocal.withInitial(() -> redisConnectionFactory.getConnection());
    }
}

Using redisConnectionThreadLocal is to avoid establishing multiple connections, which is time-consuming

Summary

The above is the complete code of the function. Remember to modify the uploadPath when using the code to prevent the code from finding the directory path. At the end of the code, you can use mysql to calculate the checksum of the entire file, store the checksum result, file name, file size, and file type in the database, and judge whether it exists before the next large file upload. If it exists, do not upload it to avoid taking up space.