Writing and using HDFS Java API

First

Need to set up a Hadoop environment and start Hadoop

If you haven’t built it yet, you can read this article:

Hadoop cluster construction and configuration-CSDN Blog

Here I use the idea under windows to connect the Hadoop of the virtual machine

(1) Install hadoop under windows

The installed Hadoop needs to be the same as the Hadoop version of the virtual machine, and the jdk version must also be the same.

You can view it with the command

hadoop version//hadoop-version
Java version//java-version

(2) hdfs Java api

1. Implement data writing,

The reference code is as follows:

Create a new class:

package org.example;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.Test;
import java.net.URI;
public class writeFileOnHDFS {
    @Test
    public void write1() throws Exception {
        //Create configuration object
        Configuration conf = new Configuration();
        conf.set("dfs.client.use.datanode.hostname", "true");
        String uri = "hdfs://192.168.170.80:8020";
        FileSystem fs = FileSystem.get(new URI(uri), conf);
        Path path = new Path(uri + "/lyf/hadoop/lyf0316.txt");
        FSDataOutputStream out = fs.create(path);
        out.write("habse".getBytes());
        out.close();
        fs.close();
        System.out.println("File [" + path + "] was written successfully!");
    }
}

2. Implement data reading,

The reference code is as follows

package org.example;

import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.Test;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;

public class ReadFileOnHDFS {
    @Test
    public void read1() throws Exception {
        Configuration conf = new Configuration();
        conf.set("dfs.client.use.datanode.hostname", "true");
        String uri = "hdfs://192.168.170.80:8020";
        FileSystem fs = FileSystem.get(new URI(uri), conf, "root");
        Path path = new Path(uri + "/lyf/hadoop/lyf0316.txt" + "");
        FSDataInputStream in = fs.open(path);
        BufferedReader br = new BufferedReader(new InputStreamReader(in));
        String nextLine = "";
        while ((nextLine = br.readLine()) != null) {
            System.out.println(nextLine);
        }
        br.close();
        in.close();
        fs.close();
    }
}

3. Get all files under HDFS

The reference code is as follows

package net.army.hdfs;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import java.io.*;
import java.text.SimpleDateFormat;

public class HDFSOutputFile {
    public static void lsDir(Configuration conf, String remoteDir)
                throws IOException {
            FileSystem fs = FileSystem.get(conf);
            Path dirPath = new Path(remoteDir);

            RemoteIterator<LocatedFileStatus> remoteIterator = fs.listFiles(
                    dirPath, true);
            while (remoteIterator.hasNext()) {
                FileStatus s = remoteIterator.next();
                System.out.println("Path:" + s.getPath().toString());
                System.out.println("Permission:" + s.getPermission().toString());
                System.out.println("size:" + s.getLen());
                Long timeStamp = s.getModificationTime();
                SimpleDateFormat format = new SimpleDateFormat("yyy-MM-dd HH:mm:ss");
                String date = format.format(timeStamp);
                System.out.println("Time:" + date);
                System.out.println();
            }
            fs.close();
        }
        public static void main(String[] args) {
            Configuration conf = new Configuration();
            conf.set("fs.default.name", "hdfs://192.168.170.80:8020");
            String remoteDir = "/"; // HDFS path
            try {
                System.out.println("(recursively) read information about all files in the directory: " + remoteDir);
                HDFSOutputFile.lsDir(conf, remoteDir);
                System.out.println("Reading completed");
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
}

4. Upload files to HDFS (files under windows to Linux)

package org.example;

import java.net.URI;
import java.net.URISyntaxException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.Test;



public class CopyFromLocalFile {
    public static void main(String[] args) throws Exception{
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS","hdfs://192.168.170.80:8020");
        System.setProperty("HADOOP_USER_NAME","root");
        FileSystem fs = FileSystem.get(conf);
        fs.copyFromLocalFile(new Path("e:/test/test.txt"),new Path("/lyf"));
        fs.close();
    }
}

5. Add new folder

package org.example;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.Test;

import java.net.URI;
public class CreateNewFolder {
    @Test
    public void createDir01() {
        String HDFS_PATH = "hdfs://192.168.170.80:8020";
        Configuration configuration = null;
        FileSystem fileSystem = null;

        configuration = new Configuration();
        try {
            fileSystem =FileSystem.get(new URI(HDFS_PATH),configuration,"root");

            fileSystem.mkdirs(new Path("/linux"));

        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            fileSystem = null;
            configuration = null;
            System.out.println("--------------end---------------");
        }
    }
}

6. Renaming of folders

package org.example;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.Test;

import java.net.URI;

public class ChangeFileName {
    @Test
    public void reameFile03(){
        String HDFS_PATH = "hdfs://192.168.170.80:8020/";
        Configuration configuration = null;
        FileSystem fileSystem = null;
        configuration = new Configuration();
        try {
            fileSystem =FileSystem.get(new URI(HDFS_PATH),configuration,"root");
            Path oldPath = new Path("/linux/linux.txt");
            Path newPath = new Path("/linux/hadoop.txtt");
            System.out.println(fileSystem.rename(oldPath,newPath));
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            fileSystem = null;
            configuration = null;
        }
    }
}

7. Determine whether the file exists 9.

package org.example;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class FileExit {
    public static void main(String[] args){
        try{
            String fileName = "/lyf";
            Configuration conf = new Configuration();
            conf.set("fs.defaultFS", "hdfs://192.168.170.80:8020");
            conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
            FileSystem fs = FileSystem.get(conf);
            if(fs.exists(new Path(fileName))){
                System.out.println("File exists");
            }else{
                System.out.println("File does not exist");
            }

        }catch (Exception e){
            e.printStackTrace();
        }
    }
}

8. Obtain the block information divided into large files

package net.army.hdfs;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.Progress;
import org.apache.hadoop.util.Progressable;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.net.URI;

/**
 * Use Java API to operate HDFS file system
 *
 * Because it is placed under test, it is best to use unit testing.
 * How to introduce jUnit unit testing in pom
 * There are two methods for unit testing: (1) before unit testing; (2) after unit testing
 *
 * key point:
 *1) Create Configuration
 * 2) Get FileSystem
 *3)The rest is the operation of HDFS API
 */

public class HDFSApp {


    public static final String HDFS_PATH = "hdfs://192.168.170.80:8020";
    Configuration configuration = null;
    FileSystem fileSystem = null;

    @Before
    public void setup() throws Exception{
        System.out.println("-----setup-----");
        configuration = new Configuration();
        configuration.set("dfs.replication", "1");
        fileSystem = FileSystem.get(new URI("hdfs://192.168.170.80:8020"), configuration, "root");
    }

    /*
     * View file block information
     */
    @Test
    public void getFileBlockLocations() throws Exception{
        FileStatus fileStatus = fileSystem.getFileStatus(new Path("/lyf/hadoop/hadoop-3.1.3.tar.gz"));
        BlockLocation[] blocks = fileSystem.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());

        for(BlockLocation block : blocks){

            for(String name: block.getNames()){
                System.out.println(name + " : " + block.getOffset() + " : " + block.getLength());
            }
        }
    }

    @After
    public void tearDown(){
        System.out.println("-----tearDown-----");

        //Blanking
        configuration = null;
        fileSystem = null;
    }
}

9. Get all nodes on the cluster

package net.army.hdfs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
public class GetNode {
    public static void main(String[] args) throws Exception {
        Configuration conf=new Configuration();
        conf.set("fs.defaultFS", "hdfs://192.168.170.80:8020");
        FileSystem fs=FileSystem.get(conf);
        DistributedFileSystem dfs = (DistributedFileSystem)fs;
        DatanodeInfo[] Infos = dfs.getDataNodeStats();

        for(int i=0;i<Infos.length;i + + ){
            System.out.println("DataNode_" + i + "_Name:" + Infos[i].getHostName());
        }
    }
}