HDFS Java API Programming

Level 1: File reading and writing

Knowledge points

1.HDFS file creation and operation steps

step1: Get the FileSystem object;

step2: Write through FSDataOutputStream;

step3: Output the file content through FSDataInputStream.

Programming requirements

  • Get the system settings of hadoop and create an HDFS file in it. The file path is /user/hadoop/myfile;
  • Add the string https://www.educoder.net in the myfile file;
  • Read the contents of the file just created myfile and output it.
import java.io.*;
import java.sql.Date;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;


public class hdfs {

    public static void main(String[] args) throws IOException {
//Please add code between Begin-End to complete the task requirements.
        /********* Begin *********/
        //Get the FileSystem object
        Configuration conf=new Configuration(); //Realize value transfer between hadoop modules
        FileSystem fs=FileSystem.get(conf); //Get the file system
        Path file=new Path("/user/hadoop/myfile"); //Create file
        //Write through FSDataOutputStream
        FSDataOutputStream outStream=fs.create(file); //Get the output stream
        outStream.writeUTF("https://www.educoder.net"); //Any character can be written
        outStream.close(); //Remember to close the output stream
        //Output the file content through FSDataInputStream
        FSDataInputStream inStream=fs.open(file); //Get the input stream
        String data=inStream.readUTF(); //Read file

        /********* End *********/

    }
  }

Level 2: File upload

Programming requirements

The text file is uploaded to HDFS. If the specified file already exists in HDFS, the user can specify whether to append it to the end of the original file or overwrite the original file.

import java.io.*;
import java.sql.Date;
import java.util.Scanner;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;


public class hdfs {

      /**
     * Determine whether the path exists
     */
    public static boolean test(Configuration conf, String path) throws IOException {
       /*****start*****/
        //Please write code here to determine whether the file exists
        FileSystem fs = FileSystem.get(conf);//Get the object
        fs.exists(new Path(path)); //Determine whether the file in the path exists, if so, return true
        return fs.exists(new Path(path));
        
        /*****end*****/
    }

    /**
     * Copy the file to the specified path
     * If the path already exists, overwrite it
     */
    public static void copyFromLocalFile(Configuration conf, String localFilePath, String remoteFilePath) throws IOException {
        /*****start*****/
        //Please write the code to copy the file to the specified path here
        FileSystem fs = FileSystem.get(conf);//Get the object
        Path localPath=new Path(localFilePath);
        Path remotePath=new Path(remoteFilePath);
         /* The first parameter of fs.copyFromLocalFile indicates whether to delete the source file, and the second parameter indicates whether to overwrite it */
        fs.copyFromLocalFile(false,true,localPath,remotePath);

        /*****end*****/
    }
 
    /**
     *Append file content
     */
    public static void appendToFile(Configuration conf, String localFilePath, String remoteFilePath) throws IOException {
       /*****start*****/
        //Please write the code to append the file content here
        FileSystem fs=FileSystem.get(conf);
        Path remotePath=new Path(remoteFilePath);
        //Create a file to read into the stream
        FileInputStream in=new FileInputStream(localFilePath);
        //Create a file output stream. The output content will be appended to the end of the file
        FSDataOutputStream out=fs.append(remotePath);
        //Read and write file content
        byte[] data=new byte[1024];
        int read=-1;
        while((read=in.read(data))>0){
            out.write(data,0,read);
        }
   
        /*****end*****/
    }
    
/**
* Main function
*/
public static void main(String[] args)throws IOException {
Configuration conf = new Configuration();
  
        createHDFSFile(conf);
     
String localFilePath = "./file/text.txt"; // local path
String remoteFilePath = "/user/hadoop/text.txt"; // HDFS path
String choice = "";
\t\t
try {
/* Determine whether the file exists */
Boolean fileExists = false;
if (hdfs.test(conf, remoteFilePath)) {
fileExists = true;
System.out.println(remoteFilePath + " already exists.");
                choice = "append"; //If the file exists, append to the end of the file
} else {
System.out.println(remoteFilePath + " does not exist.");
                choice = "overwrite"; //overwrite
}

            /*****start*****/
       //Please write here the logic of uploading if the file does not exist. If the file choice is equal to overwrite, it will be overwritten. If choice is equal to append, it will be appended.
        
       if (fileExists!=true ) { // The file does not exist, upload it
\t\t\t\t
System.out.println(localFilePath + " Uploaded to " + remoteFilePath);
} else if (choice=="overwrite" ) { //Choose overwrite
\t\t\t\t
System.out.println(localFilePath + " Overwritten " + remoteFilePath);
} else if (choice=="append") { //Choose to append
\t\t\t\t
System.out.println(localFilePath + " appended to " + remoteFilePath);
}
        
        
       /*****end*****/
            
\t\t\t
} catch (Exception e) {
e.printStackTrace();
}
}

//Create HDFS file
public static void createHDFSFile(Configuration conf)throws IOException{
        FileSystem fs = FileSystem.get(conf); //Get the file system
        Path file = new Path("/user/hadoop/text.txt"); //Create file
        FSDataOutputStream outStream = fs.create(file); //Get the output stream
        outStream.writeUTF("hello##Hdfs");
        outStream.close();
        fs.close();
    }
   

}

Level 3: File Download

Programming requirements

Complete the function of downloading files from HDFS

import java.io.*;
import java.sql.Date;
import java.util.Scanner;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;


public class hdfs {
/**
     * Download files to local
     * Determine whether the local path already exists. If it already exists, automatically rename it.
     */
    public static void copyToLocal(Configuration conf, String remoteFilePath, String localFilePath) throws IOException {
        FileSystem fs = FileSystem.get(conf);
        Path remotePath = new Path(remoteFilePath);
File f = new File(localFilePath);
/*****start*****/
        /*Add code here to determine whether the file exists. If the file name exists, it will be automatically renamed (add _0, _1 ... after the file name) */
        if (fs.exists(new Path(localFilePath))) {
        System.out.println(localFilePath + " already exists.");
        Integer i = 0;
        while ( f.exists() ) {
        f = new File( localFilePath );
        if ( f.exists() ) {
        localFilePath = localFilePath + "_" + i;
        break;
        }
        }
        System.out.println("will be renamed to: " + localFilePath);
        }

        /*****end*****/

/*****start*****/
        //Add the code to download the file locally here
Path localPath=new Path(localFilePath);
        fs.copyToLocalFile(remotePath,localPath);

/*****end*****/
       fs.close();
    }
    
/**
* Main function
*/
public static void main(String[] args)throws IOException {
Configuration conf = new Configuration();
createHDFSFile(conf);
String localFilePath = "/tmp/output/text.txt"; // local path
String remoteFilePath = "/user/hadoop/text.txt"; // HDFS path
\t\t
try {
//Call the method to download the file locally
hdfs.copyToLocal(conf, remoteFilePath, localFilePath);
System.out.println("Download completed");
} catch (Exception e) {
e.printStackTrace();
}
}

\t
//Create HDFS file
public static void createHDFSFile(Configuration conf)throws IOException{
        FileSystem fs = FileSystem.get(conf); //Get the file system
        Path file = new Path("/user/hadoop/text.txt"); //Create file
        FSDataOutputStream outStream = fs.create(file); //Get the output stream
        outStream.writeUTF("hello hadoop HDFS www.educoder.net");
        outStream.close();
        fs.close();
    }

}

Level 4: Reading data using character streams

Knowledge points

1. Reading data using character streams is divided into three steps:

step1: Obtain the FileSystem object through the Configuration object;

step2: Obtain the FSDataInputStream object through fs;

step3: Read the data in the file through the character stream loop and output it.

Programming requirements

Complete output of the specified file in HDFS to the specified file

import java.io.*;
import java.sql.Date;
import java.util.Scanner;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;


public class hdfs {
 /**
     * Read file content
     */
    public static void cat(Configuration conf, String remoteFilePath) throws IOException {
       
\t\t
/*****start*****/
//1. Read the data in the file
        FileSystem fs=FileSystem.get(conf);
        Path remotePath=new Path(remoteFilePath);
        FSDataInputStream in=fs.open(remotePath);
        BufferedReader d=new BufferedReader(new InputStreamReader(in));
        StringBuffer buffer=new StringBuffer();
        String line=null;
        while((line=d.readLine())!=null)
        {
            buffer.append(line);
        }
        String res = buffer.toString();


//2. Output the read data to the /tmp/output/text.txt file Tip: You can use FileWriter
        FileWriter f1=new FileWriter("/tmp/output/text.txt");
        f1.write(res);
        f1.close();
\t

/*****end*****/
    }
    
/**
* Main function
*/
public static void main(String[] args)throws IOException {
Configuration conf = new Configuration();
createHDFSFile(conf);
String remoteFilePath = "/user/hadoop/text.txt"; // HDFS path
\t\t
try {
System.out.println("Read file: " + remoteFilePath);
hdfs.cat(conf, remoteFilePath);
System.out.println("\\
Reading completed");
} catch (Exception e) {
e.printStackTrace();
}
}

//Create HDFS file
public static void createHDFSFile(Configuration conf)throws IOException{
        FileSystem fs = FileSystem.get(conf); //Get the file system
        Path file = new Path("/user/hadoop/text.txt"); //Create file
        FSDataOutputStream outStream = fs.create(file); //Get the output stream
        outStream.writeUTF("hello hadoop HDFS step4 www.educoder.net");
        outStream.close();
        fs.close();
    }


}

Level 5: Delete files

Knowledge points

1.public boolean delete(Path f, Boolean recursive) Permanently deletes the specified file or directory. If f is an empty directory or file, then recursive will be ignored. Only when recursive=true will a non-empty directory and its contents be deleted (that is, all files will be deleted recursively).

Programming requirements

Delete the /user/hadoop/text.txt file in HDFS

import java.io.*;
import java.sql.Date;
import java.util.Scanner;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;


public class hdfs {

/**
     * Delete Files 
     */
    public static boolean rm(Configuration conf, String remoteFilePath) throws IOException {
/*****start*****/
//Please add the code to delete files here
        FileSystem fs=FileSystem.get(conf);
        Path remotePath=new Path(remoteFilePath);
        boolean result=fs.delete(remotePath,false);

return result ;


/*****end*****/
    }
    
/**
* Main function
*/
public static void main(String[] args) {
Configuration conf = new Configuration();
String remoteFilePath = "/user/hadoop/text.txt"; // HDFS file
\t\t
try {
if (rm(conf, remoteFilePath) ) {
System.out.println("File deletion: " + remoteFilePath);
} else {
System.out.println("The operation failed (the file does not exist or the deletion failed)");
}
} catch (Exception e) {
e.printStackTrace();
}
}


}

Level 6: Delete folder

Knowledge points

1. Verify whether files exist in the directory: public RemoteIterator listFiles(Path f, Boolean recursive), recursive is false, then the files in the directory are returned; if recursive is true, then the file is returned in the root directory.

2. Delete files and directories in HDFS: public boolean delete(Path f, Boolean recursive)

Programming requirements

Delete the /user/hadoop/tmp directory and /user/hadoop/dir directory in HDFS. Before deleting, you need to determine whether the two directories are empty. If they are not empty, Do not delete, otherwise delete.

import java.io.*;
import java.sql.Date;
import java.util.Scanner;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;


public class hdfs {

\t
    /**
     * Determine whether the directory is empty
     * true: empty, false: not empty
     */
    public static boolean isDirEmpty(Configuration conf, String remoteDir) throws IOException {
       /*****start*****/
//Please add code here to determine whether the directory is empty
        FileSystem fs=FileSystem.get(conf);
        Path dirPath=new Path(remoteDir);
        RemoteIterator<LocatedFileStatus> result=fs.listFiles(dirPath,true);

return !result.hasNext() ;
/*****end*****/
    }
\t
    /**
     * Delete directory
     */
    public static boolean rmDir(Configuration conf, String remoteDir, boolean recursive) throws IOException {
       /*****start*****/
//Please add the code to delete the directory here
        FileSystem fs=FileSystem.get(conf);
        Path dirPath=new Path(remoteDir);
        boolean result=fs.delete(dirPath,false);
return result ;
/*****end*****/

    }
    
/**
* Main function
*/
public static void main(String[] args) {
Configuration conf = new Configuration();
String remoteDir = "/user/hadoop/dir/"; // HDFS directory
String remoteDir1 = "/user/hadoop/tmp/"; // HDFS directory
Boolean forceDelete = false; // Whether to force delete
\t\t
try {
if ( !isDirEmpty(conf, remoteDir) & amp; & amp; !forceDelete ) {
System.out.println("The directory is not empty and will not be deleted");
} else {
if ( rmDir(conf, remoteDir, forceDelete) ) {
System.out.println("Directory deleted: " + remoteDir);
} else {
System.out.println("Operation failed");
}
}
            
            if ( !isDirEmpty(conf, remoteDir1) & amp; & amp; !forceDelete ) {
System.out.println("The directory is not empty and will not be deleted");
} else {
if ( rmDir(conf, remoteDir1, forceDelete) ) {
System.out.println("Directory deleted: " + remoteDir1);
} else {
System.out.println("Operation failed");
}
}
} catch (Exception e) {
e.printStackTrace();
}
}


}

Level 7: Customized data input flow

Knowledge points

1. Related methods

public int read(char[] cbuf,int off,int len)throws IOException

(1) The specified number of characters has been read. The read method of the underlying stream returns -1, indicating the end of the file (end-of-file), or the ready method of the underlying stream returns false, indicating that subsequent input requests will be blocked.

(2) If the first call to read on the underlying stream returns -1 (indicating the end of the file), then this method returns -1, otherwise This method returns the actual number of characters read.

Programming requirements

Implement the method readLine() for reading the specified file in HDFS line by line. If the end of the file is read, it will return empty, otherwise it will return the text of one line of the file, that is, implement and BufferedReader The readLine() method of the class has a similar effect.

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.*;

public class MyFSDataInputStream extends FSDataInputStream {
public MyFSDataInputStream(InputStream in) {
super(in);
}
\t
/**
     * Implement line-by-line reading
     * Each time a character is read in and ends with "\
", a line of content is returned.
     */
public static String readline(BufferedReader br) throws IOException {
/*****start*****/
//Please fill in the code here to implement related functions
        char[] data = new char[1024];
        int read = -1;
        int off = 0; // When the loop is executed, br will continue to read from the end of the previous reading, so in this function, off starts from 0 every time
        while ( (read = br.read(data, off, 1)) != -1 ) {
            if (String.valueOf(data[off]).equals("\\
") ) {
                off + = 1;
                return String.valueOf(data, 0, read);
            }
            off + = 1;
            return String.valueOf(data, 0, read);
        }
    
return null ;
/*****end*****/

}

/**
     * Read file content
     */
    public static void cat(Configuration conf, String remoteFilePath) throws IOException {
        FileSystem fs = FileSystem.get(conf);
        Path remotePath = new Path(remoteFilePath);
        FSDataInputStream in = fs.open(remotePath);
        BufferedReader br = new BufferedReader(new InputStreamReader(in));
        FileWriter f = new FileWriter("/tmp/output/text.txt");
        String line = null;
        while ( (line = MyFSDataInputStream.readline(br)) != null ) {
        f.write(line);
        }
        f.close();
        br.close();
        in.close();
        fs.close();
    }
\t
/**
* Main function
*/
public static void main(String[] args) {
Configuration conf = new Configuration();
String remoteFilePath = "/user/hadoop/text.txt"; // HDFS path
try {
MyFSDataInputStream.cat(conf, remoteFilePath);
} catch (Exception e) {
e.printStackTrace();
}
}
}

The knowledge points of the article match the official knowledge files, and you can further learn related knowledge. Java Skill TreeHomepageOverview 139393 people are learning the system

syntaxbug.com © 2021 All Rights Reserved.