Level 1: File reading and writing
Knowledge points
1.HDFS file creation and operation steps
step1: Get the FileSystem object;
step2: Write through FSDataOutputStream;
step3: Output the file content through FSDataInputStream.
Programming requirements
- Get the system settings of hadoop and create an HDFS file in it. The file path is
/user/hadoop/myfile
; - Add the string
https://www.educoder.net
in themyfile
file; - Read the contents of the file just created
myfile
and output it.
import java.io.*; import java.sql.Date; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; public class hdfs { public static void main(String[] args) throws IOException { //Please add code between Begin-End to complete the task requirements. /********* Begin *********/ //Get the FileSystem object Configuration conf=new Configuration(); //Realize value transfer between hadoop modules FileSystem fs=FileSystem.get(conf); //Get the file system Path file=new Path("/user/hadoop/myfile"); //Create file //Write through FSDataOutputStream FSDataOutputStream outStream=fs.create(file); //Get the output stream outStream.writeUTF("https://www.educoder.net"); //Any character can be written outStream.close(); //Remember to close the output stream //Output the file content through FSDataInputStream FSDataInputStream inStream=fs.open(file); //Get the input stream String data=inStream.readUTF(); //Read file /********* End *********/ } }
Level 2: File upload
Programming requirements
The text file is uploaded to HDFS. If the specified file already exists in HDFS, the user can specify whether to append it to the end of the original file or overwrite the original file.
import java.io.*; import java.sql.Date; import java.util.Scanner; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; public class hdfs { /** * Determine whether the path exists */ public static boolean test(Configuration conf, String path) throws IOException { /*****start*****/ //Please write code here to determine whether the file exists FileSystem fs = FileSystem.get(conf);//Get the object fs.exists(new Path(path)); //Determine whether the file in the path exists, if so, return true return fs.exists(new Path(path)); /*****end*****/ } /** * Copy the file to the specified path * If the path already exists, overwrite it */ public static void copyFromLocalFile(Configuration conf, String localFilePath, String remoteFilePath) throws IOException { /*****start*****/ //Please write the code to copy the file to the specified path here FileSystem fs = FileSystem.get(conf);//Get the object Path localPath=new Path(localFilePath); Path remotePath=new Path(remoteFilePath); /* The first parameter of fs.copyFromLocalFile indicates whether to delete the source file, and the second parameter indicates whether to overwrite it */ fs.copyFromLocalFile(false,true,localPath,remotePath); /*****end*****/ } /** *Append file content */ public static void appendToFile(Configuration conf, String localFilePath, String remoteFilePath) throws IOException { /*****start*****/ //Please write the code to append the file content here FileSystem fs=FileSystem.get(conf); Path remotePath=new Path(remoteFilePath); //Create a file to read into the stream FileInputStream in=new FileInputStream(localFilePath); //Create a file output stream. The output content will be appended to the end of the file FSDataOutputStream out=fs.append(remotePath); //Read and write file content byte[] data=new byte[1024]; int read=-1; while((read=in.read(data))>0){ out.write(data,0,read); } /*****end*****/ } /** * Main function */ public static void main(String[] args)throws IOException { Configuration conf = new Configuration(); createHDFSFile(conf); String localFilePath = "./file/text.txt"; // local path String remoteFilePath = "/user/hadoop/text.txt"; // HDFS path String choice = ""; \t\t try { /* Determine whether the file exists */ Boolean fileExists = false; if (hdfs.test(conf, remoteFilePath)) { fileExists = true; System.out.println(remoteFilePath + " already exists."); choice = "append"; //If the file exists, append to the end of the file } else { System.out.println(remoteFilePath + " does not exist."); choice = "overwrite"; //overwrite } /*****start*****/ //Please write here the logic of uploading if the file does not exist. If the file choice is equal to overwrite, it will be overwritten. If choice is equal to append, it will be appended. if (fileExists!=true ) { // The file does not exist, upload it \t\t\t\t System.out.println(localFilePath + " Uploaded to " + remoteFilePath); } else if (choice=="overwrite" ) { //Choose overwrite \t\t\t\t System.out.println(localFilePath + " Overwritten " + remoteFilePath); } else if (choice=="append") { //Choose to append \t\t\t\t System.out.println(localFilePath + " appended to " + remoteFilePath); } /*****end*****/ \t\t\t } catch (Exception e) { e.printStackTrace(); } } //Create HDFS file public static void createHDFSFile(Configuration conf)throws IOException{ FileSystem fs = FileSystem.get(conf); //Get the file system Path file = new Path("/user/hadoop/text.txt"); //Create file FSDataOutputStream outStream = fs.create(file); //Get the output stream outStream.writeUTF("hello##Hdfs"); outStream.close(); fs.close(); } }
Level 3: File Download
Programming requirements
Complete the function of downloading files from HDFS
import java.io.*; import java.sql.Date; import java.util.Scanner; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; public class hdfs { /** * Download files to local * Determine whether the local path already exists. If it already exists, automatically rename it. */ public static void copyToLocal(Configuration conf, String remoteFilePath, String localFilePath) throws IOException { FileSystem fs = FileSystem.get(conf); Path remotePath = new Path(remoteFilePath); File f = new File(localFilePath); /*****start*****/ /*Add code here to determine whether the file exists. If the file name exists, it will be automatically renamed (add _0, _1 ... after the file name) */ if (fs.exists(new Path(localFilePath))) { System.out.println(localFilePath + " already exists."); Integer i = 0; while ( f.exists() ) { f = new File( localFilePath ); if ( f.exists() ) { localFilePath = localFilePath + "_" + i; break; } } System.out.println("will be renamed to: " + localFilePath); } /*****end*****/ /*****start*****/ //Add the code to download the file locally here Path localPath=new Path(localFilePath); fs.copyToLocalFile(remotePath,localPath); /*****end*****/ fs.close(); } /** * Main function */ public static void main(String[] args)throws IOException { Configuration conf = new Configuration(); createHDFSFile(conf); String localFilePath = "/tmp/output/text.txt"; // local path String remoteFilePath = "/user/hadoop/text.txt"; // HDFS path \t\t try { //Call the method to download the file locally hdfs.copyToLocal(conf, remoteFilePath, localFilePath); System.out.println("Download completed"); } catch (Exception e) { e.printStackTrace(); } } \t //Create HDFS file public static void createHDFSFile(Configuration conf)throws IOException{ FileSystem fs = FileSystem.get(conf); //Get the file system Path file = new Path("/user/hadoop/text.txt"); //Create file FSDataOutputStream outStream = fs.create(file); //Get the output stream outStream.writeUTF("hello hadoop HDFS www.educoder.net"); outStream.close(); fs.close(); } }
Level 4: Reading data using character streams
Knowledge points
1. Reading data using character streams is divided into three steps:
step1: Obtain the FileSystem
object through the Configuration
object;
step2: Obtain the FSDataInputStream
object through fs
;
step3: Read the data in the file through the character stream loop and output it.
Programming requirements
Complete output of the specified file in HDFS to the specified file
import java.io.*; import java.sql.Date; import java.util.Scanner; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; public class hdfs { /** * Read file content */ public static void cat(Configuration conf, String remoteFilePath) throws IOException { \t\t /*****start*****/ //1. Read the data in the file FileSystem fs=FileSystem.get(conf); Path remotePath=new Path(remoteFilePath); FSDataInputStream in=fs.open(remotePath); BufferedReader d=new BufferedReader(new InputStreamReader(in)); StringBuffer buffer=new StringBuffer(); String line=null; while((line=d.readLine())!=null) { buffer.append(line); } String res = buffer.toString(); //2. Output the read data to the /tmp/output/text.txt file Tip: You can use FileWriter FileWriter f1=new FileWriter("/tmp/output/text.txt"); f1.write(res); f1.close(); \t /*****end*****/ } /** * Main function */ public static void main(String[] args)throws IOException { Configuration conf = new Configuration(); createHDFSFile(conf); String remoteFilePath = "/user/hadoop/text.txt"; // HDFS path \t\t try { System.out.println("Read file: " + remoteFilePath); hdfs.cat(conf, remoteFilePath); System.out.println("\\ Reading completed"); } catch (Exception e) { e.printStackTrace(); } } //Create HDFS file public static void createHDFSFile(Configuration conf)throws IOException{ FileSystem fs = FileSystem.get(conf); //Get the file system Path file = new Path("/user/hadoop/text.txt"); //Create file FSDataOutputStream outStream = fs.create(file); //Get the output stream outStream.writeUTF("hello hadoop HDFS step4 www.educoder.net"); outStream.close(); fs.close(); } }
Level 5: Delete files
Knowledge points
1.public boolean delete(Path f, Boolean recursive)
Permanently deletes the specified file or directory. If f
is an empty directory or file, then recursive
recursive=true
will a non-empty directory and its contents be deleted (that is, all files will be deleted recursively).
Programming requirements
Delete the /user/hadoop/text.txt
file in HDFS
import java.io.*; import java.sql.Date; import java.util.Scanner; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; public class hdfs { /** * Delete Files */ public static boolean rm(Configuration conf, String remoteFilePath) throws IOException { /*****start*****/ //Please add the code to delete files here FileSystem fs=FileSystem.get(conf); Path remotePath=new Path(remoteFilePath); boolean result=fs.delete(remotePath,false); return result ; /*****end*****/ } /** * Main function */ public static void main(String[] args) { Configuration conf = new Configuration(); String remoteFilePath = "/user/hadoop/text.txt"; // HDFS file \t\t try { if (rm(conf, remoteFilePath) ) { System.out.println("File deletion: " + remoteFilePath); } else { System.out.println("The operation failed (the file does not exist or the deletion failed)"); } } catch (Exception e) { e.printStackTrace(); } } }
Level 6: Delete folder
Knowledge points
1. Verify whether files exist in the directory: public RemoteIteratorrecursive
is false
, then the files in the directory are returned; if recursive
is true
, then the file is returned in the root directory.
2. Delete files and directories in HDFS: public boolean delete(Path f, Boolean recursive)
Programming requirements
Delete the /user/hadoop/tmp
directory and /user/hadoop/dir
directory in HDFS. Before deleting, you need to determine whether the two directories are empty. If they are not empty, Do not delete, otherwise delete.
import java.io.*; import java.sql.Date; import java.util.Scanner; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; public class hdfs { \t /** * Determine whether the directory is empty * true: empty, false: not empty */ public static boolean isDirEmpty(Configuration conf, String remoteDir) throws IOException { /*****start*****/ //Please add code here to determine whether the directory is empty FileSystem fs=FileSystem.get(conf); Path dirPath=new Path(remoteDir); RemoteIterator<LocatedFileStatus> result=fs.listFiles(dirPath,true); return !result.hasNext() ; /*****end*****/ } \t /** * Delete directory */ public static boolean rmDir(Configuration conf, String remoteDir, boolean recursive) throws IOException { /*****start*****/ //Please add the code to delete the directory here FileSystem fs=FileSystem.get(conf); Path dirPath=new Path(remoteDir); boolean result=fs.delete(dirPath,false); return result ; /*****end*****/ } /** * Main function */ public static void main(String[] args) { Configuration conf = new Configuration(); String remoteDir = "/user/hadoop/dir/"; // HDFS directory String remoteDir1 = "/user/hadoop/tmp/"; // HDFS directory Boolean forceDelete = false; // Whether to force delete \t\t try { if ( !isDirEmpty(conf, remoteDir) & amp; & amp; !forceDelete ) { System.out.println("The directory is not empty and will not be deleted"); } else { if ( rmDir(conf, remoteDir, forceDelete) ) { System.out.println("Directory deleted: " + remoteDir); } else { System.out.println("Operation failed"); } } if ( !isDirEmpty(conf, remoteDir1) & amp; & amp; !forceDelete ) { System.out.println("The directory is not empty and will not be deleted"); } else { if ( rmDir(conf, remoteDir1, forceDelete) ) { System.out.println("Directory deleted: " + remoteDir1); } else { System.out.println("Operation failed"); } } } catch (Exception e) { e.printStackTrace(); } } }
Level 7: Customized data input flow
Knowledge points
1. Related methods
public int read(char[] cbuf,int off,int len)throws IOException
(1) The specified number of characters has been read. The read
method of the underlying stream returns -1
, indicating the end of the file (end-of-file
), or the ready
method of the underlying stream returns false
, indicating that subsequent input requests will be blocked.
(2) If the first call to read
on the underlying stream returns -1
(indicating the end of the file), then this method returns -1
, otherwise This method returns the actual number of characters read.
Programming requirements
Implement the method readLine()
for reading the specified file in HDFS line by line. If the end of the file is read, it will return empty, otherwise it will return the text of one line of the file, that is, implement and BufferedReader
The readLine()
method of the class has a similar effect.
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import java.io.*; public class MyFSDataInputStream extends FSDataInputStream { public MyFSDataInputStream(InputStream in) { super(in); } \t /** * Implement line-by-line reading * Each time a character is read in and ends with "\ ", a line of content is returned. */ public static String readline(BufferedReader br) throws IOException { /*****start*****/ //Please fill in the code here to implement related functions char[] data = new char[1024]; int read = -1; int off = 0; // When the loop is executed, br will continue to read from the end of the previous reading, so in this function, off starts from 0 every time while ( (read = br.read(data, off, 1)) != -1 ) { if (String.valueOf(data[off]).equals("\\ ") ) { off + = 1; return String.valueOf(data, 0, read); } off + = 1; return String.valueOf(data, 0, read); } return null ; /*****end*****/ } /** * Read file content */ public static void cat(Configuration conf, String remoteFilePath) throws IOException { FileSystem fs = FileSystem.get(conf); Path remotePath = new Path(remoteFilePath); FSDataInputStream in = fs.open(remotePath); BufferedReader br = new BufferedReader(new InputStreamReader(in)); FileWriter f = new FileWriter("/tmp/output/text.txt"); String line = null; while ( (line = MyFSDataInputStream.readline(br)) != null ) { f.write(line); } f.close(); br.close(); in.close(); fs.close(); } \t /** * Main function */ public static void main(String[] args) { Configuration conf = new Configuration(); String remoteFilePath = "/user/hadoop/text.txt"; // HDFS path try { MyFSDataInputStream.cat(conf, remoteFilePath); } catch (Exception e) { e.printStackTrace(); } } }
The knowledge points of the article match the official knowledge files, and you can further learn related knowledge. Java Skill TreeHomepageOverview 139393 people are learning the system