First
Need to set up a Hadoop environment and start Hadoop
If you haven’t built it yet, you can read this article:
Hadoop cluster construction and configuration-CSDN Blog
Here I use the idea under windows to connect the Hadoop of the virtual machine
(1) Install hadoop under windows
The installed Hadoop needs to be the same as the Hadoop version of the virtual machine, and the jdk version must also be the same.
You can view it with the command
hadoop version//hadoop-version Java version//java-version
(2) hdfs Java api
1. Implement data writing,
The reference code is as follows:
Create a new class:
package org.example; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.junit.Test; import java.net.URI; public class writeFileOnHDFS { @Test public void write1() throws Exception { //Create configuration object Configuration conf = new Configuration(); conf.set("dfs.client.use.datanode.hostname", "true"); String uri = "hdfs://192.168.170.80:8020"; FileSystem fs = FileSystem.get(new URI(uri), conf); Path path = new Path(uri + "/lyf/hadoop/lyf0316.txt"); FSDataOutputStream out = fs.create(path); out.write("habse".getBytes()); out.close(); fs.close(); System.out.println("File [" + path + "] was written successfully!"); } }
2. Implement data reading,
The reference code is as follows
package org.example; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.junit.Test; import java.io.BufferedReader; import java.io.InputStreamReader; import java.net.URI; import org.apache.hadoop.conf.Configuration; public class ReadFileOnHDFS { @Test public void read1() throws Exception { Configuration conf = new Configuration(); conf.set("dfs.client.use.datanode.hostname", "true"); String uri = "hdfs://192.168.170.80:8020"; FileSystem fs = FileSystem.get(new URI(uri), conf, "root"); Path path = new Path(uri + "/lyf/hadoop/lyf0316.txt" + ""); FSDataInputStream in = fs.open(path); BufferedReader br = new BufferedReader(new InputStreamReader(in)); String nextLine = ""; while ((nextLine = br.readLine()) != null) { System.out.println(nextLine); } br.close(); in.close(); fs.close(); } }
3. Get all files under HDFS
The reference code is as follows
package net.army.hdfs; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import java.io.*; import java.text.SimpleDateFormat; public class HDFSOutputFile { public static void lsDir(Configuration conf, String remoteDir) throws IOException { FileSystem fs = FileSystem.get(conf); Path dirPath = new Path(remoteDir); RemoteIterator<LocatedFileStatus> remoteIterator = fs.listFiles( dirPath, true); while (remoteIterator.hasNext()) { FileStatus s = remoteIterator.next(); System.out.println("Path:" + s.getPath().toString()); System.out.println("Permission:" + s.getPermission().toString()); System.out.println("size:" + s.getLen()); Long timeStamp = s.getModificationTime(); SimpleDateFormat format = new SimpleDateFormat("yyy-MM-dd HH:mm:ss"); String date = format.format(timeStamp); System.out.println("Time:" + date); System.out.println(); } fs.close(); } public static void main(String[] args) { Configuration conf = new Configuration(); conf.set("fs.default.name", "hdfs://192.168.170.80:8020"); String remoteDir = "/"; // HDFS path try { System.out.println("(recursively) read information about all files in the directory: " + remoteDir); HDFSOutputFile.lsDir(conf, remoteDir); System.out.println("Reading completed"); } catch (Exception e) { e.printStackTrace(); } } }
4. Upload files to HDFS (files under windows to Linux)
package org.example; import java.net.URI; import java.net.URISyntaxException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.junit.Test; public class CopyFromLocalFile { public static void main(String[] args) throws Exception{ Configuration conf = new Configuration(); conf.set("fs.defaultFS","hdfs://192.168.170.80:8020"); System.setProperty("HADOOP_USER_NAME","root"); FileSystem fs = FileSystem.get(conf); fs.copyFromLocalFile(new Path("e:/test/test.txt"),new Path("/lyf")); fs.close(); } }
5. Add new folder
package org.example; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.junit.Test; import java.net.URI; public class CreateNewFolder { @Test public void createDir01() { String HDFS_PATH = "hdfs://192.168.170.80:8020"; Configuration configuration = null; FileSystem fileSystem = null; configuration = new Configuration(); try { fileSystem =FileSystem.get(new URI(HDFS_PATH),configuration,"root"); fileSystem.mkdirs(new Path("/linux")); } catch (Exception e) { e.printStackTrace(); } finally { fileSystem = null; configuration = null; System.out.println("--------------end---------------"); } } }
6. Renaming of folders
package org.example; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.junit.Test; import java.net.URI; public class ChangeFileName { @Test public void reameFile03(){ String HDFS_PATH = "hdfs://192.168.170.80:8020/"; Configuration configuration = null; FileSystem fileSystem = null; configuration = new Configuration(); try { fileSystem =FileSystem.get(new URI(HDFS_PATH),configuration,"root"); Path oldPath = new Path("/linux/linux.txt"); Path newPath = new Path("/linux/hadoop.txtt"); System.out.println(fileSystem.rename(oldPath,newPath)); } catch (Exception e) { e.printStackTrace(); } finally { fileSystem = null; configuration = null; } } }
7. Determine whether the file exists 9.
package org.example; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; public class FileExit { public static void main(String[] args){ try{ String fileName = "/lyf"; Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs://192.168.170.80:8020"); conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem"); FileSystem fs = FileSystem.get(conf); if(fs.exists(new Path(fileName))){ System.out.println("File exists"); }else{ System.out.println("File does not exist"); } }catch (Exception e){ e.printStackTrace(); } } }
8. Obtain the block information divided into large files
package net.army.hdfs; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.util.Progress; import org.apache.hadoop.util.Progressable; import org.junit.After; import org.junit.Before; import org.junit.Test; import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; import java.io.InputStream; import java.net.URI; /** * Use Java API to operate HDFS file system * * Because it is placed under test, it is best to use unit testing. * How to introduce jUnit unit testing in pom * There are two methods for unit testing: (1) before unit testing; (2) after unit testing * * key point: *1) Create Configuration * 2) Get FileSystem *3)The rest is the operation of HDFS API */ public class HDFSApp { public static final String HDFS_PATH = "hdfs://192.168.170.80:8020"; Configuration configuration = null; FileSystem fileSystem = null; @Before public void setup() throws Exception{ System.out.println("-----setup-----"); configuration = new Configuration(); configuration.set("dfs.replication", "1"); fileSystem = FileSystem.get(new URI("hdfs://192.168.170.80:8020"), configuration, "root"); } /* * View file block information */ @Test public void getFileBlockLocations() throws Exception{ FileStatus fileStatus = fileSystem.getFileStatus(new Path("/lyf/hadoop/hadoop-3.1.3.tar.gz")); BlockLocation[] blocks = fileSystem.getFileBlockLocations(fileStatus, 0, fileStatus.getLen()); for(BlockLocation block : blocks){ for(String name: block.getNames()){ System.out.println(name + " : " + block.getOffset() + " : " + block.getLength()); } } } @After public void tearDown(){ System.out.println("-----tearDown-----"); //Blanking configuration = null; fileSystem = null; } }
9. Get all nodes on the cluster
package net.army.hdfs; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; public class GetNode { public static void main(String[] args) throws Exception { Configuration conf=new Configuration(); conf.set("fs.defaultFS", "hdfs://192.168.170.80:8020"); FileSystem fs=FileSystem.get(conf); DistributedFileSystem dfs = (DistributedFileSystem)fs; DatanodeInfo[] Infos = dfs.getDataNodeStats(); for(int i=0;i<Infos.length;i + + ){ System.out.println("DataNode_" + i + "_Name:" + Infos[i].getHostName()); } } }