IDEA integrates Hadoop3.x under Windows

1. Hadoop development package download

1.1 Download the corresponding hadoop development package on the windows system, for example: hadoop3.3.5 version

1.1 Unzip the development package and rename it

2. Patch

2.1 Since it is a windows development system, some patches need to be installed to run normally. Install them in the bin directory of hadoop.

Copy these three patches to the bin directory of hadoop.

3. Configure hadoop environment variables

3.1 Prerequisite JDK has been installed and environment variables have been configured

3.1 Add two variables: HADOOP_HOME, PATH

4. Open IDEA to create a Maven project

4.1 Project creation

4.2. Introduce corresponding dependencies into the pom.xml file

<dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-common</artifactId>
    <version>3.3.5</version>
</dependency>
<dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-client</artifactId>
    <version>3.3.5</version>
</dependency>
<dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-hdfs</artifactId>
    <version>3.3.5</version>
</dependency>

4.3. Download the configuration file from the hadoop server and import it into the project’s src/resources

4.4. Use hard coding to connect to hdfs pseudo-distribution (Note: the server’s firewall must be turned off)

Suitable for stand-alone mode and pseudo-distributed cluster: because it is clear which server the namenode is

method one:

try{
            //-Applicable to stand-alone mode and pseudo-distributed cluster: because they are all fixed server addresses
            //-The address here must be the same as the content of fs.defaultFS configured in core-site.xml
            URI uri = new URI("hdfs://hadoop:9000");
            Configuration conf = new Configuration();
            String user = "root";
            FileSystem fs = FileSystem.get(conf);
            System.out.println("Connection successful");
            //-Determine whether a directory in hdfs exists
            System.out.println(fs.exists(new Path("/wordcount")));
            fs.close();
        }catch (Exception ex){
            ex.printStackTrace();
        }

Method two:

try{
            //-Applicable to stand-alone mode and pseudo-distributed cluster: because they are all fixed server addresses
            //-Configuration items
            Configuration conf = new Configuration();
            //-The address here must be the same as the content of fs.defaultFS configured in core-site.xml
            conf.set("fs.defaultFS","hdfs://hadoop:9000");
            //-Set the current operating system environment variables
            System.setProperty("HADOOP_USER_NAME","ROOT");
            FileSystem fs = FileSystem.get(conf);
            System.out.println("Connection successful");
            //-Determine whether a directory in hdfs exists
            System.out.println(fs.exists(new Path("/user")));
            fs.close();
        }catch (Exception ex){
            ex.printStackTrace();
        }

4.5. Connect to hadoop high availability cluster

4.5.1 Download the following configuration file in the hadoop cluster server locally and copy it to the src/resources directory of the project

4.5.2 Connecting to high-availability cluster code

try{
            System.setProperty("HADOOP_USER_NAME","ROOT");
            Configuration conf = new Configuration();
            DistributedFileSystem dfs = new DistributedFileSystem();
            //-Get the name of the cluster server, the parameters are configured in hdfs-site.xml
            String nameService = conf.get("dfs.nameservices");
            String hdfsRPCUrl = "http://" + nameService + ":" + 8020;
            dfs.initialize(URI.create(hdfsRPCUrl),conf);
            //-: Get all file paths in the root directory of hdfs
            FileStatus[] list = dfs.listStatus(new Path("/"));
            for(FileStatus file:list){
                System.out.println(file.getPath());
            }
            dfs.close();
        }catch(Exception ex){
            ex.printStackTrace();
        }

4.5.3 Create the directory on the server yourself

try{
            System.setProperty("HADOOP_USER_NAME","ROOT");
            Configuration conf = new Configuration();
            DistributedFileSystem dfs = new DistributedFileSystem();
            //-Get the name of the cluster server, the parameters are configured in hdfs-site.xml
            String nameService = conf.get("dfs.nameservices");
            String hdfsRPCUrl = "http://" + nameService + ":" + 8020;
            dfs.initialize(URI.create(hdfsRPCUrl),conf);
            //-:Create a directory
            Path path = new Path("/ceshi");
            if(!dfs.exists(path)){
                System.out.println(dfs.mkdirs(path)?"Creation successful":"Creation failed");
            }else{
                System.out.println(path.getName() + "Already exists, no need to create");
            }
            dfs.close();
        }catch(Exception ex){
            ex.printStackTrace();
        }

4.5.4 Modify file name

try{
            System.setProperty("HADOOP_USER_NAME","ROOT");
            Configuration conf = new Configuration();
            DistributedFileSystem dfs = new DistributedFileSystem();
            //-Get the name of the cluster server, the parameters are configured in hdfs-site.xml
            String nameService = conf.get("dfs.nameservices");
            String hdfsRPCUrl = "http://" + nameService + ":" + 8020;
            dfs.initialize(URI.create(hdfsRPCUrl),conf);
            //-:Create a directory
            Path src = new Path("/ceshi");
            Path dst = new Path("/test");
            if(dfs.exists(src)){
                System.out.println(dfs.rename(src,dst)?"Modification successful":"Modification failed");
            }else{
                System.out.println(src.getName() + "File does not exist");
            }
            dfs.close();
        }catch(Exception ex){
            ex.printStackTrace();
        }

4.5.5 Delete directories or files

try{
            System.setProperty("HADOOP_USER_NAME","ROOT");
            Configuration conf = new Configuration();
            DistributedFileSystem dfs = new DistributedFileSystem();
            //-Get the name of the cluster server, the parameters are configured in hdfs-site.xml
            String nameService = conf.get("dfs.nameservices");
            String hdfsRPCUrl = "http://" + nameService + ":" + 8020;
            dfs.initialize(URI.create(hdfsRPCUrl),conf);
            //-:Create a directory
            Path path = new Path("/test");
            if(dfs.exists(path)){
                System.out.println(dfs.delete(path,true)?"Delete successfully":"Delete failed");
            }else{
                System.out.println(path.getName() + "File does not exist");
            }
            dfs.close();
        }catch(Exception ex){
            ex.printStackTrace();
        }

4.5.6 File upload

try{
            System.setProperty("HADOOP_USER_NAME","ROOT");
            Configuration conf = new Configuration();
            DistributedFileSystem dfs = new DistributedFileSystem();
            //-Get the name of the cluster server, the parameters are configured in hdfs-site.xml
            String nameService = conf.get("dfs.nameservices");
            String hdfsRPCUrl = "http://" + nameService + ":" + 8020;
            dfs.initialize(URI.create(hdfsRPCUrl),conf);
            //-:Create a directory
            Path src = new Path("hadoop tutorial.txt");
            Path dst = new Path("/study/hadoop tutorial.txt");
            Path path = new Path("/study");
            if(dfs.exists(path)){
                dfs.copyFromLocalFile(src,dst);
                System.out.println(dfs.exists(dst)?"Upload successful":"Upload failed");
            }else{
                System.out.println(path.getName() + "The target directory does not exist and cannot be uploaded");
            }
            dfs.close();
        }catch(Exception ex){
            ex.printStackTrace();
        }