Deploy hadoop-3.3.6, hbase-2.5.6, apache-zookeeper-3.8.1 cluster on linux

1. Introduction to hadoop

Hadoop is a distributed system infrastructure developed by the Apache Foundation. Users can develop distributed programs without understanding the underlying details of distribution. Make full use of the power of clusters for high-speed computing and storage. Hadoop implements a distributed file system (Distributed File System), one of which is HDFS (Hadoop Distributed File System)

2. hbase

HBase is a distributed, scalable NoSQL database that supports massive data storage. The underlying physical storage is stored in Key-Value data format, and all data files in HBase are stored on the Hadoop HDFS file system.

3. Environment configuration

1.jdk1.8

2.hadoop-3.3.6

3.hbase-2.5.6

4. zookeeper-3.8.1

4. Environment variable configuration

export JAVA_HOME=/usr/local/jdk1.8.0_391
export JRE_HOME=/usr/local/jdk1.8.0_391/jre
export HBASE_HOME=/usr/local/bigdata/hbase-2.5.6
#export HBASE_HOME=/usr/local/bigdata/hbase-2.5.6-hadoop3
export FLINK_HOME=/usr/local/bigdata/flink-1.18.0
export SCALA_HOME=/usr/local/bigdata/scala-2.13.12
export HADOOP_HOME=/usr/local/bigdata/hadoop-3.3.6
export SPARK_HOME=/usr/local/bigdata/spark-3.5.0-bin-hadoop3
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar:$JAR_HOME/lib
export PATH=.:$JAVA_HOME/bin:$JRE_HOME/bin:$SCALA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$SPARK_HOME/bin:$FLINK_HOME/bin:$HBASE_HOME/bin:$PYTHON_HOME/bin: $PATH

5. hadoop configuration

5.1、 yarn-site.xml

<property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
    <property>
        <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
        <value>org.apache.hadoop.mapred.ShuffleHandler</value>
    </property>
    <property>
        <name>yarn.resourcemanager.hostname</name>
        <value>node1</value>
    </property>
    <property>
        <name>yarn.application.classpath</name>
        <value>/usr/local/bigdata/hadoop-3.3.6/etc/hadoop:/usr/local/bigdata/hadoop-3.3.6/share/hadoop/common/lib/*:/usr/local/bigdata/ hadoop-3.3.6/share/hadoop/common/*:/usr/local/bigdata/hadoop-3.3.6/share/hadoop/hdfs:/usr/local/bigdata/hadoop-3.3.6/share/hadoop/ hdfs/lib/*:/usr/local/bigdata/hadoop-3.3.6/share/hadoop/hdfs/*:/usr/local/bigdata/hadoop-3.3.6/share/hadoop/mapreduce/*:/usr /local/bigdata/hadoop-3.3.6/share/hadoop/yarn:/usr/local/bigdata/hadoop-3.3.6/share/hadoop/yarn/lib/*:/usr/local/bigdata/hadoop-3.3 .6/share/hadoop/yarn/*</value>
    </property>

5.2, hdfs-site.xml

 <property>
        <name>dfs.replication</name>
        <value>3</value>
    </property>
    <property>
        <name>dfs.namenode.name.dir</name>
        <value>file:///usr/local/bigdata/hadoop-3.3.6/data/namenode</value> //Note that the previous part of the path is changed to your own
    </property>
    <property>
          <name>dfs.datanode.data.dir</name>
          <value>file:///usr/local/bigdata/hadoop-3.3.6/data/datanode</value> //Note that the previous part of the path is changed to your own
     </property>
    <property>
        <name>dfs.namenode.secondary.http-address</name>
        <value>node2:9860</value>
    </property>
   <property>
       <name>dfs.ha.automatic-failover.enabled</name>
       <value>false</value>
   </property>
   <property>
        <name>dfs.client.failover.proxy.provider.mycluster</name>
        <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
   </property>
   <property>
        <name>dfs.ha.fencing.methods</name>
        <value>sshfence</value>
  </property>
  <property>
      <name>dfs.permissions.enabled</name>
      <value>false</value>
  </property>
  <property>
      <name>dfs.image.transfer.bandwidthPerSec</name>
      <value>1048576</value>
  </property>
  <property>
      <name>dfs.block.scanner.volume.bytes.per.second</name>
      <value>1048576</value>
  </property>
  <property>
       <name>dfs.datanode.balance.bandwidthPerSec</name>
       <value>20m</value>
   </property>

5.3.core-site.yml

 <property>
        <name>hadoop.tmp.dir</name>
        <value>/usr/local/bigdata/hadoop-3.3.6/data</value> //Note that the previous part of the path is changed to your own
    </property>
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://node1:9000</value>
    </property>
        <property>
        <name>hadoop.http.authentication.simple.anonymous.allowed</name>
        <value>true</value>
    </property>
    <property>
       <name>hadoop.http.staticuser.user</name>
       <value>root</value>
    </property>
    <property>
       <name>hadoop.proxyuser.root.hosts</name>
       <value>*</value>
   </property>
   <property>
        <name>hadoop.proxyuser.root.groups</name>
        <value>*</value>
   </property>
   <property>
       <name>io.file.buffer.size</name>
       <value>1048576</value>
       <description>Size of read/write SequenceFiles buffer: 128K</description>
   </property>
   <property>
        <name>ha.zookeeper.quorum</name>
        <value>node1:2181,node2:2181,node3:2181</value>
   </property>
   <property>
        <name>hadoop.zk.address</name>
        <value>node1:2181,node2:2181,node3:2181</value>
   </property>

5.5.mapred-site.xml

 <property>
       <name>mapreduce.framework.name</name>
       <value>yarn</value>
  </property>
  <property>
      <name>mapreduce.jobhistory.address</name>
      <value>node1:10020</value>
  </property>
  <property>
     <name>mapreduce.jobhistory.webapp.address</name>
     <value>node1:19888</value>
  </property>
  <property>
      <name>mapreduce.map.memory.mb</name>
      <value>1024</value>
  </property>
  <property>
     <name>mapreduce.reduce.memory.mb</name>
     <value>2048</value>
  </property>

5.6, workers

node1
node2
node3

6. Hadoop startup

http://192.168.42.139:8088/

http://192.168.42.139:9870/

7. hbase configuration

7.1. Modify hbase-env.sh

export JAVA_HOME=${JAVA_HOME}
export HBASE_LOG_DIR=${HBASE_HOME}/logs
export HBASE_MANAGES_ZK=false

7.2. Modify hbase-site.xml

 <property>
    <name>hbase.cluster.distributed</name>
    <value>true</value>
  </property>
  <property>
    <name>hbase.tmp.dir</name>
    <value>hdfs://node1:9000/hbase/tmp</value>
  </property>
  <property>
     <name>hbase.unsafe.stream.capability.enforce</name>
     <value>false</value>
  </property>
  <property>
     <name>hbase.master.info.port</name>
     <value>16010</value>
  </property>
  <property>
     <name>hbase.zookeeper.property.dataDir</name>
     <value>/usr/local/bigdata/hbase-2.5.6/data/zk</value>
   </property>
   <property>
     <name>hbase.rootdir</name>
      <value>hdfs://node1:9000/hbase</value>
    </property>
    <property>
        <name>hbase.master</name>
        <value>node1:16010</value>
    </property>
    <property>
      <name>hbase.zookeeper.quorum</name>
      <value>node1:2181,node2:2181,node3:2181</value>
    </property>
    <property>
        <name>hbase.wal.provider</name>
        <value>filesystem</value>
   </property>
   <property>
      <name>hbase.unsafe.stream.capability.enforce</name>
      <value>false</value>
  </property>

7.3. Access hbase

http://192.168.42.139:16010/

The knowledge points of the article match the official knowledge files, and you can further learn relevant knowledge. Cloud native entry-level skills treeHomepageOverview 17034 people are learning the system