dockerhadoop高可用集群搭建 (使用docker搭建hadoop集群总结)

docker下安装hadoop集群,按照如下步骤,可快速安装并启动hadoop集群。

准备如下安装包

  • hadoop-3.2.1.tar.gz
  • jdk1.8.tar.gz

hadoop启动脚本

  • hadoop-master.sh
#!/bin/sh

HADOOP_HOME=/usr/local/hadoop-3.2.1
/usr/sbin/sshd

cd ${HADOOP_HOME}
# 首次运行需要执行初始化,之后不需要
${HADOOP_HOME}/bin/hdfs namenode -format
${HADOOP_HOME}/sbin/start-dfs.sh
${HADOOP_HOME}/sbin/start-yarn.sh

tail -f /dev/null
  • hadoop-slave.sh
#!/bin/sh

HADOOP_HOME=/usr/local/hadoop-3.2.1
/usr/sbin/sshd

cd ${HADOOP_HOME}
${HADOOP_HOME}/sbin/start-dfs.sh
${HADOOP_HOME}/sbin/start-yarn.sh

tail -f /dev/null
  • hadoop-history.sh
#!/bin/sh

HADOOP_HOME=/usr/local/hadoop-3.2.1
/usr/sbin/sshd

cd ${HADOOP_HOME}
${HADOOP_HOME}/sbin/start-dfs.sh
${HADOOP_HOME}/sbin/start-yarn.sh
${HADOOP_HOME}/mr-jobhistory-daemon.sh start historyserver

tail -f /dev/null

docker文件

  • Dockerfile文件
FROM centos:latest

ARG tar_file
ARG tar_name

ADD ${tar_file}.tar.gz /usr/local/
ADD hadoop-master.sh /opt/
ADD hadoop-slave.sh /opt/
ADD hadoop-history.sh /opt/
ADD jdk1.8.tar.gz /usr/local/

RUN chmod a+x /opt/hadoop-master.sh /opt/hadoop-slave.sh /opt/hadoop-history.sh \
	&& yum install -y passwd openssl openssh-server openssh-clients lsof vim which sudo \
	&& /bin/cp /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && echo 'Asia/Shanghai' > /etc/timezone \
	&& ssh-keygen -t rsa -P '' -f /root/.ssh/id_rsa \
	&& cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys \
	&& ssh-keygen -t dsa -f /etc/ssh/ssh_host_dsa_key \
	&& ssh-keygen -t rsa -f /etc/ssh/ssh_host_rsa_key \
	&& ssh-keygen -t dsa -f /etc/ssh/ssh_host_ecdsa_key \
	&& ssh-keygen -t rsa -f /etc/ssh/ssh_host_ed25519_key \
	&& chmod 700 /root/.ssh/ \
	&& chmod 600 /root/.ssh/authorized_keys \
	&& cd /usr/local/${tar_name} \
 && mkdir -p tmp namenode datanode
	
ENV HDFS_DATANODE_USER root
ENV HDFS_NAMENODE_USER root
ENV HDFS_SECONDARYNAMENODE_USER root
ENV HDFS_DATANODE_SECURE_USER hdfs
ENV YARN_RESOURCEMANAGER_USER root
ENV HADOOP_SECURE_DN_USER yarn
ENV YARN_NODEMANAGER_USER root
	
ENV JAVA_HOME /usr/local/jdk8
ENV CLASSPATH .:$JAVA_HOME/lib
ENV PATH $PATH:$JAVA_HOME/bin
ENV HADOOP_HOME /usr/local/${tar_name}
ENV HADOOP_INSTALL $HADOOP_HOME
ENV HADOOP_MAPRED_HOME $HADOOP_HOME
ENV HADOOP_COMMON_HOME $HADOOP_HOME
ENV HADOOP_HDFS_HOME $HADOOP_HOME
ENV YARN_HOME $HADOOP_HOME
ENV HADOOP_LIBEXEC_DIR $HADOOP_HOME/libexec
ENV HADOOP_COMMON_LIB_NATIVE_DIR $HADOOP_HOME/lib/native
ENV PATH $HADOOP_HOME/sbin:$HADOOP_HOME/bin:$JAVA_HOME/bin:$PATH
ENV CLASSPATH $($HADOOP_HOME/bin/hadoop classpath):$CLASSPATH

# ENTRYPOINT ["/opt/hadoop-master.sh"]
# docker build -t kala/hadoop:2.0 .
  • Docker-Compose.yml文件
version: "3.7"
services:
 hadoop-master:
 build:
 context: .
 args:
 tar_file: hadoop-3.2.1
 tar_name: hadoop-3.2.1
 image: kala/hadoop:2.0
 container_name: kala-hadoop-master-container
 hostname: hadoopmaster
 command:
 - /bin/sh
 - -c
 - |
 /opt/hadoop-master.sh
 extra_hosts:
 - "hadoopslave1:172.16.0.16"
 - "hadoopslave2:172.16.0.17"
 ports:
 - "19888:19888"
 - "18088:18088"
 - "9870:9870"
 - "9000:9000"
 - "8088:8088"
 volumes: 
 - ../volumes/conf:/usr/local/hadoop-3.2.1/etc/hadoop
 networks:
 docker_net:
 ipv4_address: 172.16.0.15
 hadoop-slave1:
 image: kala/hadoop:2.0
 container_name: kala-hadoop-slave1-container
 hostname: hadoopslave1
 command:
 - /bin/sh
 - -c
 - |
 /opt/hadoop-slave.sh
 extra_hosts:
 - "hadoopmaster:172.16.0.15"
 - "hadoopslave2:172.16.0.17"
 volumes: 
 - ../volumes/conf:/usr/local/hadoop-3.2.1/etc/hadoop
 networks:
 docker_net:
 ipv4_address: 172.16.0.16
 hadoop-slave2:
 image: kala/hadoop:2.0
 container_name: kala-hadoop-slave2-container
 hostname: hadoopslave2
 command:
 - /bin/sh
 - -c
 - |
 /opt/hadoop-slave.sh
 extra_hosts:
 - "hadoopmaster:172.16.0.15"
 - "hadoopslave1:172.16.0.16"
 volumes: 
 - ../volumes/conf:/usr/local/hadoop-3.2.1/etc/hadoop
 networks:
 docker_net:
 ipv4_address: 172.16.0.17
 
networks:
 docker_net:
 ipam:
 driver: default
 config:
 - subnet: "172.16.0.0/24"
 external:
 name: docker-networks

hadoop配置文件

  • hadoop-env.sh
#替换jdk的路径
export JAVA_HOME=/usr/local/jdk8
  • yarn-env.sh
#替换jdk的路径
export JAVA_HOME=/usr/local/jdk8
  • core-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<configuration>
 <property>
 <name>hadoop.tmp.dir</name>
 <value>/usr/local/hadoop-3.2.1/tmp</value>
 <description>A base for other temporary directories.</description>
 </property>

 <property>
 <name>fs.defaultFS</name>
 <value>hdfs://hadoopmaster:18088</value>
 </property>
</configuration>
  • hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<configuration>
 <property>
 <name>dfs.replication</name>
 <value>2</value>
 </property>
	
	<!--hdfs 监听namenode的web的地址,默认就是9870端口,如果不改端口也可以不设置 -->
	<property>
		<name>dfs.namenode.http-address</name>
		<value>hadoopmaster:9870</value>
	</property>

 <property>
 <name>dfs.namenode.name.dir</name>
 <value>file:/usr/local/hadoop-3.2.1/namenode</value>
 </property>

 <property>
 <name>dfs.datanode.data.dir</name>
 <value>file:/usr/local/hadoop-3.2.1/datanode</value>
 </property>
</configuration>
  • mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<configuration>
<!-- 必须设置,mapreduce程序使用的资源调度平台,默认值是local,若不改就只能单机运行,不会到集群上了 -->
	<property>
		<name>mapreduce.framework.name</name>
		<value>yarn</value>
	</property>
	<property>
			<name>mapreduce.jobhistory.webapp.address</name>
			<value>hadoopmaster:19888</value>
	</property>
	<property>
 <name>mapreduce.application.classpath</name>
 <value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*</value>
 </property>
</configuration>
  • yarn-site.xml
<?xml version="1.0"?>

<configuration>
<!-- 必须配置 指定YARN的老大(ResourceManager)在哪一台主机 -->
	<property>
		<name>yarn.resourcemanager.hostname</name>
		<value>hadoopmaster</value>
	</property>
	 
	<!-- 必须配置 提供mapreduce程序获取数据的方式 默认为空 -->
	<property>
		<name>yarn.nodemanager.aux-services</name>
		<value>mapreduce_shuffle</value>
	</property>
	
	<property>
 <name>yarn.nodemanager.env-whitelist</name>
 <value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
 </property>
</configuration>
  • workers
hadoopmaster
hadoopslave1
hadoopslave2

集群启动操作

  • 启动hadoop docker服务
docker-compose -f Docker-Compose.yml up
  • 停止hadoop docker服务
docker-compose -f Docker-Compose.yml down

访问hadoop集群

  • 访问NameNode
http://nn_host:9870/

docker安装hadoop分布式集群,dockerhadoop集群镜像

  • 访问ResourceManager
http://nn_host:8088/

docker安装hadoop分布式集群,dockerhadoop集群镜像

  • 访问MapReduce JobHistory Server
http://nn_host:19888/

后续更多技术分享,尽在今日IT,欢迎关注哦~