Apache Hadoop 2.6.5 Cluster Setup
created by migaloo, 2017.01.21
□ Hosts config (ALL Server)
- /etc/hosts 에 각서버에 대한 IP정보 및 hostname을 정의하여 구성해야할 노드에 대한 정보를 입력한다.
- hosts 파일 편집 후에는 리부팅 한다.
> sudo vim /etc/hosts
□ Disable IPv6 (ALL Server)
- Upstream employee Daniel Walsh recommends not disabling the ipv6 module,
as that can cause issues with SELinux and other components, but adding the following to /etc/sysctl.conf
> sudo vim /etc/sysctl.conf
# Disable IPv6
net.ipv6.conf.all.disable_ipv6 = 1
net.ipv6.conf.default.disable_ipv6 = 1
> sudo sysctl -p # 변경사항 즉시적용
□ SSH KEY 설정
- Master 와 slave간 자동 SSH접속을 위해 Key를 생성하여 cluster간 통신이 가능하도록 설정한다.
#SSH KEY 생성(All Server)
> sudo vim /etc/ssh/sshd_config
#PubkeyAuthentication yes
AuthorizedKeysFile .ssh/authorized_keys
> rm -rf ~/.ssh #기존 정보 여부 확인 필요
> mkdir ~/.ssh
> ssh-keygen -t rsa -P "" #(엔터) (엔터)
> cat /home/migaloo/.ssh/id_rsa.pub >> /home/migaloo/.ssh/authorized_keys
> chmod 755 ~/.ssh
> chmod 644 ~/.ssh/authorized_keys
> sudo /etc/init.d/ssh restart
> ssh localhost
#SSH KEY slave에 복사(NameNode)
> scp /home/migaloo/.ssh/id_rsa.pub migaloo@migaloo02:/home/migaloo/.ssh/id_rsa_migaloo01.pub
> scp /home/migaloo/.ssh/id_rsa.pub migaloo@migaloo03:/home/migaloo/.ssh/id_rsa_migaloo01.pub
> scp /home/migaloo/.ssh/id_rsa.pub migaloo@migaloo04:/home/migaloo/.ssh/id_rsa_migaloo01.pub
> scp /home/migaloo/.ssh/id_rsa.pub migaloo@migaloo05:/home/migaloo/.ssh/id_rsa_migaloo01.pub
#SSH KEY slave에 적용(ResourceManger, slaves)
> cat /home/migaloo/.ssh/id_rsa_migaloo01.pub >> /home/migaloo/.ssh/authorized_keys
#SSH KEY slave에 복사(ResourceManger)
> scp /home/migaloo/.ssh/id_rsa.pub migaloo@migaloo01:/home/migaloo/.ssh/id_rsa_migaloo02.pub
> scp /home/migaloo/.ssh/id_rsa.pub migaloo@migaloo03:/home/migaloo/.ssh/id_rsa_migaloo02.pub
> scp /home/migaloo/.ssh/id_rsa.pub migaloo@migaloo04:/home/migaloo/.ssh/id_rsa_migaloo02.pub
> scp /home/migaloo/.ssh/id_rsa.pub migaloo@migaloo05:/home/migaloo/.ssh/id_rsa_migaloo02.pub
#SSH KEY slave에 적용(NameNode, slaves)
> cat /home/migaloo/.ssh/id_rsa_migaloo02.pub >> /home/migaloo/.ssh/authorized_keys
# 연결 테스트 (NameNode, ResourceManager)
> ssh localhost
> ssh migaloo01
> ssh migaloo02
> ssh migaloo03
> ssh migaloo04
> ssh migaloo05
□ Hadoop 설치 ( Non-Secure Mode )
[주의]
.경로 생성과 etc/profile 수정은 전체 노드별로 각각 실행
.Hadoop config 파일은NameNode에서 작성 후 scp로 ResourceManger, DataNode에 배포 후
각노드의 열할에 맞게 설정 파일을 수정한다.
#설치 위치에 파일 배포 ( /platform/package/hadoop )
> cd /platform/package
> tar -zxvf /platform/temp/hadoop/hadoop-2.6.5.tar.gz
> ln -s hadoop-2.6.5/ hadoop
#JAVA_HOME SET PATH
> echo $JAVA_HOME
> javac -version
> which javac
> readlink -f /usr/bin/javac
> sudo vim /etc/profile
export JAVA_HOME=/usr/lib/jvm/java-8-oracle
> source /etc/profile
> echo $JAVA_HOME
> $JAVA_HOME/bin/javac -version
#HADOOP MAKE PATH
> mkdir -p /platform/files/hadoop
> mkdir -p /platform/logs/hadoop/hdfs
> mkdir -p /platform/logs/hadoop/yarn
> mkdir -p /platform/data/hadoop/name
> mkdir -p /platform/data/hadoop/data
#HADOOP COMMON CONFIG
> sudo vim /etc/profile
HADOOP_HOME="/platform/package/hadoop"
HADOOP_YARN_HOME=$HADOOP_HOME
HADOOP_PREFIX=$HADOOP_HOME
HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
HADOOP_PID_DIR="/platform/files/hadoop"
HADOOP_SECURE_DN_PID_DIR=$HADOOP_PID_DIR
HADOOP_LOG_DIR="/platform/logs/hadoop/hdfs"
YARN_LOG_DIR="/platform/logs/hadoop/yarn"
LD_LIBRARY_PATH=$HADOOP_HOME/lib/native:/lib:/lib64:/usr/lib:/usr/lib64:/usr/local/lib/
#MESOS_HADOOP_HOME=$HADOOP_HOME
export HADOOP_HOME HADOOP_YARN_HOME HADOOP_PREFIX HADOOP_CONF_DIR HADOOP_PID_DIR HADOOP_SECURE_DN_PID_DIR HADOOP_LOG_DIR YARN_LOG_DIR LD_LIBRARY_PATH
export PATH=$JAVA_HOME/bin:$PATH:$HOME/bin:/sbin:$HADOOP_HOME/bin
> source /etc/profile
> echo $HADOOP_HOME
> echo $HADOOP_YARN_HOME
> echo $HADOOP_PREFIX
> echo $HADOOP_CONF_DIR
> echo $HADOOP_PID_DIR
> echo $HADOOP_SECURE_DN_PID_DIR
> echo $HADOOP_LOG_DIR
> echo $YARN_LOG_DIR
> echo $LD_LIBRARY_PATH
> echo $PATH
==== 여기 까지 내용은 모든 노드에서 실행한 후 다음으로 진행 한다. ===============
#Configuring the Hadoop Daemons in Non-Secure Mode
This section deals with important parameters to be specified in the given configuration files:
> vim $HADOOP_PREFIX/etc/hadoop/core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://migaloo01:9000</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
<property>
<name>fs.trash.interval</name>
<value>14400</value>
<description>Number of minutes after which the checkpoint gets deleted. if zero, the trash freature is disabled</description>
</property>
</configuration>
#scp -rp /platform/package/hadoop/etc/hadoop/hdfs-site.xml migaloo@migaloo02:/platform/package/hadoop/etc/hadoop/hdfs-site.xml
#scp -rp /platform/package/hadoop/etc/hadoop/hdfs-site.xml migaloo@migaloo04:/platform/package/hadoop/etc/hadoop/hdfs-site.xml
#scp -rp /platform/package/hadoop/etc/hadoop/hdfs-site.xml migaloo@migaloo05:/platform/package/hadoop/etc/hadoop/hdfs-site.xml
> vim $HADOOP_PREFIX/etc/hadoop/hdfs-site.xml
<configuration>
<property>
<name>dfs.permissions.superusergroup</name>
<value>migaloo</value>
</property>
<!-- for NameNode -->
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///platform/data/hadoop/name</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.blocksize</name>
<value>268435456</value>
</property>
<property>
<name>dfs.namenode.handler.count</name>
<value>100</value>
</property>
<property>
<name>dfs.hosts</name>
<value>/platform/package/hadoop/etc/hadoop/slaves</value>
</property>
<!-- for DataNode -->
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///platform/data/hadoop/data<value>
</property>
</configuration>
> cp $HADOOP_PREFIX/etc/hadoop/mapred-site.xml.template $HADOOP_PREFIX/etc/hadoop/mapred-site.xml
> vim $HADOOP_PREFIX/etc/hadoop/mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
> vim $HADOOP_PREFIX/etc/hadoop/yarn-site.xml
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>migaloo02:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>migaloo02:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>migaloo02:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>migaloo02:8041</value>
</property>
</configuration>
#Slaves file
> vim /platform/package/hadoop/etc/hadoop/slaves
migaloo03
migaloo04
migaloo05
#config 복사
scp -rp /platform/package/hadoop/etc/hadoop/* migaloo@migaloo02:/platform/package/hadoop/etc/hadoop
scp -rp /platform/package/hadoop/etc/hadoop/* migaloo@migaloo03:/platform/package/hadoop/etc/hadoop
scp -rp /platform/package/hadoop/etc/hadoop/* migaloo@migaloo04:/platform/package/hadoop/etc/hadoop
scp -rp /platform/package/hadoop/etc/hadoop/* migaloo@migaloo05:/platform/package/hadoop/etc/hadoop
#Hadoop config 노드 역할에 맞게 수정
[ DataNode : migaloo03, migaloo04, migaloo05 ]
> vim $HADOOP_PREFIX/etc/hadoop/hdfs-site.xml
<configuration>
<!-- for NameNode -->
<!-- property>
....
</property -->
<!-- for DataNode -->
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///platform/data/hadoop/data<value>
</property>
</configuration>
> vim $HADOOP_PREFIX/etc/hadoop/yarn-site.xml
<configuration>
<!-- property>
....
</property -->
</configuration>
□ Operating the Hadoop Cluster
#Hadoop FileSystem Format
[ NameNode : migaloo01 ]
> $HADOOP_PREFIX/bin/hdfs namenode -format "migaloo_cluster"
#재포멧 시 기존 내용 모두 삭제(All Server)
rm -rf /platform/data/hadoop/name/*
rm -rf /platform/data/hadoop/data/*
#Hadoop Startup
[ NameNode : migaloo01 ]
> $HADOOP_PREFIX/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs start namenode
[ ResourceManager : migaloo02 ]
> $HADOOP_YARN_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR start resourcemanager
> $HADOOP_YARN_HOME/sbin/yarn-daemon.sh start proxyserver --config $HADOOP_CONF_DIR
> $HADOOP_PREFIX/sbin/mr-jobhistory-daemon.sh start historyserver --config $HADOOP_CONF_DIR
[ DataNode : migaloo03, migaloo04, migaloo05 ]
> $HADOOP_PREFIX/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs start datanode
> $HADOOP_YARN_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR start nodemanager
#각 노드별 process 확인
> jps
#Web Interfaces
NameNode http://migaloo01:50070/ http://192.168.10.101:50070/
ResourceManager http://migaloo02:8088/ http://192.168.10.102:8088/
MapReduce JobHistory Server http://migaloo02:19888/ http://http//192.168.10.102:19888/
#테스트 파일 생성 (NameNode)
> hdfs dfs -mkdir /tmp
> vi test_hdfs.txt
> hdfs dfs -put test_hdfs.txt /tmp
> hdfs dfs -ls /tmp
#Hadoop Shutdown
[ NameNode : migaloo01 ]
> $HADOOP_PREFIX/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs stop namenode
[ ResourceManager : migaloo02 ]
> $HADOOP_YARN_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR stop resourcemanager
> $HADOOP_YARN_HOME/sbin/yarn-daemon.sh stop proxyserver --config $HADOOP_CONF_DIR
> $HADOOP_PREFIX/sbin/mr-jobhistory-daemon.sh stop historyserver --config $HADOOP_CONF_DIR
[ DataNode : migaloo03, migaloo04, migaloo05 ]
> $HADOOP_PREFIX/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs stop datanode
> $HADOOP_YARN_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR stop nodemanager
#.주의 사항
- host 파일에 127.0.1.1 migaloo01 이런 설정이 있으면 삭제한다.
데몬이 127.0.1.1로 실행되어 외부에서 서비스로 접속이 불가할 수 있다.
- 네입노드 포맷후 데이터 노드 실행 안됨 오류
2017-01-22 00:40:05,818 WARN org.apache.hadoop.hdfs.server.datanode.DataNode: Ending block pool service for: Block pool <registering> (Datanode Uuid unassigned) service to migaloo01/192.168.10.101:9000
2017-01-22 00:40:05,922 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Removed Block pool <registering> (Datanode Uuid unassigned)
2017-01-22 00:40:07,923 WARN org.apache.hadoop.hdfs.server.datanode.DataNode: Exiting Datanode
I met the same problem and solved it by doing the following steps:
step 1. remove the hdfs directory (for me it was the default directory "/tmp/hadoop-root/")
> rm -rf /platform/data/hadoop/name/*
> rm -rf /platform/data/hadoop/data/*
step 2. run
> $HADOOP_PREFIX/bin/hdfs namenode -format "migaloo_cluster"
to format the directory
□ 참고 싸이트
http://hadoop.apache.org/docs/r2.6.5/hadoop-project-dist/hadoop-common/ClusterSetup.html