1. 集群环境规划
IP | 主机名 | NN1 | NN2 | DN | Resource Manager | NodeManager | ZK |
172.*.*.6 | master | Y | Y | N | Y | N | Y |
172.*.*.7 | slave1 | N | N | Y | N | Y | Y |
172.*.*.8 | slave2 | N | N | Y | N | Y | Y |
172.*.*.9 | slave2 | N | N | Y | N | Y | Y |
2. 新建用户及用户组
adduser hadoop passwd hadoop #添加到hadoop组 usermod -a -G hadoop hadoop #赋予root权限 vi /etc/sudoers hadoop ALL=(ALL) ALL
3. 修改master(172...6)主机名
vi /etc/sysconfig/network HOSTNAME=master #重启生效或者临时使用命令生效 hostname master #同理在slave1和slave2、slave3上分别执行 hostname slave1 hostname slave2 hostname slave3
4. 配置ip与主机名映射
vi /etc/hosts 172.*.*.6 master 172.*.*.7 slave1 172.*.*.8 slave2 172.*.*.9 slave3
5. 配置免密登录
#三台集群中分别生成密钥 ssh-keygen -t rsa #将公钥拷贝到master的authorized_keys中 cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys #赋予authorized_keys 600权限 chmod 600 authorized_keys #最终authorized_keys文件内容如下 [root@localhost .ssh]# cat authorized_keys ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAtEvxRj/3xPCtnO38Gy4Y/Y4gj6XX5s+G2hwG5xx19PiDQEKeW3BYUDE616OVdecStBo3X+0Plr2ioirI/3WGlUkm0todr/irpksy0MTpvsjCNUnCWGUHGFMUmrcw1LSiNLhoOSS02AcIq+hw3QJO0w0Wo0EN8xcOhrYwuAByoVv3CvqWd/2Vce2rNOXxLNSmc9tR0Dl3ZqOAq+2a55GM7cETj+eiexDeF5zEVJ2vykQdH3+sZ2XLrQu4WXOMn70xFosk7E1lwJ14QLy6lpfRcWnB1JVKJx9mglze6v3U35g59Vu/LP7t3ebW+dJIOD3/Attb5HcvN8MNfQVOX3JD4w== root@master ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAuU9KJmlmYCx7D+vfMCl2Fj/kz1mfWBrChco0jmZtbygpYY8MUSjmfnsC/wefWKMnFtEruJb+RrgBLxVY6lNzvVKXh+iVPhrjubzj54FoZjepR+1EEznIvwkKa+Y4fkcSJjmcSq/Wvjvz34j3/wVoa1qZtbQing+GzC8Xt0y5rQ6fD1gzD4Oniu43fHAeQDxpo2cVNnTdO2HEe56ZfhIctVRP63rc2CoEuD7d0Ea2WhV0Uruqri/ZKFHVAQQqQ7z/jdCgzTdTXJ5t5hpyeaK8+mYhUKEyOF3xrACW1Is6grUjhbjUxTLt2y2Ytw1d5voFxCUJ6MQcy91KFE/9Lfefyw== root@slave1 ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEArucVUJdZBYXJD0r9WiX6VnR5S3F7BhoR7hB8UTkXs+WRJGEX9E44yjH+BjIJAPn2v/XwOCdqzSZrGPzLL/BG+XRhGN5NGmdplv8xI3C93hC5kZewRHrHlcAG5Kv4mcHlU+ugcWiyQbIaQvLaFXaq48ZVQHYrzXrz3ZT6QDpsaZtSeW4Z4KWeFmL+AwNyAqxK0nxYXR1zNQJ1r0IdApKmP1WNvbcblB2UKx5G7VMxOs62WY0R9LGdJK6Mmmr5QPlWlpn/g5vXlBvgD80pM6iixFAyz8q19aMQjErTWuULNvX8tdcm+StJV52N8EsiuNMOs+xLVO7L00yxZRtwrXKGgQ== root@slave2 #将master的authorized_keys远程传输到slave1/slave2 scp ~/.ssh/authorized_keys root@slave1:~/.ssh/ scp ~/.ssh/authorized_keys root@slave2:~/.ssh/ #检查远程免密登录 ssh slave1 ssh slave2 ssh slave3
6. 解压并配置环境变量
tar -zxvf hadoop-2.7.7.tar.gz vi /etc/profile export HADOOP_HOME=/opt/middleware/hadoop-2.7.7 export PATH=$PATH:${HADOOP_HOME}/bin source /etc/profile
7. hadoop配置
#master节点创建文件夹 mkdir -p /opt/middleware/hadoop-2.7.7/dfs/{name,data} mkdir -p /opt/middleware/hadoop-2.7.7/temp #其他配置 vi slaves slave1 slave2 slave3
#修改hadoop-env.sh export JAVA_HOME=#{JAVA_HOME} #修改为(根据jdk实际安装目录) export JAVA_HOME=/usr/local/jdk
#配置core-site.xml <configuration> <property> <name>fs.defaultFS</name> <value>hdfs://master:9000</value> </property> <property> <name>io.file.buffer.size</name> <value>131072</value> </property> <property> <name>hadoop.tmp.dir</name> <value>file:/opt/middleware/hadoop-2.7.7/temp</value> </property> <property> <name>hadoop.proxyuser.hduser.hosts</name> <value>*</value> </property> <property> <name>hadoop.proxyuser.hduser.groups</name> <value>*</value> </property> </configuration>
#配置hdfs-site.xml <configuration> <property> <name>dfs.nameservices</name> <value>rsmshadoop</value> </property> <property> <name>dfs.namenode.secondary.http-address</name> <value>master:9001</value> </property> <property> <name>dfs.namenode.name.dir</name> <value>file:/opt/middleware/hadoop-2.7.7/dfs/name</value> </property> <property> <name>dfs.datanode.data.dir</name> <value>file:/opt/middleware/hadoop-2.7.7/dfs/data</value> </property> <property> <name>dfs.replication</name> <value>1</value> </property> <property> <name>dfs.webhdfs.enabled</name> <value>true</value> </property> </configuration>
#配置mapred-site.xml <configuration> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> <property> <name>mapreduce.jobhistory.address</name> <value>master:10020</value> </property> <property> <name>mapreduce.jobhistory.webapp.address</name> <value>master:19888</value> </property> </configuration>
#配置yarn-site.xml <configuration> <property> <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name> <value>org.apache.hadoop.mapred.ShuffleHandler</value> </property> <property> <name>yarn.resourcemanager.address</name> <value>master:8032</value> </property> <property> <name>yarn.resourcemanager.scheduler.address</name> <value>master:8030</value> </property> <property> <name>yarn.resourcemanager.resource-tracker.address</name> <value>master:8031</value> </property> <property> <name>yarn.resourcemanager.admin.address</name> <value>master:8033</value> </property> <property> <name>yarn.resourcemanager.webapp.address</name> <value>master:8088</value> </property> </configuration>
8. 运行hadoop
#格式化 /opt/middleware/hadoop-2.7.7/bin/hdfs namenode -format
启动集群
/opt/middleware/hadoop-2.7.7/sbin/start-all.sh
[root@localhost sbin]# sh start-dfs.sh which: no start-dfs.sh in (/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/jdk/bin:/opt/middleware/mongodb/bin:/opt/middleware/hadoop-2.7.7/bin:/root/bin:/usr/local/jdk/bin:/opt/middleware/mongodb/bin:/opt/middleware/hadoop-2.7.7/bin) 19/01/17 18:38:06 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable Starting namenodes on [master] The authenticity of host 'master (172.*.*.6)' can't be established. RSA key fingerprint is a0:47:1b:35:a9:f1:e7:0d:81:6d:8b:f4:47:95:f9:96. Are you sure you want to continue connecting (yes/no)? yes master: Warning: Permanently added 'master,172.*.*.6' (RSA) to the list of known hosts. master: starting namenode, logging to /opt/middleware/hadoop-2.7.7/logs/hadoop-root-namenode-master.out slave2: starting datanode, logging to /opt/middleware/hadoop-2.7.7/logs/hadoop-root-datanode-slave2.out slave1: starting datanode, logging to /opt/middleware/hadoop-2.7.7/logs/hadoop-root-datanode-slave1.out Starting secondary namenodes [master] master: starting secondarynamenode, logging to /opt/middleware/hadoop-2.7.7/logs/hadoop-root-secondarynamenode-master.out 19/01/17 18:38:41 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
验证集群
hadoop jar /opt/middleware/hadoop-2.7.7/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.7.jar pi 10 10
监控页面
碰到的问题
1.部分NameNode无法启动
原因是多次运行了格式化命令,导致集群ID不一致 /opt/middleware/hadoop-2.7.7/bin/hdfs namenode -format 解决方案: 将NameNode(master)的clusterID拷贝到DataNode中 #打开NameNode Version vi /opt/middleware/hadoop-2.7.7/dfs/name/current/VERSION ... clusterID=CID-45f7aaaf-424a-472c-9cb5-827a9d18906e #打开DataNode Version vi /opt/middleware/hadoop-2.7.7/dfs/data/current/VERSION ...
2.NameNode无法启动
There appears to be a gap in the edit log. We expected txid 1, but got txid 37309 原因:元数据文件被损坏 解决方案:hadoop namenode -recover