//安装SSH
[root@localhost /]# sudo yum install ssh
//生成密钥
[root@localhost /]# ssh-keygen
(可以一路回车)
生成下面两个文件:
/root/.ssh/id_rsa
/root/.ssh/id_rsa.pub
[root@localhost .ssh]# cd /root/.ssh/
//实际情况是把公钥复制到另外一台机器上,并且写入到另外一台机器上的authorized_keys文件中
[root@localhost .ssh]# cat ./id_rsa.pub>>./authorized_keys
[root@localhost .ssh]# cd /home
(另外一个更简单的复制方法是使用 ssh-copy-id -i ~/.ssh/id_rsa.pub root@192.168.1.201)
//配置JDK环境变量
[root@localhost opt]# vi /etc/profile
export JAVA_HOME=/opt/jdk1.6.0_31
export PATH=$JAVA_HOME/bin:$PATH:.
//使配置生效
[root@localhost opt]# source /etc/profile
//安装Hadoop 1.0.3
[root@localhost opt]# rpm -i hadoop-1.0.3-1.x86_64.rpm
//查看安装后的Hadoop版本号信息
[root@localhost opt]# hadoop version
(如果报错,请检查 hadoop-env.sh 中的java路径配置是否正确)
修改hadoop配置文件(/etc/hadoop)
[root@localhost hadoop]# cd /etc/hadoop
[root@localhost hadoop]# vi hadoop-env.sh
export JAVA_HOME=/opt/jdk1.6.0_31
[root@localhost hadoop]# vi core-site.xml
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://192.168.1.101:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/hadoop</value>
</property>
</configuration>
[root@localhost hadoop]# vi hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
[root@localhost hadoop]# vi mapred-site.xml
<configuration>
<property>
<name>mapred.job.tracker</name>
<value>192.168.1.101:9001</value>
</property>
</configuration>
//格式化文件系统
[root@localhost opt]# hadoop namenode -format
//启动Hadoop相关的所有服务 (/usr/sbin)
[root@localhost sbin]# start-all.sh
或
[root@localhost opt]# /usr/sbin/start-all.sh
(如果没有执行权限,需要将/usr/sbin目录下的相关sh文件设置执行权限)
说明:
start-all.sh
stop-all.sh
start-dfs.sh
stop-dfs.sh
start-mapred.sh
stop-mapred.sh
slaves.sh
//jps查看已经启动的服务进程信息
[root@localhost hadoop]# jps
5131 NameNode
5242 DataNode
5361 SecondaryNameNode
5583 TaskTracker
5463 JobTracker
6714 Jps
防火墙需要开放的端口:
9000
9001
50010
(访问 http://192.168.1.101:50070 http://192.168.1.101:50030)
[root@localhost hadoop]# hadoop dfsadmin -report
为运行例子 wordcount 作准备
[root@localhost opt]# hadoop fs -mkdir input
[root@localhost opt]# echo "Hello World Bye World" > file01
[root@localhost opt]# echo "Hello Hadoop Goodbye Hadoop" > file02
[root@localhost opt]# hadoop fs -copyFromLocal ./file0* input
运行例子 wordcount
[root@localhost opt]# hadoop jar /usr/share/hadoop/hadoop-examples-1.0.3.jar wordcount input output
12/08/11 12:00:30 INFO input.FileInputFormat: Total input paths to process : 2
12/08/11 12:00:30 INFO util.NativeCodeLoader: Loaded the native-hadoop library
12/08/11 12:00:30 WARN snappy.LoadSnappy: Snappy native library not loaded
12/08/11 12:00:31 INFO mapred.JobClient: Running job: job_201208111137_0001
12/08/11 12:00:32 INFO mapred.JobClient: map 0% reduce 0%
12/08/11 12:01:05 INFO mapred.JobClient: map 100% reduce 0%
12/08/11 12:01:20 INFO mapred.JobClient: map 100% reduce 100%
12/08/11 12:01:25 INFO mapred.JobClient: Job complete: job_201208111137_0001
12/08/11 12:01:25 INFO mapred.JobClient: Counters: 29
12/08/11 12:01:25 INFO mapred.JobClient: Job Counters
12/08/11 12:01:25 INFO mapred.JobClient: Launched reduce tasks=1
12/08/11 12:01:25 INFO mapred.JobClient: SLOTS_MILLIS_MAPS=49499
12/08/11 12:01:25 INFO mapred.JobClient: Total time spent by all reduces waiting after reserving slots (ms)=0
12/08/11 12:01:25 INFO mapred.JobClient: Total time spent by all maps waiting after reserving slots (ms)=0
12/08/11 12:01:25 INFO mapred.JobClient: Launched map tasks=2
12/08/11 12:01:25 INFO mapred.JobClient: Data-local map tasks=2
12/08/11 12:01:25 INFO mapred.JobClient: SLOTS_MILLIS_REDUCES=12839
12/08/11 12:01:25 INFO mapred.JobClient: File Output Format Counters
12/08/11 12:01:25 INFO mapred.JobClient: Bytes Written=41
12/08/11 12:01:25 INFO mapred.JobClient: FileSystemCounters
12/08/11 12:01:25 INFO mapred.JobClient: FILE_BYTES_READ=79
12/08/11 12:01:25 INFO mapred.JobClient: HDFS_BYTES_READ=276
12/08/11 12:01:25 INFO mapred.JobClient: FILE_BYTES_WRITTEN=64705
12/08/11 12:01:25 INFO mapred.JobClient: HDFS_BYTES_WRITTEN=41
12/08/11 12:01:25 INFO mapred.JobClient: File Input Format Counters
12/08/11 12:01:25 INFO mapred.JobClient: Bytes Read=50
12/08/11 12:01:25 INFO mapred.JobClient: Map-Reduce Framework
12/08/11 12:01:25 INFO mapred.JobClient: Map output materialized bytes=85
12/08/11 12:01:25 INFO mapred.JobClient: Map input records=2
12/08/11 12:01:25 INFO mapred.JobClient: Reduce shuffle bytes=85
12/08/11 12:01:25 INFO mapred.JobClient: Spilled Records=12
12/08/11 12:01:25 INFO mapred.JobClient: Map output bytes=82
12/08/11 12:01:25 INFO mapred.JobClient: CPU time spent (ms)=4770
12/08/11 12:01:25 INFO mapred.JobClient: Total committed heap usage (bytes)=246751232
12/08/11 12:01:25 INFO mapred.JobClient: Combine input records=8
12/08/11 12:01:25 INFO mapred.JobClient: SPLIT_RAW_BYTES=226
12/08/11 12:01:25 INFO mapred.JobClient: Reduce input records=6
12/08/11 12:01:25 INFO mapred.JobClient: Reduce input groups=5
12/08/11 12:01:25 INFO mapred.JobClient: Combine output records=6
12/08/11 12:01:25 INFO mapred.JobClient: Physical memory (bytes) snapshot=391634944
12/08/11 12:01:25 INFO mapred.JobClient: Reduce output records=5
12/08/11 12:01:25 INFO mapred.JobClient: Virtual memory (bytes) snapshot=3159781376
12/08/11 12:01:25 INFO mapred.JobClient: Map output records=8
//查看统计结果
[root@localhost opt]# hadoop fs -cat output/part-r-00000
Bye1
Goodbye1
Hadoop2
Hello2
World2
//---------------------------------------
作业日志存放目录:
/var/log/hadoop/root/userlogs/
//---------------------------------------
安装 hadoop-1.0.3-1 后,存放的目录有:
/etc/hadoop
/var/run/hadoop
/var/log/hadoop
/usr/share/hadoop
/usr/share/doc/hadoop
/usr/etc/hadoop
/usr/bin/hadoop(文件)
/usr/include/hadoop