您当前的位置: 首页 > 学无止境 > 心得笔记 网站首页心得笔记
hadoop-分布式
发布时间:2019-07-30 16:44:26编辑:雪饮阅读()
环境:redhat6.4-i386
各个节点公共
主机名解析
[root@localhost ~]# cat /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.2.170 master
192.168.2.223 snn
192.168.2.212 slave
[root@localhost ~]# useradd hduser
[root@localhost ~]# passwd hduser
时间同步
*/5 * * * * /sbin/ntpdate ntp7.aliyun.com &> /dev/null
jdk安装及hadoop源码部署
[root@localhost src]# rpm -ivh jdk-7u5-linux-i586.rpm
[root@localhost src]# tar -zxvf hadoop-0.20.2-cdh3u5.tar.gz
[root@localhost src]# ln -sv /usr/local/src/hadoop-0.20.2-cdh3u5 /usr/local/hadoop
环境变量(直接获取上次单机时部署的环境变量)
[root@localhost ~]# scp root@192.168.2.175:/etc/profile.d/java.sh /etc/profile.d/
[root@localhost ~]# scp root@192.168.2.175:/etc/profile.d/hadoop.sh /etc/profile.d/
[root@localhost src]# chmod +x /etc/profile.d/java.sh
[root@localhost src]# chmod +x /etc/profile.d/hadoop.sh
[root@localhost src]# /etc/profile.d/java.sh
[root@localhost src]# /etc/profile.d/hadoop.sh
[root@localhost src]# source /etc/profile.d/java.sh
[root@localhost src]# source /etc/profile.d/hadoop.sh
权限
[root@localhost conf]# mkdir /hadoop/temp -pv
如下两个命令,当master启动时最好再执行下
[root@localhost conf]#chown -R hduser.hduser /usr/local/hadoop
[root@localhost conf]# chown -R hduser.hduser /hadoop
master配置
双机互信
[root@localhost ~]# ssh-keygen -t rsa -P ''
[root@localhost ~]# ssh-copy-id -i .ssh/id_rsa.pub root@snn
[root@localhost ~]# ssh-copy-id -i .ssh/id_rsa.pub root@slave
masters与slaves
[root@localhost conf]# cat /usr/local/hadoop/conf/masters
snn
[root@localhost conf]# cat /usr/local/hadoop/conf/slaves
slave
core-site、mapred-site、hdfs-site
[root@localhost conf]# cat /usr/local/hadoop/conf/core-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>/hadoop/temp</value>
</property>
<property>
<name>fs.default.name</name>
<value>hdfs://master:8020</value>
</property>
</configuration>
[root@localhost conf]# cat /usr/local/hadoop/conf/mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>mapred.job.tracker</name>
<value>master:8021</value>
</property>
</configuration>
[root@localhost conf]# cat /usr/local/hadoop/conf/hdfs-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
<description>the replication</description>
</property>
<property>
<name>dfs.data.dir</name>
<value>/hadoop/data</value>
<final>true</final>
<description>the data.dir</description>
</property>
<property>
<name>dfs.name.dir</name>
<value>/hadoop/name</value>
<final>true</final>
<description>the name.dir</description>
</property>
<property>
<name>fs.checkpoint.dir</name>
<value>/hadoop/namesecondary</value>
<final>true</final>
<description>the checkpoint.dir</description>
</property>
</configuration>
同步
[root@localhost conf]# scp /usr/local/hadoop/conf/*-site.xml root@snn:/usr/local/hadoop/conf/
[root@localhost conf]# scp /usr/local/hadoop/conf/*-site.xml root@slave:/usr/local/hadoop/conf/
格式化
[root@localhost conf]# su - hduser
[hduser@localhost ~]$ hadoop namenode -format、
启动
[hduser@localhost ~]$ start-all.sh
测试
master
[hduser@localhost ~]$ jps
11123 JobTracker
11165 Jps
10966 NameNode
snn
[hduser@localhost ~]$ jps
8911 Jps
8866 SecondaryNameNode
选择任意一个节点进行上传文件并进行数据分析的测试
建立目录
[hduser@localhost ~]$ hadoop fs -mkdir wc-in
上传
[hduser@localhost ~]$ hadoop fs -put /etc/rc.d/rc.sysinit wc-in
[hduser@localhost ~]$ hadoop fs -put /etc/rc.d/init.d/functions wc-in
查看文件列表
[hduser@localhost ~]$ hadoop fs -ls wc-in/
Found 2 items
-rw-r--r-- 1 hduser supergroup 18216 2019-07-31 00:34 /user/hduser/wc-in/functions
-rw-r--r-- 1 hduser supergroup 19216 2019-07-31 00:34 /user/hduser/wc-in/rc.sysinit
分析
[hduser@localhost ~]$ hadoop jar /usr/local/hadoop/hadoop-examples-0.20.2-cdh3u5.jar wordcount wc-in wc-out
19/07/31 00:38:08 INFO input.FileInputFormat: Total input paths to process : 2
19/07/31 00:38:08 INFO util.NativeCodeLoader: Loaded the native-hadoop library
19/07/31 00:38:08 WARN snappy.LoadSnappy: Snappy native library not loaded
19/07/31 00:38:09 INFO mapred.JobClient: Running job: job_201907310024_0001
19/07/31 00:38:10 INFO mapred.JobClient: map 0% reduce 0%
19/07/31 00:38:16 INFO mapred.JobClient: map 100% reduce 0%
19/07/31 00:38:24 INFO mapred.JobClient: map 100% reduce 33%
19/07/31 00:38:25 INFO mapred.JobClient: map 100% reduce 100%
19/07/31 00:38:25 INFO mapred.JobClient: Job complete: job_201907310024_0001
19/07/31 00:38:25 INFO mapred.JobClient: Counters: 26
19/07/31 00:38:25 INFO mapred.JobClient: Job Counters
19/07/31 00:38:25 INFO mapred.JobClient: Launched reduce tasks=1
19/07/31 00:38:25 INFO mapred.JobClient: SLOTS_MILLIS_MAPS=9161
19/07/31 00:38:25 INFO mapred.JobClient: Total time spent by all reduces waiting after reserving slots (ms)=0
19/07/31 00:38:25 INFO mapred.JobClient: Total time spent by all maps waiting after reserving slots (ms)=0
19/07/31 00:38:25 INFO mapred.JobClient: Launched map tasks=2
19/07/31 00:38:25 INFO mapred.JobClient: Data-local map tasks=2
19/07/31 00:38:25 INFO mapred.JobClient: SLOTS_MILLIS_REDUCES=8752
19/07/31 00:38:25 INFO mapred.JobClient: FileSystemCounters
19/07/31 00:38:25 INFO mapred.JobClient: FILE_BYTES_READ=27012
19/07/31 00:38:25 INFO mapred.JobClient: HDFS_BYTES_READ=37655
19/07/31 00:38:25 INFO mapred.JobClient: FILE_BYTES_WRITTEN=212539
19/07/31 00:38:25 INFO mapred.JobClient: HDFS_BYTES_WRITTEN=18583
19/07/31 00:38:25 INFO mapred.JobClient: Map-Reduce Framework
19/07/31 00:38:25 INFO mapred.JobClient: Map input records=1445
19/07/31 00:38:25 INFO mapred.JobClient: Reduce shuffle bytes=27018
19/07/31 00:38:25 INFO mapred.JobClient: Spilled Records=3584
19/07/31 00:38:25 INFO mapred.JobClient: Map output bytes=56447
19/07/31 00:38:25 INFO mapred.JobClient: CPU time spent (ms)=2980
19/07/31 00:38:25 INFO mapred.JobClient: Total committed heap usage (bytes)=328138752
19/07/31 00:38:25 INFO mapred.JobClient: Combine input records=5508
19/07/31 00:38:25 INFO mapred.JobClient: SPLIT_RAW_BYTES=223
19/07/31 00:38:25 INFO mapred.JobClient: Reduce input records=1792
19/07/31 00:38:25 INFO mapred.JobClient: Reduce input groups=1610
19/07/31 00:38:25 INFO mapred.JobClient: Combine output records=1792
19/07/31 00:38:25 INFO mapred.JobClient: Physical memory (bytes) snapshot=398868480
19/07/31 00:38:25 INFO mapred.JobClient: Reduce output records=1610
19/07/31 00:38:25 INFO mapred.JobClient: Virtual memory (bytes) snapshot=1185402880
19/07/31 00:38:25 INFO mapred.JobClient: Map output records=5508
关键字词:hadoop
上一篇:hadoop-安装与基础使用
下一篇:SELinux基础概念