Hadoop安裝指引

wangq17發表於2016-10-15


環境:

Ubuntu16.04

機器:

192.168.1.105 namenode

192.168.1.102 datanode1


0、配置節點資訊

sudo gedit /etc/hosts

#加入下面的資訊

192.168.1.105 namenode

192.168.1.102 datanode1


sudo gedit /etc/hostname #修改主機名稱

#上面的內容電腦重啟後生效


1、在Ubuntu下建立hadoop組和hadoop使用者

1.1、建立hadoop使用者組

如果不是在root下登入需要

@ubuntu:~$ sudo addgroup hadoop


1.2、建立hadoop使用者

@ubuntu:~$ sudo adduser -ingroup hadoop hadoop


1.3、為hadoop使用者新增許可權(root許可權一樣)

sudo gedit /etc/sudoers


#User privilege specification 新增

root ALL=(ALL:ALL) ALL

hadoop ALL=(ALL:ALL) ALL


2、用新增加的hadoop使用者登入Ubuntu系統

su hadoop


3、安裝ssh

3.1、下載:sudo apt-get install openssh-server

安裝完成後,啟動服務

3.2、啟動:sudo /etc/init.d/ssh start

檢視服務是否正確啟動:ps -e | grep ssh


設定ssh免密碼登入

# su hadoop

$ ssh-keygen -t rsa

$ ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop@namenode

$ ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop@slavenode

$ chmod 0600 ~/.ssh/authorized_keys

$ exit

namenode節點操作

mkdir -p $HOME/.ssh 
chmod 700 $HOME/.ssh 
ssh-keygen -t rsa -P '' -f $HOME/.ssh/id_rsa 
cat $HOME/.ssh/id_rsa.pub >> $HOME/.ssh/authorized_keys 
chmod 644 $HOME/.ssh/authorized_keys
Copy the public key to new slave node in hadoop user $HOME directory
scp $HOME/.ssh/id_rsa.pub hadoop@192.168.1.104:/home/hadoop/

datanode上操作

cd $HOME
mkdir -p $HOME/.ssh 
chmod 700 $HOME/.ssh
cat id_rsa.pub >>$HOME/.ssh/authorized_keys 
chmod 644 $HOME/.ssh/authorized_keys

一定要跟下面的許可權一樣,不過不一樣就會每次都要輸入密碼!!!
chmode 755 /home --->dwxr-xr-x
chmode 755 hadoop --->drwxr-xr-x
chmod 700 .ssh --->drwx------
chmod 664 authorized_keys -rw-rw-r--

4、安裝jdk

$su

password

oracle網站下載64位或者32位的jdk(根據自己的作業系統位數)

mkdir /usr/lib/jvm

tar -zxf jdk...

# mv jdk1.8.0_101 /usr/lib/jvm

# exit

新增

export JAVA_HOME=/usr/lib/jvm/jdk1.8.0_101

export PATH=PATH:$JAVA_HOME/bin



5、安裝hadoop


mkdir /home/hadoop


sudo tar xzf hadoop-2.7.3.tar.gz


mv hadoop-2.7.3 /home/hadoop

#修改hadoop-2.7.3hadoop

mv hadoop-2.7.3 hadoop


chmod 777 /home/hadoop/hadoop



!!!!!hadoop檔案配置


hadoop下面建立dfs檔案家


cd /home/hadoop/hadoop

hadoop$ mkdir dfs

hadoop$ mkdir dfs/name

hadoop$ mkdir dfs/name/data


cd /home/hadoop/hadoop/etc/hadoop


sudo gedit core-site.xml

<configuration>



<property>

<name>fs.default.name</name>

<value>hdfs://namenode:9000</value>

</property>

<property>

<name>dfs.permissions</name>

<value>false</value>

</property>


</configuration>


sudo gedit hdfs-site.xml


<configuration>

<property>

<name>dfs.data.dir</name>

<value>file:/home/hadoop/hadoop/dfs/name/data</value>

<final>true</final>

</property>


<property>

<name>dfs.name.dir</name>

<value>file:/home/hadoop/hadoop/dfs/name</value>

<final>true</final>

</property>


<property>

<name>dfs.replication</name>

<value>2</value>

</property>

</configuration>


sudo gedit mapred-site.xml.template

<configuration>

<property>

<name>mapreduce.framework.name</name>

<value>Yarn</value>

</property>

</configuration>

<!--

<configuration>

<property>

<name>mapred.job.tracker</name>

<value>hdfs://namenode:9001</value>

</property>

</configuration>

->

配置yarn檔案

sudo gedit yarn-site.xml

<configuration>

<!-- Site specific YARN configuration properties -->

<property>

<name>yarn.nodemanager.aux-services</name>

<value>mapreduce.shuffle</value>

</property>

<property>

<description>The address of the applications manager interface in the RM.</description>

<name>yarn.resourcemanager.address</name>

<value>192.168.1.105:8040</value>

</property>


<property>

<description>The address of the scheduler interface.</description>

<name>yarn.resourcemanager.scheduler.address</name>

<value>192.168.1.105:8030</value>

</property>


<property>

<description>The address of the RM web application.</description>

<name>yarn.resourcemanager.webapp.address</name>

<value>192.168.1.105:8088</value>

</property>

 

<property>

<description>The address of the resource tracker interface.</description>

<name>yarn.resourcemanager.resource-tracker.address</name>

<value>192.168.1.105 :8025</value>

</property>

</configuration>

su gedit slaves

#輸入slave節點

datanode1

sudo gedit masters

#輸入namenode節點

namenode

!!!!!hadoop檔案配置



配置.bashrc檔案


sudo gedit ~/.bashrc

#HADOOP VARIABLES START


export JAVA_HOME=/usr/lib/jvm/jdk1.8.0_101


export HADOOP_HOME=/home/hadoop/hadoop


export PATH=$PATH:$HADOOP_HOME/bin


export PA TH=$PATH:$HADOOP_HOME/sbin


export HADOOP_MAPRED_HOME=$HADOOP_HOME


export HADOOP_COMMON_HOME=$HADOOP_HOME


export HADOOP_HDFS_HOME=$HADOOP_HOME


export YARN_HOME=$HADOOP_HOME


export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native


export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib/native"


#HADOOP VARIABLES END


用命令使配置生效:source ~/.bashrc


5.6hadoop-env.sh配置java環境變數

sudo gedit /home/hadoop/hadoop/etc/hadoop/hadoop-env.sh

找到JAVA_HOME環境變數,修改如下

export JAVA_HOME=/usr/lib/jvm/jdk1.8.0_101



7、在slavenode上安裝hadoop

# su hadoop

$ cd /home/hadoop

$ scp -r hadoop slavenode:/home/hadoop



8、啟動hadoop


cd /home/hadoop/hadoop/bin

hadoop namenode -format #注意啟動一次後再次啟動的時候可能會導致叢集中datanodes節點的dfsdata檔案下version過期導致無法在slave節點上建立datanode,可以修改VERSIONlayoutVersionnamenode中的一致來完成同步或者刪除VERSION


cd /home/hadoop/hadoop/sbin

start-all.sh

相關文章