Hadoop實驗——熟悉常用的HDFS操作

藍星花發表於2017-11-12

一,程式設計實現以下指定功能,並利用Hadoop提供的Shell命令完成相同任務:


向HDFS中上傳任意文字檔案,如果指定的檔案在HDFS中已經存在,由使用者指定是追加到原有檔案末尾還是覆蓋原有的檔案

Shell命令實現:1.先到Hadoop主資料夾cd /usr/local/hadoop2.啟動Hadoop服務

sbin/start-dfs.sh
sbin/start-yarn.sh

3.建立兩個任意文字檔案用於實驗

echo "hello world" > local.txt
echo "hello hadoop" >text.txt

4.建立使用者工作目錄(HDFS預設工作目錄格式為/user/當前使用者)

hadoop fs -mkdir -p /user/當前使用者名稱

5.檢查檔案是否存在

hadoop fs -test -e text.txt
echo $?

6.上傳本地檔案到HDFS系統
hadoop fs -put text.txt


7.追加到檔案末尾的指令
hadoop fs -appendToFile local.txt text.txt


8.檢視HDFS檔案的內容
hadoop fs -cat text.txt


9.覆蓋原有檔案的指令(覆蓋之後再執行一遍上一步)
hadoop fs -copyFromLocal -f local.txt text.txt


10.以上步驟也可以用如下命令實現

if $(hadoop fs -test -e text.txt);
then $(hadoop fs -appendToFile local.txt text.txt);
else $(hadoop fs -copyFromLocal -f local.txt text.txt);
fi
package cn.edu.zucc.hdfs;
import java.io.FileInputStream;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class CopyFromLocalFile {
/**
    * 判斷路徑是否存在
 */
public static boolean test(Configuration conf, String path) {
       try (FileSystem fs = FileSystem.get(conf)) {
           return fs.exists(new Path(path));
       } catch (IOException e) {
           e.printStackTrace();
           return false;
       }
   }

   /**
    * 複製檔案到指定路徑 若路徑已存在,則進行覆蓋
    */
   public static void copyFromLocalFile(Configuration conf,
           String localFilePath, String remoteFilePath) {
       Path localPath = new Path(localFilePath);
       Path remotePath = new Path(remoteFilePath);
       try (FileSystem fs = FileSystem.get(conf)) {
           /* fs.copyFromLocalFile 第一個參數列示是否刪除原始檔,第二個參數列示是否覆蓋 */
           fs.copyFromLocalFile(false, true, localPath, remotePath);
       } catch (IOException e) {
           e.printStackTrace();
       }

   }

   /**
    * 追加檔案內容
    */
   public static void appendToFile(Configuration conf, String localFilePath,
           String remoteFilePath) {
       Path remotePath = new Path(remoteFilePath);
       try (FileSystem fs = FileSystem.get(conf);
               FileInputStream in = new FileInputStream(localFilePath);) {
           FSDataOutputStream out = fs.append(remotePath);
           byte[] data = new byte[1024];
           int read = -1;
           while ((read = in.read(data)) > 0) {
               out.write(data, 0, read);
           }
           out.close();
       } catch (IOException e) {
           e.printStackTrace();
       }
   }

   /**
    * 主函式
    */
   public static void main(String[] args) {
       Configuration conf = new Configuration();
       conf.set("fs.defaultFS", "hdfs://localhost:9000");
       String localFilePath = "/usr/local/hadoop/text.txt"; // 本地路徑
       String remoteFilePath = "/user/tiny/text.txt"; // HDFS路徑
       // String choice = "append"; // 若檔案存在則追加到檔案末尾
       String choice = "overwrite"; // 若檔案存在則覆蓋

       try {
           /* 判斷檔案是否存在 */
           boolean fileExists = false;
           if (CopyFromLocalFile.test(conf, remoteFilePath)) {
               fileExists = true;
               System.out.println(remoteFilePath + " 已存在.");
           } else {
               System.out.println(remoteFilePath + " 不存在.");
           }
           /* 進行處理 */
           if (!fileExists) { // 檔案不存在,則上傳
               CopyFromLocalFile.copyFromLocalFile(conf, localFilePath,
                       remoteFilePath);
               System.out.println(localFilePath + " 已上傳至 " + remoteFilePath);
           } else if (choice.equals("overwrite")) { // 選擇覆蓋
               CopyFromLocalFile.copyFromLocalFile(conf, localFilePath,
                       remoteFilePath);
               System.out.println(localFilePath + " 已覆蓋 " + remoteFilePath);
           } else if (choice.equals("append")) { // 選擇追加
               CopyFromLocalFile.appendToFile(conf, localFilePath,
                       remoteFilePath);
               System.out.println(localFilePath + " 已追加至 " + remoteFilePath);
           }
       } catch (Exception e) {
           e.printStackTrace();
       }
   }
}

從HDFS中下載指定檔案,如果本地檔案與要下載的檔名稱相同,則自動對下載的檔案重新命名。
Shell命令實現:

if $(hadoop fs -test -e /usr/local/hadoop/text.txt);
then $(hadoop fs -copyToLocal text.txt ./text.txt); 
else $(hadoop fs -copyToLocal text.txt ./text2.txt); 
fi
package cn.edu.zucc.hdfs;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.fs.FileSystem;

import java.io.*;
public class CopyToLocal {
    /**
     * 下載檔案到本地 判斷本地路徑是否已存在,若已存在,則自動進行重新命名
     */
    public static void copyToLocal(Configuration conf, String remoteFilePath,
            String localFilePath) {
        Path remotePath = new Path(remoteFilePath);
        try (FileSystem fs = FileSystem.get(conf)) {
            File f = new File(localFilePath);
            /* 如果檔名存在,自動重新命名(在檔名後面加上 _0, _1 ...) */
            if (f.exists()) {
                System.out.println(localFilePath + " 已存在.");
                Integer i = Integer.valueOf(0);
                while (true) {
                    f = new File(localFilePath + "_" + i.toString());
                    if (!f.exists()) {
                        localFilePath = localFilePath + "_" + i.toString();
                        break;
                    } else {
                        i++;
                        continue;
                    }
                }
                System.out.println("將重新命名為: " + localFilePath);
            }
            // 下載檔案到本地
    

向HDFS中上傳任意文字檔案,如果指定的檔案在HDFS中已經存在,由使用者指定是追加到原有檔案末尾還是覆蓋原有的檔案

Shell命令實現:1.先到Hadoop主資料夾cd /usr/local/hadoop

2.啟動Hadoop服務

sbin/start-dfs.sh
sbin/start-yarn.sh

3.建立兩個任意文字檔案用於實驗

echo "hello world" > local.txt
echo "hello hadoop" >text.txt

4.建立使用者工作目錄(HDFS預設工作目錄格式為/user/當前使用者)

hadoop fs -mkdir -p /user/當前使用者名稱

5.檢查檔案是否存在

hadoop fs -test -e text.txt
echo $?

6.上傳本地檔案到HDFS系統
hadoop fs -put text.txt


7.追加到檔案末尾的指令
hadoop fs -appendToFile local.txt text.txt


8.檢視HDFS檔案的內容
hadoop fs -cat text.txt


9.覆蓋原有檔案的指令(覆蓋之後再執行一遍上一步)
hadoop fs -copyFromLocal -f local.txt text.txt


10.以上步驟也可以用如下命令實現

if $(hadoop fs -test -e text.txt);
then $(hadoop fs -appendToFile local.txt text.txt);
else $(hadoop fs -copyFromLocal -f local.txt text.txt);
fi


 package cn.edu.zucc.hdfs;
import java.io.FileInputStream;
import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class CopyFromLocalFile {
   /**
    * 判斷路徑是否存在
    */
   public static boolean test(Configuration conf, String path) {
       try (FileSystem fs = FileSystem.get(conf)) {
           return fs.exists(new Path(path));
       } catch (IOException e) {
           e.printStackTrace();
           return false;
       }
   }

   /**
    * 複製檔案到指定路徑 若路徑已存在,則進行覆蓋
    */
   public static void copyFromLocalFile(Configuration conf,
           String localFilePath, String remoteFilePath) {
       Path localPath = new Path(localFilePath);
       Path remotePath = new Path(remoteFilePath);
       try (FileSystem fs = FileSystem.get(conf)) {
           /* fs.copyFromLocalFile 第一個參數列示是否刪除原始檔,第二個參數列示是否覆蓋 */
           fs.copyFromLocalFile(false, true, localPath, remotePath);
       } catch (IOException e) {
           e.printStackTrace();
       }

   }

   /**
    * 追加檔案內容
    */
   public static void appendToFile(Configuration conf, String localFilePath,
           String remoteFilePath) {
       Path remotePath = new Path(remoteFilePath);
       try (FileSystem fs = FileSystem.get(conf);
               FileInputStream in = new FileInputStream(localFilePath);) {
           FSDataOutputStream out = fs.append(remotePath);
           byte[] data = new byte[1024];
           int read = -1;
           while ((read = in.read(data)) > 0) {
               out.write(data, 0, read);
           }
           out.close();
       } catch (IOException e) {
           e.printStackTrace();
       }
   }

   /**
    * 主函式
    */
   public static void main(String[] args) {
       Configuration conf = new Configuration();
       conf.set("fs.defaultFS", "hdfs://localhost:9000");
       String localFilePath = "/usr/local/hadoop/text.txt"; // 本地路徑
       String remoteFilePath = "/user/tiny/text.txt"; // HDFS路徑
       // String choice = "append"; // 若檔案存在則追加到檔案末尾
       String choice = "overwrite"; // 若檔案存在則覆蓋

       try {
           /* 判斷檔案是否存在 */
           boolean fileExists = false;
           if (CopyFromLocalFile.test(conf, remoteFilePath)) {
               fileExists = true;
               System.out.println(remoteFilePath + " 已存在.");
           } else {
               System.out.println(remoteFilePath + " 不存在.");
           }
           /* 進行處理 */
           if (!fileExists) { // 檔案不存在,則上傳
               CopyFromLocalFile.copyFromLocalFile(conf, localFilePath,
                       remoteFilePath);
               System.out.println(localFilePath + " 已上傳至 " + remoteFilePath);
           } else if (choice.equals("overwrite")) { // 選擇覆蓋
               CopyFromLocalFile.copyFromLocalFile(conf, localFilePath,
                       remoteFilePath);
               System.out.println(localFilePath + " 已覆蓋 " + remoteFilePath);
           } else if (choice.equals("append")) { // 選擇追加
               CopyFromLocalFile.appendToFile(conf, localFilePath,
                       remoteFilePath);
               System.out.println(localFilePath + " 已追加至 " + remoteFilePath);
           }
       } catch (Exception e) {
           e.printStackTrace();
       }
   }
}


從HDFS中下載指定檔案,如果本地檔案與要下載的檔名稱相同,則自動對下載的檔案重新命名。
Shell命令實現:

if $(hadoop fs -test -e /usr/local/hadoop/text.txt);
then $(hadoop fs -copyToLocal text.txt ./text.txt); 
else $(hadoop fs -copyToLocal text.txt ./text2.txt); 
fi
package cn.edu.zucc.hdfs;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.fs.FileSystem;

import java.io.*;
public class CopyToLocal {
    /**
     * 下載檔案到本地 判斷本地路徑是否已存在,若已存在,則自動進行重新命名
     */
    public static void copyToLocal(Configuration conf, String remoteFilePath,
            String localFilePath) {
        Path remotePath = new Path(remoteFilePath);
        try (FileSystem fs = FileSystem.get(conf)) {
            File f = new File(localFilePath);
            /* 如果檔名存在,自動重新命名(在檔名後面加上 _0, _1 ...) */
            if (f.exists()) {
                System.out.println(localFilePath + " 已存在.");
                Integer i = Integer.valueOf(0);
                while (true) {
                    f = new File(localFilePath + "_" + i.toString());
                    if (!f.exists()) {
                        localFilePath = localFilePath + "_" + i.toString();
                        break;
                    } else {
                        i++;
                        continue;
                    }
                }
                System.out.println("將重新命名為: " + localFilePath);
            }
            // 下載檔案到本地
            Path localPath = new Path(localFilePath);
            fs.copyToLocalFile(remotePath, localPath);
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

    /**
     * 主函式
     */
    public static void main(String[] args) {
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://localhost:9000");
        String localFilePath = "/usr/local/hadoop/text.txt"; // 本地路徑
        String remoteFilePath = "/user/tiny/text.txt"; // HDFS路徑

        try {
            CopyToLocal.copyToLocal(conf, remoteFilePath, localFilePath);
            System.out.println("下載完成");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}


http://www.jianshu.com/p/0663d74b79b5


Path localPath = new Path(localFilePath); fs.copyToLocalFile(remotePath, localPath); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } /** * 主函式 */ public static void main(String[] args) { Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs://localhost:9000"); String localFilePath = "/usr/local/hadoop/text.txt"; // 本地路徑 String remoteFilePath = "/user/tiny/text.txt"; // HDFS路徑 try { CopyToLocal.copyToLocal(conf, remoteFilePath, localFilePath); System.out.println("下載完成"); } catch (Exception e) { e.printStackTrace(); } }}



      http://www.jianshu.com/p/0663d74b79b5
 

相關文章