利用hdfs的api,可以實現向hdfs的檔案、目錄讀寫,利用這一套API可以設計一個簡易的山寨版雲盤,見下圖:
為了方便操作,將常用的檔案讀寫操作封裝了一個工具類:
1 import org.apache.hadoop.conf.Configuration; 2 import org.apache.hadoop.fs.*; 3 import org.apache.hadoop.io.IOUtils; 4 5 import java.io.ByteArrayOutputStream; 6 import java.io.IOException; 7 import java.io.InputStream; 8 import java.io.OutputStream; 9 10 /** 11 * HDFS工具類 12 * Author: 菩提樹下的楊過(http://yjmyzz.cnblogs.com) 13 * Since: 2015-05-21 14 */ 15 public class HDFSUtil { 16 17 18 private HDFSUtil() { 19 20 } 21 22 /** 23 * 判斷路徑是否存在 24 * 25 * @param conf 26 * @param path 27 * @return 28 * @throws IOException 29 */ 30 public static boolean exits(Configuration conf, String path) throws IOException { 31 FileSystem fs = FileSystem.get(conf); 32 return fs.exists(new Path(path)); 33 } 34 35 /** 36 * 建立檔案 37 * 38 * @param conf 39 * @param filePath 40 * @param contents 41 * @throws IOException 42 */ 43 public static void createFile(Configuration conf, String filePath, byte[] contents) throws IOException { 44 FileSystem fs = FileSystem.get(conf); 45 Path path = new Path(filePath); 46 FSDataOutputStream outputStream = fs.create(path); 47 outputStream.write(contents); 48 outputStream.close(); 49 fs.close(); 50 } 51 52 /** 53 * 建立檔案 54 * 55 * @param conf 56 * @param filePath 57 * @param fileContent 58 * @throws IOException 59 */ 60 public static void createFile(Configuration conf, String filePath, String fileContent) throws IOException { 61 createFile(conf, filePath, fileContent.getBytes()); 62 } 63 64 /** 65 * @param conf 66 * @param localFilePath 67 * @param remoteFilePath 68 * @throws IOException 69 */ 70 public static void copyFromLocalFile(Configuration conf, String localFilePath, String remoteFilePath) throws IOException { 71 FileSystem fs = FileSystem.get(conf); 72 Path localPath = new Path(localFilePath); 73 Path remotePath = new Path(remoteFilePath); 74 fs.copyFromLocalFile(true, true, localPath, remotePath); 75 fs.close(); 76 } 77 78 /** 79 * 刪除目錄或檔案 80 * 81 * @param conf 82 * @param remoteFilePath 83 * @param recursive 84 * @return 85 * @throws IOException 86 */ 87 public static boolean deleteFile(Configuration conf, String remoteFilePath, boolean recursive) throws IOException { 88 FileSystem fs = FileSystem.get(conf); 89 boolean result = fs.delete(new Path(remoteFilePath), recursive); 90 fs.close(); 91 return result; 92 } 93 94 /** 95 * 刪除目錄或檔案(如果有子目錄,則級聯刪除) 96 * 97 * @param conf 98 * @param remoteFilePath 99 * @return 100 * @throws IOException 101 */ 102 public static boolean deleteFile(Configuration conf, String remoteFilePath) throws IOException { 103 return deleteFile(conf, remoteFilePath, true); 104 } 105 106 /** 107 * 檔案重新命名 108 * 109 * @param conf 110 * @param oldFileName 111 * @param newFileName 112 * @return 113 * @throws IOException 114 */ 115 public static boolean renameFile(Configuration conf, String oldFileName, String newFileName) throws IOException { 116 FileSystem fs = FileSystem.get(conf); 117 Path oldPath = new Path(oldFileName); 118 Path newPath = new Path(newFileName); 119 boolean result = fs.rename(oldPath, newPath); 120 fs.close(); 121 return result; 122 } 123 124 /** 125 * 建立目錄 126 * 127 * @param conf 128 * @param dirName 129 * @return 130 * @throws IOException 131 */ 132 public static boolean createDirectory(Configuration conf, String dirName) throws IOException { 133 FileSystem fs = FileSystem.get(conf); 134 Path dir = new Path(dirName); 135 boolean result = fs.mkdirs(dir); 136 fs.close(); 137 return result; 138 } 139 140 /** 141 * 列出指定路徑下的所有檔案(不包含目錄) 142 * 143 * @param conf 144 * @param basePath 145 * @param recursive 146 */ 147 public static RemoteIterator<LocatedFileStatus> listFiles(FileSystem fs, String basePath, boolean recursive) throws IOException { 148 149 RemoteIterator<LocatedFileStatus> fileStatusRemoteIterator = fs.listFiles(new Path(basePath), recursive); 150 151 return fileStatusRemoteIterator; 152 } 153 154 /** 155 * 列出指定路徑下的檔案(非遞迴) 156 * 157 * @param conf 158 * @param basePath 159 * @return 160 * @throws IOException 161 */ 162 public static RemoteIterator<LocatedFileStatus> listFiles(Configuration conf, String basePath) throws IOException { 163 FileSystem fs = FileSystem.get(conf); 164 RemoteIterator<LocatedFileStatus> remoteIterator = fs.listFiles(new Path(basePath), false); 165 fs.close(); 166 return remoteIterator; 167 } 168 169 /** 170 * 列出指定目錄下的檔案\子目錄資訊(非遞迴) 171 * 172 * @param conf 173 * @param dirPath 174 * @return 175 * @throws IOException 176 */ 177 public static FileStatus[] listStatus(Configuration conf, String dirPath) throws IOException { 178 FileSystem fs = FileSystem.get(conf); 179 FileStatus[] fileStatuses = fs.listStatus(new Path(dirPath)); 180 fs.close(); 181 return fileStatuses; 182 } 183 184 185 /** 186 * 讀取檔案內容 187 * 188 * @param conf 189 * @param filePath 190 * @return 191 * @throws IOException 192 */ 193 public static String readFile(Configuration conf, String filePath) throws IOException { 194 String fileContent = null; 195 FileSystem fs = FileSystem.get(conf); 196 Path path = new Path(filePath); 197 InputStream inputStream = null; 198 ByteArrayOutputStream outputStream = null; 199 try { 200 inputStream = fs.open(path); 201 outputStream = new ByteArrayOutputStream(inputStream.available()); 202 IOUtils.copyBytes(inputStream, outputStream, conf); 203 fileContent = outputStream.toString(); 204 } finally { 205 IOUtils.closeStream(inputStream); 206 IOUtils.closeStream(outputStream); 207 fs.close(); 208 } 209 return fileContent; 210 } 211 }
簡單的測試了一下:
1 @Test 2 public void test() throws IOException { 3 Configuration conf = new Configuration(); 4 String newDir = "/test"; 5 //01.檢測路徑是否存在 測試 6 if (HDFSUtil.exits(conf, newDir)) { 7 System.out.println(newDir + " 已存在!"); 8 } else { 9 //02.建立目錄測試 10 boolean result = HDFSUtil.createDirectory(conf, newDir); 11 if (result) { 12 System.out.println(newDir + " 建立成功!"); 13 } else { 14 System.out.println(newDir + " 建立失敗!"); 15 } 16 } 17 String fileContent = "Hi,hadoop. I love you"; 18 String newFileName = newDir + "/myfile.txt"; 19 20 //03.建立檔案測試 21 HDFSUtil.createFile(conf, newFileName, fileContent); 22 System.out.println(newFileName + " 建立成功"); 23 24 //04.讀取檔案內容 測試 25 System.out.println(newFileName + " 的內容為:\n" + HDFSUtil.readFile(conf, newFileName)); 26 27 //05. 測試獲取所有目錄資訊 28 FileStatus[] dirs = HDFSUtil.listStatus(conf, "/"); 29 System.out.println("--根目錄下的所有子目錄---"); 30 for (FileStatus s : dirs) { 31 System.out.println(s); 32 } 33 34 //06. 測試獲取所有檔案 35 FileSystem fs = FileSystem.get(conf); 36 RemoteIterator<LocatedFileStatus> files = HDFSUtil.listFiles(fs, "/", true); 37 System.out.println("--根目錄下的所有檔案---"); 38 while (files.hasNext()) { 39 System.out.println(files.next()); 40 } 41 fs.close(); 42 43 //刪除檔案測試 44 boolean isDeleted = HDFSUtil.deleteFile(conf, newDir); 45 System.out.println(newDir + " 已被刪除"); 46 47 }
注:測試時,不要忘記了在resources目錄下放置core-site.xml檔案,不然IDE環境下,程式碼不知道去連哪裡的HDFS
輸出結果:
/test 已存在!
/test/myfile.txt 建立成功
/test/myfile.txt 的內容為:
Hi,hadoop. I love you
--根目錄下的所有子目錄---
FileStatus{path=hdfs://172.28.20.102:9000/jimmy; isDirectory=true; modification_time=1432176691550; access_time=0; owner=hadoop; group=supergroup; permission=rwxrwxrwx; isSymlink=false}
FileStatus{path=hdfs://172.28.20.102:9000/test; isDirectory=true; modification_time=1432181331362; access_time=0; owner=jimmy; group=supergroup; permission=rwxr-xr-x; isSymlink=false}
FileStatus{path=hdfs://172.28.20.102:9000/user; isDirectory=true; modification_time=1431931797244; access_time=0; owner=hadoop; group=supergroup; permission=rwxr-xr-x; isSymlink=false}
--根目錄下的所有檔案---
LocatedFileStatus{path=hdfs://172.28.20.102:9000/jimmy/input/README.txt; isDirectory=false; length=1366; replication=1; blocksize=134217728; modification_time=1431922483851; access_time=1432174134018; owner=hadoop; group=supergroup; permission=rw-r--r--; isSymlink=false}
LocatedFileStatus{path=hdfs://172.28.20.102:9000/jimmy/output/_SUCCESS; isDirectory=false; length=0; replication=3; blocksize=134217728; modification_time=1432176692454; access_time=1432176692448; owner=jimmy; group=supergroup; permission=rw-r--r--; isSymlink=false}
LocatedFileStatus{path=hdfs://172.28.20.102:9000/jimmy/output/part-r-00000; isDirectory=false; length=1306; replication=3; blocksize=134217728; modification_time=1432176692338; access_time=1432176692182; owner=jimmy; group=supergroup; permission=rw-r--r--; isSymlink=false}
LocatedFileStatus{path=hdfs://172.28.20.102:9000/test/myfile.txt; isDirectory=false; length=21; replication=3; blocksize=134217728; modification_time=1432181331601; access_time=1432181331362; owner=jimmy; group=supergroup; permission=rw-r--r--; isSymlink=false}
/test 已被刪除
用spring-mvc結合hdfs api仿造hadoop的檔案瀏覽管理介面,做了一個山寨版:(只完成了檔案列表功能)
原始碼託管在taobao開源平臺上了,有需要的可以參考下: