我的讀取hadoop Sequence格式的檔案的程式碼

longerandlonger發表於2013-01-08
	public static void main(String[] args) {
		org.apache.hadoop.io.SequenceFile.Reader reader = null;
		java.io.FileOutputStream fos = null;
		try {
			String uri = "file:///D:/attempt_201212181734_2923950_r_000000_0";
			org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
			FileSystem fs = FileSystem.get(conf);
			Path path = new Path(uri);
			reader = new org.apache.hadoop.io.SequenceFile.Reader(fs, path, conf);
			Writable key = (Writable) org.apache.hadoop.util.ReflectionUtils.newInstance(reader.getKeyClass(), conf);
			Writable value = (Writable) org.apache.hadoop.util.ReflectionUtils.newInstance(reader.getValueClass(), conf);
			
			int n=0;
			while(reader.next(key, value)){
				/* 如果解析出是亂碼,嘗試使用者UTF8轉碼 */
				//String valueStr = new String(value.toString().getBytes("ISO8859_1"),"GB2312");
				System.out.println(value.toString());
			}
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			IOUtils.closeStream(reader);
			IOUtils.closeStream(fos);
		}
	}

相關文章