java讀取大檔案1G+

正常讀取大檔案，可能會想到用快取

如：

package base;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.math.BigInteger;

public class ReadBigFile {

   public static void readBigFile(String path) {
       BigInteger sum;
       try {
           File file = new File(path);
           BufferedInputStream fis = new BufferedInputStream(
                   new FileInputStream(file));
           BufferedReader reader = new BufferedReader(new InputStreamReader(
                   fis, "utf-8"), 5 * 1024 * 1024);// 用5M的緩衝讀取文字檔案

           String line = "";
           sum = BigInteger.ZERO;
           while ((line = reader.readLine()) != null) {
               sum = sum.add(BigInteger.ONE);

           }

           System.out.println(sum);
       } catch (FileNotFoundException e) {
           e.printStackTrace();
       } catch (UnsupportedEncodingException e) {
           e.printStackTrace();
       } catch (IOException e) {
           e.printStackTrace();
       }

   }

   /**
   * @param args
   * @throws IOException
   */
   public static void main(String[] args) {
       readBigFile("H:/TDDOWNLOAD/TEST.txt");
   }

}

但是為了統計等，更加有效率，採用流的方式

public class ReadBigFile2 {

   // 統計指定檔案中的字元的總數，其中：fileName指的是檔案的全路徑
   public static long xermote(String fileName) {
       FileReader reader = null;
       try {
           reader = new FileReader(fileName);
           // 建立分析給定字元流的標記生成器
           StreamTokenizer stn = new StreamTokenizer(
                   new BufferedReader(reader));// 根據給定的字元流建立標記生成器stn
           // 下面指定單引號、雙引號和註釋的轉義字元在此標記生成器中是普通字元，不代表其他的特殊的含義
           stn.ordinaryChar('\'');
           stn.ordinaryChar('\"');
           stn.ordinaryChar('/');
           // 建立可以統計字元總數的各個變數
           String str;
           int num_sum = 0;// 用於統計數字字元總數的變數
           int word_sum = 0;// 用於統計字母、漢字等字元總數的變數
           int sym_sum = 0;// 用行統計標點符號等字元總數的變數
           int sum = 0;// 用於計算全部種類字元總數的變數
           // 當生成器的下一個標記不是流末尾的常量
           while (stn.nextToken() != StreamTokenizer.TT_EOF) {
               switch (stn.ttype) {// 來判斷當前獲取的流的標記型別
               // TT_EOL指示已讀到行末尾的常量。
               case StreamTokenizer.TT_EOL:
                   break;
               // TT_NUMBER指示已讀到一個數字標記的常量
               case StreamTokenizer.TT_NUMBER:
                   str = String.valueOf((stn.nval));// stn.nval表示此數字標記的值
                   num_sum += str.length();
                   break;
               // TT_WORD指示已讀到一個文字標記的常量
               case StreamTokenizer.TT_WORD:
                   // str = stn.sval;// stn.sval表示此文字標記的值
                   str = "\\x01\\x02";
                   word_sum += str.length();
                   break;
               default:
                   // 如果以上3中型別都不是，則為英文的標點符號
                   str = String.valueOf((char) stn.ttype);
                   sym_sum += str.length();
               }
           }
           System.out.println("數字標記的和為：" + num_sum);
           System.out.println("文字標記的和為： " + word_sum);
           System.out.println("標點符號標記的和為： " + sym_sum);
           sum = sym_sum + num_sum + word_sum;
           System.out.println("此檔案中的字元總數為： " + sum);

           return sum;
       } catch (Exception e) {
           e.printStackTrace();
           return -1;
       } finally {
           if (reader != null) {
               try {
                   reader.close();
               } catch (IOException e1) {
               }
           }
       }
   }

   public static void main(String[] args) {
       String fileName = "H:/TDDOWNLOAD/迅雷相關/迅雷網鄰/P2P Seacher/movie/test.txt";
       System.out.println("統計檔案" + fileName + "中的字元數的結果如下：");
       ReadBigFile2.xermote(fileName);
   }
}

java讀取大檔案1G+

相關文章