oracle外部表實現大資料量的Excel匯入

livedba發表於2012-06-08

現在專案裡有一個需求:

大資料量的excel匯入到資料庫,利用oracle的外部表實現,實現思路:

1.建立一個模板,把excel裡的列於業務資料表做一個對映關係

2.業務員上傳excel,透過java把excel轉換為cvs格式的,然後跨伺服器上傳到oracle資料庫所在的伺服器下面

3.建立oracle外部表,匯入資料,再從外部表把資料匯入到業務表中

以上是思路,待實現完成,再完善此文。

[@more@]

一、java實現excel轉換為cvs

參考:http://zhangruihong154.blog.163.com/blog/static/14014292220118505423704/

http://www.blogjava.net/jayslong/archive/2011/04/21/convert_xls_and_xlsx_to_csv.html

/**
* Name: net.uni.util.JavaExcelToCvs.java
* Version: 1.0
* Date: 2012-6-8
* Author: 孫偉
*/
package net.uni.util;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;

import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
import org.apache.poi.hssf.eventusermodel.HSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFRequest;
import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener;
import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
import org.apache.poi.hssf.model.HSSFFormulaParser;
import org.apache.poi.hssf.record.BOFRecord;
import org.apache.poi.hssf.record.BlankRecord;
import org.apache.poi.hssf.record.BoolErrRecord;
import org.apache.poi.hssf.record.BoundSheetRecord;
import org.apache.poi.hssf.record.FormulaRecord;
import org.apache.poi.hssf.record.LabelRecord;
import org.apache.poi.hssf.record.LabelSSTRecord;
import org.apache.poi.hssf.record.NoteRecord;
import org.apache.poi.hssf.record.NumberRecord;
import org.apache.poi.hssf.record.RKRecord;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.SSTRecord;
import org.apache.poi.hssf.record.StringRecord;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;

/**
* 處理內容: excel轉換為cvs格式
*
* @version: 1.0
* @see:net.uni.util.JavaExcelToCvs.java
* @date:2012-6-8
* @author:孫偉
*/
public class JavaExcelToCvs implements HSSFListener{

private int minColumns;
private POIFSFileSystem fs;
private PrintStream output;

private int lastRowNumber;
private int lastColumnNumber;

/** */
/** Should we output the formula, or the value it has? */
private boolean outputFormulaValues = true;

/** */
/** For parsing Formulas */
private SheetRecordCollectingListener workbookBuildingListener;
private HSSFWorkbook stubWorkbook;

// Records we pick up as we process
private SSTRecord sstRecord;
private FormatTrackingHSSFListener formatListener;

/** */
/** So we known which sheet we're on */
private int sheetIndex = -1;
private BoundSheetRecord[] orderedBSRs;
@SuppressWarnings("unchecked")
private ArrayList boundSheetRecords = new ArrayList();

// For handling formulas with string results
private int nextRow;
private int nextColumn;
private boolean outputNextStringRecord;

private final String OUTPUT_CHARSET = "GBK";

/** */
/**
* Creates a new XLS -> CSV converter
*
* @param fs
* The POIFSFileSystem to process
* @param output
* The PrintStream to output the CSV to
* @param minColumns
* The minimum number of columns to output, or -1 for no minimum
*/
public JavaExcelToCvs(POIFSFileSystem fs, PrintStream output, int minColumns) {
this.fs = fs;
this.output = output;
this.minColumns = minColumns;
}

public JavaExcelToCvs(String inputFilePath, String outputFilePath) throws Exception {
fs = new POIFSFileSystem(new FileInputStream(inputFilePath));
output = new PrintStream(outputFilePath, OUTPUT_CHARSET);
minColumns = -1;
}

/** */
/**
* Creates a new XLS -> CSV converter
*
* @param filename
* The file to process
* @param minColumns
* The minimum number of columns to output, or -1 for no minimum
* @throws IOException
* @throws FileNotFoundException
*/
public JavaExcelToCvs(String filename, int minColumns) throws IOException, FileNotFoundException {
this(new POIFSFileSystem(new FileInputStream(filename)), System.out, minColumns);
}

/** */
/**
* Initiates the processing of the XLS file to CSV
*/
public void process() throws IOException {
MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);
formatListener = new FormatTrackingHSSFListener(listener);

HSSFEventFactory factory = new HSSFEventFactory();
HSSFRequest request = new HSSFRequest();

if (outputFormulaValues) {
request.addListenerForAllRecords(formatListener);
} else {
workbookBuildingListener = new SheetRecordCollectingListener(formatListener);
request.addListenerForAllRecords(workbookBuildingListener);
}

factory.processWorkbookEvents(request, fs);
}

/** */
/**
* Main HSSFListener method, processes events, and outputs the CSV as the
* file is processed.
*/
@SuppressWarnings("unchecked")
public void processRecord(Record record) {
int thisRow = -1;
int thisColumn = -1;
String thisStr = null;

switch (record.getSid()) {
case BoundSheetRecord.sid:
boundSheetRecords.add(record);
break;
case BOFRecord.sid:
BOFRecord br = (BOFRecord) record;
if (br.getType() == BOFRecord.TYPE_WORKSHEET) {
// Create sub workbook if required
if (workbookBuildingListener != null && stubWorkbook == null) {
stubWorkbook = workbookBuildingListener.getStubHSSFWorkbook();
}

// Output the worksheet name
// Works by ordering the BSRs by the location of
// their BOFRecords, and then knowing that we
// process BOFRecords in byte offset order
sheetIndex++;
if (orderedBSRs == null) {
orderedBSRs = BoundSheetRecord.orderByBofPosition(boundSheetRecords);
}
output.println();
output.println(orderedBSRs[sheetIndex].getSheetname() + " [" + (sheetIndex + 1) + "]:");
}
break;

case SSTRecord.sid:
sstRecord = (SSTRecord) record;
break;

case BlankRecord.sid:
BlankRecord brec = (BlankRecord) record;

thisRow = brec.getRow();
thisColumn = brec.getColumn();
thisStr = "";
break;
case BoolErrRecord.sid:
BoolErrRecord berec = (BoolErrRecord) record;

thisRow = berec.getRow();
thisColumn = berec.getColumn();
thisStr = "";
break;

case FormulaRecord.sid:
FormulaRecord frec = (FormulaRecord) record;

thisRow = frec.getRow();
thisColumn = frec.getColumn();

if (outputFormulaValues) {
if (Double.isNaN(frec.getValue())) {
// Formula result is a string
// This is stored in the next record
outputNextStringRecord = true;
nextColumn = frec.getColumn();
} else {
thisStr = formatListener.formatNumberDateCell(frec);
}
} else {
thisStr = '"' + HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression()) + '"';
}
break;
case StringRecord.sid:
if (outputNextStringRecord) {
// String for formula
StringRecord srec = (StringRecord) record;
thisStr = srec.getString();
thisRow = nextRow;
thisColumn = nextColumn;
outputNextStringRecord = false;
}
break;

case LabelRecord.sid:
LabelRecord lrec = (LabelRecord) record;

thisRow = lrec.getRow();
thisColumn = lrec.getColumn();
thisStr = '"' + lrec.getValue() + '"';
break;
case LabelSSTRecord.sid:
LabelSSTRecord lsrec = (LabelSSTRecord) record;

thisRow = lsrec.getRow();
thisColumn = lsrec.getColumn();
if (sstRecord == null) {
thisStr = '"' + "(No SST Record, can't identify string)" + '"';
} else {
thisStr = '"' + sstRecord.getString(lsrec.getSSTIndex()).toString() + '"';
}
break;
case NoteRecord.sid:
NoteRecord nrec = (NoteRecord) record;

thisRow = nrec.getRow();
thisColumn = nrec.getColumn();
// TODO: Find object to match nrec.getShapeId()
thisStr = '"' + "(TODO)" + '"';
break;
case NumberRecord.sid:
NumberRecord numrec = (NumberRecord) record;

thisRow = numrec.getRow();
thisColumn = numrec.getColumn();

// Format
thisStr = formatListener.formatNumberDateCell(numrec);
break;
case RKRecord.sid:
RKRecord rkrec = (RKRecord) record;

thisRow = rkrec.getRow();
thisColumn = rkrec.getColumn();
thisStr = '"' + "(TODO)" + '"';
break;
default:
break;
}

// Handle new row
if (thisRow != -1 && thisRow != lastRowNumber) {
lastColumnNumber = -1;
}

// Handle missing column
if (record instanceof MissingCellDummyRecord) {
MissingCellDummyRecord mc = (MissingCellDummyRecord) record;
thisRow = mc.getRow();
thisColumn = mc.getColumn();
thisStr = "";
}

// If we got something to print out, do so
if (thisStr != null) {
if (thisColumn > 0) {
output.print(',');
}
output.print(thisStr);
}

// Update column and row count
if (thisRow > -1)
lastRowNumber = thisRow;
if (thisColumn > -1)
lastColumnNumber = thisColumn;

// Handle end of row
if (record instanceof LastCellOfRowDummyRecord) {
// Print out any missing commas if needed
if (minColumns > 0) {
// Columns are 0 based
if (lastColumnNumber == -1) {
lastColumnNumber = 0;
}
for (int i = lastColumnNumber; i < (minColumns); i++) {
output.print(',');
}
}

// We're onto a new row
lastColumnNumber = -1;
// End the row
output.println();
}
}

public static void main(String[] args) throws Exception {
JavaExcelToCvs javaExcelToCvs = new JavaExcelToCvs("E:testExtent.xls", "E:a.csv");
javaExcelToCvs.process();
}
}

二、java透過ftp將檔案上傳到伺服器上

/**
* Name: net.uni.util.JavaFtpUpload.java
* Version: 1.0
* Date: 2012-6-8
* Author: 孫偉
*/
package net.uni.util;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.StringTokenizer;
import sun.net.TelnetInputStream;
import sun.net.ftp.FtpClient;

/**
* 處理內容:透過java-ftp將檔案上傳到資料庫伺服器上 此前提條件是oracle伺服器的ftp服務開啟
* /var/ftp/oracle_external這個目錄是屬於oracle使用者的,並且有操作許可權
* /var/ftp的許可權是775 oracle_external這個目錄的許可權是777
* @version: 1.0
* @see:net.uni.util.JavaFtpUpload.java
* @date:2012-6-8
* @author:孫偉
*/
public class JavaFtpUpload {
private String ip = "192.168.0.57";// ftp伺服器的IP地址
private String username = "anonymous";// 使用者名稱
private String password = "331";// 密碼
private int port = 21;// 預設埠
private String localfilefullname = "";// 需要上傳的目錄,帶絕對路徑
FtpClient ftpclient = null;
OutputStream os = null;
FileInputStream is = null;

/**
* 建立資料夾
*
* @param dir
* @param ftpclient
* @throws exception
*/
private void createdir(String dir, FtpClient ftpclient) throws Exception {
ftpclient.ascii();
StringTokenizer s = new StringTokenizer(dir, "/"); //
s.countTokens();
String pathname = "";
while (s.hasMoreElements()) {
pathname = pathname + "/" + (String) s.nextElement();
try {
ftpclient.sendServer("mkd " + pathname + "rn");// 如果伺服器上有該目錄,不會被建立
} catch (Exception e) {
e = null;
}
ftpclient.readServerResponse();
}
ftpclient.binary();
}

/**
* 檢查資料夾是否存在
*
* @param dir
* @param ftpclient
* @return
*/
private boolean isdirexist(String dir, FtpClient ftpclient) throws Exception {
try {
ftpclient.cd(dir);
} catch (Exception e) {
// todo 自動生成 catch 塊
return false;
}
return true;
}

/**
* ftp上傳
*
* @param localfilefullname
* 上傳的原始檔夾
* @return
*/
public boolean upload(String prefix, String localfilefullname) throws Exception {
this.localfilefullname = localfilefullname;
try {
String savefilename = localfilefullname;
// 新建一個ftp客戶端連
ftpclient = new FtpClient();
ftpclient.openServer(this.ip, this.port);
ftpclient.login(this.username, this.password);
// 開啟本地待長傳的檔案
File file_in = new File(savefilename);
processfile(prefix, file_in, ftpclient);
if (is != null) {
is.close();
}
if (os != null) {
os.close();
}
if (ftpclient != null) {
ftpclient.closeServer();
}
return true;
} catch (Exception e) {
e.printStackTrace();
System.err.println("exception e in ftp upload(): " + e.toString());
return false;
} finally {
if (is != null) {
is.close();
}
if (os != null) {
os.close();
}
if (ftpclient != null) {
ftpclient.closeServer();
}
}
}

/**
* 上傳檔案,遞迴演算法,如果是目錄且伺服器上不存在該目錄則在伺服器上建立該目錄,一級一級往下找,找到目錄下的檔案則讀取檔案內容,上傳到伺服器上
*
* @param source
* @param ftpclient
* @throws exception
*/
private void processfile(String prefix, File source, FtpClient ftpclient) throws Exception {
if (source.exists()) {
if (source.isDirectory()) {
String path = prefix + source.getPath().substring(localfilefullname.length()).replace("", "/");
if (!isdirexist(path, ftpclient)) {
createdir(path, ftpclient);
}
File sourcefile[] = source.listFiles();
for (int i = 0; i < sourcefile.length; i++) {
if (sourcefile[i].exists()) {
if (sourcefile[i].isDirectory()) {
this.processfile(prefix, sourcefile[i], ftpclient);
} else {
ftpclient.cd(cheangpath(prefix, sourcefile[i].getPath()));
ftpclient.binary();
os = ftpclient.put(sourcefile[i].getName());
byte[] bytes = new byte[1024];
is = new FileInputStream(sourcefile[i]);
// 開始複製
int c;
// 暫未考慮中途終止的情況
while ((c = is.read(bytes)) != -1) {
os.write(bytes, 0, c);
}
is.close();
os.close();
}
}
}
} else {
ftpclient.cd(cheangpath(prefix, source.getPath()));
ftpclient.binary();
os = ftpclient.put(source.getName());
byte[] bytes = new byte[1024];
is = new FileInputStream(source);
// 開始複製
int c;
// 暫未考慮中途終止的情況
while ((c = is.read(bytes)) != -1) {
os.write(bytes, 0, c);
}
is.close();
os.close();
}
} else {
throw new Exception("此檔案或資料夾[" + source.getName() + "]有誤或不存在!");
}
}

/**
* 獲取當前的ftp路徑
*
* @param path
* @return
*/
private String cheangpath(String prefix, String path) throws Exception {
path = path.substring(localfilefullname.length()).replace("", "/");
if ("".equals(path)) {
path = "/";
} else {
path = path.substring(0, path.lastIndexOf("/") + 1);
}
path = prefix + path;
return path;
}

/**
* 遞迴演算法,取得檔案的位元組大小
*
* @param strName
* 檔名帶絕對路徑
* @return
*/
public Long getSize(String strName) {
Long TotalSize = 0L;
File f = new File(strName);
if (f.isFile())
return f.length();
else {
if (f.isDirectory()) {
File[] contents = f.listFiles();
for (int i = 0; i < contents.length; i++) {
if (contents[i].isFile())
TotalSize += contents[i].length();
else {
if (contents[i].isDirectory())
TotalSize += getSize(contents[i].getPath());
}
}
}
}
return TotalSize;
}

/**
* 遞迴下載檔案
*
* @param localPath
* @param remotePath
*/
public void processdownload(String localPath, String remotePath) {
FileOutputStream outStream = null;
ArrayList list = null;
try {
list = getFileList(remotePath);
ftpclient.binary();
File temp = null;
for (int i = 0; i < list.size(); i++) {
// 如果是檔案,則直接執行下載
if (isFile(list.get(i).toString())) {
ftpclient.cd(remotePath);
ArrayList listfileName = getNameList(remotePath);
for (int j = 0; j < listfileName.size(); j++) {
temp = new File(localPath + File.separator + listfileName.get(j).toString());
outStream = new FileOutputStream(temp);
TelnetInputStream is = ftpclient.get(listfileName.get(j).toString());
byte[] bytes = new byte[1024];
int c;
// 暫未考慮中途終止的情況
while ((c = is.read(bytes)) != -1) {
outStream.write(bytes, 0, c);
}
is.close();
outStream.close();
System.out.println("成功下載檔案:" + remotePath + File.separator + listfileName.get(j).toString());
}
} else if (isDir(list.get(i).toString()))// 是目錄
{
temp = new File(localPath + File.separator + getFileName(list.get(i).toString()));
temp.mkdirs();
String newRemote = remotePath + File.separator + getFileName(list.get(i).toString());
processdownload(localPath + File.separator + getFileName(list.get(i).toString()), newRemote);
}
}
} catch (Exception ex) {
ex.printStackTrace();
} finally {
try {
outStream.close();
} catch (Exception ex) {
ex.printStackTrace();
}
}
}

/**
* 返回資料夾或者檔案的名稱
*
* @param line
* @return
*/
public String getFileName(String line) {
String filename = (String) parseLine(line).get(8);
return filename;
}

/**
* 返回當前目錄的檔名稱
*
* @return
* @throws IOException
*/
public ArrayList getNameList(String remotePath) throws IOException {
BufferedReader dr = new BufferedReader(new InputStreamReader(ftpclient.nameList(remotePath)));
ArrayList al = new ArrayList();
String s = "";
while ((s = dr.readLine()) != null) {
System.out.println("filename:" + s);
al.add(s);
}
return al;
}

/**
* 返回當前目錄的所有檔案及資料夾
*
* @return
* @throws IOException
*/
public ArrayList getFileList(String remotePath) throws IOException {
ftpclient.cd(remotePath);
BufferedReader dr = new BufferedReader(new InputStreamReader(ftpclient.list()));
ArrayList al = new ArrayList();
String s = "";
while ((s = dr.readLine()) != null) {
System.out.println("readLine:" + s);
if ((!((String) parseLine(s).get(8)).equals(".")) && (!((String) parseLine(s).get(8)).equals(".."))) {
al.add(s);
System.out.println("s:" + s);
}
}
return al;
}

/**
* 判斷一行檔案資訊是否為目錄
*
* @param line
* @return
*/
public boolean isDir(String line) {
return ((String) parseLine(line).get(0)).indexOf("d") != -1;
}

public boolean isFile(String line) {
return !isDir(line);
}

/**
* 處理getFileList取得的行資訊
*
* @param line
* @return
*/
private ArrayList parseLine(String line) {
ArrayList s1 = new ArrayList();
StringTokenizer st = new StringTokenizer(line, " ");
while (st.hasMoreTokens()) {
s1.add(st.nextToken());
}
return s1;
}

/**
* ftp下載檔案
*
* @param localPath
* @param remotePath
* @return
* @throws Exception
*/
public boolean download(String localPath, String remotePath) throws Exception {
this.localfilefullname = localfilefullname;
try {
String savefilename = localfilefullname;
// 新建一個ftp客戶端連
ftpclient = new FtpClient();
ftpclient.openServer(this.ip, this.port);
ftpclient.login(this.username, this.password);
ftpclient.cd(remotePath);
processdownload(localPath, remotePath);
if (is != null) {
is.close();
}
if (os != null) {
os.close();
}
if (ftpclient != null) {
ftpclient.closeServer();
}
return true;
} catch (Exception e) {
e.printStackTrace();
System.err.println("exception e in ftp upload(): " + e.toString());
return false;
} finally {
if (is != null) {
is.close();
}
if (os != null) {
os.close();
}
if (ftpclient != null) {
ftpclient.closeServer();
}
}
}

public static void main(String args[]) throws Exception {
JavaFtpUpload ftpup = new JavaFtpUpload();
String path = "E:/a.csv";
if (ftpup.getSize(path) > 204800) {
System.out.println("您上傳的檔案過大,檔案大小不能大於200k!");
return;
}
ftpup.upload("/oracle_external", path);
// ftpup.download("c:test", "/52601/20090612/bonc20090612095616");
}
}

三、寫一個公共方法,建立外部表,匯入資料


//根據模板ID查詢對應匯入的excel模板
StringBuffer excelSbf = new StringBuffer();
excelSbf.append("SELECT d.base_excel_mod_id AS excelModId ,d.base_excel_mod_name AS excelModName,d.table_name AS excelTableName,");
excelSbf.append("m.excel_col_index AS excelColIndex,m.table_column_name AS excelColName,m.table_column_type AS excelColType,m.table_column_length AS excelColLen");
excelSbf.append("FROM base_excel_mapping m,base_excel_mod d");
excelSbf.append("WHERE d.base_excel_mod_id = m.base_excel_mod_id AND d.base_excel_mod_id=? ORDER BY m.excel_col_index");
List excelMapList = this.getJdbcTemplate().query(excelSbf.toString(), new Object[]{excelModelId},new BeanPropertyRowMapper(BaseExcelModMap.class));

String busiTableName = "";
//建立外部表的sql語句
StringBuffer createExtSql = new StringBuffer();
StringBuffer extSql = new StringBuffer();

for(int i=0;i BaseExcelModMap exc = excelMapList.get(i);
if(i==0){
busiTableName = exc.getExcelTableName();
}
extSql.append(exc.getExcelColName());
extSql.append(" ");
extSql.append(exc.getExcelColType());
if(exc.getExcelColType().equals("VARCHAR2")||exc.getExcelColType().equals("NUMBER")){
extSql.append("(").append(exc.getExcelColLen()).append(")");
}

if(i extSql.append(",");
}
}

//外部表表名
String exterNalTable = busiTableName + System.currentTimeMillis();

createExtSql.append("create table ");
createExtSql.append(exterNalTable);
createExtSql.append(extSql);
createExtSql.append(" organization external (type oracle_loader default directory data_dir access parameters ( records delimited by newline");
createExtSql.append(" badfile 'data_dir':'").append(exterNalTable).append(".bad' ");
createExtSql.append(" fields terminated by ',' ldrtrim reject rows with all null fileds ) localtion (").append(fileName).append(")) reject limit unlimited parallel");

//StringBuffer sbf = new StringBuffer();
//sbf.append("create table tt(id_ varchar2(16),name_ varchar2(30),name1 varchar2(30),name2 varchar2(30))");
//sbf.append(" organization external (type oracle_loader default directory data_dir access parameters ( records delimited by newline");
//sbf.append(" fields terminated by ',' ) location('").append(fileName).append("'))");
this.getJdbcTemplate().execute(createExtSql.toString());
StringBuffer busiSbf = new StringBuffer();
busiSbf.append("insert into ").append(busiTableName).append(" select * from ").append(exterNalTable);
this.getJdbcTemplate().execute(busiSbf.toString());

StringBuffer sbf = new StringBuffer();
sbf.append("drop table ").append(exterNalTable);
this.getJdbcTemplate().execute(sbf.toString());

來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/25261409/viewspace-1058463/,如需轉載,請註明出處,否則將追究法律責任。

相關文章