VC6 UTF-8檔案讀寫

查志強發表於2014-11-27

【原文:http://biancheng.dnbcw.info/c/277486.html

UTF-8格式檔案的前三位元組為0xef,0xbb,0xbf;讀取的時候要跳過這三位元組解碼;寫入的時候要先寫這三位元組到檔案頭。

    

/× utf8wr.h ×/
#ifndef UTF8WR_H
#define UTF8WR_H
#include <stdio.h>
#include <malloc.h>
#include <tchar.h>
#include <windows.h>
size_t utf8_encode(const TCHAR *lpszBuffer,TCHAR *lpszContext);
size_t utf8_decode(const TCHAR *lpszBuffer,TCHAR *lpszContext);
size_t utf8_write(const char *lpszFile,const TCHAR *lpszBuffer);
size_t utf8_read(const char *lpszFile,TCHAR *lpszBuffer);
#endif

/* utf8wr.c */
#include "utf8wr.h"
size_t utf8_encode(const TCHAR *lpszBuffer,TCHAR *lpszContext)
{
    size_t size=0;
    wchar_t *pUnicode=NULL; 
    
    if(lpszBuffer==NULL){
        return(size);
    }
    size=MultiByteToWideChar(936,0,lpszBuffer,-1,NULL,0);     
    if(size>0){
        pUnicode=(wchar_t *)malloc(size*sizeof(wchar_t));
        if(pUnicode){
            MultiByteToWideChar(936,0,lpszBuffer,-1,(LPWSTR)pUnicode,size);
            size=WideCharToMultiByte(CP_UTF8,0,(LPWSTR)pUnicode,-1,NULL,0,NULL,NULL);
            if(size>0){
                if(lpszContext){
                    size=WideCharToMultiByte(CP_UTF8,0,(LPWSTR)pUnicode,-1,lpszContext,size,NULL,NULL);
                }
            }
            free(pUnicode);
        }
    }
    return(size);
}
size_t utf8_decode(const TCHAR *lpszBuffer,TCHAR *lpszContext)
{
    wchar_t *pUnicode=NULL; 
    size_t size=0;
    
    if(lpszBuffer==NULL){
        return(size);
    }
    size=MultiByteToWideChar(CP_UTF8,0,lpszBuffer,-1,NULL,0);
    if(size>0){
        pUnicode=(wchar_t *)malloc((size+1)*sizeof(wchar_t));
        if(pUnicode){
            MultiByteToWideChar(CP_UTF8,0,lpszBuffer,-1,(LPWSTR)pUnicode,size); 
            size=WideCharToMultiByte(936,0,(LPWSTR)pUnicode,-1,NULL,0,NULL,NULL);    
            if(lpszContext){
                size=WideCharToMultiByte(936,0,(LPWSTR)pUnicode,-1,lpszContext,size,NULL,NULL); 
            }
            free(pUnicode);
        }
    }
    
    return(size);
}
size_t utf8_write(const char *lpszFile,const TCHAR *lpszBuffer)
{
    size_t size1=0;
    TCHAR utf8header[3];
    TCHAR *psz=NULL;
    FILE *fp=NULL;
    utf8header[0]=(TCHAR)0xef;
    utf8header[1]=(TCHAR)0xbb;
    utf8header[2]=(TCHAR)0xbf;
    fp=fopen(lpszFile,"wb");
    if(fp){
        fwrite(utf8header,sizeof(TCHAR),3,fp);
        size1=utf8_encode(lpszBuffer,NULL);
        if(size1>0){
            psz=(TCHAR *)malloc(size1);
            if(psz){
                size1=utf8_encode(lpszBuffer,psz);
                size1=fwrite(psz,sizeof(TCHAR),size1,fp);
                free(psz);
                psz=NULL;
            }
        }        
        fclose(fp);
    }
    return(size1);
}
size_t utf8_read(const char *lpszFile,TCHAR *lpszBuffer)
{
    size_t size=0;
    FILE *fp=NULL;
    TCHAR *psz=NULL;
    fp=fopen(lpszFile,"rb");
    if(fp){
        fseek(fp,0L,SEEK_END);
        size=ftell(fp);
        rewind(fp);
        if(size>0){
            psz=(TCHAR *)malloc(size);
            if(psz){
                size=fread(psz,sizeof(TCHAR),size,fp);
                if(size>3){
                    if((psz[0]==0xffffffef)&&(psz[1]==0xffffffbb)&&(psz[2]==0xffffffbf)){
                        size=utf8_decode(psz+3,NULL);                        
                        if(lpszBuffer){
                            size=utf8_decode(psz+3,lpszBuffer);
                        }
                    }
                }
                free(psz);
            }
        }
        fclose(fp);
    }
    
    return(size);
}
int main()
{
    const TCHAR *pszBuffer=_T("中國人民萬歲^_^");
    TCHAR *psz=NULL;
    TCHAR *pszFile="utf8.txt";
    size_t size=0;
    
    size=utf8_write(pszFile,pszBuffer);
    printf("write %d bytes\n",size);
    size=utf8_read(pszFile,NULL);
    if(size>0){
        psz=(TCHAR *)malloc(size);
        if(psz){
            size=utf8_read(pszFile,psz);
            printf("read %d bytes:\n%s\n",size,psz);
            free(psz);
        }
    }
    
    return(0);
}

執行結果:
write 22 bytes
read 16 bytes:
中國人民萬歲^_^