C# 壓縮的缺陷

iDotNetSpace發表於2009-07-01
用到壓縮,以前1.1的時候都用的第3方庫,這次想嘗試一下2.0帶的 Compression庫。
一用發現這個庫壓的東西偏大,而且沒檔名沒時間。

那麼到底是為什麼會這樣呢,難道是我的使用有問題?

決定要搞搞明白,於是有了下面這段程式碼,用SharpZipLib和2.0自帶的庫做下對比,大家都採用Gzip壓縮

使用2.0的庫做壓縮
        static MemoryStream Deflate(byte[] data)
        
{
            MemoryStream memoryStream 
= new MemoryStream();

            
using (GZipStream gzip = new GZipStream(memoryStream, CompressionMode.Compress, true))
            
{
                gzip.Write(data, 
0, data.Length);
                gzip.Flush();
                gzip.Close();
            }


            
return memoryStream;
        }

使用SharpZipLib壓縮
        static MemoryStream DeflateUseSharpZipLib(byte[] data)
        
{
            MemoryStream memoryStream 
= new MemoryStream();

            
using (GZipOutputStream outStream = new GZipOutputStream(memoryStream))
            
{
                outStream.IsStreamOwner 
= false;
                outStream.Write(data, 
0, data.Length);
                outStream.Flush();
                outStream.Finish();
            }

            
return memoryStream;
        }

ok,之後看看我們的測試檔案   1.pdf 3358kb

使用System.IO.Compression
testzip 1.pdf 0.zip 0 
使用ICSharpCode.SharpZipLib.GZip
testzip 1.pdf 1.zip 1  

壓縮後大小:
0.zip 2499kb(System)
1.zip 1612kb(SharpZipLib)

可以看到 ICSharpCode.SharpZipLib.GZip 壓縮後1612kb,C# System.IO.Compression.GZipStream 壓縮後大小2499kb

而且System.IO.Compression.GZipStream壓縮的檔案沒檔名,時間也是不對的

那麼是什麼原因呢?
那就讓我們跟蹤看看

System.IO.Compression.GZipStream的實現

GZipStream類
        public override void Write(byte[] array, int offset, int count)
        
{
            
if (this.deflateStream == null)
            
{
                
throw new ObjectDisposedException(null, SR.GetString("ObjectDisposed_StreamClosed"));
            }

            
this.deflateStream.Write(array, offset, count);
        }

DeflateStream類
        public override void Write(byte[] array, int offset, int count)
        
{
            
this.EnsureCompressionMode();
            
this.ValidateParameters(array, offset, count);
            
this.InternalWrite(array, offset, count, false);
        }


        
internal void InternalWrite(byte[] array, int offset, int count, bool isAsync)
        
{
            
int deflateOutput;
            
while (!this.deflater.NeedsInput())
            
{
                deflateOutput 
= this.deflater.GetDeflateOutput(this.buffer);
                
if (deflateOutput != 0)
                
{
                    
if (isAsync)
                    
{
                        IAsyncResult asyncResult 
= this._stream.BeginWrite(this.buffer, 0, deflateOutput, nullnull);
                        
this._stream.EndWrite(asyncResult);
                    }

                    
else
                    
{
                        
this._stream.Write(this.buffer, 0, deflateOutput);
                    }

                }

            }

            
this.deflater.SetInput(array, offset, count);
            
while (!this.deflater.NeedsInput())
            
{
                deflateOutput 
= this.deflater.GetDeflateOutput(this.buffer);
                
if (deflateOutput != 0)
                
{
                    
if (isAsync)
                    
{
                        IAsyncResult result2 
= this._stream.BeginWrite(this.buffer, 0, deflateOutput, nullnull);
                        
this._stream.EndWrite(result2);
                    }

                    
else
                    
{
                        
this._stream.Write(this.buffer, 0, deflateOutput);
                    }

                }

            }

        }


        
public int GetDeflateOutput(byte[] output)
        
{
            
return this.encoder.GetCompressedOutput(output);
        }
Write 呼叫 InternalWrite,而InternalWrite呼叫GetDeflateOutput
encoder是FastEncoder物件,在Deflate建構函式中初始化

FastEncoder類
            public int GetCompressedOutput(byte[] outputBuffer)
            
{
                
this.output.UpdateBuffer(outputBuffer);
                
if (this.usingGzip && !this.hasGzipHeader)
                
{
                    
this.output.WriteGzipHeader(3);   
                    
this.hasGzipHeader = true;
                }

                
if (!this.hasBlockHeader)
                
{
                    
this.hasBlockHeader = true;
                    
this.output.WritePreamble();
                }

                
do
                
{
                    
int count = (this.inputBuffer.Count < this.inputWindow.FreeWindowSpace) ? this.inputBuffer.Count : this.inputWindow.FreeWindowSpace;
                    
if (count > 0)
                    
{
                        
this.inputWindow.CopyBytes(this.inputBuffer.Buffer, this.inputBuffer.StartIndex, count);
                        
if (this.usingGzip)
                        
{
                            
this.gzipCrc32 = DecodeHelper.UpdateCrc32(this.gzipCrc32, this.inputBuffer.Buffer, this.inputBuffer.StartIndex, count);
                            
uint num2 = this.inputStreamSize + ((uint) count);
                            
if (num2 < this.inputStreamSize)
                            
{
                                
throw new InvalidDataException(SR.GetString("StreamSizeOverflow"));
                            }

                            
this.inputStreamSize = num2;
                        }

                        
this.inputBuffer.ConsumeBytes(count);
                    }

                    
while ((this.inputWindow.BytesAvailable > 0&& this.output.SafeToWriteTo())
                    
{
                        
this.inputWindow.GetNextSymbolOrMatch(this.currentMatch);
                        
if (this.currentMatch.State == MatchState.HasSymbol)
                        
{
                            
this.output.WriteChar(this.currentMatch.Symbol);
                        }

                        
else
                        
{
                            
if (this.currentMatch.State == MatchState.HasMatch)
                            
{
                                
this.output.WriteMatch(this.currentMatch.Length, this.currentMatch.Position);
                                
continue;
                            }

                            
this.output.WriteChar(this.currentMatch.Symbol);
                            
this.output.WriteMatch(this.currentMatch.Length, this.currentMatch.Position);
                        }

                    }

                }

                
while (this.output.SafeToWriteTo() && !this.NeedsInput());
                
this.needsEOB = true;
                
return this.output.BytesWritten;
            }


注意到
output.WriteGzipHeader(3)函式,汗一下,壓縮率寫死了。。。

看看WriteGzipHeader是怎麼寫的
internal void WriteGzipHeader(int compression_level)
{
    
this.outputBuf[this.outputPos++= 0x1f;
    
this.outputBuf[this.outputPos++= 0x8b;
    
this.outputBuf[this.outputPos++= 8;
    
this.outputBuf[this.outputPos++= 0;
    
this.outputBuf[this.outputPos++= 0;
    
this.outputBuf[this.outputPos++= 0;
    
this.outputBuf[this.outputPos++= 0;
    
this.outputBuf[this.outputPos++= 0;
    
if (compression_level == 10)
    
{
        
this.outputBuf[this.outputPos++= 2;
    }

    
else
    
{
        
this.outputBuf[this.outputPos++= 4;
    }

    
this.outputBuf[this.outputPos++= 0;
}

再次暈倒,沒擴充套件頭資訊,沒時間,全部寫死,如果看不明白,對照一下gzip的格式就知道了

gzip格式 http://www.gzip.org/zlib/rfc-gzip.html

至此,已經知道為什麼2.0的Gzip壓縮,沒有檔名資訊,也沒有時間了,真的是很陽春白雪的壓縮。

還有另外一個問題,為什麼2.0自帶的庫壓縮率低?

來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/12639172/viewspace-608035/,如需轉載,請註明出處,否則將追究法律責任。

相關文章