// Copyright (c) 2010, Andrei Vieru. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // The lzma package implements reading and writing of LZMA format compressed data. // Reference implementation is LZMA SDK version 4.65 originaly developed by Igor // Pavlov, available online at: // // http://www.7-zip.org/sdk.html // // // // Usage examples. Write compressed data to a buffer: // // var b bytes.Buffer // w := lzma.NewWriter(&b) // w.Write([]byte("hello, world\n")) // w.Close() // // read that data back: // // r := lzma.NewReader(&b) // io.Copy(os.Stdout, r) // r.Close() // // // // If the data is bigger than you'd like to hold into memory, use pipes. Write // compressed data to an io.PipeWriter: // // pr, pw := io.Pipe() // go func() { // defer pw.Close() // w := lzma.NewWriter(pw) // defer w.Close() // // the bytes.Buffer would be an io.Reader used to read uncompressed data from // io.Copy(w, bytes.NewBuffer([]byte("hello, world\n"))) // }() // // and read it back: // // defer pr.Close() // r := lzma.NewReader(pr) // defer r.Close() // // the os.Stdout would be an io.Writer used to write uncompressed data to // io.Copy(os.Stdout, r) // // // package lzma import ( "errors" "io" ) const ( inBufSize = 1 << 16 outBufSize = 1 << 16 lzmaPropSize = 5 lzmaHeaderSize = lzmaPropSize + 8 lzmaMaxReqInputSize = 20 kNumRepDistances = 4 kNumStates = 12 kNumPosSlotBits = 6 kDicLogSizeMin = 0 kNumLenToPosStatesBits = 2 kNumLenToPosStates = 1 << kNumLenToPosStatesBits kMatchMinLen = 2 kNumAlignBits = 4 kAlignTableSize = 1 << kNumAlignBits kAlignMask = kAlignTableSize - 1 kStartPosModelIndex = 4 kEndPosModelIndex = 14 kNumPosModels = kEndPosModelIndex - kStartPosModelIndex kNumFullDistances = 1 << (kEndPosModelIndex / 2) kNumLitPosStatesBitsEncodingMax = 4 kNumLitContextBitsMax = 8 kNumPosStatesBitsMax = 4 kNumPosStatesMax = 1 << kNumPosStatesBitsMax kNumLowLenBits = 3 kNumMidLenBits = 3 kNumHighLenBits = 8 kNumLowLenSymbols = 1 << kNumLowLenBits kNumMidLenSymbols = 1 << kNumMidLenBits kNumLenSymbols = kNumLowLenSymbols + kNumMidLenSymbols + (1 << kNumHighLenBits) kMatchMaxLen = kMatchMinLen + kNumLenSymbols - 1 ) // A streamError reports the presence of corrupt input stream. var streamError = errors.New("error in lzma encoded data stream") // A headerError reports an error in the header of the lzma encoder file. var headerError = errors.New("error in lzma header") // A nReadError reports what its message reads var nReadError = errors.New("number of bytes returned by Reader.Read() didn't meet expectances") // A nWriteError reports what its message reads var nWriteError = errors.New("number of bytes returned by Writer.Write() didn't meet expectances") // TODO: implement this err // A dataIntegrityError reports an error encountered while cheching data integrity. // -- from lzma.txt: // You can use multiple checks to test data integrity after full decompression: // 1) Check Result and "status" variable. // 2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize. // 3) Check that output(srcLen) = compressedSize, if you know real compressedSize. // You must use correct finish mode in that case. // //type dataIntegrityError struct { // msg string // // hz //} func stateUpdateChar(index uint32) uint32 { if index < 4 { return 0 } if index < 10 { return index - 3 } return index - 6 } func stateUpdateMatch(index uint32) uint32 { if index < 7 { return 7 } return 10 } func stateUpdateRep(index uint32) uint32 { if index < 7 { return 8 } return 11 } func stateUpdateShortRep(index uint32) uint32 { if index < 7 { return 9 } return 11 } func stateIsCharState(index uint32) bool { if index < 7 { return true } return false } func getLenToPosState(length uint32) uint32 { length -= kMatchMinLen if length < kNumLenToPosStates { return length } return kNumLenToPosStates - 1 } // LZMA compressed file format // --------------------------- // Offset Size Description // 0 1 Special LZMA properties (lc,lp, pb in encoded form) // 1 4 Dictionary size (little endian) // 5 8 Uncompressed size (little endian). Size -1 stands for unknown size // lzma properties type props struct { litContextBits, // lc litPosStateBits, // lp posStateBits uint8 // pb dictSize uint32 } func (p *props) decodeProps(buf []byte) { d := buf[0] if d > (9 * 5 * 5) { throw(headerError) } p.litContextBits = d % 9 d /= 9 p.posStateBits = d / 5 p.litPosStateBits = d % 5 if p.litContextBits > kNumLitContextBitsMax || p.litPosStateBits > 4 || p.posStateBits > kNumPosStatesBitsMax { throw(headerError) } for i := 0; i < 4; i++ { p.dictSize += uint32(buf[i+1]) << uint32(i*8) } } type decoder struct { // i/o rd *rangeDecoder // r outWin *lzOutWindow // w // lzma header prop *props unpackSize int64 // hz matchDecoders []uint16 repDecoders []uint16 repG0Decoders []uint16 repG1Decoders []uint16 repG2Decoders []uint16 rep0LongDecoders []uint16 posSlotCoders []*rangeBitTreeCoder posDecoders []uint16 posAlignCoder *rangeBitTreeCoder lenCoder *lenCoder repLenCoder *lenCoder litCoder *litCoder dictSizeCheck uint32 posStateMask uint32 } func (z *decoder) doDecode() { var state uint32 = 0 var rep0 uint32 = 0 var rep1 uint32 = 0 var rep2 uint32 = 0 var rep3 uint32 = 0 var nowPos uint64 = 0 var prevByte byte = 0 for z.unpackSize < 0 || int64(nowPos) < z.unpackSize { posState := uint32(nowPos) & z.posStateMask if z.rd.decodeBit(z.matchDecoders, state<= kStartPosModelIndex { numDirectBits := posSlot>>1 - 1 rep0 = (2 | posSlot&1) << numDirectBits if posSlot < kEndPosModelIndex { rep0 += reverseDecodeIndex(z.rd, z.posDecoders, rep0-posSlot-1, numDirectBits) } else { rep0 += z.rd.decodeDirectBits(numDirectBits-kNumAlignBits) << kNumAlignBits rep0 += z.posAlignCoder.reverseDecode(z.rd) if int32(rep0) < 0 { if rep0 == 0xFFFFFFFF { break } throw(streamError) } } } else { rep0 = posSlot } } if uint64(rep0) >= nowPos || rep0 >= z.dictSizeCheck { throw(streamError) } z.outWin.copyBlock(rep0, length) nowPos += uint64(length) prevByte = z.outWin.getByte(0) } } z.outWin.flush() //if z.unpackSize != -1 { // if z.outWin.unpacked != z.unpackSize { // throw(&dataIntegrityError{}) // } //} } func (z *decoder) decoder(r io.Reader, w io.Writer) (err error) { defer handlePanics(&err) // read 13 bytes (lzma header) header := make([]byte, lzmaHeaderSize) n, err := r.Read(header) if err != nil { return } if n != lzmaHeaderSize { return nReadError } z.prop = &props{} z.prop.decodeProps(header) z.unpackSize = 0 for i := 0; i < 8; i++ { b := header[lzmaPropSize+i] z.unpackSize = z.unpackSize | int64(b)<