zstd: Update to upstream version 1.4.1

This commit is contained in:
Rémi Verschelde 2019-07-20 20:47:07 +02:00
parent 4379395892
commit b81fa47f38
15 changed files with 581 additions and 301 deletions

28
thirdparty/README.md vendored
View file

@ -200,6 +200,7 @@ Important: Some files have Godot-made changes.
They are marked with `// -- GODOT start --` and `// -- GODOT end --` They are marked with `// -- GODOT start --` and `// -- GODOT end --`
comments. comments.
## libtheora ## libtheora
- Upstream: https://www.theora.org - Upstream: https://www.theora.org
@ -262,18 +263,6 @@ changes to ensure they build for Javascript/HTML5. Those
changes are marked with `// -- GODOT --` comments. changes are marked with `// -- GODOT --` comments.
## wslay
- Upstream: https://github.com/tatsuhiro-t/wslay
- Version: 1.1.0
- License: MIT
File extracted from upstream release tarball:
- All `*.c` and `*.h` in `lib/` and `lib/includes/`
- `wslay.h` has a small Godot addition to fix MSVC build.
See `thirdparty/wslay/msvcfix.diff`
## mbedtls ## mbedtls
- Upstream: https://tls.mbed.org/ - Upstream: https://tls.mbed.org/
@ -508,6 +497,19 @@ They can be reapplied using the patches included in the `vhacd`
folder. folder.
## wslay
- Upstream: https://github.com/tatsuhiro-t/wslay
- Version: 1.1.0
- License: MIT
File extracted from upstream release tarball:
- All `*.c` and `*.h` in `lib/` and `lib/includes/`
- `wslay.h` has a small Godot addition to fix MSVC build.
See `thirdparty/wslay/msvcfix.diff`
## xatlas ## xatlas
- Upstream: https://github.com/jpcy/xatlas - Upstream: https://github.com/jpcy/xatlas
@ -536,7 +538,7 @@ Files extracted from upstream source:
## zstd ## zstd
- Upstream: https://github.com/facebook/zstd - Upstream: https://github.com/facebook/zstd
- Version: 1.4.0 - Version: 1.4.1
- License: BSD-3-Clause - License: BSD-3-Clause
Files extracted from upstream source: Files extracted from upstream source:

View file

@ -127,6 +127,13 @@
} \ } \
} }
/* vectorization */
#if !defined(__clang__) && defined(__GNUC__)
# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
#else
# define DONT_VECTORIZE
#endif
/* disable warnings */ /* disable warnings */
#ifdef _MSC_VER /* Visual Studio */ #ifdef _MSC_VER /* Visual Studio */
# include <intrin.h> /* For Visual 2005 */ # include <intrin.h> /* For Visual 2005 */

View file

@ -34,7 +34,6 @@
#endif #endif
#include "xxhash.h" /* XXH_reset, update, digest */ #include "xxhash.h" /* XXH_reset, update, digest */
#if defined (__cplusplus) #if defined (__cplusplus)
extern "C" { extern "C" {
#endif #endif
@ -193,19 +192,72 @@ static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
* Shared functions to include for inlining * Shared functions to include for inlining
*********************************************/ *********************************************/
static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); } static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; } #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); }
#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
#define WILDCOPY_OVERLENGTH 8
#define VECLEN 16
typedef enum {
ZSTD_no_overlap,
ZSTD_overlap_src_before_dst,
/* ZSTD_overlap_dst_before_src, */
} ZSTD_overlap_e;
/*! ZSTD_wildcopy() : /*! ZSTD_wildcopy() :
* custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */ * custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */
#define WILDCOPY_OVERLENGTH 8 MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length) void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
{ {
ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
const BYTE* ip = (const BYTE*)src; const BYTE* ip = (const BYTE*)src;
BYTE* op = (BYTE*)dst; BYTE* op = (BYTE*)dst;
BYTE* const oend = op + length; BYTE* const oend = op + length;
do
COPY8(op, ip) assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
while (op < oend); if (length < VECLEN || (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN)) {
do
COPY8(op, ip)
while (op < oend);
}
else {
if ((length & 8) == 0)
COPY8(op, ip);
do {
COPY16(op, ip);
}
while (op < oend);
}
}
/*! ZSTD_wildcopy_16min() :
* same semantics as ZSTD_wilcopy() except guaranteed to be able to copy 16 bytes at the start */
MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
void ZSTD_wildcopy_16min(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
{
ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
const BYTE* ip = (const BYTE*)src;
BYTE* op = (BYTE*)dst;
BYTE* const oend = op + length;
assert(length >= 8);
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
if (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN) {
do
COPY8(op, ip)
while (op < oend);
}
else {
if ((length & 8) == 0)
COPY8(op, ip);
do {
COPY16(op, ip);
}
while (op < oend);
}
} }
MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* should be faster for decoding, but strangely, not verified on all platform */ MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* should be faster for decoding, but strangely, not verified on all platform */

View file

@ -385,6 +385,11 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
bounds.upperBound = ZSTD_lcm_uncompressed; bounds.upperBound = ZSTD_lcm_uncompressed;
return bounds; return bounds;
case ZSTD_c_targetCBlockSize:
bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN;
bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX;
return bounds;
default: default:
{ ZSTD_bounds const boundError = { ERROR(parameter_unsupported), 0, 0 }; { ZSTD_bounds const boundError = { ERROR(parameter_unsupported), 0, 0 };
return boundError; return boundError;
@ -452,6 +457,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
case ZSTD_c_ldmHashRateLog: case ZSTD_c_ldmHashRateLog:
case ZSTD_c_forceAttachDict: case ZSTD_c_forceAttachDict:
case ZSTD_c_literalCompressionMode: case ZSTD_c_literalCompressionMode:
case ZSTD_c_targetCBlockSize:
default: default:
return 0; return 0;
} }
@ -497,6 +503,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
case ZSTD_c_ldmHashLog: case ZSTD_c_ldmHashLog:
case ZSTD_c_ldmMinMatch: case ZSTD_c_ldmMinMatch:
case ZSTD_c_ldmBucketSizeLog: case ZSTD_c_ldmBucketSizeLog:
case ZSTD_c_targetCBlockSize:
break; break;
default: RETURN_ERROR(parameter_unsupported); default: RETURN_ERROR(parameter_unsupported);
@ -671,6 +678,12 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
CCtxParams->ldmParams.hashRateLog = value; CCtxParams->ldmParams.hashRateLog = value;
return CCtxParams->ldmParams.hashRateLog; return CCtxParams->ldmParams.hashRateLog;
case ZSTD_c_targetCBlockSize :
if (value!=0) /* 0 ==> default */
BOUNDCHECK(ZSTD_c_targetCBlockSize, value);
CCtxParams->targetCBlockSize = value;
return CCtxParams->targetCBlockSize;
default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
} }
} }
@ -692,13 +705,13 @@ size_t ZSTD_CCtxParams_getParameter(
*value = CCtxParams->compressionLevel; *value = CCtxParams->compressionLevel;
break; break;
case ZSTD_c_windowLog : case ZSTD_c_windowLog :
*value = CCtxParams->cParams.windowLog; *value = (int)CCtxParams->cParams.windowLog;
break; break;
case ZSTD_c_hashLog : case ZSTD_c_hashLog :
*value = CCtxParams->cParams.hashLog; *value = (int)CCtxParams->cParams.hashLog;
break; break;
case ZSTD_c_chainLog : case ZSTD_c_chainLog :
*value = CCtxParams->cParams.chainLog; *value = (int)CCtxParams->cParams.chainLog;
break; break;
case ZSTD_c_searchLog : case ZSTD_c_searchLog :
*value = CCtxParams->cParams.searchLog; *value = CCtxParams->cParams.searchLog;
@ -773,6 +786,9 @@ size_t ZSTD_CCtxParams_getParameter(
case ZSTD_c_ldmHashRateLog : case ZSTD_c_ldmHashRateLog :
*value = CCtxParams->ldmParams.hashRateLog; *value = CCtxParams->ldmParams.hashRateLog;
break; break;
case ZSTD_c_targetCBlockSize :
*value = (int)CCtxParams->targetCBlockSize;
break;
default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
} }
return 0; return 0;
@ -930,12 +946,12 @@ size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset)
@return : 0, or an error code if one value is beyond authorized range */ @return : 0, or an error code if one value is beyond authorized range */
size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
{ {
BOUNDCHECK(ZSTD_c_windowLog, cParams.windowLog); BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog);
BOUNDCHECK(ZSTD_c_chainLog, cParams.chainLog); BOUNDCHECK(ZSTD_c_chainLog, (int)cParams.chainLog);
BOUNDCHECK(ZSTD_c_hashLog, cParams.hashLog); BOUNDCHECK(ZSTD_c_hashLog, (int)cParams.hashLog);
BOUNDCHECK(ZSTD_c_searchLog, cParams.searchLog); BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog);
BOUNDCHECK(ZSTD_c_minMatch, cParams.minMatch); BOUNDCHECK(ZSTD_c_minMatch, (int)cParams.minMatch);
BOUNDCHECK(ZSTD_c_targetLength,cParams.targetLength); BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength);
BOUNDCHECK(ZSTD_c_strategy, cParams.strategy); BOUNDCHECK(ZSTD_c_strategy, cParams.strategy);
return 0; return 0;
} }
@ -951,7 +967,7 @@ ZSTD_clampCParams(ZSTD_compressionParameters cParams)
if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound; \ if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound; \
else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \ else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \
} }
# define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, int) # define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned)
CLAMP(ZSTD_c_windowLog, cParams.windowLog); CLAMP(ZSTD_c_windowLog, cParams.windowLog);
CLAMP(ZSTD_c_chainLog, cParams.chainLog); CLAMP(ZSTD_c_chainLog, cParams.chainLog);
CLAMP(ZSTD_c_hashLog, cParams.hashLog); CLAMP(ZSTD_c_hashLog, cParams.hashLog);
@ -1282,15 +1298,14 @@ static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs)
} }
/*! ZSTD_invalidateMatchState() /*! ZSTD_invalidateMatchState()
* Invalidate all the matches in the match finder tables. * Invalidate all the matches in the match finder tables.
* Requires nextSrc and base to be set (can be NULL). * Requires nextSrc and base to be set (can be NULL).
*/ */
static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms) static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms)
{ {
ZSTD_window_clear(&ms->window); ZSTD_window_clear(&ms->window);
ms->nextToUpdate = ms->window.dictLimit; ms->nextToUpdate = ms->window.dictLimit;
ms->nextToUpdate3 = ms->window.dictLimit;
ms->loadedDictEnd = 0; ms->loadedDictEnd = 0;
ms->opt.litLengthSum = 0; /* force reset of btopt stats */ ms->opt.litLengthSum = 0; /* force reset of btopt stats */
ms->dictMatchState = NULL; ms->dictMatchState = NULL;
@ -1327,15 +1342,17 @@ static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pl
typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e; typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e;
typedef enum { ZSTD_resetTarget_CDict, ZSTD_resetTarget_CCtx } ZSTD_resetTarget_e;
static void* static void*
ZSTD_reset_matchState(ZSTD_matchState_t* ms, ZSTD_reset_matchState(ZSTD_matchState_t* ms,
void* ptr, void* ptr,
const ZSTD_compressionParameters* cParams, const ZSTD_compressionParameters* cParams,
ZSTD_compResetPolicy_e const crp, U32 const forCCtx) ZSTD_compResetPolicy_e const crp, ZSTD_resetTarget_e const forWho)
{ {
size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
size_t const hSize = ((size_t)1) << cParams->hashLog; size_t const hSize = ((size_t)1) << cParams->hashLog;
U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
size_t const h3Size = ((size_t)1) << hashLog3; size_t const h3Size = ((size_t)1) << hashLog3;
size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
@ -1349,7 +1366,7 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
ZSTD_invalidateMatchState(ms); ZSTD_invalidateMatchState(ms);
/* opt parser space */ /* opt parser space */
if (forCCtx && (cParams->strategy >= ZSTD_btopt)) { if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) {
DEBUGLOG(4, "reserving optimal parser space"); DEBUGLOG(4, "reserving optimal parser space");
ms->opt.litFreq = (unsigned*)ptr; ms->opt.litFreq = (unsigned*)ptr;
ms->opt.litLengthFreq = ms->opt.litFreq + (1<<Litbits); ms->opt.litLengthFreq = ms->opt.litFreq + (1<<Litbits);
@ -1377,6 +1394,19 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
return ptr; return ptr;
} }
/* ZSTD_indexTooCloseToMax() :
* minor optimization : prefer memset() rather than reduceIndex()
* which is measurably slow in some circumstances (reported for Visual Studio).
* Works when re-using a context for a lot of smallish inputs :
* if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN,
* memset() will be triggered before reduceIndex().
*/
#define ZSTD_INDEXOVERFLOW_MARGIN (16 MB)
static int ZSTD_indexTooCloseToMax(ZSTD_window_t w)
{
return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN);
}
#define ZSTD_WORKSPACETOOLARGE_FACTOR 3 /* define "workspace is too large" as this number of times larger than needed */ #define ZSTD_WORKSPACETOOLARGE_FACTOR 3 /* define "workspace is too large" as this number of times larger than needed */
#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 /* when workspace is continuously too large #define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 /* when workspace is continuously too large
* during at least this number of times, * during at least this number of times,
@ -1388,7 +1418,7 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
note : `params` are assumed fully validated at this stage */ note : `params` are assumed fully validated at this stage */
static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
ZSTD_CCtx_params params, ZSTD_CCtx_params params,
U64 pledgedSrcSize, U64 const pledgedSrcSize,
ZSTD_compResetPolicy_e const crp, ZSTD_compResetPolicy_e const crp,
ZSTD_buffered_policy_e const zbuff) ZSTD_buffered_policy_e const zbuff)
{ {
@ -1400,13 +1430,21 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
if (ZSTD_equivalentParams(zc->appliedParams, params, if (ZSTD_equivalentParams(zc->appliedParams, params,
zc->inBuffSize, zc->inBuffSize,
zc->seqStore.maxNbSeq, zc->seqStore.maxNbLit, zc->seqStore.maxNbSeq, zc->seqStore.maxNbLit,
zbuff, pledgedSrcSize)) { zbuff, pledgedSrcSize) ) {
DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> continue mode (wLog1=%u, blockSize1=%zu)", DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> consider continue mode");
zc->appliedParams.cParams.windowLog, zc->blockSize);
zc->workSpaceOversizedDuration += (zc->workSpaceOversizedDuration > 0); /* if it was too large, it still is */ zc->workSpaceOversizedDuration += (zc->workSpaceOversizedDuration > 0); /* if it was too large, it still is */
if (zc->workSpaceOversizedDuration <= ZSTD_WORKSPACETOOLARGE_MAXDURATION) if (zc->workSpaceOversizedDuration <= ZSTD_WORKSPACETOOLARGE_MAXDURATION) {
DEBUGLOG(4, "continue mode confirmed (wLog1=%u, blockSize1=%zu)",
zc->appliedParams.cParams.windowLog, zc->blockSize);
if (ZSTD_indexTooCloseToMax(zc->blockState.matchState.window)) {
/* prefer a reset, faster than a rescale */
ZSTD_reset_matchState(&zc->blockState.matchState,
zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32,
&params.cParams,
crp, ZSTD_resetTarget_CCtx);
}
return ZSTD_continueCCtx(zc, params, pledgedSrcSize); return ZSTD_continueCCtx(zc, params, pledgedSrcSize);
} } } } }
DEBUGLOG(4, "ZSTD_equivalentParams()==0 -> reset CCtx"); DEBUGLOG(4, "ZSTD_equivalentParams()==0 -> reset CCtx");
if (params.ldmParams.enableLdm) { if (params.ldmParams.enableLdm) {
@ -1449,7 +1487,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize); DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);
if (workSpaceTooSmall || workSpaceWasteful) { if (workSpaceTooSmall || workSpaceWasteful) {
DEBUGLOG(4, "Need to resize workSpaceSize from %zuKB to %zuKB", DEBUGLOG(4, "Resize workSpaceSize from %zuKB to %zuKB",
zc->workSpaceSize >> 10, zc->workSpaceSize >> 10,
neededSpace >> 10); neededSpace >> 10);
@ -1491,7 +1529,10 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock); ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock);
ptr = zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32; ptr = ZSTD_reset_matchState(&zc->blockState.matchState,
zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32,
&params.cParams,
crp, ZSTD_resetTarget_CCtx);
/* ldm hash table */ /* ldm hash table */
/* initialize bucketOffsets table later for pointer alignment */ /* initialize bucketOffsets table later for pointer alignment */
@ -1509,8 +1550,6 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
} }
assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
ptr = ZSTD_reset_matchState(&zc->blockState.matchState, ptr, &params.cParams, crp, /* forCCtx */ 1);
/* sequences storage */ /* sequences storage */
zc->seqStore.maxNbSeq = maxNbSeq; zc->seqStore.maxNbSeq = maxNbSeq;
zc->seqStore.sequencesStart = (seqDef*)ptr; zc->seqStore.sequencesStart = (seqDef*)ptr;
@ -1587,15 +1626,14 @@ static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict,
* handled in _enforceMaxDist */ * handled in _enforceMaxDist */
} }
static size_t ZSTD_resetCCtx_byAttachingCDict( static size_t
ZSTD_CCtx* cctx, ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
const ZSTD_CDict* cdict, const ZSTD_CDict* cdict,
ZSTD_CCtx_params params, ZSTD_CCtx_params params,
U64 pledgedSrcSize, U64 pledgedSrcSize,
ZSTD_buffered_policy_e zbuff) ZSTD_buffered_policy_e zbuff)
{ {
{ { const ZSTD_compressionParameters* const cdict_cParams = &cdict->matchState.cParams;
const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams;
unsigned const windowLog = params.cParams.windowLog; unsigned const windowLog = params.cParams.windowLog;
assert(windowLog != 0); assert(windowLog != 0);
/* Resize working context table params for input only, since the dict /* Resize working context table params for input only, since the dict
@ -1607,8 +1645,7 @@ static size_t ZSTD_resetCCtx_byAttachingCDict(
assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
} }
{ { const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc
const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc
- cdict->matchState.window.base); - cdict->matchState.window.base);
const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit; const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit;
if (cdictLen == 0) { if (cdictLen == 0) {
@ -1625,9 +1662,9 @@ static size_t ZSTD_resetCCtx_byAttachingCDict(
cctx->blockState.matchState.window.base + cdictEnd; cctx->blockState.matchState.window.base + cdictEnd;
ZSTD_window_clear(&cctx->blockState.matchState.window); ZSTD_window_clear(&cctx->blockState.matchState.window);
} }
/* loadedDictEnd is expressed within the referential of the active context */
cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit; cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit;
} } }
}
cctx->dictID = cdict->dictID; cctx->dictID = cdict->dictID;
@ -1681,7 +1718,6 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState;
dstMatchState->window = srcMatchState->window; dstMatchState->window = srcMatchState->window;
dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3;
dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
} }
@ -1761,7 +1797,6 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState; ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState;
dstMatchState->window = srcMatchState->window; dstMatchState->window = srcMatchState->window;
dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3;
dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
} }
dstCCtx->dictID = srcCCtx->dictID; dstCCtx->dictID = srcCCtx->dictID;
@ -1831,16 +1866,15 @@ static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const
/*! ZSTD_reduceIndex() : /*! ZSTD_reduceIndex() :
* rescale all indexes to avoid future overflow (indexes are U32) */ * rescale all indexes to avoid future overflow (indexes are U32) */
static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue) static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const U32 reducerValue)
{ {
ZSTD_matchState_t* const ms = &zc->blockState.matchState; { U32 const hSize = (U32)1 << params->cParams.hashLog;
{ U32 const hSize = (U32)1 << zc->appliedParams.cParams.hashLog;
ZSTD_reduceTable(ms->hashTable, hSize, reducerValue); ZSTD_reduceTable(ms->hashTable, hSize, reducerValue);
} }
if (zc->appliedParams.cParams.strategy != ZSTD_fast) { if (params->cParams.strategy != ZSTD_fast) {
U32 const chainSize = (U32)1 << zc->appliedParams.cParams.chainLog; U32 const chainSize = (U32)1 << params->cParams.chainLog;
if (zc->appliedParams.cParams.strategy == ZSTD_btlazy2) if (params->cParams.strategy == ZSTD_btlazy2)
ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue); ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue);
else else
ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue); ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue);
@ -2524,6 +2558,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
else else
op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
assert(op <= oend);
if (nbSeq==0) { if (nbSeq==0) {
/* Copy the old tables over as if we repeated them */ /* Copy the old tables over as if we repeated them */
memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));
@ -2532,6 +2567,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
/* seqHead : flags for FSE encoding type */ /* seqHead : flags for FSE encoding type */
seqHead = op++; seqHead = op++;
assert(op <= oend);
/* convert length/distances into codes */ /* convert length/distances into codes */
ZSTD_seqToCodes(seqStorePtr); ZSTD_seqToCodes(seqStorePtr);
@ -2555,6 +2591,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
if (LLtype == set_compressed) if (LLtype == set_compressed)
lastNCount = op; lastNCount = op;
op += countSize; op += countSize;
assert(op <= oend);
} } } }
/* build CTable for Offsets */ /* build CTable for Offsets */
{ unsigned max = MaxOff; { unsigned max = MaxOff;
@ -2577,6 +2614,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
if (Offtype == set_compressed) if (Offtype == set_compressed)
lastNCount = op; lastNCount = op;
op += countSize; op += countSize;
assert(op <= oend);
} } } }
/* build CTable for MatchLengths */ /* build CTable for MatchLengths */
{ unsigned max = MaxML; { unsigned max = MaxML;
@ -2597,6 +2635,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
if (MLtype == set_compressed) if (MLtype == set_compressed)
lastNCount = op; lastNCount = op;
op += countSize; op += countSize;
assert(op <= oend);
} } } }
*seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
@ -2610,6 +2649,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
longOffsets, bmi2); longOffsets, bmi2);
FORWARD_IF_ERROR(bitstreamSize); FORWARD_IF_ERROR(bitstreamSize);
op += bitstreamSize; op += bitstreamSize;
assert(op <= oend);
/* zstd versions <= 1.3.4 mistakenly report corruption when /* zstd versions <= 1.3.4 mistakenly report corruption when
* FSE_readNCount() receives a buffer < 4 bytes. * FSE_readNCount() receives a buffer < 4 bytes.
* Fixed by https://github.com/facebook/zstd/pull/1146. * Fixed by https://github.com/facebook/zstd/pull/1146.
@ -2721,30 +2761,24 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr)
ssPtr->longLengthID = 0; ssPtr->longLengthID = 0;
} }
static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;
void* dst, size_t dstCapacity,
const void* src, size_t srcSize) static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
{ {
ZSTD_matchState_t* const ms = &zc->blockState.matchState; ZSTD_matchState_t* const ms = &zc->blockState.matchState;
size_t cSize; DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize);
DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
(unsigned)dstCapacity, (unsigned)ms->window.dictLimit, (unsigned)ms->nextToUpdate);
assert(srcSize <= ZSTD_BLOCKSIZE_MAX); assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
/* Assert that we have correctly flushed the ctx params into the ms's copy */ /* Assert that we have correctly flushed the ctx params into the ms's copy */
ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams); ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams);
if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch); ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch);
cSize = 0; return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */
goto out; /* don't even attempt compression below a certain srcSize */
} }
ZSTD_resetSeqStore(&(zc->seqStore)); ZSTD_resetSeqStore(&(zc->seqStore));
/* required for optimal parser to read stats from dictionary */ /* required for optimal parser to read stats from dictionary */
ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy;
/* tell the optimal parser how we expect to compress literals */ /* tell the optimal parser how we expect to compress literals */
ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode; ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode;
/* a gap between an attached dict and the current window is not safe, /* a gap between an attached dict and the current window is not safe,
* they must remain adjacent, * they must remain adjacent,
* and when that stops being the case, the dict must be unset */ * and when that stops being the case, the dict must be unset */
@ -2798,6 +2832,21 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
{ const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize;
ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize); ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize);
} } } }
return ZSTDbss_compress;
}
static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize)
{
size_t cSize;
DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
(unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate);
{ const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
FORWARD_IF_ERROR(bss);
if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; }
}
/* encode sequences and literals */ /* encode sequences and literals */
cSize = ZSTD_compressSequences(&zc->seqStore, cSize = ZSTD_compressSequences(&zc->seqStore,
@ -2826,6 +2875,25 @@ out:
} }
static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, void const* ip, void const* iend)
{
if (ZSTD_window_needOverflowCorrection(ms->window, iend)) {
U32 const maxDist = (U32)1 << params->cParams.windowLog;
U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy);
U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip);
ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
ZSTD_reduceIndex(ms, params, correction);
if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
else ms->nextToUpdate -= correction;
/* invalidate dictionaries on overflow correction */
ms->loadedDictEnd = 0;
ms->dictMatchState = NULL;
}
}
/*! ZSTD_compress_frameChunk() : /*! ZSTD_compress_frameChunk() :
* Compress a chunk of data into one or multiple blocks. * Compress a chunk of data into one or multiple blocks.
* All blocks will be terminated, all input will be consumed. * All blocks will be terminated, all input will be consumed.
@ -2844,7 +2912,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
BYTE* const ostart = (BYTE*)dst; BYTE* const ostart = (BYTE*)dst;
BYTE* op = ostart; BYTE* op = ostart;
U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog; U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog;
assert(cctx->appliedParams.cParams.windowLog <= 31); assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX);
DEBUGLOG(5, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize); DEBUGLOG(5, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize);
if (cctx->appliedParams.fParams.checksumFlag && srcSize) if (cctx->appliedParams.fParams.checksumFlag && srcSize)
@ -2859,19 +2927,10 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
"not enough space to store compressed block"); "not enough space to store compressed block");
if (remaining < blockSize) blockSize = remaining; if (remaining < blockSize) blockSize = remaining;
if (ZSTD_window_needOverflowCorrection(ms->window, ip + blockSize)) { ZSTD_overflowCorrectIfNeeded(ms, &cctx->appliedParams, ip, ip + blockSize);
U32 const cycleLog = ZSTD_cycleLog(cctx->appliedParams.cParams.chainLog, cctx->appliedParams.cParams.strategy); ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip);
ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); /* Ensure hash/chain table insertion resumes no sooner than lowlimit */
ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
ZSTD_reduceIndex(cctx, correction);
if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
else ms->nextToUpdate -= correction;
ms->loadedDictEnd = 0;
ms->dictMatchState = NULL;
}
ZSTD_window_enforceMaxDist(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit; if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;
{ size_t cSize = ZSTD_compressBlock_internal(cctx, { size_t cSize = ZSTD_compressBlock_internal(cctx,
@ -2899,7 +2958,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
} } } }
if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending; if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending;
return op-ostart; return (size_t)(op-ostart);
} }
@ -2991,6 +3050,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams,
cctx->pledgedSrcSizePlusOne-1, cctx->dictID); cctx->pledgedSrcSizePlusOne-1, cctx->dictID);
FORWARD_IF_ERROR(fhSize); FORWARD_IF_ERROR(fhSize);
assert(fhSize <= dstCapacity);
dstCapacity -= fhSize; dstCapacity -= fhSize;
dst = (char*)dst + fhSize; dst = (char*)dst + fhSize;
cctx->stage = ZSTDcs_ongoing; cctx->stage = ZSTDcs_ongoing;
@ -3007,18 +3067,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
if (!frame) { if (!frame) {
/* overflow check and correction for block mode */ /* overflow check and correction for block mode */
if (ZSTD_window_needOverflowCorrection(ms->window, (const char*)src + srcSize)) { ZSTD_overflowCorrectIfNeeded(ms, &cctx->appliedParams, src, (BYTE const*)src + srcSize);
U32 const cycleLog = ZSTD_cycleLog(cctx->appliedParams.cParams.chainLog, cctx->appliedParams.cParams.strategy);
U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, 1 << cctx->appliedParams.cParams.windowLog, src);
ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
ZSTD_reduceIndex(cctx, correction);
if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
else ms->nextToUpdate -= correction;
ms->loadedDictEnd = 0;
ms->dictMatchState = NULL;
}
} }
DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize); DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize);
@ -3074,7 +3123,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
const void* src, size_t srcSize, const void* src, size_t srcSize,
ZSTD_dictTableLoadMethod_e dtlm) ZSTD_dictTableLoadMethod_e dtlm)
{ {
const BYTE* const ip = (const BYTE*) src; const BYTE* ip = (const BYTE*) src;
const BYTE* const iend = ip + srcSize; const BYTE* const iend = ip + srcSize;
ZSTD_window_update(&ms->window, src, srcSize); ZSTD_window_update(&ms->window, src, srcSize);
@ -3085,32 +3134,42 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
if (srcSize <= HASH_READ_SIZE) return 0; if (srcSize <= HASH_READ_SIZE) return 0;
switch(params->cParams.strategy) while (iend - ip > HASH_READ_SIZE) {
{ size_t const remaining = iend - ip;
case ZSTD_fast: size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX);
ZSTD_fillHashTable(ms, iend, dtlm); const BYTE* const ichunk = ip + chunk;
break;
case ZSTD_dfast:
ZSTD_fillDoubleHashTable(ms, iend, dtlm);
break;
case ZSTD_greedy: ZSTD_overflowCorrectIfNeeded(ms, params, ip, ichunk);
case ZSTD_lazy:
case ZSTD_lazy2:
if (srcSize >= HASH_READ_SIZE)
ZSTD_insertAndFindFirstIndex(ms, iend-HASH_READ_SIZE);
break;
case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ switch(params->cParams.strategy)
case ZSTD_btopt: {
case ZSTD_btultra: case ZSTD_fast:
case ZSTD_btultra2: ZSTD_fillHashTable(ms, ichunk, dtlm);
if (srcSize >= HASH_READ_SIZE) break;
ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend); case ZSTD_dfast:
break; ZSTD_fillDoubleHashTable(ms, ichunk, dtlm);
break;
default: case ZSTD_greedy:
assert(0); /* not possible : not a valid strategy id */ case ZSTD_lazy:
case ZSTD_lazy2:
if (chunk >= HASH_READ_SIZE)
ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE);
break;
case ZSTD_btlazy2: /* we want the dictionary table fully sorted */
case ZSTD_btopt:
case ZSTD_btultra:
case ZSTD_btultra2:
if (chunk >= HASH_READ_SIZE)
ZSTD_updateTree(ms, ichunk-HASH_READ_SIZE, ichunk);
break;
default:
assert(0); /* not possible : not a valid strategy id */
}
ip = ichunk;
} }
ms->nextToUpdate = (U32)(iend - ms->window.base); ms->nextToUpdate = (U32)(iend - ms->window.base);
@ -3297,12 +3356,11 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
ZSTDcrp_continue, zbuff) ); ZSTDcrp_continue, zbuff) );
{ { size_t const dictID = ZSTD_compress_insertDictionary(
size_t const dictID = ZSTD_compress_insertDictionary(
cctx->blockState.prevCBlock, &cctx->blockState.matchState, cctx->blockState.prevCBlock, &cctx->blockState.matchState,
&params, dict, dictSize, dictContentType, dtlm, cctx->entropyWorkspace); &params, dict, dictSize, dictContentType, dtlm, cctx->entropyWorkspace);
FORWARD_IF_ERROR(dictID); FORWARD_IF_ERROR(dictID);
assert(dictID <= (size_t)(U32)-1); assert(dictID <= UINT_MAX);
cctx->dictID = (U32)dictID; cctx->dictID = (U32)dictID;
} }
return 0; return 0;
@ -3555,10 +3613,10 @@ static size_t ZSTD_initCDict_internal(
/* Reset the state to no dictionary */ /* Reset the state to no dictionary */
ZSTD_reset_compressedBlockState(&cdict->cBlockState); ZSTD_reset_compressedBlockState(&cdict->cBlockState);
{ void* const end = ZSTD_reset_matchState( { void* const end = ZSTD_reset_matchState(&cdict->matchState,
&cdict->matchState, (U32*)cdict->workspace + HUF_WORKSPACE_SIZE_U32,
(U32*)cdict->workspace + HUF_WORKSPACE_SIZE_U32, &cParams,
&cParams, ZSTDcrp_continue, /* forCCtx */ 0); ZSTDcrp_continue, ZSTD_resetTarget_CDict);
assert(end == (char*)cdict->workspace + cdict->workspaceSize); assert(end == (char*)cdict->workspace + cdict->workspaceSize);
(void)end; (void)end;
} }
@ -4068,7 +4126,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
case zcss_flush: case zcss_flush:
DEBUGLOG(5, "flush stage"); DEBUGLOG(5, "flush stage");
{ size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
size_t const flushed = ZSTD_limitCopy(op, oend-op, size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op),
zcs->outBuff + zcs->outBuffFlushedSize, toFlush); zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u", DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u",
(unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed); (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed);
@ -4262,7 +4320,7 @@ size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */ if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */
/* single thread mode : attempt to calculate remaining to flush more precisely */ /* single thread mode : attempt to calculate remaining to flush more precisely */
{ size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE; { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE;
size_t const checksumSize = zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4; size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4);
size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize; size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize;
DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush); DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush);
return toFlush; return toFlush;

View file

@ -33,13 +33,13 @@ extern "C" {
***************************************/ ***************************************/
#define kSearchStrength 8 #define kSearchStrength 8
#define HASH_READ_SIZE 8 #define HASH_READ_SIZE 8
#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index 1 now means "unsorted". #define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
It could be confused for a real successor at index "1", if sorted as larger than its predecessor. It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
It's not a big deal though : candidate will just be sorted again. It's not a big deal though : candidate will just be sorted again.
Additionally, candidate position 1 will be lost. Additionally, candidate position 1 will be lost.
But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
Constant required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
/*-************************************* /*-*************************************
@ -128,21 +128,20 @@ typedef struct {
BYTE const* base; /* All regular indexes relative to this position */ BYTE const* base; /* All regular indexes relative to this position */
BYTE const* dictBase; /* extDict indexes relative to this position */ BYTE const* dictBase; /* extDict indexes relative to this position */
U32 dictLimit; /* below that point, need extDict */ U32 dictLimit; /* below that point, need extDict */
U32 lowLimit; /* below that point, no more data */ U32 lowLimit; /* below that point, no more valid data */
} ZSTD_window_t; } ZSTD_window_t;
typedef struct ZSTD_matchState_t ZSTD_matchState_t; typedef struct ZSTD_matchState_t ZSTD_matchState_t;
struct ZSTD_matchState_t { struct ZSTD_matchState_t {
ZSTD_window_t window; /* State for window round buffer management */ ZSTD_window_t window; /* State for window round buffer management */
U32 loadedDictEnd; /* index of end of dictionary */ U32 loadedDictEnd; /* index of end of dictionary, within context's referential. When dict referential is copied into active context (i.e. not attached), effectively same value as dictSize, since referential starts from zero */
U32 nextToUpdate; /* index from which to continue table update */ U32 nextToUpdate; /* index from which to continue table update */
U32 nextToUpdate3; /* index from which to continue table update */
U32 hashLog3; /* dispatch table : larger == faster, more memory */ U32 hashLog3; /* dispatch table : larger == faster, more memory */
U32* hashTable; U32* hashTable;
U32* hashTable3; U32* hashTable3;
U32* chainTable; U32* chainTable;
optState_t opt; /* optimal parser state */ optState_t opt; /* optimal parser state */
const ZSTD_matchState_t * dictMatchState; const ZSTD_matchState_t* dictMatchState;
ZSTD_compressionParameters cParams; ZSTD_compressionParameters cParams;
}; };
@ -195,6 +194,9 @@ struct ZSTD_CCtx_params_s {
int compressionLevel; int compressionLevel;
int forceWindow; /* force back-references to respect limit of int forceWindow; /* force back-references to respect limit of
* 1<<wLog, even for dictionary */ * 1<<wLog, even for dictionary */
size_t targetCBlockSize; /* Tries to fit compressed block size to be around targetCBlockSize.
* No target when targetCBlockSize == 0.
* There is no guarantee on compressed block size */
ZSTD_dictAttachPref_e attachDictPref; ZSTD_dictAttachPref_e attachDictPref;
ZSTD_literalCompressionMode_e literalCompressionMode; ZSTD_literalCompressionMode_e literalCompressionMode;
@ -324,7 +326,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v
/* copy Literals */ /* copy Literals */
assert(seqStorePtr->maxNbLit <= 128 KB); assert(seqStorePtr->maxNbLit <= 128 KB);
assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit); assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
ZSTD_wildcopy(seqStorePtr->lit, literals, litLength); ZSTD_wildcopy(seqStorePtr->lit, literals, litLength, ZSTD_no_overlap);
seqStorePtr->lit += litLength; seqStorePtr->lit += litLength;
/* literal Length */ /* literal Length */
@ -564,6 +566,9 @@ MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64
/*-************************************* /*-*************************************
* Round buffer management * Round buffer management
***************************************/ ***************************************/
#if (ZSTD_WINDOWLOG_MAX_64 > 31)
# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX"
#endif
/* Max current allowed */ /* Max current allowed */
#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX)) #define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
/* Maximum chunk size before overflow correction needs to be called again */ /* Maximum chunk size before overflow correction needs to be called again */
@ -675,31 +680,49 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
* Updates lowLimit so that: * Updates lowLimit so that:
* (srcEnd - base) - lowLimit == maxDist + loadedDictEnd * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
* *
* This allows a simple check that index >= lowLimit to see if index is valid. * It ensures index is valid as long as index >= lowLimit.
* This must be called before a block compression call, with srcEnd as the block * This must be called before a block compression call.
* source end.
* *
* If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit. * loadedDictEnd is only defined if a dictionary is in use for current compression.
* This is because dictionaries are allowed to be referenced as long as the last * As the name implies, loadedDictEnd represents the index at end of dictionary.
* byte of the dictionary is in the window, but once they are out of range, * The value lies within context's referential, it can be directly compared to blockEndIdx.
* they cannot be referenced. If loadedDictEndPtr is NULL, we use
* loadedDictEnd == 0.
* *
* In normal dict mode, the dict is between lowLimit and dictLimit. In * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
* dictMatchState mode, lowLimit and dictLimit are the same, and the dictionary * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
* is below them. forceWindow and dictMatchState are therefore incompatible. * This is because dictionaries are allowed to be referenced fully
* as long as the last byte of the dictionary is in the window.
* Once input has progressed beyond window size, dictionary cannot be referenced anymore.
*
* In normal dict mode, the dictionary lies between lowLimit and dictLimit.
* In dictMatchState mode, lowLimit and dictLimit are the same,
* and the dictionary is below them.
* forceWindow and dictMatchState are therefore incompatible.
*/ */
MEM_STATIC void MEM_STATIC void
ZSTD_window_enforceMaxDist(ZSTD_window_t* window, ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
void const* srcEnd, const void* blockEnd,
U32 maxDist, U32 maxDist,
U32* loadedDictEndPtr, U32* loadedDictEndPtr,
const ZSTD_matchState_t** dictMatchStatePtr) const ZSTD_matchState_t** dictMatchStatePtr)
{ {
U32 const blockEndIdx = (U32)((BYTE const*)srcEnd - window->base); U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
U32 loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0; U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u", DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
(unsigned)blockEndIdx, (unsigned)maxDist); (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
/* - When there is no dictionary : loadedDictEnd == 0.
In which case, the test (blockEndIdx > maxDist) is merely to avoid
overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
- When there is a standard dictionary :
Index referential is copied from the dictionary,
which means it starts from 0.
In which case, loadedDictEnd == dictSize,
and it makes sense to compare `blockEndIdx > maxDist + dictSize`
since `blockEndIdx` also starts from zero.
- When there is an attached dictionary :
loadedDictEnd is expressed within the referential of the context,
so it can be directly compared against blockEndIdx.
*/
if (blockEndIdx > maxDist + loadedDictEnd) { if (blockEndIdx > maxDist + loadedDictEnd) {
U32 const newLowLimit = blockEndIdx - maxDist; U32 const newLowLimit = blockEndIdx - maxDist;
if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit; if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
@ -708,10 +731,31 @@ ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
(unsigned)window->dictLimit, (unsigned)window->lowLimit); (unsigned)window->dictLimit, (unsigned)window->lowLimit);
window->dictLimit = window->lowLimit; window->dictLimit = window->lowLimit;
} }
if (loadedDictEndPtr) /* On reaching window size, dictionaries are invalidated */
*loadedDictEndPtr = 0; if (loadedDictEndPtr) *loadedDictEndPtr = 0;
if (dictMatchStatePtr) if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
*dictMatchStatePtr = NULL; }
}
/* Similar to ZSTD_window_enforceMaxDist(),
* but only invalidates dictionary
* when input progresses beyond window size. */
MEM_STATIC void
ZSTD_checkDictValidity(ZSTD_window_t* window,
const void* blockEnd,
U32 maxDist,
U32* loadedDictEndPtr,
const ZSTD_matchState_t** dictMatchStatePtr)
{
U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
(unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
if (loadedDictEnd && (blockEndIdx > maxDist + loadedDictEnd)) {
/* On reaching window size, dictionaries are invalidated */
if (loadedDictEndPtr) *loadedDictEndPtr = 0;
if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
} }
} }

View file

@ -43,8 +43,7 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
/* Only load extra positions for ZSTD_dtlm_full */ /* Only load extra positions for ZSTD_dtlm_full */
if (dtlm == ZSTD_dtlm_fast) if (dtlm == ZSTD_dtlm_fast)
break; break;
} } }
}
} }
@ -63,7 +62,10 @@ size_t ZSTD_compressBlock_doubleFast_generic(
const BYTE* const istart = (const BYTE*)src; const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart; const BYTE* ip = istart;
const BYTE* anchor = istart; const BYTE* anchor = istart;
const U32 prefixLowestIndex = ms->window.dictLimit; const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
const U32 lowestValid = ms->window.dictLimit;
const U32 maxDistance = 1U << cParams->windowLog;
const U32 prefixLowestIndex = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid;
const BYTE* const prefixLowest = base + prefixLowestIndex; const BYTE* const prefixLowest = base + prefixLowestIndex;
const BYTE* const iend = istart + srcSize; const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE; const BYTE* const ilimit = iend - HASH_READ_SIZE;
@ -95,8 +97,15 @@ size_t ZSTD_compressBlock_doubleFast_generic(
dictCParams->chainLog : hBitsS; dictCParams->chainLog : hBitsS;
const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictStart); const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictStart);
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");
assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState); assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
/* if a dictionary is attached, it must be within window range */
if (dictMode == ZSTD_dictMatchState) {
assert(lowestValid + maxDistance >= endIndex);
}
/* init */ /* init */
ip += (dictAndPrefixLength == 0); ip += (dictAndPrefixLength == 0);
if (dictMode == ZSTD_noDict) { if (dictMode == ZSTD_noDict) {
@ -138,7 +147,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
ip++; ip++;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
goto _match_stored; goto _match_stored;
} }
@ -147,7 +156,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
&& ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
ip++; ip++;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
goto _match_stored; goto _match_stored;
} }
@ -170,8 +179,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
offset = (U32)(current - dictMatchIndexL - dictIndexDelta); offset = (U32)(current - dictMatchIndexL - dictIndexDelta);
while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */ while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
goto _match_found; goto _match_found;
} } }
}
if (matchIndexS > prefixLowestIndex) { if (matchIndexS > prefixLowestIndex) {
/* check prefix short match */ /* check prefix short match */
@ -186,16 +194,14 @@ size_t ZSTD_compressBlock_doubleFast_generic(
if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) { if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) {
goto _search_next_long; goto _search_next_long;
} } }
}
ip += ((ip-anchor) >> kSearchStrength) + 1; ip += ((ip-anchor) >> kSearchStrength) + 1;
continue; continue;
_search_next_long: _search_next_long:
{ { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8); size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
U32 const matchIndexL3 = hashLong[hl3]; U32 const matchIndexL3 = hashLong[hl3];
const BYTE* matchL3 = base + matchIndexL3; const BYTE* matchL3 = base + matchIndexL3;
@ -221,9 +227,7 @@ _search_next_long:
offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta); offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta);
while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */ while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
goto _match_found; goto _match_found;
} } } }
}
}
/* if no long +1 match, explore the short match we found */ /* if no long +1 match, explore the short match we found */
if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) { if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
@ -242,7 +246,7 @@ _match_found:
offset_2 = offset_1; offset_2 = offset_1;
offset_1 = offset; offset_1 = offset;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
_match_stored: _match_stored:
/* match found */ /* match found */
@ -250,11 +254,14 @@ _match_stored:
anchor = ip; anchor = ip;
if (ip <= ilimit) { if (ip <= ilimit) {
/* Fill Table */ /* Complementary insertion */
hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = /* done after iLimit test, as candidates could be > iend-8 */
hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; /* here because current+2 could be > iend-8 */ { U32 const indexToInsert = current+2;
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base); hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
}
/* check immediate repcode */ /* check immediate repcode */
if (dictMode == ZSTD_dictMatchState) { if (dictMode == ZSTD_dictMatchState) {
@ -278,8 +285,7 @@ _match_stored:
continue; continue;
} }
break; break;
} } }
}
if (dictMode == ZSTD_noDict) { if (dictMode == ZSTD_noDict) {
while ( (ip <= ilimit) while ( (ip <= ilimit)
@ -294,14 +300,15 @@ _match_stored:
ip += rLength; ip += rLength;
anchor = ip; anchor = ip;
continue; /* faster when present ... (?) */ continue; /* faster when present ... (?) */
} } } } } } }
} /* while (ip < ilimit) */
/* save reps for next block */ /* save reps for next block */
rep[0] = offset_1 ? offset_1 : offsetSaved; rep[0] = offset_1 ? offset_1 : offsetSaved;
rep[1] = offset_2 ? offset_2 : offsetSaved; rep[1] = offset_2 ? offset_2 : offsetSaved;
/* Return the last literals size */ /* Return the last literals size */
return iend - anchor; return (size_t)(iend - anchor);
} }
@ -360,10 +367,15 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
const BYTE* anchor = istart; const BYTE* anchor = istart;
const BYTE* const iend = istart + srcSize; const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8; const BYTE* const ilimit = iend - 8;
const U32 prefixStartIndex = ms->window.dictLimit;
const BYTE* const base = ms->window.base; const BYTE* const base = ms->window.base;
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
const U32 maxDistance = 1U << cParams->windowLog;
const U32 lowestValid = ms->window.lowLimit;
const U32 lowLimit = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid;
const U32 dictStartIndex = lowLimit;
const U32 dictLimit = ms->window.dictLimit;
const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;
const BYTE* const prefixStart = base + prefixStartIndex; const BYTE* const prefixStart = base + prefixStartIndex;
const U32 dictStartIndex = ms->window.lowLimit;
const BYTE* const dictBase = ms->window.dictBase; const BYTE* const dictBase = ms->window.dictBase;
const BYTE* const dictStart = dictBase + dictStartIndex; const BYTE* const dictStart = dictBase + dictStartIndex;
const BYTE* const dictEnd = dictBase + prefixStartIndex; const BYTE* const dictEnd = dictBase + prefixStartIndex;
@ -371,6 +383,10 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize); DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize);
/* if extDict is invalidated due to maxDistance, switch to "regular" variant */
if (prefixStartIndex == dictStartIndex)
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict);
/* Search Loop */ /* Search Loop */
while (ip < ilimit) { /* < instead of <=, because (ip+1) */ while (ip < ilimit) { /* < instead of <=, because (ip+1) */
const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls); const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
@ -396,7 +412,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
ip++; ip++;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
} else { } else {
if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
@ -407,7 +423,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
offset_2 = offset_1; offset_2 = offset_1;
offset_1 = offset; offset_1 = offset;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
} else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
@ -432,23 +448,27 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
} }
offset_2 = offset_1; offset_2 = offset_1;
offset_1 = offset; offset_1 = offset;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
} else { } else {
ip += ((ip-anchor) >> kSearchStrength) + 1; ip += ((ip-anchor) >> kSearchStrength) + 1;
continue; continue;
} } } }
/* found a match : store it */ /* move to next sequence start */
ip += mLength; ip += mLength;
anchor = ip; anchor = ip;
if (ip <= ilimit) { if (ip <= ilimit) {
/* Fill Table */ /* Complementary insertion */
hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; /* done after iLimit test, as candidates could be > iend-8 */
hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2; { U32 const indexToInsert = current+2;
hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base); hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
}
/* check immediate repcode */ /* check immediate repcode */
while (ip <= ilimit) { while (ip <= ilimit) {
U32 const current2 = (U32)(ip-base); U32 const current2 = (U32)(ip-base);
@ -475,7 +495,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
rep[1] = offset_2; rep[1] = offset_2;
/* Return the last literals size */ /* Return the last literals size */
return iend - anchor; return (size_t)(iend - anchor);
} }

View file

@ -13,7 +13,8 @@
void ZSTD_fillHashTable(ZSTD_matchState_t* ms, void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm) const void* const end,
ZSTD_dictTableLoadMethod_e dtlm)
{ {
const ZSTD_compressionParameters* const cParams = &ms->cParams; const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32* const hashTable = ms->hashTable; U32* const hashTable = ms->hashTable;
@ -41,6 +42,7 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
} } } } } } } }
} }
FORCE_INLINE_TEMPLATE FORCE_INLINE_TEMPLATE
size_t ZSTD_compressBlock_fast_generic( size_t ZSTD_compressBlock_fast_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@ -58,7 +60,10 @@ size_t ZSTD_compressBlock_fast_generic(
const BYTE* ip0 = istart; const BYTE* ip0 = istart;
const BYTE* ip1; const BYTE* ip1;
const BYTE* anchor = istart; const BYTE* anchor = istart;
const U32 prefixStartIndex = ms->window.dictLimit; const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
const U32 maxDistance = 1U << cParams->windowLog;
const U32 validStartIndex = ms->window.dictLimit;
const U32 prefixStartIndex = (endIndex - validStartIndex > maxDistance) ? endIndex - maxDistance : validStartIndex;
const BYTE* const prefixStart = base + prefixStartIndex; const BYTE* const prefixStart = base + prefixStartIndex;
const BYTE* const iend = istart + srcSize; const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE; const BYTE* const ilimit = iend - HASH_READ_SIZE;
@ -165,7 +170,7 @@ _match: /* Requires: ip0, match0, offcode */
rep[1] = offset_2 ? offset_2 : offsetSaved; rep[1] = offset_2 ? offset_2 : offsetSaved;
/* Return the last literals size */ /* Return the last literals size */
return iend - anchor; return (size_t)(iend - anchor);
} }
@ -222,8 +227,15 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart); const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart);
const U32 dictHLog = dictCParams->hashLog; const U32 dictHLog = dictCParams->hashLog;
/* otherwise, we would get index underflow when translating a dict index /* if a dictionary is still attached, it necessarily means that
* into a local index */ * it is within window size. So we just check it. */
const U32 maxDistance = 1U << cParams->windowLog;
const U32 endIndex = (U32)((size_t)(ip - base) + srcSize);
assert(endIndex - prefixStartIndex <= maxDistance);
(void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */
/* ensure there will be no no underflow
* when translating a dict index into a local index */
assert(prefixStartIndex >= (U32)(dictEnd - dictBase)); assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
/* init */ /* init */
@ -251,7 +263,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
ip++; ip++;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
} else if ( (matchIndex <= prefixStartIndex) ) { } else if ( (matchIndex <= prefixStartIndex) ) {
size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls); size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
U32 const dictMatchIndex = dictHashTable[dictHash]; U32 const dictMatchIndex = dictHashTable[dictHash];
@ -271,7 +283,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
} /* catch up */ } /* catch up */
offset_2 = offset_1; offset_2 = offset_1;
offset_1 = offset; offset_1 = offset;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
} }
} else if (MEM_read32(match) != MEM_read32(ip)) { } else if (MEM_read32(match) != MEM_read32(ip)) {
/* it's not a match, and we're not going to check the dictionary */ /* it's not a match, and we're not going to check the dictionary */
@ -286,7 +298,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
&& (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
offset_2 = offset_1; offset_2 = offset_1;
offset_1 = offset; offset_1 = offset;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
} }
/* match found */ /* match found */
@ -327,7 +339,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
rep[1] = offset_2 ? offset_2 : offsetSaved; rep[1] = offset_2 ? offset_2 : offsetSaved;
/* Return the last literals size */ /* Return the last literals size */
return iend - anchor; return (size_t)(iend - anchor);
} }
size_t ZSTD_compressBlock_fast_dictMatchState( size_t ZSTD_compressBlock_fast_dictMatchState(
@ -366,15 +378,24 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
const BYTE* const istart = (const BYTE*)src; const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart; const BYTE* ip = istart;
const BYTE* anchor = istart; const BYTE* anchor = istart;
const U32 dictStartIndex = ms->window.lowLimit; const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
const U32 maxDistance = 1U << cParams->windowLog;
const U32 validLow = ms->window.lowLimit;
const U32 lowLimit = (endIndex - validLow > maxDistance) ? endIndex - maxDistance : validLow;
const U32 dictStartIndex = lowLimit;
const BYTE* const dictStart = dictBase + dictStartIndex; const BYTE* const dictStart = dictBase + dictStartIndex;
const U32 prefixStartIndex = ms->window.dictLimit; const U32 dictLimit = ms->window.dictLimit;
const U32 prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit;
const BYTE* const prefixStart = base + prefixStartIndex; const BYTE* const prefixStart = base + prefixStartIndex;
const BYTE* const dictEnd = dictBase + prefixStartIndex; const BYTE* const dictEnd = dictBase + prefixStartIndex;
const BYTE* const iend = istart + srcSize; const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8; const BYTE* const ilimit = iend - 8;
U32 offset_1=rep[0], offset_2=rep[1]; U32 offset_1=rep[0], offset_2=rep[1];
/* switch to "regular" variant if extDict is invalidated due to maxDistance */
if (prefixStartIndex == dictStartIndex)
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls);
/* Search Loop */ /* Search Loop */
while (ip < ilimit) { /* < instead of <=, because (ip+1) */ while (ip < ilimit) { /* < instead of <=, because (ip+1) */
const size_t h = ZSTD_hashPtr(ip, hlog, mls); const size_t h = ZSTD_hashPtr(ip, hlog, mls);
@ -394,7 +415,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
ip++; ip++;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
} else { } else {
if ( (matchIndex < dictStartIndex) || if ( (matchIndex < dictStartIndex) ||
(MEM_read32(match) != MEM_read32(ip)) ) { (MEM_read32(match) != MEM_read32(ip)) ) {
@ -410,7 +431,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
offset = current - matchIndex; offset = current - matchIndex;
offset_2 = offset_1; offset_2 = offset_1;
offset_1 = offset; offset_1 = offset;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
} } } }
/* found a match : store it */ /* found a match : store it */
@ -445,7 +466,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
rep[1] = offset_2; rep[1] = offset_2;
/* Return the last literals size */ /* Return the last literals size */
return iend - anchor; return (size_t)(iend - anchor);
} }

View file

@ -83,7 +83,10 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
U32* largerPtr = smallerPtr + 1; U32* largerPtr = smallerPtr + 1;
U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */ U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
U32 dummy32; /* to be nullified at the end */ U32 dummy32; /* to be nullified at the end */
U32 const windowLow = ms->window.lowLimit; U32 const windowValid = ms->window.lowLimit;
U32 const maxDistance = 1U << cParams->windowLog;
U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)", DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
current, dictLimit, windowLow); current, dictLimit, windowLow);
@ -239,7 +242,9 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
const BYTE* const base = ms->window.base; const BYTE* const base = ms->window.base;
U32 const current = (U32)(ip-base); U32 const current = (U32)(ip-base);
U32 const windowLow = ms->window.lowLimit; U32 const maxDistance = 1U << cParams->windowLog;
U32 const windowValid = ms->window.lowLimit;
U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
U32* const bt = ms->chainTable; U32* const bt = ms->chainTable;
U32 const btLog = cParams->chainLog - 1; U32 const btLog = cParams->chainLog - 1;
@ -490,8 +495,10 @@ size_t ZSTD_HcFindBestMatch_generic (
const U32 dictLimit = ms->window.dictLimit; const U32 dictLimit = ms->window.dictLimit;
const BYTE* const prefixStart = base + dictLimit; const BYTE* const prefixStart = base + dictLimit;
const BYTE* const dictEnd = dictBase + dictLimit; const BYTE* const dictEnd = dictBase + dictLimit;
const U32 lowLimit = ms->window.lowLimit;
const U32 current = (U32)(ip-base); const U32 current = (U32)(ip-base);
const U32 maxDistance = 1U << cParams->windowLog;
const U32 lowValid = ms->window.lowLimit;
const U32 lowLimit = (current - lowValid > maxDistance) ? current - maxDistance : lowValid;
const U32 minChain = current > chainSize ? current - chainSize : 0; const U32 minChain = current > chainSize ? current - chainSize : 0;
U32 nbAttempts = 1U << cParams->searchLog; U32 nbAttempts = 1U << cParams->searchLog;
size_t ml=4-1; size_t ml=4-1;
@ -653,7 +660,6 @@ size_t ZSTD_compressBlock_lazy_generic(
/* init */ /* init */
ip += (dictAndPrefixLength == 0); ip += (dictAndPrefixLength == 0);
ms->nextToUpdate3 = ms->nextToUpdate;
if (dictMode == ZSTD_noDict) { if (dictMode == ZSTD_noDict) {
U32 const maxRep = (U32)(ip - prefixLowest); U32 const maxRep = (U32)(ip - prefixLowest);
if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
@ -933,7 +939,6 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
U32 offset_1 = rep[0], offset_2 = rep[1]; U32 offset_1 = rep[0], offset_2 = rep[1];
/* init */ /* init */
ms->nextToUpdate3 = ms->nextToUpdate;
ip += (ip == prefixStart); ip += (ip == prefixStart);
/* Match Loop */ /* Match Loop */

View file

@ -447,7 +447,7 @@ size_t ZSTD_ldm_generateSequences(
if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) { if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
U32 const ldmHSize = 1U << params->hashLog; U32 const ldmHSize = 1U << params->hashLog;
U32 const correction = ZSTD_window_correctOverflow( U32 const correction = ZSTD_window_correctOverflow(
&ldmState->window, /* cycleLog */ 0, maxDist, src); &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction); ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
} }
/* 2. We enforce the maximum offset allowed. /* 2. We enforce the maximum offset allowed.

View file

@ -255,13 +255,13 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
* to provide a cost which is directly comparable to a match ending at same position */ * to provide a cost which is directly comparable to a match ending at same position */
static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr, int optLevel) static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr, int optLevel)
{ {
if (optPtr->priceType >= zop_predef) return WEIGHT(litLength, optLevel); if (optPtr->priceType >= zop_predef) return (int)WEIGHT(litLength, optLevel);
/* dynamic statistics */ /* dynamic statistics */
{ U32 const llCode = ZSTD_LLcode(litLength); { U32 const llCode = ZSTD_LLcode(litLength);
int const contribution = (LL_bits[llCode] * BITCOST_MULTIPLIER) int const contribution = (int)(LL_bits[llCode] * BITCOST_MULTIPLIER)
+ WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */ + (int)WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */
- WEIGHT(optPtr->litLengthFreq[llCode], optLevel); - (int)WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
#if 1 #if 1
return contribution; return contribution;
#else #else
@ -278,7 +278,7 @@ static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLe
const optState_t* const optPtr, const optState_t* const optPtr,
int optLevel) int optLevel)
{ {
int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel) int const contribution = (int)ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel)
+ ZSTD_litLengthContribution(litLength, optPtr, optLevel); + ZSTD_litLengthContribution(litLength, optPtr, optLevel);
return contribution; return contribution;
} }
@ -372,13 +372,15 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
/* Update hashTable3 up to ip (excluded) /* Update hashTable3 up to ip (excluded)
Assumption : always within prefix (i.e. not within extDict) */ Assumption : always within prefix (i.e. not within extDict) */
static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, const BYTE* const ip) static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
U32* nextToUpdate3,
const BYTE* const ip)
{ {
U32* const hashTable3 = ms->hashTable3; U32* const hashTable3 = ms->hashTable3;
U32 const hashLog3 = ms->hashLog3; U32 const hashLog3 = ms->hashLog3;
const BYTE* const base = ms->window.base; const BYTE* const base = ms->window.base;
U32 idx = ms->nextToUpdate3; U32 idx = *nextToUpdate3;
U32 const target = ms->nextToUpdate3 = (U32)(ip - base); U32 const target = (U32)(ip - base);
size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3); size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
assert(hashLog3 > 0); assert(hashLog3 > 0);
@ -387,6 +389,7 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, const BYTE*
idx++; idx++;
} }
*nextToUpdate3 = target;
return hashTable3[hash3]; return hashTable3[hash3];
} }
@ -503,9 +506,11 @@ static U32 ZSTD_insertBt1(
} } } }
*smallerPtr = *largerPtr = 0; *smallerPtr = *largerPtr = 0;
if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */ { U32 positions = 0;
assert(matchEndIdx > current + 8); if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */
return matchEndIdx - (current + 8); assert(matchEndIdx > current + 8);
return MAX(positions, matchEndIdx - (current + 8));
}
} }
FORCE_INLINE_TEMPLATE FORCE_INLINE_TEMPLATE
@ -520,8 +525,13 @@ void ZSTD_updateTree_internal(
DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)", DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
idx, target, dictMode); idx, target, dictMode);
while(idx < target) while(idx < target) {
idx += ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict); U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
assert(idx < (U32)(idx + forward));
idx += forward;
}
assert((size_t)(ip - base) <= (size_t)(U32)(-1));
assert((size_t)(iend - base) <= (size_t)(U32)(-1));
ms->nextToUpdate = target; ms->nextToUpdate = target;
} }
@ -531,16 +541,18 @@ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
FORCE_INLINE_TEMPLATE FORCE_INLINE_TEMPLATE
U32 ZSTD_insertBtAndGetAllMatches ( U32 ZSTD_insertBtAndGetAllMatches (
ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
ZSTD_matchState_t* ms, ZSTD_matchState_t* ms,
U32* nextToUpdate3,
const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode, const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
U32 rep[ZSTD_REP_NUM], const U32 rep[ZSTD_REP_NUM],
U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */ U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
ZSTD_match_t* matches,
const U32 lengthToBeat, const U32 lengthToBeat,
U32 const mls /* template */) U32 const mls /* template */)
{ {
const ZSTD_compressionParameters* const cParams = &ms->cParams; const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
U32 const maxDistance = 1U << cParams->windowLog;
const BYTE* const base = ms->window.base; const BYTE* const base = ms->window.base;
U32 const current = (U32)(ip-base); U32 const current = (U32)(ip-base);
U32 const hashLog = cParams->hashLog; U32 const hashLog = cParams->hashLog;
@ -556,8 +568,9 @@ U32 ZSTD_insertBtAndGetAllMatches (
U32 const dictLimit = ms->window.dictLimit; U32 const dictLimit = ms->window.dictLimit;
const BYTE* const dictEnd = dictBase + dictLimit; const BYTE* const dictEnd = dictBase + dictLimit;
const BYTE* const prefixStart = base + dictLimit; const BYTE* const prefixStart = base + dictLimit;
U32 const btLow = btMask >= current ? 0 : current - btMask; U32 const btLow = (btMask >= current) ? 0 : current - btMask;
U32 const windowLow = ms->window.lowLimit; U32 const windowValid = ms->window.lowLimit;
U32 const windowLow = ((current - windowValid) > maxDistance) ? current - maxDistance : windowValid;
U32 const matchLow = windowLow ? windowLow : 1; U32 const matchLow = windowLow ? windowLow : 1;
U32* smallerPtr = bt + 2*(current&btMask); U32* smallerPtr = bt + 2*(current&btMask);
U32* largerPtr = bt + 2*(current&btMask) + 1; U32* largerPtr = bt + 2*(current&btMask) + 1;
@ -627,7 +640,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
/* HC3 match finder */ /* HC3 match finder */
if ((mls == 3) /*static*/ && (bestLength < mls)) { if ((mls == 3) /*static*/ && (bestLength < mls)) {
U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, ip); U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
if ((matchIndex3 >= matchLow) if ((matchIndex3 >= matchLow)
& (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) { & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
size_t mlen; size_t mlen;
@ -653,9 +666,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
(ip+mlen == iLimit) ) { /* best possible length */ (ip+mlen == iLimit) ) { /* best possible length */
ms->nextToUpdate = current+1; /* skip insertion */ ms->nextToUpdate = current+1; /* skip insertion */
return 1; return 1;
} } } }
}
}
/* no dictMatchState lookup: dicts don't have a populated HC3 table */ /* no dictMatchState lookup: dicts don't have a populated HC3 table */
} }
@ -760,10 +771,13 @@ U32 ZSTD_insertBtAndGetAllMatches (
FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches ( FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
ZSTD_match_t* matches, /* store result (match found, increasing size) in this table */
ZSTD_matchState_t* ms, ZSTD_matchState_t* ms,
U32* nextToUpdate3,
const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode, const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode,
U32 rep[ZSTD_REP_NUM], U32 const ll0, const U32 rep[ZSTD_REP_NUM],
ZSTD_match_t* matches, U32 const lengthToBeat) U32 const ll0,
U32 const lengthToBeat)
{ {
const ZSTD_compressionParameters* const cParams = &ms->cParams; const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32 const matchLengthSearch = cParams->minMatch; U32 const matchLengthSearch = cParams->minMatch;
@ -772,12 +786,12 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode); ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
switch(matchLengthSearch) switch(matchLengthSearch)
{ {
case 3 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 3); case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3);
default : default :
case 4 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 4); case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4);
case 5 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 5); case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5);
case 7 : case 7 :
case 6 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 6); case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6);
} }
} }
@ -853,6 +867,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4; U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
U32 nextToUpdate3 = ms->nextToUpdate;
ZSTD_optimal_t* const opt = optStatePtr->priceTable; ZSTD_optimal_t* const opt = optStatePtr->priceTable;
ZSTD_match_t* const matches = optStatePtr->matchTable; ZSTD_match_t* const matches = optStatePtr->matchTable;
@ -862,7 +877,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u", DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
(U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate); (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
assert(optLevel <= 2); assert(optLevel <= 2);
ms->nextToUpdate3 = ms->nextToUpdate;
ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel); ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
ip += (ip==prefixStart); ip += (ip==prefixStart);
@ -873,7 +887,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
/* find first match */ /* find first match */
{ U32 const litlen = (U32)(ip - anchor); { U32 const litlen = (U32)(ip - anchor);
U32 const ll0 = !litlen; U32 const ll0 = !litlen;
U32 const nbMatches = ZSTD_BtGetAllMatches(ms, ip, iend, dictMode, rep, ll0, matches, minMatch); U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
if (!nbMatches) { ip++; continue; } if (!nbMatches) { ip++; continue; }
/* initialize opt[0] */ /* initialize opt[0] */
@ -970,7 +984,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0; U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
U32 const previousPrice = opt[cur].price; U32 const previousPrice = opt[cur].price;
U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel); U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
U32 const nbMatches = ZSTD_BtGetAllMatches(ms, inr, iend, dictMode, opt[cur].rep, ll0, matches, minMatch); U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
U32 matchNb; U32 matchNb;
if (!nbMatches) { if (!nbMatches) {
DEBUGLOG(7, "rPos:%u : no match found", cur); DEBUGLOG(7, "rPos:%u : no match found", cur);
@ -1094,7 +1108,7 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
} /* while (ip < ilimit) */ } /* while (ip < ilimit) */
/* Return the last literals size */ /* Return the last literals size */
return iend - anchor; return (size_t)(iend - anchor);
} }
@ -1158,7 +1172,6 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
ms->window.dictLimit += (U32)srcSize; ms->window.dictLimit += (U32)srcSize;
ms->window.lowLimit = ms->window.dictLimit; ms->window.lowLimit = ms->window.dictLimit;
ms->nextToUpdate = ms->window.dictLimit; ms->nextToUpdate = ms->window.dictLimit;
ms->nextToUpdate3 = ms->window.dictLimit;
/* re-inforce weight of collected statistics */ /* re-inforce weight of collected statistics */
ZSTD_upscaleStats(&ms->opt); ZSTD_upscaleStats(&ms->opt);

View file

@ -1129,9 +1129,14 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
size_t const produced = ZSTD_isError(cResult) ? 0 : cResult; size_t const produced = ZSTD_isError(cResult) ? 0 : cResult;
size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed; size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed;
assert(flushed <= produced); assert(flushed <= produced);
assert(jobPtr->consumed <= jobPtr->src.size);
toFlush = produced - flushed; toFlush = produced - flushed;
if (toFlush==0 && (jobPtr->consumed >= jobPtr->src.size)) { /* if toFlush==0, nothing is available to flush.
/* doneJobID is not-fully-flushed, but toFlush==0 : doneJobID should be compressing some more data */ * However, jobID is expected to still be active:
* if jobID was already completed and fully flushed,
* ZSTDMT_flushProduced() should have already moved onto next job.
* Therefore, some input has not yet been consumed. */
if (toFlush==0) {
assert(jobPtr->consumed < jobPtr->src.size); assert(jobPtr->consumed < jobPtr->src.size);
} }
} }
@ -1148,12 +1153,16 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params) static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params)
{ {
if (params.ldmParams.enableLdm) unsigned jobLog;
if (params.ldmParams.enableLdm) {
/* In Long Range Mode, the windowLog is typically oversized. /* In Long Range Mode, the windowLog is typically oversized.
* In which case, it's preferable to determine the jobSize * In which case, it's preferable to determine the jobSize
* based on chainLog instead. */ * based on chainLog instead. */
return MAX(21, params.cParams.chainLog + 4); jobLog = MAX(21, params.cParams.chainLog + 4);
return MAX(20, params.cParams.windowLog + 2); } else {
jobLog = MAX(20, params.cParams.windowLog + 2);
}
return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX);
} }
static int ZSTDMT_overlapLog_default(ZSTD_strategy strat) static int ZSTDMT_overlapLog_default(ZSTD_strategy strat)
@ -1197,7 +1206,7 @@ static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params const params)
ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2) ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
- overlapRLog; - overlapRLog;
} }
assert(0 <= ovLog && ovLog <= 30); assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX);
DEBUGLOG(4, "overlapLog : %i", params.overlapLog); DEBUGLOG(4, "overlapLog : %i", params.overlapLog);
DEBUGLOG(4, "overlap size : %i", 1 << ovLog); DEBUGLOG(4, "overlap size : %i", 1 << ovLog);
return (ovLog==0) ? 0 : (size_t)1 << ovLog; return (ovLog==0) ? 0 : (size_t)1 << ovLog;
@ -1391,7 +1400,7 @@ size_t ZSTDMT_initCStream_internal(
FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) ); FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) );
if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN; if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX; if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = (size_t)ZSTDMT_JOBSIZE_MAX;
mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */ mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
if (mtctx->singleBlockingThread) { if (mtctx->singleBlockingThread) {
@ -1432,6 +1441,8 @@ size_t ZSTDMT_initCStream_internal(
if (mtctx->targetSectionSize == 0) { if (mtctx->targetSectionSize == 0) {
mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params); mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
} }
assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX);
if (params.rsyncable) { if (params.rsyncable) {
/* Aim for the targetsectionSize as the average job size. */ /* Aim for the targetsectionSize as the average job size. */
U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20); U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20);

View file

@ -50,6 +50,7 @@
#ifndef ZSTDMT_JOBSIZE_MIN #ifndef ZSTDMT_JOBSIZE_MIN
# define ZSTDMT_JOBSIZE_MIN (1 MB) # define ZSTDMT_JOBSIZE_MIN (1 MB)
#endif #endif
#define ZSTDMT_JOBLOG_MAX (MEM_32bits() ? 29 : 30)
#define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB)) #define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB))

View file

@ -360,8 +360,11 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize)
sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE); sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE);
RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32, RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32,
frameParameter_unsupported); frameParameter_unsupported);
{
return skippableHeaderSize + sizeU32; size_t const skippableSize = skippableHeaderSize + sizeU32;
RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong);
return skippableSize;
}
} }
/** ZSTD_findDecompressedSize() : /** ZSTD_findDecompressedSize() :
@ -378,11 +381,10 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
size_t const skippableSize = readSkippableFrameSize(src, srcSize); size_t const skippableSize = readSkippableFrameSize(src, srcSize);
if (ZSTD_isError(skippableSize)) if (ZSTD_isError(skippableSize)) {
return skippableSize;
if (srcSize < skippableSize) {
return ZSTD_CONTENTSIZE_ERROR; return ZSTD_CONTENTSIZE_ERROR;
} }
assert(skippableSize <= srcSize);
src = (const BYTE *)src + skippableSize; src = (const BYTE *)src + skippableSize;
srcSize -= skippableSize; srcSize -= skippableSize;
@ -467,6 +469,8 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize
if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE) if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
&& (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize); frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize);
assert(ZSTD_isError(frameSizeInfo.compressedSize) ||
frameSizeInfo.compressedSize <= srcSize);
return frameSizeInfo; return frameSizeInfo;
} else { } else {
const BYTE* ip = (const BYTE*)src; const BYTE* ip = (const BYTE*)src;
@ -529,7 +533,6 @@ size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
return frameSizeInfo.compressedSize; return frameSizeInfo.compressedSize;
} }
/** ZSTD_decompressBound() : /** ZSTD_decompressBound() :
* compatible with legacy mode * compatible with legacy mode
* `src` must point to the start of a ZSTD frame or a skippeable frame * `src` must point to the start of a ZSTD frame or a skippeable frame
@ -546,6 +549,7 @@ unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize)
unsigned long long const decompressedBound = frameSizeInfo.decompressedBound; unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR) if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
return ZSTD_CONTENTSIZE_ERROR; return ZSTD_CONTENTSIZE_ERROR;
assert(srcSize >= compressedSize);
src = (const BYTE*)src + compressedSize; src = (const BYTE*)src + compressedSize;
srcSize -= compressedSize; srcSize -= compressedSize;
bound += decompressedBound; bound += decompressedBound;
@ -738,9 +742,8 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
(unsigned)magicNumber, ZSTD_MAGICNUMBER); (unsigned)magicNumber, ZSTD_MAGICNUMBER);
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
size_t const skippableSize = readSkippableFrameSize(src, srcSize); size_t const skippableSize = readSkippableFrameSize(src, srcSize);
if (ZSTD_isError(skippableSize)) FORWARD_IF_ERROR(skippableSize);
return skippableSize; assert(skippableSize <= srcSize);
RETURN_ERROR_IF(srcSize < skippableSize, srcSize_wrong);
src = (const BYTE *)src + skippableSize; src = (const BYTE *)src + skippableSize;
srcSize -= skippableSize; srcSize -= skippableSize;

View file

@ -505,7 +505,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
*nbSeqPtr = nbSeq; *nbSeqPtr = nbSeq;
/* FSE table descriptors */ /* FSE table descriptors */
RETURN_ERROR_IF(ip+4 > iend, srcSize_wrong); /* minimum possible size */ RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong); /* minimum possible size: 1 byte for symbol encoding types */
{ symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
@ -637,9 +637,10 @@ size_t ZSTD_execSequence(BYTE* op,
if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
/* copy Literals */ /* copy Literals */
ZSTD_copy8(op, *litPtr);
if (sequence.litLength > 8) if (sequence.litLength > 8)
ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ ZSTD_wildcopy_16min(op, (*litPtr), sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
else
ZSTD_copy8(op, *litPtr);
op = oLitEnd; op = oLitEnd;
*litPtr = iLitEnd; /* update for next sequence */ *litPtr = iLitEnd; /* update for next sequence */
@ -686,13 +687,13 @@ size_t ZSTD_execSequence(BYTE* op,
if (oMatchEnd > oend-(16-MINMATCH)) { if (oMatchEnd > oend-(16-MINMATCH)) {
if (op < oend_w) { if (op < oend_w) {
ZSTD_wildcopy(op, match, oend_w - op); ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
match += oend_w - op; match += oend_w - op;
op = oend_w; op = oend_w;
} }
while (op < oMatchEnd) *op++ = *match++; while (op < oMatchEnd) *op++ = *match++;
} else { } else {
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
} }
return sequenceLength; return sequenceLength;
} }
@ -717,9 +718,11 @@ size_t ZSTD_execSequenceLong(BYTE* op,
if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd); if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
/* copy Literals */ /* copy Literals */
ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
if (sequence.litLength > 8) if (sequence.litLength > 8)
ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ ZSTD_wildcopy_16min(op, *litPtr, sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
else
ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
op = oLitEnd; op = oLitEnd;
*litPtr = iLitEnd; /* update for next sequence */ *litPtr = iLitEnd; /* update for next sequence */
@ -766,13 +769,13 @@ size_t ZSTD_execSequenceLong(BYTE* op,
if (oMatchEnd > oend-(16-MINMATCH)) { if (oMatchEnd > oend-(16-MINMATCH)) {
if (op < oend_w) { if (op < oend_w) {
ZSTD_wildcopy(op, match, oend_w - op); ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
match += oend_w - op; match += oend_w - op;
op = oend_w; op = oend_w;
} }
while (op < oMatchEnd) *op++ = *match++; while (op < oMatchEnd) *op++ = *match++;
} else { } else {
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
} }
return sequenceLength; return sequenceLength;
} }
@ -889,6 +892,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
} }
FORCE_INLINE_TEMPLATE size_t FORCE_INLINE_TEMPLATE size_t
DONT_VECTORIZE
ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize, void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq, const void* seqStart, size_t seqSize, int nbSeq,
@ -918,6 +922,11 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
ZSTD_STATIC_ASSERT(
BIT_DStream_unfinished < BIT_DStream_completed &&
BIT_DStream_endOfBuffer < BIT_DStream_completed &&
BIT_DStream_completed < BIT_DStream_overflow);
for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) { for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) {
nbSeq--; nbSeq--;
{ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); { seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
@ -930,6 +939,7 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
/* check if reached exact end */ /* check if reached exact end */
DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq); DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
RETURN_ERROR_IF(nbSeq, corruption_detected); RETURN_ERROR_IF(nbSeq, corruption_detected);
RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected);
/* save reps for next block */ /* save reps for next block */
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); } { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
} }
@ -1131,6 +1141,7 @@ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
static TARGET_ATTRIBUTE("bmi2") size_t static TARGET_ATTRIBUTE("bmi2") size_t
DONT_VECTORIZE
ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize, void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq, const void* seqStart, size_t seqSize, int nbSeq,

View file

@ -71,7 +71,7 @@ extern "C" {
/*------ Version ------*/ /*------ Version ------*/
#define ZSTD_VERSION_MAJOR 1 #define ZSTD_VERSION_MAJOR 1
#define ZSTD_VERSION_MINOR 4 #define ZSTD_VERSION_MINOR 4
#define ZSTD_VERSION_RELEASE 0 #define ZSTD_VERSION_RELEASE 1
#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library version */ ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library version */
@ -82,16 +82,16 @@ ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library v
#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION) #define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
ZSTDLIB_API const char* ZSTD_versionString(void); /* requires v1.3.0+ */ ZSTDLIB_API const char* ZSTD_versionString(void); /* requires v1.3.0+ */
/*************************************** /* *************************************
* Default constant * Default constant
***************************************/ ***************************************/
#ifndef ZSTD_CLEVEL_DEFAULT #ifndef ZSTD_CLEVEL_DEFAULT
# define ZSTD_CLEVEL_DEFAULT 3 # define ZSTD_CLEVEL_DEFAULT 3
#endif #endif
/*************************************** /* *************************************
* Constants * Constants
***************************************/ ***************************************/
/* All magic numbers are supposed read/written to/from files/memory using little-endian convention */ /* All magic numbers are supposed read/written to/from files/memory using little-endian convention */
#define ZSTD_MAGICNUMBER 0xFD2FB528 /* valid since v0.8.0 */ #define ZSTD_MAGICNUMBER 0xFD2FB528 /* valid since v0.8.0 */
@ -183,9 +183,14 @@ ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compres
***************************************/ ***************************************/
/*= Compression context /*= Compression context
* When compressing many times, * When compressing many times,
* it is recommended to allocate a context just once, and re-use it for each successive compression operation. * it is recommended to allocate a context just once,
* and re-use it for each successive compression operation.
* This will make workload friendlier for system's memory. * This will make workload friendlier for system's memory.
* Use one context per thread for parallel execution in multi-threaded environments. */ * Note : re-using context is just a speed / resource optimization.
* It doesn't change the compression ratio, which remains identical.
* Note 2 : In multi-threaded environments,
* use one different context per thread for parallel execution.
*/
typedef struct ZSTD_CCtx_s ZSTD_CCtx; typedef struct ZSTD_CCtx_s ZSTD_CCtx;
ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx);
@ -380,6 +385,7 @@ typedef enum {
* ZSTD_c_forceMaxWindow * ZSTD_c_forceMaxWindow
* ZSTD_c_forceAttachDict * ZSTD_c_forceAttachDict
* ZSTD_c_literalCompressionMode * ZSTD_c_literalCompressionMode
* ZSTD_c_targetCBlockSize
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
* note : never ever use experimentalParam? names directly; * note : never ever use experimentalParam? names directly;
* also, the enums values themselves are unstable and can still change. * also, the enums values themselves are unstable and can still change.
@ -389,6 +395,7 @@ typedef enum {
ZSTD_c_experimentalParam3=1000, ZSTD_c_experimentalParam3=1000,
ZSTD_c_experimentalParam4=1001, ZSTD_c_experimentalParam4=1001,
ZSTD_c_experimentalParam5=1002, ZSTD_c_experimentalParam5=1002,
ZSTD_c_experimentalParam6=1003,
} ZSTD_cParameter; } ZSTD_cParameter;
typedef struct { typedef struct {
@ -657,17 +664,33 @@ ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
ZSTD_inBuffer* input, ZSTD_inBuffer* input,
ZSTD_EndDirective endOp); ZSTD_EndDirective endOp);
ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */
ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */
/******************************************************************************* /* These buffer sizes are softly recommended.
* This is a legacy streaming API, and can be replaced by ZSTD_CCtx_reset() and * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output.
* ZSTD_compressStream2(). It is redundant, but is still fully supported. * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(),
* reducing the amount of memory shuffling and buffering, resulting in minor performance savings.
*
* However, note that these recommendations are from the perspective of a C caller program.
* If the streaming interface is invoked from some other language,
* especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo,
* a major performance rule is to reduce crossing such interface to an absolute minimum.
* It's not rare that performance ends being spent more into the interface, rather than compression itself.
* In which cases, prefer using large buffers, as large as practical,
* for both input and output, to reduce the nb of roundtrips.
*/
ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */
ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */
/* *****************************************************************************
* This following is a legacy streaming API.
* It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2().
* It is redundant, but remains fully supported.
* Advanced parameters and dictionary compression can only be used through the * Advanced parameters and dictionary compression can only be used through the
* new API. * new API.
******************************************************************************/ ******************************************************************************/
/** /*!
* Equivalent to: * Equivalent to:
* *
* ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
@ -675,16 +698,16 @@ ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output
* ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
*/ */
ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel);
/** /*!
* Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue). * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue).
* NOTE: The return value is different. ZSTD_compressStream() returns a hint for * NOTE: The return value is different. ZSTD_compressStream() returns a hint for
* the next read size (if non-zero and not an error). ZSTD_compressStream2() * the next read size (if non-zero and not an error). ZSTD_compressStream2()
* returns the number of bytes left to flush (if non-zero and not an error). * returns the minimum nb of bytes left to flush (if non-zero and not an error).
*/ */
ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
/** Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */ /*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */
ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
/** Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */ /*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */
ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
@ -969,7 +992,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
#endif /* ZSTD_H_235446 */ #endif /* ZSTD_H_235446 */
/**************************************************************************************** /* **************************************************************************************
* ADVANCED AND EXPERIMENTAL FUNCTIONS * ADVANCED AND EXPERIMENTAL FUNCTIONS
**************************************************************************************** ****************************************************************************************
* The definitions in the following section are considered experimental. * The definitions in the following section are considered experimental.
@ -1037,6 +1060,10 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
#define ZSTD_LDM_HASHRATELOG_MIN 0 #define ZSTD_LDM_HASHRATELOG_MIN 0
#define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN) #define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN)
/* Advanced parameter bounds */
#define ZSTD_TARGETCBLOCKSIZE_MIN 64
#define ZSTD_TARGETCBLOCKSIZE_MAX ZSTD_BLOCKSIZE_MAX
/* internal */ /* internal */
#define ZSTD_HASHLOG3_MAX 17 #define ZSTD_HASHLOG3_MAX 17
@ -1162,7 +1189,7 @@ typedef enum {
* however it does mean that all frame data must be present and valid. */ * however it does mean that all frame data must be present and valid. */
ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize); ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
/** ZSTD_decompressBound() : /*! ZSTD_decompressBound() :
* `src` should point to the start of a series of ZSTD encoded and/or skippable frames * `src` should point to the start of a series of ZSTD encoded and/or skippable frames
* `srcSize` must be the _exact_ size of this series * `srcSize` must be the _exact_ size of this series
* (i.e. there should be a frame boundary at `src + srcSize`) * (i.e. there should be a frame boundary at `src + srcSize`)
@ -1409,6 +1436,11 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre
*/ */
#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5 #define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5
/* Tries to fit compressed block size to be around targetCBlockSize.
* No target when targetCBlockSize == 0.
* There is no guarantee on compressed block size (default:0) */
#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6
/*! ZSTD_CCtx_getParameter() : /*! ZSTD_CCtx_getParameter() :
* Get the requested compression parameter value, selected by enum ZSTD_cParameter, * Get the requested compression parameter value, selected by enum ZSTD_cParameter,
* and store it into int* value. * and store it into int* value.
@ -1843,7 +1875,7 @@ typedef struct {
unsigned checksumFlag; unsigned checksumFlag;
} ZSTD_frameHeader; } ZSTD_frameHeader;
/** ZSTD_getFrameHeader() : /*! ZSTD_getFrameHeader() :
* decode Frame Header, or requires larger `srcSize`. * decode Frame Header, or requires larger `srcSize`.
* @return : 0, `zfhPtr` is correctly filled, * @return : 0, `zfhPtr` is correctly filled,
* >0, `srcSize` is too small, value is wanted `srcSize` amount, * >0, `srcSize` is too small, value is wanted `srcSize` amount,