/* Copyright 2013 Google Inc. All Rights Reserved. | |
Distributed under MIT license. | |
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT | |
*/ | |
/* Block split point selection utilities. */ | |
#include "./block_splitter.h" | |
#include <assert.h> | |
#include <string.h> /* memcpy, memset */ | |
#include "./bit_cost.h" | |
#include "./cluster.h" | |
#include "./command.h" | |
#include "./fast_log.h" | |
#include "./histogram.h" | |
#include "./memory.h" | |
#include "./port.h" | |
#include "./quality.h" | |
#if defined(__cplusplus) || defined(c_plusplus) | |
extern "C" { | |
#endif | |
static const size_t kMaxLiteralHistograms = 100; | |
static const size_t kMaxCommandHistograms = 50; | |
static const double kLiteralBlockSwitchCost = 28.1; | |
static const double kCommandBlockSwitchCost = 13.5; | |
static const double kDistanceBlockSwitchCost = 14.6; | |
static const size_t kLiteralStrideLength = 70; | |
static const size_t kCommandStrideLength = 40; | |
static const size_t kSymbolsPerLiteralHistogram = 544; | |
static const size_t kSymbolsPerCommandHistogram = 530; | |
static const size_t kSymbolsPerDistanceHistogram = 544; | |
static const size_t kMinLengthForBlockSplitting = 128; | |
static const size_t kIterMulForRefining = 2; | |
static const size_t kMinItersForRefining = 100; | |
static size_t CountLiterals(const Command* cmds, const size_t num_commands) { | |
/* Count how many we have. */ | |
size_t total_length = 0; | |
size_t i; | |
for (i = 0; i < num_commands; ++i) { | |
total_length += cmds[i].insert_len_; | |
} | |
return total_length; | |
} | |
static void CopyLiteralsToByteArray(const Command* cmds, | |
const size_t num_commands, | |
const uint8_t* data, | |
const size_t offset, | |
const size_t mask, | |
uint8_t* literals) { | |
size_t pos = 0; | |
size_t from_pos = offset & mask; | |
size_t i; | |
for (i = 0; i < num_commands; ++i) { | |
size_t insert_len = cmds[i].insert_len_; | |
if (from_pos + insert_len > mask) { | |
size_t head_size = mask + 1 - from_pos; | |
memcpy(literals + pos, data + from_pos, head_size); | |
from_pos = 0; | |
pos += head_size; | |
insert_len -= head_size; | |
} | |
if (insert_len > 0) { | |
memcpy(literals + pos, data + from_pos, insert_len); | |
pos += insert_len; | |
} | |
from_pos = (from_pos + insert_len + CommandCopyLen(&cmds[i])) & mask; | |
} | |
} | |
static BROTLI_INLINE unsigned int MyRand(unsigned int* seed) { | |
*seed *= 16807U; | |
if (*seed == 0) { | |
*seed = 1; | |
} | |
return *seed; | |
} | |
static BROTLI_INLINE double BitCost(size_t count) { | |
return count == 0 ? -2.0 : FastLog2(count); | |
} | |
#define HISTOGRAMS_PER_BATCH 64 | |
#define CLUSTERS_PER_BATCH 16 | |
#define FN(X) X ## Literal | |
#define DataType uint8_t | |
/* NOLINTNEXTLINE(build/include) */ | |
#include "./block_splitter_inc.h" | |
#undef DataType | |
#undef FN | |
#define FN(X) X ## Command | |
#define DataType uint16_t | |
/* NOLINTNEXTLINE(build/include) */ | |
#include "./block_splitter_inc.h" | |
#undef FN | |
#define FN(X) X ## Distance | |
/* NOLINTNEXTLINE(build/include) */ | |
#include "./block_splitter_inc.h" | |
#undef DataType | |
#undef FN | |
void BrotliInitBlockSplit(BlockSplit* self) { | |
self->num_types = 0; | |
self->num_blocks = 0; | |
self->types = 0; | |
self->lengths = 0; | |
self->types_alloc_size = 0; | |
self->lengths_alloc_size = 0; | |
} | |
void BrotliDestroyBlockSplit(MemoryManager* m, BlockSplit* self) { | |
BROTLI_FREE(m, self->types); | |
BROTLI_FREE(m, self->lengths); | |
} | |
void BrotliSplitBlock(MemoryManager* m, | |
const Command* cmds, | |
const size_t num_commands, | |
const uint8_t* data, | |
const size_t pos, | |
const size_t mask, | |
const BrotliEncoderParams* params, | |
BlockSplit* literal_split, | |
BlockSplit* insert_and_copy_split, | |
BlockSplit* dist_split) { | |
{ | |
size_t literals_count = CountLiterals(cmds, num_commands); | |
uint8_t* literals = BROTLI_ALLOC(m, uint8_t, literals_count); | |
if (BROTLI_IS_OOM(m)) return; | |
/* Create a continuous array of literals. */ | |
CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, literals); | |
/* Create the block split on the array of literals. | |
Literal histograms have alphabet size 256. */ | |
SplitByteVectorLiteral( | |
m, literals, literals_count, | |
kSymbolsPerLiteralHistogram, kMaxLiteralHistograms, | |
kLiteralStrideLength, kLiteralBlockSwitchCost, params, | |
literal_split); | |
if (BROTLI_IS_OOM(m)) return; | |
BROTLI_FREE(m, literals); | |
} | |
{ | |
/* Compute prefix codes for commands. */ | |
uint16_t* insert_and_copy_codes = BROTLI_ALLOC(m, uint16_t, num_commands); | |
size_t i; | |
if (BROTLI_IS_OOM(m)) return; | |
for (i = 0; i < num_commands; ++i) { | |
insert_and_copy_codes[i] = cmds[i].cmd_prefix_; | |
} | |
/* Create the block split on the array of command prefixes. */ | |
SplitByteVectorCommand( | |
m, insert_and_copy_codes, num_commands, | |
kSymbolsPerCommandHistogram, kMaxCommandHistograms, | |
kCommandStrideLength, kCommandBlockSwitchCost, params, | |
insert_and_copy_split); | |
if (BROTLI_IS_OOM(m)) return; | |
/* TODO: reuse for distances? */ | |
BROTLI_FREE(m, insert_and_copy_codes); | |
} | |
{ | |
/* Create a continuous array of distance prefixes. */ | |
uint16_t* distance_prefixes = BROTLI_ALLOC(m, uint16_t, num_commands); | |
size_t j = 0; | |
size_t i; | |
if (BROTLI_IS_OOM(m)) return; | |
for (i = 0; i < num_commands; ++i) { | |
const Command* cmd = &cmds[i]; | |
if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) { | |
distance_prefixes[j++] = cmd->dist_prefix_; | |
} | |
} | |
/* Create the block split on the array of distance prefixes. */ | |
SplitByteVectorDistance( | |
m, distance_prefixes, j, | |
kSymbolsPerDistanceHistogram, kMaxCommandHistograms, | |
kCommandStrideLength, kDistanceBlockSwitchCost, params, | |
dist_split); | |
if (BROTLI_IS_OOM(m)) return; | |
BROTLI_FREE(m, distance_prefixes); | |
} | |
} | |
#if defined(__cplusplus) || defined(c_plusplus) | |
} /* extern "C" */ | |
#endif |