4a711bea63
Change-Id: I6968b0f0401f3f42dc55a0f4938a7e12a3a55ae7
1096 lines
30 KiB
C
1096 lines
30 KiB
C
/* $NetBSD: blocksort.c,v 1.1.1.2 2012/05/07 00:41:45 wiz Exp $ */
|
|
|
|
|
|
/*-------------------------------------------------------------*/
|
|
/*--- Block sorting machinery ---*/
|
|
/*--- blocksort.c ---*/
|
|
/*-------------------------------------------------------------*/
|
|
|
|
/* ------------------------------------------------------------------
|
|
This file is part of bzip2/libbzip2, a program and library for
|
|
lossless, block-sorting data compression.
|
|
|
|
bzip2/libbzip2 version 1.0.6 of 6 September 2010
|
|
Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
|
|
|
|
Please read the WARNING, DISCLAIMER and PATENTS sections in the
|
|
README file.
|
|
|
|
This program is released under the terms of the license contained
|
|
in the file LICENSE.
|
|
------------------------------------------------------------------ */
|
|
|
|
|
|
#include "bzlib_private.h"
|
|
|
|
/*---------------------------------------------*/
|
|
/*--- Fallback O(N log(N)^2) sorting ---*/
|
|
/*--- algorithm, for repetitive blocks ---*/
|
|
/*---------------------------------------------*/
|
|
|
|
/*---------------------------------------------*/
|
|
static
|
|
__inline__
|
|
void fallbackSimpleSort ( UInt32* fmap,
|
|
UInt32* eclass,
|
|
Int32 lo,
|
|
Int32 hi )
|
|
{
|
|
Int32 i, j, tmp;
|
|
UInt32 ec_tmp;
|
|
|
|
if (lo == hi) return;
|
|
|
|
if (hi - lo > 3) {
|
|
for ( i = hi-4; i >= lo; i-- ) {
|
|
tmp = fmap[i];
|
|
ec_tmp = eclass[tmp];
|
|
for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
|
|
fmap[j-4] = fmap[j];
|
|
fmap[j-4] = tmp;
|
|
}
|
|
}
|
|
|
|
for ( i = hi-1; i >= lo; i-- ) {
|
|
tmp = fmap[i];
|
|
ec_tmp = eclass[tmp];
|
|
for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
|
|
fmap[j-1] = fmap[j];
|
|
fmap[j-1] = tmp;
|
|
}
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
#define fswap(zz1, zz2) \
|
|
{ Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
|
|
|
|
#define fvswap(zzp1, zzp2, zzn) \
|
|
{ \
|
|
Int32 yyp1 = (zzp1); \
|
|
Int32 yyp2 = (zzp2); \
|
|
Int32 yyn = (zzn); \
|
|
while (yyn > 0) { \
|
|
fswap(fmap[yyp1], fmap[yyp2]); \
|
|
yyp1++; yyp2++; yyn--; \
|
|
} \
|
|
}
|
|
|
|
|
|
#define fmin(a,b) ((a) < (b)) ? (a) : (b)
|
|
|
|
#define fpush(lz,hz) { stackLo[sp] = lz; \
|
|
stackHi[sp] = hz; \
|
|
sp++; }
|
|
|
|
#define fpop(lz,hz) { sp--; \
|
|
lz = stackLo[sp]; \
|
|
hz = stackHi[sp]; }
|
|
|
|
#define FALLBACK_QSORT_SMALL_THRESH 10
|
|
#define FALLBACK_QSORT_STACK_SIZE 100
|
|
|
|
|
|
static
|
|
void fallbackQSort3 ( UInt32* fmap,
|
|
UInt32* eclass,
|
|
Int32 loSt,
|
|
Int32 hiSt )
|
|
{
|
|
Int32 unLo, unHi, ltLo, gtHi, n, m;
|
|
Int32 sp, lo, hi;
|
|
UInt32 med, r, r3;
|
|
Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
|
|
Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
|
|
|
|
r = 0;
|
|
|
|
sp = 0;
|
|
fpush ( loSt, hiSt );
|
|
|
|
while (sp > 0) {
|
|
|
|
AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
|
|
|
|
fpop ( lo, hi );
|
|
if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
|
|
fallbackSimpleSort ( fmap, eclass, lo, hi );
|
|
continue;
|
|
}
|
|
|
|
/* Random partitioning. Median of 3 sometimes fails to
|
|
avoid bad cases. Median of 9 seems to help but
|
|
looks rather expensive. This too seems to work but
|
|
is cheaper. Guidance for the magic constants
|
|
7621 and 32768 is taken from Sedgewick's algorithms
|
|
book, chapter 35.
|
|
*/
|
|
r = ((r * 7621) + 1) % 32768;
|
|
r3 = r % 3;
|
|
if (r3 == 0) med = eclass[fmap[lo]]; else
|
|
if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
|
|
med = eclass[fmap[hi]];
|
|
|
|
unLo = ltLo = lo;
|
|
unHi = gtHi = hi;
|
|
|
|
while (1) {
|
|
while (1) {
|
|
if (unLo > unHi) break;
|
|
n = (Int32)eclass[fmap[unLo]] - (Int32)med;
|
|
if (n == 0) {
|
|
fswap(fmap[unLo], fmap[ltLo]);
|
|
ltLo++; unLo++;
|
|
continue;
|
|
};
|
|
if (n > 0) break;
|
|
unLo++;
|
|
}
|
|
while (1) {
|
|
if (unLo > unHi) break;
|
|
n = (Int32)eclass[fmap[unHi]] - (Int32)med;
|
|
if (n == 0) {
|
|
fswap(fmap[unHi], fmap[gtHi]);
|
|
gtHi--; unHi--;
|
|
continue;
|
|
};
|
|
if (n < 0) break;
|
|
unHi--;
|
|
}
|
|
if (unLo > unHi) break;
|
|
fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
|
|
}
|
|
|
|
AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
|
|
|
|
if (gtHi < ltLo) continue;
|
|
|
|
n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
|
|
m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
|
|
|
|
n = lo + unLo - ltLo - 1;
|
|
m = hi - (gtHi - unHi) + 1;
|
|
|
|
if (n - lo > hi - m) {
|
|
fpush ( lo, n );
|
|
fpush ( m, hi );
|
|
} else {
|
|
fpush ( m, hi );
|
|
fpush ( lo, n );
|
|
}
|
|
}
|
|
}
|
|
|
|
#undef fmin
|
|
#undef fpush
|
|
#undef fpop
|
|
#undef fswap
|
|
#undef fvswap
|
|
#undef FALLBACK_QSORT_SMALL_THRESH
|
|
#undef FALLBACK_QSORT_STACK_SIZE
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
/* Pre:
|
|
nblock > 0
|
|
eclass exists for [0 .. nblock-1]
|
|
((UChar*)eclass) [0 .. nblock-1] holds block
|
|
ptr exists for [0 .. nblock-1]
|
|
|
|
Post:
|
|
((UChar*)eclass) [0 .. nblock-1] holds block
|
|
All other areas of eclass destroyed
|
|
fmap [0 .. nblock-1] holds sorted order
|
|
bhtab [ 0 .. 2+(nblock/32) ] destroyed
|
|
*/
|
|
|
|
#define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
|
|
#define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
|
|
#define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
|
|
#define WORD_BH(zz) bhtab[(zz) >> 5]
|
|
#define UNALIGNED_BH(zz) ((zz) & 0x01f)
|
|
|
|
static
|
|
void fallbackSort ( UInt32* fmap,
|
|
UInt32* eclass,
|
|
UInt32* bhtab,
|
|
Int32 nblock,
|
|
Int32 verb )
|
|
{
|
|
Int32 ftab[257];
|
|
Int32 ftabCopy[256];
|
|
Int32 H, i, j, k, l, r, cc, cc1;
|
|
Int32 nNotDone;
|
|
Int32 nBhtab;
|
|
UChar* eclass8 = (UChar*)eclass;
|
|
|
|
/*--
|
|
Initial 1-char radix sort to generate
|
|
initial fmap and initial BH bits.
|
|
--*/
|
|
if (verb >= 4)
|
|
VPrintf0 ( " bucket sorting ...\n" );
|
|
for (i = 0; i < 257; i++) ftab[i] = 0;
|
|
for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
|
|
for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
|
|
for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
|
|
|
|
for (i = 0; i < nblock; i++) {
|
|
j = eclass8[i];
|
|
k = ftab[j] - 1;
|
|
ftab[j] = k;
|
|
fmap[k] = i;
|
|
}
|
|
|
|
nBhtab = 2 + (nblock / 32);
|
|
for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
|
|
for (i = 0; i < 256; i++) SET_BH(ftab[i]);
|
|
|
|
/*--
|
|
Inductively refine the buckets. Kind-of an
|
|
"exponential radix sort" (!), inspired by the
|
|
Manber-Myers suffix array construction algorithm.
|
|
--*/
|
|
|
|
/*-- set sentinel bits for block-end detection --*/
|
|
for (i = 0; i < 32; i++) {
|
|
SET_BH(nblock + 2*i);
|
|
CLEAR_BH(nblock + 2*i + 1);
|
|
}
|
|
|
|
/*-- the log(N) loop --*/
|
|
H = 1;
|
|
while (1) {
|
|
|
|
if (verb >= 4)
|
|
VPrintf1 ( " depth %6d has ", H );
|
|
|
|
j = 0;
|
|
for (i = 0; i < nblock; i++) {
|
|
if (ISSET_BH(i)) j = i;
|
|
k = fmap[i] - H; if (k < 0) k += nblock;
|
|
eclass[k] = j;
|
|
}
|
|
|
|
nNotDone = 0;
|
|
r = -1;
|
|
while (1) {
|
|
|
|
/*-- find the next non-singleton bucket --*/
|
|
k = r + 1;
|
|
while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
|
|
if (ISSET_BH(k)) {
|
|
while (WORD_BH(k) == 0xffffffff) k += 32;
|
|
while (ISSET_BH(k)) k++;
|
|
}
|
|
l = k - 1;
|
|
if (l >= nblock) break;
|
|
while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
|
|
if (!ISSET_BH(k)) {
|
|
while (WORD_BH(k) == 0x00000000) k += 32;
|
|
while (!ISSET_BH(k)) k++;
|
|
}
|
|
r = k - 1;
|
|
if (r >= nblock) break;
|
|
|
|
/*-- now [l, r] bracket current bucket --*/
|
|
if (r > l) {
|
|
nNotDone += (r - l + 1);
|
|
fallbackQSort3 ( fmap, eclass, l, r );
|
|
|
|
/*-- scan bucket and generate header bits-- */
|
|
cc = -1;
|
|
for (i = l; i <= r; i++) {
|
|
cc1 = eclass[fmap[i]];
|
|
if (cc != cc1) { SET_BH(i); cc = cc1; };
|
|
}
|
|
}
|
|
}
|
|
|
|
if (verb >= 4)
|
|
VPrintf1 ( "%6d unresolved strings\n", nNotDone );
|
|
|
|
H *= 2;
|
|
if (H > nblock || nNotDone == 0) break;
|
|
}
|
|
|
|
/*--
|
|
Reconstruct the original block in
|
|
eclass8 [0 .. nblock-1], since the
|
|
previous phase destroyed it.
|
|
--*/
|
|
if (verb >= 4)
|
|
VPrintf0 ( " reconstructing block ...\n" );
|
|
j = 0;
|
|
for (i = 0; i < nblock; i++) {
|
|
while (ftabCopy[j] == 0) j++;
|
|
ftabCopy[j]--;
|
|
eclass8[fmap[i]] = (UChar)j;
|
|
}
|
|
AssertH ( j < 256, 1005 );
|
|
}
|
|
|
|
#undef SET_BH
|
|
#undef CLEAR_BH
|
|
#undef ISSET_BH
|
|
#undef WORD_BH
|
|
#undef UNALIGNED_BH
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
/*--- The main, O(N^2 log(N)) sorting ---*/
|
|
/*--- algorithm. Faster for "normal" ---*/
|
|
/*--- non-repetitive blocks. ---*/
|
|
/*---------------------------------------------*/
|
|
|
|
/*---------------------------------------------*/
|
|
static
|
|
__inline__
|
|
Bool mainGtU ( UInt32 i1,
|
|
UInt32 i2,
|
|
UChar* block,
|
|
UInt16* quadrant,
|
|
UInt32 nblock,
|
|
Int32* budget )
|
|
{
|
|
Int32 k;
|
|
UChar c1, c2;
|
|
UInt16 s1, s2;
|
|
|
|
AssertD ( i1 != i2, "mainGtU" );
|
|
/* 1 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
i1++; i2++;
|
|
/* 2 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
i1++; i2++;
|
|
/* 3 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
i1++; i2++;
|
|
/* 4 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
i1++; i2++;
|
|
/* 5 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
i1++; i2++;
|
|
/* 6 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
i1++; i2++;
|
|
/* 7 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
i1++; i2++;
|
|
/* 8 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
i1++; i2++;
|
|
/* 9 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
i1++; i2++;
|
|
/* 10 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
i1++; i2++;
|
|
/* 11 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
i1++; i2++;
|
|
/* 12 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
i1++; i2++;
|
|
|
|
k = nblock + 8;
|
|
|
|
do {
|
|
/* 1 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
s1 = quadrant[i1]; s2 = quadrant[i2];
|
|
if (s1 != s2) return (s1 > s2);
|
|
i1++; i2++;
|
|
/* 2 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
s1 = quadrant[i1]; s2 = quadrant[i2];
|
|
if (s1 != s2) return (s1 > s2);
|
|
i1++; i2++;
|
|
/* 3 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
s1 = quadrant[i1]; s2 = quadrant[i2];
|
|
if (s1 != s2) return (s1 > s2);
|
|
i1++; i2++;
|
|
/* 4 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
s1 = quadrant[i1]; s2 = quadrant[i2];
|
|
if (s1 != s2) return (s1 > s2);
|
|
i1++; i2++;
|
|
/* 5 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
s1 = quadrant[i1]; s2 = quadrant[i2];
|
|
if (s1 != s2) return (s1 > s2);
|
|
i1++; i2++;
|
|
/* 6 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
s1 = quadrant[i1]; s2 = quadrant[i2];
|
|
if (s1 != s2) return (s1 > s2);
|
|
i1++; i2++;
|
|
/* 7 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
s1 = quadrant[i1]; s2 = quadrant[i2];
|
|
if (s1 != s2) return (s1 > s2);
|
|
i1++; i2++;
|
|
/* 8 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
s1 = quadrant[i1]; s2 = quadrant[i2];
|
|
if (s1 != s2) return (s1 > s2);
|
|
i1++; i2++;
|
|
|
|
if (i1 >= nblock) i1 -= nblock;
|
|
if (i2 >= nblock) i2 -= nblock;
|
|
|
|
k -= 8;
|
|
(*budget)--;
|
|
}
|
|
while (k >= 0);
|
|
|
|
return False;
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
/*--
|
|
Knuth's increments seem to work better
|
|
than Incerpi-Sedgewick here. Possibly
|
|
because the number of elems to sort is
|
|
usually small, typically <= 20.
|
|
--*/
|
|
static
|
|
Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
|
|
9841, 29524, 88573, 265720,
|
|
797161, 2391484 };
|
|
|
|
static
|
|
void mainSimpleSort ( UInt32* ptr,
|
|
UChar* block,
|
|
UInt16* quadrant,
|
|
Int32 nblock,
|
|
Int32 lo,
|
|
Int32 hi,
|
|
Int32 d,
|
|
Int32* budget )
|
|
{
|
|
Int32 i, j, h, bigN, hp;
|
|
UInt32 v;
|
|
|
|
bigN = hi - lo + 1;
|
|
if (bigN < 2) return;
|
|
|
|
hp = 0;
|
|
while (incs[hp] < bigN) hp++;
|
|
hp--;
|
|
|
|
for (; hp >= 0; hp--) {
|
|
h = incs[hp];
|
|
|
|
i = lo + h;
|
|
while (True) {
|
|
|
|
/*-- copy 1 --*/
|
|
if (i > hi) break;
|
|
v = ptr[i];
|
|
j = i;
|
|
while ( mainGtU (
|
|
ptr[j-h]+d, v+d, block, quadrant, nblock, budget
|
|
) ) {
|
|
ptr[j] = ptr[j-h];
|
|
j = j - h;
|
|
if (j <= (lo + h - 1)) break;
|
|
}
|
|
ptr[j] = v;
|
|
i++;
|
|
|
|
/*-- copy 2 --*/
|
|
if (i > hi) break;
|
|
v = ptr[i];
|
|
j = i;
|
|
while ( mainGtU (
|
|
ptr[j-h]+d, v+d, block, quadrant, nblock, budget
|
|
) ) {
|
|
ptr[j] = ptr[j-h];
|
|
j = j - h;
|
|
if (j <= (lo + h - 1)) break;
|
|
}
|
|
ptr[j] = v;
|
|
i++;
|
|
|
|
/*-- copy 3 --*/
|
|
if (i > hi) break;
|
|
v = ptr[i];
|
|
j = i;
|
|
while ( mainGtU (
|
|
ptr[j-h]+d, v+d, block, quadrant, nblock, budget
|
|
) ) {
|
|
ptr[j] = ptr[j-h];
|
|
j = j - h;
|
|
if (j <= (lo + h - 1)) break;
|
|
}
|
|
ptr[j] = v;
|
|
i++;
|
|
|
|
if (*budget < 0) return;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
/*--
|
|
The following is an implementation of
|
|
an elegant 3-way quicksort for strings,
|
|
described in a paper "Fast Algorithms for
|
|
Sorting and Searching Strings", by Robert
|
|
Sedgewick and Jon L. Bentley.
|
|
--*/
|
|
|
|
#define mswap(zz1, zz2) \
|
|
{ Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
|
|
|
|
#define mvswap(zzp1, zzp2, zzn) \
|
|
{ \
|
|
Int32 yyp1 = (zzp1); \
|
|
Int32 yyp2 = (zzp2); \
|
|
Int32 yyn = (zzn); \
|
|
while (yyn > 0) { \
|
|
mswap(ptr[yyp1], ptr[yyp2]); \
|
|
yyp1++; yyp2++; yyn--; \
|
|
} \
|
|
}
|
|
|
|
static
|
|
__inline__
|
|
UChar mmed3 ( UChar a, UChar b, UChar c )
|
|
{
|
|
UChar t;
|
|
if (a > b) { t = a; a = b; b = t; };
|
|
if (b > c) {
|
|
b = c;
|
|
if (a > b) b = a;
|
|
}
|
|
return b;
|
|
}
|
|
|
|
#define mmin(a,b) ((a) < (b)) ? (a) : (b)
|
|
|
|
#define mpush(lz,hz,dz) { stackLo[sp] = lz; \
|
|
stackHi[sp] = hz; \
|
|
stackD [sp] = dz; \
|
|
sp++; }
|
|
|
|
#define mpop(lz,hz,dz) { sp--; \
|
|
lz = stackLo[sp]; \
|
|
hz = stackHi[sp]; \
|
|
dz = stackD [sp]; }
|
|
|
|
|
|
#define mnextsize(az) (nextHi[az]-nextLo[az])
|
|
|
|
#define mnextswap(az,bz) \
|
|
{ Int32 tz; \
|
|
tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
|
|
tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
|
|
tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
|
|
|
|
|
|
#define MAIN_QSORT_SMALL_THRESH 20
|
|
#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
|
|
#define MAIN_QSORT_STACK_SIZE 100
|
|
|
|
static
|
|
void mainQSort3 ( UInt32* ptr,
|
|
UChar* block,
|
|
UInt16* quadrant,
|
|
Int32 nblock,
|
|
Int32 loSt,
|
|
Int32 hiSt,
|
|
Int32 dSt,
|
|
Int32* budget )
|
|
{
|
|
Int32 unLo, unHi, ltLo, gtHi, n, m, med;
|
|
Int32 sp, lo, hi, d;
|
|
|
|
Int32 stackLo[MAIN_QSORT_STACK_SIZE];
|
|
Int32 stackHi[MAIN_QSORT_STACK_SIZE];
|
|
Int32 stackD [MAIN_QSORT_STACK_SIZE];
|
|
|
|
Int32 nextLo[3];
|
|
Int32 nextHi[3];
|
|
Int32 nextD [3];
|
|
|
|
sp = 0;
|
|
mpush ( loSt, hiSt, dSt );
|
|
|
|
while (sp > 0) {
|
|
|
|
AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
|
|
|
|
mpop ( lo, hi, d );
|
|
if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
|
|
d > MAIN_QSORT_DEPTH_THRESH) {
|
|
mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
|
|
if (*budget < 0) return;
|
|
continue;
|
|
}
|
|
|
|
med = (Int32)
|
|
mmed3 ( block[ptr[ lo ]+d],
|
|
block[ptr[ hi ]+d],
|
|
block[ptr[ (lo+hi)>>1 ]+d] );
|
|
|
|
unLo = ltLo = lo;
|
|
unHi = gtHi = hi;
|
|
|
|
while (True) {
|
|
while (True) {
|
|
if (unLo > unHi) break;
|
|
n = ((Int32)block[ptr[unLo]+d]) - med;
|
|
if (n == 0) {
|
|
mswap(ptr[unLo], ptr[ltLo]);
|
|
ltLo++; unLo++; continue;
|
|
};
|
|
if (n > 0) break;
|
|
unLo++;
|
|
}
|
|
while (True) {
|
|
if (unLo > unHi) break;
|
|
n = ((Int32)block[ptr[unHi]+d]) - med;
|
|
if (n == 0) {
|
|
mswap(ptr[unHi], ptr[gtHi]);
|
|
gtHi--; unHi--; continue;
|
|
};
|
|
if (n < 0) break;
|
|
unHi--;
|
|
}
|
|
if (unLo > unHi) break;
|
|
mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
|
|
}
|
|
|
|
AssertD ( unHi == unLo-1, "mainQSort3(2)" );
|
|
|
|
if (gtHi < ltLo) {
|
|
mpush(lo, hi, d+1 );
|
|
continue;
|
|
}
|
|
|
|
n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
|
|
m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
|
|
|
|
n = lo + unLo - ltLo - 1;
|
|
m = hi - (gtHi - unHi) + 1;
|
|
|
|
nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
|
|
nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
|
|
nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
|
|
|
|
if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
|
|
if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
|
|
if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
|
|
|
|
AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
|
|
AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
|
|
|
|
mpush (nextLo[0], nextHi[0], nextD[0]);
|
|
mpush (nextLo[1], nextHi[1], nextD[1]);
|
|
mpush (nextLo[2], nextHi[2], nextD[2]);
|
|
}
|
|
}
|
|
|
|
#undef mswap
|
|
#undef mvswap
|
|
#undef mpush
|
|
#undef mpop
|
|
#undef mmin
|
|
#undef mnextsize
|
|
#undef mnextswap
|
|
#undef MAIN_QSORT_SMALL_THRESH
|
|
#undef MAIN_QSORT_DEPTH_THRESH
|
|
#undef MAIN_QSORT_STACK_SIZE
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
/* Pre:
|
|
nblock > N_OVERSHOOT
|
|
block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
|
|
((UChar*)block32) [0 .. nblock-1] holds block
|
|
ptr exists for [0 .. nblock-1]
|
|
|
|
Post:
|
|
((UChar*)block32) [0 .. nblock-1] holds block
|
|
All other areas of block32 destroyed
|
|
ftab [0 .. 65536 ] destroyed
|
|
ptr [0 .. nblock-1] holds sorted order
|
|
if (*budget < 0), sorting was abandoned
|
|
*/
|
|
|
|
#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
|
|
#define SETMASK (1 << 21)
|
|
#define CLEARMASK (~(SETMASK))
|
|
|
|
static
|
|
void mainSort ( UInt32* ptr,
|
|
UChar* block,
|
|
UInt16* quadrant,
|
|
UInt32* ftab,
|
|
Int32 nblock,
|
|
Int32 verb,
|
|
Int32* budget )
|
|
{
|
|
Int32 i, j, k, ss, sb;
|
|
Int32 runningOrder[256];
|
|
Bool bigDone[256];
|
|
Int32 copyStart[256];
|
|
Int32 copyEnd [256];
|
|
UChar c1;
|
|
Int32 numQSorted;
|
|
UInt16 s;
|
|
if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" );
|
|
|
|
/*-- set up the 2-byte frequency table --*/
|
|
for (i = 65536; i >= 0; i--) ftab[i] = 0;
|
|
|
|
j = block[0] << 8;
|
|
i = nblock-1;
|
|
for (; i >= 3; i -= 4) {
|
|
quadrant[i] = 0;
|
|
j = (j >> 8) | ( ((UInt16)block[i]) << 8);
|
|
ftab[j]++;
|
|
quadrant[i-1] = 0;
|
|
j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
|
|
ftab[j]++;
|
|
quadrant[i-2] = 0;
|
|
j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
|
|
ftab[j]++;
|
|
quadrant[i-3] = 0;
|
|
j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
|
|
ftab[j]++;
|
|
}
|
|
for (; i >= 0; i--) {
|
|
quadrant[i] = 0;
|
|
j = (j >> 8) | ( ((UInt16)block[i]) << 8);
|
|
ftab[j]++;
|
|
}
|
|
|
|
/*-- (emphasises close relationship of block & quadrant) --*/
|
|
for (i = 0; i < BZ_N_OVERSHOOT; i++) {
|
|
block [nblock+i] = block[i];
|
|
quadrant[nblock+i] = 0;
|
|
}
|
|
|
|
if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" );
|
|
|
|
/*-- Complete the initial radix sort --*/
|
|
for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
|
|
|
|
s = block[0] << 8;
|
|
i = nblock-1;
|
|
for (; i >= 3; i -= 4) {
|
|
s = (s >> 8) | (block[i] << 8);
|
|
j = ftab[s] -1;
|
|
ftab[s] = j;
|
|
ptr[j] = i;
|
|
s = (s >> 8) | (block[i-1] << 8);
|
|
j = ftab[s] -1;
|
|
ftab[s] = j;
|
|
ptr[j] = i-1;
|
|
s = (s >> 8) | (block[i-2] << 8);
|
|
j = ftab[s] -1;
|
|
ftab[s] = j;
|
|
ptr[j] = i-2;
|
|
s = (s >> 8) | (block[i-3] << 8);
|
|
j = ftab[s] -1;
|
|
ftab[s] = j;
|
|
ptr[j] = i-3;
|
|
}
|
|
for (; i >= 0; i--) {
|
|
s = (s >> 8) | (block[i] << 8);
|
|
j = ftab[s] -1;
|
|
ftab[s] = j;
|
|
ptr[j] = i;
|
|
}
|
|
|
|
/*--
|
|
Now ftab contains the first loc of every small bucket.
|
|
Calculate the running order, from smallest to largest
|
|
big bucket.
|
|
--*/
|
|
for (i = 0; i <= 255; i++) {
|
|
bigDone [i] = False;
|
|
runningOrder[i] = i;
|
|
}
|
|
|
|
{
|
|
Int32 vv;
|
|
Int32 h = 1;
|
|
do h = 3 * h + 1; while (h <= 256);
|
|
do {
|
|
h = h / 3;
|
|
for (i = h; i <= 255; i++) {
|
|
vv = runningOrder[i];
|
|
j = i;
|
|
while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
|
|
runningOrder[j] = runningOrder[j-h];
|
|
j = j - h;
|
|
if (j <= (h - 1)) goto zero;
|
|
}
|
|
zero:
|
|
runningOrder[j] = vv;
|
|
}
|
|
} while (h != 1);
|
|
}
|
|
|
|
/*--
|
|
The main sorting loop.
|
|
--*/
|
|
|
|
numQSorted = 0;
|
|
|
|
for (i = 0; i <= 255; i++) {
|
|
|
|
/*--
|
|
Process big buckets, starting with the least full.
|
|
Basically this is a 3-step process in which we call
|
|
mainQSort3 to sort the small buckets [ss, j], but
|
|
also make a big effort to avoid the calls if we can.
|
|
--*/
|
|
ss = runningOrder[i];
|
|
|
|
/*--
|
|
Step 1:
|
|
Complete the big bucket [ss] by quicksorting
|
|
any unsorted small buckets [ss, j], for j != ss.
|
|
Hopefully previous pointer-scanning phases have already
|
|
completed many of the small buckets [ss, j], so
|
|
we don't have to sort them at all.
|
|
--*/
|
|
for (j = 0; j <= 255; j++) {
|
|
if (j != ss) {
|
|
sb = (ss << 8) + j;
|
|
if ( ! (ftab[sb] & SETMASK) ) {
|
|
Int32 lo = ftab[sb] & CLEARMASK;
|
|
Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
|
|
if (hi > lo) {
|
|
if (verb >= 4)
|
|
VPrintf4 ( " qsort [0x%x, 0x%x] "
|
|
"done %d this %d\n",
|
|
ss, j, numQSorted, hi - lo + 1 );
|
|
mainQSort3 (
|
|
ptr, block, quadrant, nblock,
|
|
lo, hi, BZ_N_RADIX, budget
|
|
);
|
|
numQSorted += (hi - lo + 1);
|
|
if (*budget < 0) return;
|
|
}
|
|
}
|
|
ftab[sb] |= SETMASK;
|
|
}
|
|
}
|
|
|
|
AssertH ( !bigDone[ss], 1006 );
|
|
|
|
/*--
|
|
Step 2:
|
|
Now scan this big bucket [ss] so as to synthesise the
|
|
sorted order for small buckets [t, ss] for all t,
|
|
including, magically, the bucket [ss,ss] too.
|
|
This will avoid doing Real Work in subsequent Step 1's.
|
|
--*/
|
|
{
|
|
for (j = 0; j <= 255; j++) {
|
|
copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
|
|
copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
|
|
}
|
|
for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
|
|
k = ptr[j]-1; if (k < 0) k += nblock;
|
|
c1 = block[k];
|
|
if (!bigDone[c1])
|
|
ptr[ copyStart[c1]++ ] = k;
|
|
}
|
|
for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
|
|
k = ptr[j]-1; if (k < 0) k += nblock;
|
|
c1 = block[k];
|
|
if (!bigDone[c1])
|
|
ptr[ copyEnd[c1]-- ] = k;
|
|
}
|
|
}
|
|
|
|
AssertH ( (copyStart[ss]-1 == copyEnd[ss])
|
|
||
|
|
/* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
|
|
Necessity for this case is demonstrated by compressing
|
|
a sequence of approximately 48.5 million of character
|
|
251; 1.0.0/1.0.1 will then die here. */
|
|
(copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
|
|
1007 )
|
|
|
|
for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
|
|
|
|
/*--
|
|
Step 3:
|
|
The [ss] big bucket is now done. Record this fact,
|
|
and update the quadrant descriptors. Remember to
|
|
update quadrants in the overshoot area too, if
|
|
necessary. The "if (i < 255)" test merely skips
|
|
this updating for the last bucket processed, since
|
|
updating for the last bucket is pointless.
|
|
|
|
The quadrant array provides a way to incrementally
|
|
cache sort orderings, as they appear, so as to
|
|
make subsequent comparisons in fullGtU() complete
|
|
faster. For repetitive blocks this makes a big
|
|
difference (but not big enough to be able to avoid
|
|
the fallback sorting mechanism, exponential radix sort).
|
|
|
|
The precise meaning is: at all times:
|
|
|
|
for 0 <= i < nblock and 0 <= j <= nblock
|
|
|
|
if block[i] != block[j],
|
|
|
|
then the relative values of quadrant[i] and
|
|
quadrant[j] are meaningless.
|
|
|
|
else {
|
|
if quadrant[i] < quadrant[j]
|
|
then the string starting at i lexicographically
|
|
precedes the string starting at j
|
|
|
|
else if quadrant[i] > quadrant[j]
|
|
then the string starting at j lexicographically
|
|
precedes the string starting at i
|
|
|
|
else
|
|
the relative ordering of the strings starting
|
|
at i and j has not yet been determined.
|
|
}
|
|
--*/
|
|
bigDone[ss] = True;
|
|
|
|
if (i < 255) {
|
|
Int32 bbStart = ftab[ss << 8] & CLEARMASK;
|
|
Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
|
|
Int32 shifts = 0;
|
|
|
|
while ((bbSize >> shifts) > 65534) shifts++;
|
|
|
|
for (j = bbSize-1; j >= 0; j--) {
|
|
Int32 a2update = ptr[bbStart + j];
|
|
UInt16 qVal = (UInt16)(j >> shifts);
|
|
quadrant[a2update] = qVal;
|
|
if (a2update < BZ_N_OVERSHOOT)
|
|
quadrant[a2update + nblock] = qVal;
|
|
}
|
|
AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
|
|
}
|
|
|
|
}
|
|
|
|
if (verb >= 4)
|
|
VPrintf3 ( " %d pointers, %d sorted, %d scanned\n",
|
|
nblock, numQSorted, nblock - numQSorted );
|
|
}
|
|
|
|
#undef BIGFREQ
|
|
#undef SETMASK
|
|
#undef CLEARMASK
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
/* Pre:
|
|
nblock > 0
|
|
arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
|
|
((UChar*)arr2) [0 .. nblock-1] holds block
|
|
arr1 exists for [0 .. nblock-1]
|
|
|
|
Post:
|
|
((UChar*)arr2) [0 .. nblock-1] holds block
|
|
All other areas of block destroyed
|
|
ftab [ 0 .. 65536 ] destroyed
|
|
arr1 [0 .. nblock-1] holds sorted order
|
|
*/
|
|
void BZ2_blockSort ( EState* s )
|
|
{
|
|
UInt32* ptr = s->ptr;
|
|
UChar* block = s->block;
|
|
UInt32* ftab = s->ftab;
|
|
Int32 nblock = s->nblock;
|
|
Int32 verb = s->verbosity;
|
|
Int32 wfact = s->workFactor;
|
|
UInt16* quadrant;
|
|
Int32 budget;
|
|
Int32 budgetInit;
|
|
Int32 i;
|
|
|
|
if (nblock < 10000) {
|
|
fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
|
|
} else {
|
|
/* Calculate the location for quadrant, remembering to get
|
|
the alignment right. Assumes that &(block[0]) is at least
|
|
2-byte aligned -- this should be ok since block is really
|
|
the first section of arr2.
|
|
*/
|
|
i = nblock+BZ_N_OVERSHOOT;
|
|
if (i & 1) i++;
|
|
quadrant = (UInt16*)(&(block[i]));
|
|
|
|
/* (wfact-1) / 3 puts the default-factor-30
|
|
transition point at very roughly the same place as
|
|
with v0.1 and v0.9.0.
|
|
Not that it particularly matters any more, since the
|
|
resulting compressed stream is now the same regardless
|
|
of whether or not we use the main sort or fallback sort.
|
|
*/
|
|
if (wfact < 1 ) wfact = 1;
|
|
if (wfact > 100) wfact = 100;
|
|
budgetInit = nblock * ((wfact-1) / 3);
|
|
budget = budgetInit;
|
|
|
|
mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
|
|
if (verb >= 3)
|
|
VPrintf3 ( " %d work, %d block, ratio %5.2f\n",
|
|
budgetInit - budget,
|
|
nblock,
|
|
(float)(budgetInit - budget) /
|
|
(float)(nblock==0 ? 1 : nblock) );
|
|
if (budget < 0) {
|
|
if (verb >= 2)
|
|
VPrintf0 ( " too repetitive; using fallback"
|
|
" sorting algorithm\n" );
|
|
fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
|
|
}
|
|
}
|
|
|
|
s->origPtr = -1;
|
|
for (i = 0; i < s->nblock; i++)
|
|
if (ptr[i] == 0)
|
|
{ s->origPtr = i; break; };
|
|
|
|
AssertH( s->origPtr != -1, 1003 );
|
|
}
|
|
|
|
|
|
/*-------------------------------------------------------------*/
|
|
/*--- end blocksort.c ---*/
|
|
/*-------------------------------------------------------------*/
|