9 cblock_t MTF (cblock_t in)
\r
16 out_p = out.data = malloc(in.count + 4);
\r
19 *out_p++ = in.count&255;
\r
20 *out_p++ = (in.count>>8)&255;
\r
21 *out_p++ = (in.count>>16)&255;
\r
22 *out_p++ = (in.count>>24)&255;
\r
24 for (i=0 ; i<256 ; i++)
\r
27 for (i=0 ; i<in.count ; i++)
\r
33 // shuffle b indexes to 0
\r
34 for (j=0 ; j<256 ; j++)
\r
35 if (index[j] < code)
\r
40 out.count = out_p - out.data;
\r
46 //==========================================================================
\r
51 int bwtCompare (const void *elem1, const void *elem2)
\r
60 for (i=0 ; i<bwt_size ; i++)
\r
68 if (++i1 == bwt_size)
\r
70 if (++i2 == bwt_size)
\r
82 cblock_t BWT (cblock_t in)
\r
89 bwt_size = in.count;
\r
92 sorted = malloc(in.count*sizeof(*sorted));
\r
93 for (i=0 ; i<in.count ; i++)
\r
95 qsort (sorted, in.count, sizeof(*sorted), bwtCompare);
\r
97 out_p = out.data = malloc(in.count + 8);
\r
100 *out_p++ = in.count&255;
\r
101 *out_p++ = (in.count>>8)&255;
\r
102 *out_p++ = (in.count>>16)&255;
\r
103 *out_p++ = (in.count>>24)&255;
\r
105 // write head index
\r
106 for (i=0 ; i<in.count ; i++)
\r
107 if (sorted[i] == 0)
\r
110 *out_p++ = (i>>8)&255;
\r
111 *out_p++ = (i>>16)&255;
\r
112 *out_p++ = (i>>24)&255;
\r
114 // write the L column
\r
115 for (i=0 ; i<in.count ; i++)
\r
116 *out_p++ = in.data[(sorted[i]+in.count-1)%in.count];
\r
120 out.count = out_p - out.data;
\r
125 //==========================================================================
\r
127 typedef struct hnode_s
\r
135 hnode_t hnodes[512];
\r
136 unsigned charbits[256];
\r
137 int charbitscount[256];
\r
139 int SmallestNode (void)
\r
142 int best, bestnode;
\r
146 for (i=0 ; i<numhnodes ; i++)
\r
148 if (hnodes[i].used)
\r
150 if (!hnodes[i].count)
\r
152 if (hnodes[i].count < best)
\r
154 best = hnodes[i].count;
\r
159 if (bestnode == -1)
\r
162 hnodes[bestnode].used = true;
\r
166 void BuildChars (int nodenum, unsigned bits, int bitcount)
\r
173 Error ("bitcount > 32");
\r
174 charbits[nodenum] = bits;
\r
175 charbitscount[nodenum] = bitcount;
\r
179 node = &hnodes[nodenum];
\r
181 BuildChars (node->children[0], bits, bitcount+1);
\r
183 BuildChars (node->children[1], bits, bitcount+1);
\r
191 cblock_t Huffman (cblock_t in)
\r
202 memset (hnodes, 0, sizeof(hnodes));
\r
203 for (i=0 ; i<in.count ; i++)
\r
204 hnodes[in.data[i]].count++;
\r
206 // normalize counts
\r
209 for (i=0 ; i<256 ; i++)
\r
211 if (hnodes[i].count > max)
\r
213 max = hnodes[i].count;
\r
218 Error ("Huffman: max == 0");
\r
220 for (i=0 ; i<256 ; i++)
\r
222 hnodes[i].count = (hnodes[i].count*255+max-1) / max;
\r
227 while (numhnodes != 511)
\r
229 node = &hnodes[numhnodes];
\r
231 // pick two lowest counts
\r
232 node->children[0] = SmallestNode ();
\r
233 if (node->children[0] == -1)
\r
236 node->children[1] = SmallestNode ();
\r
237 if (node->children[1] == -1)
\r
239 if (node->children[0] != numhnodes-1)
\r
240 Error ("Bad smallestnode");
\r
243 node->count = hnodes[node->children[0]].count +
\r
244 hnodes[node->children[1]].count;
\r
248 BuildChars (numhnodes-1, 0, 0);
\r
250 out_p = out.data = malloc(in.count*2 + 1024);
\r
251 memset (out_p, 0, in.count*2+1024);
\r
254 *out_p++ = in.count&255;
\r
255 *out_p++ = (in.count>>8)&255;
\r
256 *out_p++ = (in.count>>16)&255;
\r
257 *out_p++ = (in.count>>24)&255;
\r
259 // save out the 256 normalized counts so the tree can be recreated
\r
260 for (i=0 ; i<256 ; i++)
\r
261 *out_p++ = hnodes[i].count;
\r
265 for (i=0 ; i<in.count ; i++)
\r
267 c = charbitscount[in.data[i]];
\r
268 bits = charbits[in.data[i]];
\r
273 out_p[outbits>>3] |= 1<<(outbits&7);
\r
278 out_p += (outbits+7)>>3;
\r
280 out.count = out_p - out.data;
\r
285 //==========================================================================
\r
292 #define RLE_CODE 0xe8
\r
293 #define RLE_TRIPPLE 0xe9
\r
295 int rle_counts[256];
\r
296 int rle_bytes[256];
\r
298 cblock_t RLE (cblock_t in)
\r
306 out_p = out.data = malloc (in.count*2);
\r
309 *out_p++ = in.count&255;
\r
310 *out_p++ = (in.count>>8)&255;
\r
311 *out_p++ = (in.count>>16)&255;
\r
312 *out_p++ = (in.count>>24)&255;
\r
314 for (i=0 ; i<in.count ; )
\r
320 while (i<in.count && repeat < 255 && in.data[i] == val)
\r
326 rle_counts[repeat]++;
\r
327 if (repeat > 3 || val == RLE_CODE)
\r
329 *out_p++ = RLE_CODE;
\r
340 out.count = out_p - out.data;
\r
344 //==========================================================================
\r
346 unsigned lzss_head[256];
\r
347 unsigned lzss_next[0x20000];
\r
354 #define BACK_WINDOW 0x10000
\r
355 #define BACK_BITS 16
\r
356 #define FRONT_WINDOW 16
\r
357 #define FRONT_BITS 4
\r
358 cblock_t LZSS (cblock_t in)
\r
365 int bestlength, beststart;
\r
368 if (in.count >= sizeof(lzss_next)/4)
\r
369 Error ("LZSS: too big");
\r
371 memset (lzss_head, -1, sizeof(lzss_head));
\r
373 out_p = out.data = malloc (in.count*2);
\r
374 memset (out.data, 0, in.count*2);
\r
377 *out_p++ = in.count&255;
\r
378 *out_p++ = (in.count>>8)&255;
\r
379 *out_p++ = (in.count>>16)&255;
\r
380 *out_p++ = (in.count>>24)&255;
\r
383 for (i=0 ; i<in.count ; )
\r
391 max = FRONT_WINDOW;
\r
392 if (i + max > in.count)
\r
393 max = in.count - i;
\r
395 start = lzss_head[val];
\r
396 while (start != -1 && start >= i-BACK_WINDOW)
\r
398 // count match length
\r
399 for (j=0 ; j<max ; j++)
\r
400 if (in.data[start+j] != in.data[i+j])
\r
402 if (j > bestlength)
\r
407 start = lzss_next[start];
\r
411 // slow simple search
\r
412 // search for a match
\r
413 max = FRONT_WINDOW;
\r
414 if (i + max > in.count)
\r
415 max = in.count - i;
\r
417 start = i - BACK_WINDOW;
\r
422 for ( ; start < i ; start++)
\r
424 if (in.data[start] != val)
\r
426 // count match length
\r
427 for (j=0 ; j<max ; j++)
\r
428 if (in.data[start+j] != in.data[i+j])
\r
430 if (j > bestlength)
\r
437 beststart = BACK_WINDOW - (i-beststart);
\r
439 if (bestlength < 3)
\r
440 { // output a single char
\r
443 out_p[outbits>>3] |= 1<<(outbits&7); // set bit to mark char
\r
445 for (j=0 ; j<8 ; j++, outbits++)
\r
447 out_p[outbits>>3] |= 1<<(outbits&7);
\r
450 { // output a phrase
\r
451 outbits++; // leave a 0 bit to mark phrase
\r
452 for (j=0 ; j<BACK_BITS ; j++, outbits++)
\r
453 if (beststart & (1<<j) )
\r
454 out_p[outbits>>3] |= 1<<(outbits&7);
\r
455 for (j=0 ; j<FRONT_BITS ; j++, outbits++)
\r
456 if (bestlength & (1<<j) )
\r
457 out_p[outbits>>3] |= 1<<(outbits&7);
\r
460 while (bestlength--)
\r
463 lzss_next[i] = lzss_head[val];
\r
464 lzss_head[val] = i;
\r
469 out_p += (outbits+7)>>3;
\r
470 out.count = out_p - out.data;
\r
474 //==========================================================================
\r
476 #define MIN_REPT 15
\r
478 #define HUF_TOKENS (256+MAX_REPT)
\r
480 unsigned charbits1[256][HUF_TOKENS];
\r
481 int charbitscount1[256][HUF_TOKENS];
\r
483 hnode_t hnodes1[256][HUF_TOKENS*2];
\r
484 int numhnodes1[256];
\r
486 int order0counts[256];
\r
493 int SmallestNode1 (hnode_t *hnodes, int numhnodes)
\r
496 int best, bestnode;
\r
500 for (i=0 ; i<numhnodes ; i++)
\r
502 if (hnodes[i].used)
\r
504 if (!hnodes[i].count)
\r
506 if (hnodes[i].count < best)
\r
508 best = hnodes[i].count;
\r
513 if (bestnode == -1)
\r
516 hnodes[bestnode].used = true;
\r
526 void BuildChars1 (int prev, int nodenum, unsigned bits, int bitcount)
\r
530 if (nodenum < HUF_TOKENS)
\r
533 Error ("bitcount > 32");
\r
534 charbits1[prev][nodenum] = bits;
\r
535 charbitscount1[prev][nodenum] = bitcount;
\r
539 node = &hnodes1[prev][nodenum];
\r
541 BuildChars1 (prev, node->children[0], bits, bitcount+1);
\r
543 BuildChars1 (prev, node->children[1], bits, bitcount+1);
\r
552 void BuildTree1 (int prev)
\r
554 hnode_t *node, *nodebase;
\r
558 numhnodes = HUF_TOKENS;
\r
559 nodebase = hnodes1[prev];
\r
562 node = &nodebase[numhnodes];
\r
564 // pick two lowest counts
\r
565 node->children[0] = SmallestNode1 (nodebase, numhnodes);
\r
566 if (node->children[0] == -1)
\r
569 node->children[1] = SmallestNode1 (nodebase, numhnodes);
\r
570 if (node->children[1] == -1)
\r
573 node->count = nodebase[node->children[0]].count +
\r
574 nodebase[node->children[1]].count;
\r
577 numhnodes1[prev] = numhnodes-1;
\r
578 BuildChars1 (prev, numhnodes-1, 0, 0);
\r
587 void Huffman1_Count (cblock_t in)
\r
595 for (i=0 ; i<in.count ; i++)
\r
599 hnodes1[prev][v].count++;
\r
602 for (rept=1 ; i+rept < in.count && rept < MAX_REPT ; rept++)
\r
603 if (in.data[i+rept] != v)
\r
605 if (rept > MIN_REPT)
\r
607 hnodes1[prev][255+rept].count++;
\r
620 byte scaled[256][HUF_TOKENS];
\r
621 void Huffman1_Build (FILE *f)
\r
627 for (i=0 ; i<256 ; i++)
\r
629 // normalize and save the counts
\r
631 for (j=0 ; j<HUF_TOKENS ; j++)
\r
633 if (hnodes1[i][j].count > max)
\r
634 max = hnodes1[i][j].count;
\r
639 for (j=0 ; j<HUF_TOKENS ; j++)
\r
640 { // easy to overflow 32 bits here!
\r
641 v = (hnodes1[i][j].count*(double)255+max-1)/max;
\r
644 scaled[i][j] = hnodes1[i][j].count = v;
\r
649 { // must have two tokens
\r
651 scaled[i][0] = hnodes1[i][0].count = 1;
\r
653 scaled[i][1] = hnodes1[i][1].count = 1;
\r
660 // count up the total bits
\r
662 for (i=0 ; i<256 ; i++)
\r
663 for (j=0 ; j<256 ; j++)
\r
664 total += charbitscount1[i][j] * hnodes1[i][j].count;
\r
666 total = (total+7)/8;
\r
667 printf ("%i bytes huffman1 compressed\n", total);
\r
670 fwrite (scaled, 1, sizeof(scaled), f);
\r
677 Order 1 compression with pre-built table
\r
680 cblock_t Huffman1 (cblock_t in)
\r
691 out_p = out.data = malloc(in.count*2 + 1024);
\r
692 memset (out_p, 0, in.count*2+1024);
\r
695 *out_p++ = in.count&255;
\r
696 *out_p++ = (in.count>>8)&255;
\r
697 *out_p++ = (in.count>>16)&255;
\r
698 *out_p++ = (in.count>>24)&255;
\r
703 for (i=0 ; i<in.count ; i++)
\r
707 c = charbitscount1[prev][v];
\r
708 bits = charbits1[prev][v];
\r
715 out_p[outbits>>3] |= 1<<(outbits&7);
\r
721 // check for repeat encodes
\r
722 for (rept=1 ; i+rept < in.count && rept < MAX_REPT ; rept++)
\r
723 if (in.data[i+rept] != v)
\r
725 if (rept > MIN_REPT)
\r
727 c = charbitscount1[prev][255+rept];
\r
728 bits = charbits1[prev][255+rept];
\r
735 out_p[outbits>>3] |= 1<<(outbits&7);
\r
743 out_p += (outbits+7)>>3;
\r
745 out.count = out_p - out.data;
\r