9 cblock_t MTF (cblock_t in)
16 out_p = out.data = malloc(in.count + 4);
19 *out_p++ = in.count&255;
20 *out_p++ = (in.count>>8)&255;
21 *out_p++ = (in.count>>16)&255;
22 *out_p++ = (in.count>>24)&255;
24 for (i=0 ; i<256 ; i++)
27 for (i=0 ; i<in.count ; i++)
33 // shuffle b indexes to 0
34 for (j=0 ; j<256 ; j++)
40 out.count = out_p - out.data;
46 //==========================================================================
51 int bwtCompare (const void *elem1, const void *elem2)
60 for (i=0 ; i<bwt_size ; i++)
82 cblock_t BWT (cblock_t in)
92 sorted = malloc(in.count*sizeof(*sorted));
93 for (i=0 ; i<in.count ; i++)
95 qsort (sorted, in.count, sizeof(*sorted), bwtCompare);
97 out_p = out.data = malloc(in.count + 8);
100 *out_p++ = in.count&255;
101 *out_p++ = (in.count>>8)&255;
102 *out_p++ = (in.count>>16)&255;
103 *out_p++ = (in.count>>24)&255;
106 for (i=0 ; i<in.count ; i++)
110 *out_p++ = (i>>8)&255;
111 *out_p++ = (i>>16)&255;
112 *out_p++ = (i>>24)&255;
114 // write the L column
115 for (i=0 ; i<in.count ; i++)
116 *out_p++ = in.data[(sorted[i]+in.count-1)%in.count];
120 out.count = out_p - out.data;
125 //==========================================================================
127 typedef struct hnode_s
136 unsigned charbits[256];
137 int charbitscount[256];
139 int SmallestNode (void)
146 for (i=0 ; i<numhnodes ; i++)
150 if (!hnodes[i].count)
152 if (hnodes[i].count < best)
154 best = hnodes[i].count;
162 hnodes[bestnode].used = true;
166 void BuildChars (int nodenum, unsigned bits, int bitcount)
173 Error ("bitcount > 32");
174 charbits[nodenum] = bits;
175 charbitscount[nodenum] = bitcount;
179 node = &hnodes[nodenum];
181 BuildChars (node->children[0], bits, bitcount+1);
183 BuildChars (node->children[1], bits, bitcount+1);
191 cblock_t Huffman (cblock_t in)
202 memset (hnodes, 0, sizeof(hnodes));
203 for (i=0 ; i<in.count ; i++)
204 hnodes[in.data[i]].count++;
209 for (i=0 ; i<256 ; i++)
211 if (hnodes[i].count > max)
213 max = hnodes[i].count;
218 Error ("Huffman: max == 0");
220 for (i=0 ; i<256 ; i++)
222 hnodes[i].count = (hnodes[i].count*255+max-1) / max;
227 while (numhnodes != 511)
229 node = &hnodes[numhnodes];
231 // pick two lowest counts
232 node->children[0] = SmallestNode ();
233 if (node->children[0] == -1)
236 node->children[1] = SmallestNode ();
237 if (node->children[1] == -1)
239 if (node->children[0] != numhnodes-1)
240 Error ("Bad smallestnode");
243 node->count = hnodes[node->children[0]].count +
244 hnodes[node->children[1]].count;
248 BuildChars (numhnodes-1, 0, 0);
250 out_p = out.data = malloc(in.count*2 + 1024);
251 memset (out_p, 0, in.count*2+1024);
254 *out_p++ = in.count&255;
255 *out_p++ = (in.count>>8)&255;
256 *out_p++ = (in.count>>16)&255;
257 *out_p++ = (in.count>>24)&255;
259 // save out the 256 normalized counts so the tree can be recreated
260 for (i=0 ; i<256 ; i++)
261 *out_p++ = hnodes[i].count;
265 for (i=0 ; i<in.count ; i++)
267 c = charbitscount[in.data[i]];
268 bits = charbits[in.data[i]];
273 out_p[outbits>>3] |= 1<<(outbits&7);
278 out_p += (outbits+7)>>3;
280 out.count = out_p - out.data;
285 //==========================================================================
292 #define RLE_CODE 0xe8
293 #define RLE_TRIPPLE 0xe9
298 cblock_t RLE (cblock_t in)
306 out_p = out.data = malloc (in.count*2);
309 *out_p++ = in.count&255;
310 *out_p++ = (in.count>>8)&255;
311 *out_p++ = (in.count>>16)&255;
312 *out_p++ = (in.count>>24)&255;
314 for (i=0 ; i<in.count ; )
320 while (i<in.count && repeat < 255 && in.data[i] == val)
326 rle_counts[repeat]++;
327 if (repeat > 3 || val == RLE_CODE)
340 out.count = out_p - out.data;
344 //==========================================================================
346 unsigned lzss_head[256];
347 unsigned lzss_next[0x20000];
354 #define BACK_WINDOW 0x10000
356 #define FRONT_WINDOW 16
358 cblock_t LZSS (cblock_t in)
365 int bestlength, beststart;
368 if (in.count >= sizeof(lzss_next)/4)
369 Error ("LZSS: too big");
371 memset (lzss_head, -1, sizeof(lzss_head));
373 out_p = out.data = malloc (in.count*2);
374 memset (out.data, 0, in.count*2);
377 *out_p++ = in.count&255;
378 *out_p++ = (in.count>>8)&255;
379 *out_p++ = (in.count>>16)&255;
380 *out_p++ = (in.count>>24)&255;
383 for (i=0 ; i<in.count ; )
392 if (i + max > in.count)
395 start = lzss_head[val];
396 while (start != -1 && start >= i-BACK_WINDOW)
398 // count match length
399 for (j=0 ; j<max ; j++)
400 if (in.data[start+j] != in.data[i+j])
407 start = lzss_next[start];
411 // slow simple search
412 // search for a match
414 if (i + max > in.count)
417 start = i - BACK_WINDOW;
422 for ( ; start < i ; start++)
424 if (in.data[start] != val)
426 // count match length
427 for (j=0 ; j<max ; j++)
428 if (in.data[start+j] != in.data[i+j])
437 beststart = BACK_WINDOW - (i-beststart);
440 { // output a single char
443 out_p[outbits>>3] |= 1<<(outbits&7); // set bit to mark char
445 for (j=0 ; j<8 ; j++, outbits++)
447 out_p[outbits>>3] |= 1<<(outbits&7);
451 outbits++; // leave a 0 bit to mark phrase
452 for (j=0 ; j<BACK_BITS ; j++, outbits++)
453 if (beststart & (1<<j) )
454 out_p[outbits>>3] |= 1<<(outbits&7);
455 for (j=0 ; j<FRONT_BITS ; j++, outbits++)
456 if (bestlength & (1<<j) )
457 out_p[outbits>>3] |= 1<<(outbits&7);
463 lzss_next[i] = lzss_head[val];
469 out_p += (outbits+7)>>3;
470 out.count = out_p - out.data;
474 //==========================================================================
478 #define HUF_TOKENS (256+MAX_REPT)
480 unsigned charbits1[256][HUF_TOKENS];
481 int charbitscount1[256][HUF_TOKENS];
483 hnode_t hnodes1[256][HUF_TOKENS*2];
486 int order0counts[256];
493 int SmallestNode1 (hnode_t *hnodes, int numhnodes)
500 for (i=0 ; i<numhnodes ; i++)
504 if (!hnodes[i].count)
506 if (hnodes[i].count < best)
508 best = hnodes[i].count;
516 hnodes[bestnode].used = true;
526 void BuildChars1 (int prev, int nodenum, unsigned bits, int bitcount)
530 if (nodenum < HUF_TOKENS)
533 Error ("bitcount > 32");
534 charbits1[prev][nodenum] = bits;
535 charbitscount1[prev][nodenum] = bitcount;
539 node = &hnodes1[prev][nodenum];
541 BuildChars1 (prev, node->children[0], bits, bitcount+1);
543 BuildChars1 (prev, node->children[1], bits, bitcount+1);
552 void BuildTree1 (int prev)
554 hnode_t *node, *nodebase;
558 numhnodes = HUF_TOKENS;
559 nodebase = hnodes1[prev];
562 node = &nodebase[numhnodes];
564 // pick two lowest counts
565 node->children[0] = SmallestNode1 (nodebase, numhnodes);
566 if (node->children[0] == -1)
569 node->children[1] = SmallestNode1 (nodebase, numhnodes);
570 if (node->children[1] == -1)
573 node->count = nodebase[node->children[0]].count +
574 nodebase[node->children[1]].count;
577 numhnodes1[prev] = numhnodes-1;
578 BuildChars1 (prev, numhnodes-1, 0, 0);
587 void Huffman1_Count (cblock_t in)
595 for (i=0 ; i<in.count ; i++)
599 hnodes1[prev][v].count++;
602 for (rept=1 ; i+rept < in.count && rept < MAX_REPT ; rept++)
603 if (in.data[i+rept] != v)
607 hnodes1[prev][255+rept].count++;
620 byte scaled[256][HUF_TOKENS];
621 void Huffman1_Build (FILE *f)
627 for (i=0 ; i<256 ; i++)
629 // normalize and save the counts
631 for (j=0 ; j<HUF_TOKENS ; j++)
633 if (hnodes1[i][j].count > max)
634 max = hnodes1[i][j].count;
639 for (j=0 ; j<HUF_TOKENS ; j++)
640 { // easy to overflow 32 bits here!
641 v = (hnodes1[i][j].count*(double)255+max-1)/max;
644 scaled[i][j] = hnodes1[i][j].count = v;
649 { // must have two tokens
651 scaled[i][0] = hnodes1[i][0].count = 1;
653 scaled[i][1] = hnodes1[i][1].count = 1;
660 // count up the total bits
662 for (i=0 ; i<256 ; i++)
663 for (j=0 ; j<256 ; j++)
664 total += charbitscount1[i][j] * hnodes1[i][j].count;
667 printf ("%i bytes huffman1 compressed\n", total);
670 fwrite (scaled, 1, sizeof(scaled), f);
677 Order 1 compression with pre-built table
680 cblock_t Huffman1 (cblock_t in)
691 out_p = out.data = malloc(in.count*2 + 1024);
692 memset (out_p, 0, in.count*2+1024);
695 *out_p++ = in.count&255;
696 *out_p++ = (in.count>>8)&255;
697 *out_p++ = (in.count>>16)&255;
698 *out_p++ = (in.count>>24)&255;
703 for (i=0 ; i<in.count ; i++)
707 c = charbitscount1[prev][v];
708 bits = charbits1[prev][v];
715 out_p[outbits>>3] |= 1<<(outbits&7);
721 // check for repeat encodes
722 for (rept=1 ; i+rept < in.count && rept < MAX_REPT ; rept++)
723 if (in.data[i+rept] != v)
727 c = charbitscount1[prev][255+rept];
728 bits = charbits1[prev][255+rept];
735 out_p[outbits>>3] |= 1<<(outbits&7);
743 out_p += (outbits+7)>>3;
745 out.count = out_p - out.data;