
// STATIC HUFFMAN ENCODING
// with 16-bit .INClude SFXed output file
// copyright (c) 1998 by Z0MBiE/29A

// 26-12-98  23:20:30..

#include <stdio.h>
#include <stdlib.h>

void error(const char *errmsg)
  {
    printf("%s",errmsg);
    exit(1);
  }

int htree_count[511];
int htree_next [511];
int htree_prev0[511];
int htree_prev1[511];
int htree_bit  [511];
int htree_code [256];
int htree_len  [256];
int htree_max;

void htree_init(void)
  {
    for (int i=0; i<256; i++) htree_count[i] = 0;
  }

void htree_update(char c)
  {
    htree_count[c]++;
  }

int htree_findmin(int n)
  {
    int j = -1;
    int v = -1;
    for (int i=0; i<htree_max; i++)
      if (htree_next[i]==-1)
      if (htree_count[i] != 0)
      if ((v>htree_count[i]) || (v==-1))
      if (i != n)
        {
          v = htree_count[i];
          j = i;
        }
    return j;
  }

void htree_build(void)
  {
    int a,b,s;

    for(int i=0; i<511; i++) htree_next[i] = -1;

    s = 0;
    for (int i=0; i<256; i++) s += htree_count[i];

    htree_max = 256;
    for (;;)
      {
        a = htree_findmin(-1);
        if (a == -1) error("error 1\n");
        b = htree_findmin( a);
        if (b == -1) break;
        htree_next[a] = htree_max;
        htree_next[b] = htree_max;
        htree_prev0[htree_max] = a;
        htree_prev1[htree_max] = b;
        htree_bit[a] = 0;
        htree_bit[b] = 1;
        htree_count[htree_max] = htree_count[a] + htree_count[b];
        htree_max++;

        if (htree_count[htree_max-1] == s) break;

        if (htree_max > 511) error("error 2\n");
      }

    if (s != htree_count[htree_max-1]) error("error 3\n");

    for (int i=0; i<256; i++)
      {
        htree_code[i] = 0;
        htree_len [i] = 0;

        a = i;
        for (;;)
          {
            if (htree_len[i] >= 32)
              error("error 4\n");
            if (htree_next[a] == -1) break;

            htree_code[i] = (htree_code[i] << 1) | htree_bit[a];
            htree_len[i]++;

            a = htree_next[a];
          }

      }

  }

void compressfile(const char *ifile, const char *ofile)
  {
    FILE *i, *o;
    char c;
    int s,code,len;

    i = fopen(ifile,"rb");
    if (i == NULL) error("input file reading error\n");
    o = fopen(ofile,"wb");
    if (o == NULL) error("output file writing error\n");

    htree_init();
    printf("- pass 1 - calculating frequencies\n");
    s=0;
    for (;;)
      {
        c = fgetc(i);
        if (feof(i)) break;
        htree_update(c);
        s++;
      }
    printf("- building tree\n");
    htree_build();

    printf("- converting dictionary\n");

    fprintf(o,"; generated, 16-bit .INC file. do not edit\n\n");
    fprintf(o,"h_decompressed_size equ %i\n\n",s);
    fprintf(o,"h_decompressfile:\n\n");
    fprintf(o,"    mov     ax, 2503h\n");
    fprintf(o,"    lea     dx, h_int03\n");
    fprintf(o,"    int     21h\n");
    fprintf(o,"    lea     si, h_compressed_data\n");
    fprintf(o,"    lea     di, h_decompressed_data\n");
    fprintf(o,"    xor     edx, edx\n");
    fprintf(o,"    xor     cx, cx\n");
    fprintf(o,"    mov     bx, h_decompressed_size\n");
    fprintf(o,"h_1:call    h_decompressbyte \n");
    fprintf(o,"    stosb\n");
    fprintf(o,"    dec     bx\n");
    fprintf(o,"    jnz     h_1\n");
    fprintf(o,"    ret\n\n");
    fprintf(o,"h_int03:\n\n");
    fprintf(o,"    or      cx, cx\n");
    fprintf(o,"    jnz     h_2\n");
    fprintf(o,"    lodsb\n");
    fprintf(o,"    movzx   eax, al\n");
    fprintf(o,"    shl     eax, cl\n");
    fprintf(o,"    or      edx, eax\n");
    fprintf(o,"    add     cx, 8\n");
    fprintf(o,"h_2:dec     cx\n");
    fprintf(o,"    shr     edx, 1\n");
    fprintf(o,"    retf    2\n\n");

    fprintf(o,"h_decompressbyte:\n\n");

    for (int a=htree_max-1; a>=0; a--)
      {
        if (htree_count[a] != 0)
          {
            fprintf(o,"h%i:\n",a);
            if (a > 255)
              {
                fprintf(o,"    int     3\n");
                fprintf(o,"    jnc     h%i\n",htree_prev0[a]);
                fprintf(o,"    jmp     h%i\n",htree_prev1[a]);
              }
            else
              {
                fprintf(o,"    mov     al, %i\n",a);
                fprintf(o,"    ret\n");
              }
            htree_count[a] = 0;
          }
      }

    fprintf(o,"\nh_compressed_data:\n\n");

    printf("- pass 2 - packing\n");
    rewind(i);

    code = 0;
    len  = 0;

    for (;;)
      {
        c = fgetc(i);
        if (feof(i)) break;

        code |= htree_code[c] << len;
        len += htree_len[c];

        while (len >= 8)
          {
            fprintf(o,"db %i\n",code&255);
            code >>= 8;
            len -= 8;
          }
      }
    if (len != 0)
      fprintf(o,"db %i\n",code&255);

    fprintf(o,"\nh_decompressed_data:\n\n");

    fclose(i);
    fclose(o);

  }

void main(int argc, char *argv[])
  {
    printf("Huffman Encoding (static) demo  32-bit/CPP  by Z0MBiE/29A\n");
    if (argc != 3) error("syntax: huff <infile> <outfile>\n");

    compressfile(argv[1], argv[2]);

  }

