
                        Advanced Disassembler Engine
                                   v2.03
                               (x) 1999-2004
                         http://z0mbie.host.net/

                                  ABSTRACT
                                  --------

     ADE  is based on LDE engine. Its purpose is to split given opcode into
 easy  modifiable  structure, and then to assemble this structure back into
 opcode.

                             PUBLIC SUBROUTINES
                             ------------------

     ADE has 3 public subroutines:

       ade32_init()   -- to initialize internal flag table,
       ade32_disasm() -- to disassemble opcode into structure,
       ade32_asm()    -- to assemble structure into opcode.

                                ade32_init()
                                ------------

     void __cdecl ade32_init(DWORD flagtable[512]);

     This  subroutine  initializes  internal  flag  table, which is of 2048
 bytes  size.  This  table  contains 512 DWORD-entries, 1st half for normal
 opcodes,  and  2nd  half  for  0F-prefixed  opcodes. Each DWORD-entry is a
 bitset  of  C_xxx  flags.  Because  data is packed with huffman algorithm,
 total subroutine size is of about 500 bytes.
     Once  flag  table  is  initialized,  you can pass it to ade32_disasm()
 subroutine to disassemble opcodes.

                               ade32_disasm()
                               --------------

     DWORD __cdecl ade32_disasm(IN BYTE* opcode,
                                IN OUT disasm_struct* s,
                                IN DWORD flagtable[512]);

     This  subroutine  disassembles opcode into disasm_struct* s structure,
 using previously initialized flag table.
     Before  passing  disasm_struct*  s  to this subroutine, you should set
 s.disasm_defaddr   and   s.disasm_defdata   records   to  4,  in  case  of
 disassembling standard 32-bit code.
     On  successfull  return,  EAX contains total length of the opcode, and
 the  same  is  in s.disasm_len. All other fields of the structure are also
 filled  in.  Records  s.disasm_defaddr and s.disasm_defdata may be changed
 from 4 to 2 and vice versa, in case of prefixes 66/67 encountered.
     On  error,  EAX  contains  0,  and disasm_len is set to 0; other field
 values are undefined.

                                ade32_asm()
                                -----------

     DWORD __cdecl ade32_asm(OUT BYTE* opcode,
                             IN OUT disasm_struct* s);

     This subroutine assembles structure s into opcode, using the following
 algorithm:

        if (s.disasm_flag & C_SEG)     *opcode++ = s.disasm_seg;
        if (s.disasm_flag & C_LOCK)    *opcode++ = 0xF0;
        if (s.disasm_flag & C_REP)     *opcode++ = s.disasm_rep;
        if (s.disasm_flag & C_67)      *opcode++ = 0x67;
        if (s.disasm_flag & C_66)      *opcode++ = 0x66;
        *opcode++ = s.disasm_opcode;
        if (s.disasm_flag & C_OPCODE2) *opcode++ = s.disasm_opcode2;
        if (s.disasm_flag & C_MODRM)   *opcode++ = s.disasm_modrm;
        if (s.disasm_flag & C_SIB)     *opcode++ = s.disasm_sib;
        for (DWORD i=0; i<s.disasm_addrsize; i++)
          *opcode++ = s.disasm_addr[i];
        for (DWORD i=0; i<s.disasm_datasize; i++)
          *opcode++ = s.disasm_data[i];

     Size of the produced opcode is returned into EAX and in s.disasm_len.

     If no changes has been made to the structure s, produced opcode should
 be  the  same  as  passed  to  ade32_disasm(), except prefix order in some
 cases.

                          DISASM_STRUCT structure
                          -----------------------

   struct disasm_struct
   {
     IN OUT BYTE  disasm_defaddr;       -- specify 4 for 32-bit code
     IN OUT BYTE  disasm_defdata;       -- specify 4 for 32-bit code
     OUT DWORD    disasm_len;           -- total length of opcode or 0
     OUT DWORD    disasm_flag;          -- bitset of C_xxx
     OUT DWORD    disasm_addrsize;      -- size of address (or 0 if no addr)
     OUT DWORD    disasm_datasize;      -- size of data    (or 0 if no data)
     OUT BYTE     disasm_rep;           -- REP prefix value (if C_REP)
     OUT BYTE     disasm_seg;           -- SEG prefix value (if C_SEG)
     OUT BYTE     disasm_opcode;        -- opcode value (present if no error)
     OUT BYTE     disasm_opcode2;       -- 2nd opcode value (if C_OPCODE2)
     OUT BYTE     disasm_modrm;         -- MODRM value (if C_MODRM)
     OUT BYTE     disasm_sib;           -- SIB value (if C_SIB)
     OUT BYTE     disasm_addr[8];       -- address (if disasm_addrsize!=0)
     OUT BYTE     disasm_data[8];       -- data (if disasm_datasize!=0)
   };

                                C_xxx FLAGS
                                -----------

   C_SEG        -- There is SEGment prefix.
                   If this flag is set, disasm_seg field contains
                   segment prefix value (26,2E,36,3E,64,65).
                   Opcodes with multiple SEGment prefixes are discarded,
                   i.e. not disassembled.

   C_LOCK       -- There is LOCK prefix (F0).

   C_REP        -- There is REP prefix.
                   If this flag is set, disasm_rep filed contains
                   rep prefix value (F2,F3).
                   Opcodes with multiple REP prefixes are discarded,
                   i.e. not disassembled.

   C_67         -- There is address-size modifier prefix.
                   Opcodes with multiple 67 prefixes are discarded.

   C_66         -- there is operand-size modifier prefix
                   Opcodes with multiple 66 prefixes are discarded.

   C_OPCODE2    -- There is 2nd opcode.
                   This means that 1st opcode was 0F,
                   and second opcode's value is in disasm_opcode2.

   C_MODRM      -- There is MODRM byte.
                   If this flag is set, MODRM-byte value is in disasm_modrm.

   C_SIB        -- There is SIB byte.
                   If this flag is set, SIB-byte value is in disasm_sib field.

   C_ADDR1      -- \   When combined, these bitfields gives     \
   C_ADDR2      --  }    size of address within opcode.          | total
   C_ADDR4      -- /     Possible values: 0..7                   | address
                                                                 |
   C_ADDR67     -- There is also address of size disasm_defaddr./

   C_DATA1      -- \   When combined, these bitfields gives     \
   C_DATA2      --  }    size of data within opcode.             | total
   C_DATA4      -- /     Possible values: 0..7                   | data
                                                                 |
   C_DATA66     -- There is also data of size disasm_defdata.   /

   C_REL        -- Opcode has relative argument.
                   Such opcodes as jmp(E9,EB),call(E8),jxx(7x,0F8x) has this
                   flag set.

   C_STOP       -- Opcode performs control xfer (ret,iret,jmp,jmp modrm).
                   This means that it is incorrect to disassemble
                   opcode next to current one, if no additional
                   information is given.

   C_BAD        -- Opcode shouldn't be in PE EXE/DLL.
                   PE_STAT program were used to find out opcode statistics,
                   and all unusual opcodes for PE files were
                   marked with this flag.
                   It should be used to better distinguish between data/code.

                                   Other
                                   -----

  Instructions 00 00 and FF FF are discarded, i.e. ade32_disasm() returns 0.

                            USAGE EXAMPLE (C++)
                            -------------------

  // initialize flag table

  DWORD ade32_flagtable[512];
  ade32_init(ade32_flagtable);

  // disassemble code section opcode by opcode

  for(DWORD ip = 0x00401000; ; )
  {
    disasm_struct s = {4,4};   // prepare to disasm 32-bit code

    // disassemble opcode
    DWORD len = ade32_disasm((BYTE*)ip, &s, ade32_flagtable);
    if (len == 0) break; // cant disassemble?

    // dump length, offset and hex bytes
    printf("(%i) %08X ",len,ip);
    for(DWORD j=0; j<len; j++) printf(" %02X",*(BYTE*)(ip+j));
    printf("\n");

    // assemble structure into new opcode
    BYTE fuck[64];
    DWORD newlen = ade32_asm(fuck, &s);

    // compare stuff
    assert( (newlen == len) && (memcmp(fuck, (BYTE*)ip, len)==0) );

    ip += len;
  } // for ip

                                   * * *
