-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathDisasm.cpp
2900 lines (2826 loc) · 121 KB
/
Disasm.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
////////////////////////////////////////////////////////////////////////////////
// //
// OllyDbg Disassembling Engine v2.01 //
// //
// Copyright (c) 2007-2013 Oleh Yuschuk, [email protected] //
// //
// This code is part of the OllyDbg Disassembler v2.01 //
// //
// Disassembling engine is free software; you can redistribute it and/or //
// modify it under the terms of the GNU General Public License as published //
// by the Free Software Foundation; either version 3 of the License, or (at //
// your option) any later version. //
// //
// This code is distributed in the hope that it will be useful, but WITHOUT //
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or //
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for //
// more details. //
// //
// You should have received a copy of the GNU General Public License along //
// with this program. If not, see <http://www.gnu.org/licenses/>. //
// //
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// //
// This is a fast disassembler that can be used to determine the length of //
// the binary 80x86 32-bit command and its attributes, to convert it to the //
// human-readable text form, highlight its operands, and create hexadecimal //
// dump of the binary command. //
// //
// It is a stripped down version of the disassembler used by OllyDbg 2.01. //
// It can't analyse and comment the contents of the operands, or predict the //
// results of the command execution. Analysis-dependent features are not //
// included, too. Most other features are kept. //
// //
// Disassembler supports integer, FPU, MMX, 3DNow, SSE1-SSE4.1 and AVX //
// instructions. 64-bit mode, AVX2, FMA and XOP are not (yet) supported. //
// //
// This code can be compiled either in ASCII or UNICODE mode. It is reentrant //
// (thread-safe, feature not available in the original OllyDbg code). //
// //
// Typical operation speed on 3-GHz Phenom II in MASM mode is: //
// //
// Command length and info: 130 ns/command (7,700,000 commands/s) //
// Disassembly: 290 ns/command (3,400,000 commands/s) //
// Disassembly, dump, highlighting: 350 ns/command (2,800,000 commands/s) //
// //
////////////////////////////////////////////////////////////////////////////////
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#pragma hdrstop
#include "disasm.h"
////////////////////////////////////////////////////////////////////////////////
//////////////////////////////// SYMBOLIC NAMES ////////////////////////////////
// 8-bit register names, sorted by 'natural' index (as understood by CPU, not
// in the alphabetical order as some 'programmers' prefer).
const tchar *regname8[NREG] = {
T("AL"), T("CL"), T("DL"), T("BL"),
T("AH"), T("CH"), T("DH"), T("BH") };
// 16-bit register names.
const tchar *regname16[NREG] = {
T("AX"), T("CX"), T("DX"), T("BX"),
T("SP"), T("BP"), T("SI"), T("DI") };
// 32-bit register names.
const tchar *regname32[NREG] = {
T("EAX"), T("ECX"), T("EDX"), T("EBX"),
T("ESP"), T("EBP"), T("ESI"), T("EDI") };
// Names of segment registers.
const tchar *segname[NREG] = {
T("ES"), T("CS"), T("SS"), T("DS"),
T("FS"), T("GS"), T("SEG6:"), T("SEG7:") };
// Names of FPU registers, classical form.
const tchar *fpulong[NREG] = {
T("ST(0)"), T("ST(1)"), T("ST(2)"), T("ST(3)"),
T("ST(4)"), T("ST(5)"), T("ST(6)"), T("ST(7)") };
// Names of FPU registers, short form.
const tchar *fpushort[NREG] = {
T("ST0"), T("ST1"), T("ST2"), T("ST3"),
T("ST4"), T("ST5"), T("ST6"), T("ST7") };
// Names of MMX/3DNow! registers.
const tchar *mmxname[NREG] = {
T("MM0"), T("MM1"), T("MM2"), T("MM3"),
T("MM4"), T("MM5"), T("MM6"), T("MM7") };
// Names of 128-bit SSE registers.
const tchar *sse128[NREG] = {
T("XMM0"), T("XMM1"), T("XMM2"), T("XMM3"),
T("XMM4"), T("XMM5"), T("XMM6"), T("XMM7") };
// Names of 256-bit SSE registers.
const tchar *sse256[NREG] = {
T("YMM0"), T("YMM1"), T("YMM2"), T("YMM3"),
T("YMM4"), T("YMM5"), T("YMM6"), T("YMM7") };
// Names of control registers.
const tchar *crname[NREG] = {
T("CR0"), T("CR1"), T("CR2"), T("CR3"),
T("CR4"), T("CR5"), T("CR6"), T("CR7") };
// Names of debug registers.
const tchar *drname[NREG] = {
T("DR0"), T("DR1"), T("DR2"), T("DR3"),
T("DR4"), T("DR5"), T("DR6"), T("DR7") };
// Declarations for data types. Depending on ssesizemode, name of 16-byte data
// type (DQWORD) may be changed to XMMWORD and that of 32-bit type (QQWORD) to
// YMMWORD.
const tchar *sizename[33] = {
NULL, T("BYTE"), T("WORD"), NULL,
T("DWORD"), NULL, T("FWORD"), NULL,
T("QWORD"), NULL, T("TBYTE"), NULL,
NULL, NULL, NULL, NULL,
T("DQWORD"), NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
T("QQWORD") };
// Keywords for immediate data. HLA uses sizename[] instead of sizekey[].
const tchar *sizekey[33] = {
NULL, T("DB"), T("DW"), NULL,
T("DD"), NULL, T("DF"), NULL,
T("DQ"), NULL, T("DT"), NULL,
NULL, NULL, NULL, NULL,
T("DDQ"), NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
T("DQQ") };
// Keywords for immediate data in AT&T format.
const tchar *sizeatt[33] = {
NULL, T(".BYTE"), T(".WORD"), NULL,
T(".LONG"), NULL, T(".FWORD"), NULL,
T(".QUAD"), NULL, T(".TBYTE"), NULL,
NULL, NULL, NULL, NULL,
T(".DQUAD"), NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
T(".QQUAD") };
// Comparison predicates in SSE [0..7] and VEX commands [0..31].
const tchar *ssepredicate[32] = {
T("EQ"), T("LT"), T("LE"), T("UNORD"),
T("NEQ"), T("NLT"), T("NLE"), T("ORD"),
T("EQ_UQ"), T("NGE"), T("NGT"), T("FALSE"),
T("NEQ_OQ"), T("GE"), T("GT"), T("TRUE"),
T("EQ_OS"), T("LT_OQ"), T("LE_OQ"), T("UNORD_S"),
T("NEQ_US"), T("NLT_UQ"), T("NLE_UQ"), T("ORD_S"),
T("EQ_US"), T("NGE_UQ"), T("NGT_UQ"), T("FALSE_OS"),
T("NEQ_OS"), T("GE_OQ"), T("GT_OQ"), T("TRUE_US") };
////////////////////////////////////////////////////////////////////////////////
///////////////////////////////// DISASSEMBLER /////////////////////////////////
typedef struct t_imdata { // Intermediate disassembler data
t_disasm *da; // Result of disassembly
ulong damode; // Disassembling mode, set of DA_xxx
t_config *config; // Disassembler configuration
int (*decodeaddress)(tchar *s,ulong addr);
ulong prefixlist; // List of command's prefixes, PF_xxx
int ssesize; // Size of SSE operands (16/32 bytes)
ulong immsize1; // Size of first immediate constant
ulong immsize2; // Size of second immediate constant
ulong mainsize; // Size of command with prefixes
ulong modsize; // Size of ModRegRM/SIB bytes
ulong dispsize; // Size of address offset
int usesdatasize; // May have data size prefix
int usesaddrsize; // May have address size prefix
int usessegment; // May have segment override prefix
} t_imdata;
static t_config defconfig = { // Default disassembler configuration
DAMODE_MASM, // Main style, one of DAMODE_xxx
NUM_STD|NUM_DECIMAL, // Constant part of address, NUM_xxx
NUM_STD|NUM_LONG, // Jump/call destination, NUM_xxx
NUM_STD|NUM_LONG, // Binary constants, NUM_xxx
NUM_STD|NUM_DECIMAL, // Numeric constants, NUM_xxx
0, // Force lowercase display
0, // Tab between mnemonic and arguments
0, // Extra space between arguments
0, // Use RET instead of RETN
1, // Use short form of string commands
0, // Display default segments in listing
1, // Always show memory size
0, // Show NEAR modifiers
1, // How to decode size of SSE operands
0, // How to decode jump hints
0, // How to decode size-sensitive mnemonics
0, // How to decode top of FPU stack
0 // Highlight operands
};
static t_config attconfig = { // AT&T disassembler configuration
DAMODE_ATT, // Main style, one of DAMODE_xxx
NUM_X|NUM_DECIMAL, // Constant part of address, NUM_xxx
NUM_X|NUM_LONG, // Jump/call destination, NUM_xxx
NUM_X|NUM_LONG, // Binary constants, NUM_xxx
NUM_X|NUM_DECIMAL, // Numeric constants, NUM_xxx
1, // Force lowercase display
1, // Tab between mnemonic and arguments
1, // Extra space between arguments
0, // Use RET instead of RETN
1, // Use short form of string commands
0, // Display default segments in listing
0, // Always show memory size
0, // Show NEAR modifiers
1, // How to decode size of SSE operands
0, // How to decode jump hints
0, // How to decode size-sensitive mnemonics
0, // How to decode top of FPU stack
0 // Highlight operands
};
////////////////////////////////////////////////////////////////////////////////
////////////////////////////// SERVICE FUNCTIONS ///////////////////////////////
static tchar hexcharu[16] = { // Nibble-to-hexdigit table, uppercase
T('0'), T('1'), T('2'), T('3'), T('4'), T('5'), T('6'), T('7'),
T('8'), T('9'), T('A'), T('B'), T('C'), T('D'), T('E'), T('F') };
static tchar hexcharl[16] = { // Nibble-to-hexdigit table, lowercase
T('0'), T('1'), T('2'), T('3'), T('4'), T('5'), T('6'), T('7'),
T('8'), T('9'), T('a'), T('b'), T('c'), T('d'), T('e'), T('f') };
static tchar cvtlower[256];
// Copies at most n-1 wide characters from src to dest and assures that dest is
// null-terminated. Slow but reliable. Returns number of copied characters, not
// including the terminal null. Attention, does not check that input parameters
// are correct!
static int Tstrcopy(tchar *dest,int n,const tchar *src) {
int i;
if (n<=0)
return 0;
for (i=0; i<n-1; i++) {
if (*src==T('\0')) break;
*dest++=*src++; };
*dest=T('\0');
return i;
};
// Copies at most n-1 wide characters from src to dest and assures that dest is
// null-terminated. If lowercase is 1, simultaneously converts it to lower
// case. Slow but reliable. Returns number of copied characters, not including
// the terminal null. Attention, does not check that input parameters are
// correct!
static int Tcopycase(tchar *dest,int n,const tchar *src,int lowercase) {
int i;
if (n<=0)
return 0;
for (i=0; i<n-1; i++) {
if (*src==T('\0')) break;
if (lowercase)
*dest++=cvtlower[*src++]; // Much faster than call to tolower()
else
*dest++=*src++;
;
};
*dest=T('\0');
return i;
};
// Dumps ncode bytes of code to the string s. Returns length of resulting text,
// characters, not including terminal zero. Attention, does not check that
// input parameters are correct or that s has sufficient length!
static int Thexdump(tchar *s,uchar *code,int ncode,int lowercase) {
int d,n;
static tchar *hexchar;
hexchar=(lowercase?hexcharl:hexcharu);
n=0;
while (ncode>0) {
d=*code++;
s[n++]=hexchar[(d>>4) & 0x0F];
s[n++]=hexchar[d & 0x0F];
ncode--;
};
s[n]=T('\0');
return n;
};
// Converts unsigned 1-, 2- or 4-byte number to hexadecimal text, according to
// the specified mode and type of argument. String s must be at least SHORTNAME
// characters long. Returns length of resulting text in characters, not
// including the terminal zero.
static int Hexprint(int size,tchar *s,ulong u,const t_imdata *im,ulong arg) {
int i,k,ndigit,lastdigit;
ulong nummode,mod;
tchar buf[SHORTNAME];
static tchar *hexchar;
if (size==1)
u&=0x000000FF; // 8-bit number
else if (size==2)
u&=0x0000FFFF; // 16-bit number
else
size=4; // Correct possible errors
mod=arg & B_MODMASK;
if (mod==B_ADDR)
nummode=im->config->memmode;
else if (mod==B_JMPCALL || mod==B_JMPCALLFAR)
nummode=im->config->jmpmode;
else if (mod==B_BINARY)
nummode=im->config->binconstmode;
else
nummode=im->config->constmode;
hexchar=(im->config->lowercase?hexcharl:hexcharu);
buf[SHORTNAME-1]=T('\0');
k=SHORTNAME-1;
if ((nummode & NUM_DECIMAL)!=0 && (mod==B_SIGNED || mod==B_UNSIGNED ||
(u<DECLIMIT && mod!=B_BINARY && mod!=B_JMPCALL && mod!=B_JMPCALLFAR))
) {
// Decode as decimal unsigned number.
if ((nummode & NUM_STYLE)==NUM_OLLY && u>=10)
buf[--k]=T('.'); // Period marks decimals in OllyDbg
do {
buf[--k]=hexchar[u%10];
u/=10;
} while (u!=0); }
else {
// Decode as hexadecimal number.
if (nummode & NUM_LONG) // 2, 4 or 8 significant digits
ndigit=size*2;
else
ndigit=1;
if ((nummode & NUM_STYLE)==NUM_STD)
buf[--k]=T('h');
for (i=0; i<ndigit || u!=0; i++) {
lastdigit=u & 0x0F;
buf[--k]=hexchar[lastdigit];
u=(u>>4) & 0x0FFFFFFF; };
if ((nummode & NUM_STYLE)==NUM_X) {
buf[--k]=T('x');
buf[--k]=T('0'); }
else if (lastdigit>=10 &&
((nummode & NUM_STYLE)!=NUM_OLLY || i<(mod==B_BINARY?size*2:8)))
buf[--k]=T('0');
;
};
return Tstrcopy(s,SHORTNAME,buf+k);
};
////////////////////////////////////////////////////////////////////////////////
///////////////////////// INTERNAL DISASSEMBLER TABLES /////////////////////////
t_chain *cmdchain; // Commands sorted by first CMDMASK bits
t_modrm modrm16[256]; // 16-bit ModRM decodings
t_modrm modrm32[256]; // 32-bit ModRM decodings without SIB
t_modrm sib0[256]; // ModRM-SIB decodings with Mod=00
t_modrm sib1[256]; // ModRM-SIB decodings with Mod=01
t_modrm sib2[256]; // ModRM-SIB decodings with Mod=10
// Initializes disassembler tables. Call this function once during startup.
// Returns 0 on success and -1 if initialization was unsuccessful. In the last
// case, continuation is not possible and program must terminate.
int Preparedisasm(void) {
int n,c,reg,sreg,scale,nchain;
ulong u,code,mask;
const t_bincmd *pcmd;
t_chain *pchain;
t_modrm *pmrm,*psib;
if (cmdchain!=NULL)
return 0; // Already initialized
// Sort command descriptors into command chains by first CMDMASK bits.
cmdchain=(t_chain *)malloc(NCHAIN*sizeof(t_chain));
if (cmdchain==NULL) // Low memory
return -1;
memset(cmdchain,0,NCHAIN*sizeof(t_chain));
nchain=CMDMASK+1; // Number of command chains
for (pcmd=bincmd; pcmd->length!=0; pcmd++) {
if ((pcmd->cmdtype & D_CMDTYPE)==D_PSEUDO)
continue; // Pseudocommand, for search models only
code=pcmd->code;
mask=pcmd->mask & CMDMASK;
for (u=0; u<CMDMASK+1; u++) {
if (((u ^ code) & mask)!=0)
continue; // Command has different first bytes
pchain=cmdchain+u;
while (pchain->pcmd!=NULL && pchain->pnext!=NULL)
pchain=pchain->pnext; // Walk chain to the end
if (pchain->pcmd==NULL)
pchain->pcmd=pcmd;
else if (nchain>=NCHAIN)
return -1; // Too many commands
else {
pchain->pnext=cmdchain+nchain; // Prolongate chain
pchain=pchain->pnext;
pchain->pcmd=pcmd;
nchain++;
};
};
};
// Prepare 16-bit ModRM decodings.
memset(modrm16,0,sizeof(modrm16));
for (c=0x00,pmrm=modrm16; c<=0xFF; c++,pmrm++) {
reg=c & 0x07;
if ((c & 0xC0)==0xC0) {
// Register in ModRM.
pmrm->size=1;
pmrm->features=0; // Register, its type as yet unknown
pmrm->reg=reg;
pmrm->defseg=SEG_UNDEF;
pmrm->basereg=REG_UNDEF; }
else if ((c & 0xC7)==0x06) {
// Special case of immediate address.
pmrm->size=3;
pmrm->dispsize=2;
pmrm->features=OP_MEMORY|OP_OPCONST|OP_ADDR16;
pmrm->reg=REG_UNDEF;
pmrm->defseg=SEG_DS;
pmrm->basereg=REG_UNDEF; }
else {
pmrm->features=OP_MEMORY|OP_INDEXED|OP_ADDR16;
if ((c & 0xC0)==0x40) {
pmrm->dispsize=1; pmrm->features|=OP_OPCONST; }
else if ((c & 0xC0)==0x80) {
pmrm->dispsize=2; pmrm->features|=OP_OPCONST; };
pmrm->size=pmrm->dispsize+1;
pmrm->reg=REG_UNDEF;
switch (reg) {
case 0:
pmrm->scale[REG_EBX]=1; pmrm->scale[REG_ESI]=1;
pmrm->defseg=SEG_DS;
tstrcpy(pmrm->ardec,T("BX+SI"));
tstrcpy(pmrm->aratt,T("%BX,%SI"));
pmrm->aregs=(1<<REG_EBX)|(1<<REG_ESI);
pmrm->basereg=REG_ESI; break;
case 1:
pmrm->scale[REG_EBX]=1; pmrm->scale[REG_EDI]=1;
pmrm->defseg=SEG_DS;
tstrcpy(pmrm->ardec,T("BX+DI"));
tstrcpy(pmrm->aratt,T("%BX,%DI"));
pmrm->aregs=(1<<REG_EBX)|(1<<REG_EDI);
pmrm->basereg=REG_EDI; break;
case 2:
pmrm->scale[REG_EBP]=1; pmrm->scale[REG_ESI]=1;
pmrm->defseg=SEG_SS;
tstrcpy(pmrm->ardec,T("BP+SI"));
tstrcpy(pmrm->aratt,T("%BP,%SI"));
pmrm->aregs=(1<<REG_EBP)|(1<<REG_ESI);
pmrm->basereg=REG_ESI; break;
case 3:
pmrm->scale[REG_EBP]=1; pmrm->scale[REG_EDI]=1;
pmrm->defseg=SEG_SS;
tstrcpy(pmrm->ardec,T("BP+DI"));
tstrcpy(pmrm->aratt,T("%BP,%DI"));
pmrm->aregs=(1<<REG_EBP)|(1<<REG_EDI);
pmrm->basereg=REG_EDI; break;
case 4:
pmrm->scale[REG_ESI]=1;
pmrm->defseg=SEG_DS;
tstrcpy(pmrm->ardec,T("SI"));
tstrcpy(pmrm->aratt,T("%SI"));
pmrm->aregs=(1<<REG_ESI);
pmrm->basereg=REG_ESI; break;
case 5:
pmrm->scale[REG_EDI]=1;
pmrm->defseg=SEG_DS;
tstrcpy(pmrm->ardec,T("DI"));
tstrcpy(pmrm->aratt,T("%DI"));
pmrm->aregs=(1<<REG_EDI);
pmrm->basereg=REG_EDI; break;
case 6:
pmrm->scale[REG_EBP]=1;
pmrm->defseg=SEG_SS;
tstrcpy(pmrm->ardec,T("BP"));
tstrcpy(pmrm->aratt,T("%BP"));
pmrm->aregs=(1<<REG_EBP);
pmrm->basereg=REG_EBP; break;
case 7:
pmrm->scale[REG_EBX]=1;
pmrm->defseg=SEG_DS;
tstrcpy(pmrm->ardec,T("BX"));
tstrcpy(pmrm->aratt,T("%BX"));
pmrm->aregs=(1<<REG_EBX);
pmrm->basereg=REG_EBX;
break;
};
};
};
// Prepare 32-bit ModRM decodings without SIB.
memset(modrm32,0,sizeof(modrm32));
for (c=0x00,pmrm=modrm32; c<=0xFF; c++,pmrm++) {
reg=c & 0x07;
if ((c & 0xC0)==0xC0) {
// Register in ModRM.
pmrm->size=1;
pmrm->features=0; // Register, its type as yet unknown
pmrm->reg=reg;
pmrm->defseg=SEG_UNDEF;
pmrm->basereg=REG_UNDEF; }
else if ((c & 0xC7)==0x05) {
// Special case of 32-bit immediate address.
pmrm->size=5;
pmrm->dispsize=4;
pmrm->features=OP_MEMORY|OP_OPCONST;
pmrm->reg=REG_UNDEF;
pmrm->defseg=SEG_DS;
pmrm->basereg=REG_UNDEF; }
else {
// Regular memory address.
pmrm->features=OP_MEMORY;
pmrm->reg=REG_UNDEF;
if ((c & 0xC0)==0x40) {
pmrm->dispsize=1; // 8-bit sign-extended displacement
pmrm->features|=OP_OPCONST; }
else if ((c & 0xC0)==0x80) {
pmrm->dispsize=4; // 32-bit displacement
pmrm->features|=OP_OPCONST; };
if (reg==REG_ESP) {
// SIB byte follows, decode with sib32.
if ((c & 0xC0)==0x00) pmrm->psib=sib0;
else if ((c & 0xC0)==0x40) pmrm->psib=sib1;
else pmrm->psib=sib2;
pmrm->basereg=REG_UNDEF; }
else {
pmrm->size=1+pmrm->dispsize;
pmrm->features|=OP_INDEXED;
pmrm->defseg=(reg==REG_EBP?SEG_SS:SEG_DS);
pmrm->scale[reg]=1;
tstrcpy(pmrm->ardec,regname32[reg]);
pmrm->aratt[0]=T('%');
Tstrcopy(pmrm->aratt+1,SHORTNAME-1,regname32[reg]);
pmrm->aregs=(1<<reg);
pmrm->basereg=reg;
};
};
};
// Prepare 32-bit ModRM decodings with SIB, case Mod=00: usually no disp.
memset(sib0,0,sizeof(sib0));
for (c=0x00,psib=sib0; c<=0xFF; c++,psib++) {
psib->features=OP_MEMORY;
psib->reg=REG_UNDEF;
reg=c & 0x07;
sreg=(c>>3) & 0x07;
if ((c & 0xC0)==0) scale=1;
else if ((c & 0xC0)==0x40) scale=2;
else if ((c & 0xC0)==0x80) scale=4;
else scale=8;
if (sreg!=REG_ESP) {
psib->scale[sreg]=(uchar)scale;
n=Tstrcopy(psib->ardec,SHORTNAME,regname32[sreg]);
psib->aregs=(1<<sreg);
psib->features|=OP_INDEXED;
if (scale>1) {
psib->ardec[n++]=T('*');
psib->ardec[n++]=(tchar)(T('0')+scale);
psib->ardec[n]=T('\0');
}; }
else
n=0;
if (reg==REG_EBP) {
psib->size=6;
psib->dispsize=4;
psib->features|=OP_OPCONST;
psib->defseg=SEG_DS;
psib->basereg=REG_UNDEF; }
else {
psib->size=2;
psib->defseg=((reg==REG_ESP || reg==REG_EBP)?SEG_SS:SEG_DS);
psib->scale[reg]++;
psib->features|=OP_INDEXED;
if (n!=0) psib->ardec[n++]=T('+');
Tstrcopy(psib->ardec+n,SHORTNAME-n,regname32[reg]);
psib->aregs|=(1<<reg);
psib->basereg=reg; };
if (reg!=REG_EBP) {
psib->aratt[0]=T('%'); n=1;
n+=Tstrcopy(psib->aratt+n,SHORTNAME-n,regname32[reg]); }
else
n=0;
if (sreg!=REG_ESP) {
n+=Tstrcopy(psib->aratt+n,SHORTNAME-n,T(",%"));
n+=Tstrcopy(psib->aratt+n,SHORTNAME-n,regname32[sreg]);
if (scale>1) {
psib->aratt[n++]=T(',');
psib->aratt[n++]=(tchar)(T('0')+scale);
psib->aratt[n]=T('\0');
};
};
};
// Prepare 32-bit ModRM decodings with SIB, case Mod=01: 8-bit displacement.
memset(sib1,0,sizeof(sib1));
for (c=0x00,psib=sib1; c<=0xFF; c++,psib++) {
psib->features=OP_MEMORY|OP_INDEXED|OP_OPCONST;
psib->reg=REG_UNDEF;
reg=c & 0x07;
sreg=(c>>3) & 0x07;
if ((c & 0xC0)==0) scale=1;
else if ((c & 0xC0)==0x40) scale=2;
else if ((c & 0xC0)==0x80) scale=4;
else scale=8;
psib->size=3;
psib->dispsize=1;
psib->defseg=((reg==REG_ESP || reg==REG_EBP)?SEG_SS:SEG_DS);
psib->scale[reg]=1;
psib->basereg=reg;
psib->aregs=(1<<reg);
if (sreg!=REG_ESP) {
psib->scale[sreg]+=(uchar)scale;
n=Tstrcopy(psib->ardec,SHORTNAME,regname32[sreg]);
psib->aregs|=(1<<sreg);
if (scale>1) {
psib->ardec[n++]=T('*');
psib->ardec[n++]=(tchar)(T('0')+scale);
}; }
else
n=0;
if (n!=0) psib->ardec[n++]=T('+');
Tstrcopy(psib->ardec+n,SHORTNAME-n,regname32[reg]);
psib->aratt[0]=T('%'); n=1;
n+=Tstrcopy(psib->aratt+n,SHORTNAME-n,regname32[reg]);
if (sreg!=REG_ESP) {
n+=Tstrcopy(psib->aratt+n,SHORTNAME-n,T(",%"));
n+=Tstrcopy(psib->aratt+n,SHORTNAME-n,regname32[sreg]);
if (scale>1) {
psib->aratt[n++]=T(',');
psib->aratt[n++]=(tchar)(T('0')+scale);
psib->aratt[n]=T('\0');
};
};
};
// Prepare 32-bit ModRM decodings with SIB, case Mod=10: 32-bit displacement.
memset(sib2,0,sizeof(sib2));
for (c=0x00,psib=sib2; c<=0xFF; c++,psib++) {
psib->features=OP_MEMORY|OP_INDEXED|OP_OPCONST;
psib->reg=REG_UNDEF;
reg=c & 0x07;
sreg=(c>>3) & 0x07;
if ((c & 0xC0)==0) scale=1;
else if ((c & 0xC0)==0x40) scale=2;
else if ((c & 0xC0)==0x80) scale=4;
else scale=8;
psib->size=6;
psib->dispsize=4;
psib->defseg=((reg==REG_ESP || reg==REG_EBP)?SEG_SS:SEG_DS);
psib->scale[reg]=1;
psib->basereg=reg;
psib->aregs=(1<<reg);
if (sreg!=REG_ESP) {
psib->scale[sreg]+=(uchar)scale;
n=Tstrcopy(psib->ardec,SHORTNAME,regname32[sreg]);
psib->aregs|=(1<<sreg);
if (scale>1) {
psib->ardec[n++]=T('*');
psib->ardec[n++]=(tchar)(T('0')+scale);
}; }
else
n=0;
if (n!=0) psib->ardec[n++]=T('+');
Tstrcopy(psib->ardec+n,SHORTNAME-n,regname32[reg]);
psib->aratt[0]=T('%'); n=1;
n+=Tstrcopy(psib->aratt+n,SHORTNAME-n,regname32[reg]);
if (sreg!=REG_ESP) {
n+=Tstrcopy(psib->aratt+n,SHORTNAME-n,T(",%"));
n+=Tstrcopy(psib->aratt+n,SHORTNAME-n,regname32[sreg]);
if (scale>1) {
psib->aratt[n++]=T(',');
psib->aratt[n++]=(tchar)(T('0')+scale);
psib->aratt[n]=T('\0');
};
};
};
// Fill lowercase conversion table. This table replaces tolower(). When
// compiled with Borland C++ Builder, spares significant time.
for (c=0; c<256; c++)
cvtlower[c]=(tchar)ttolower(c);
// Report success.
return 0;
};
// Frees resources allocated by Preparedisasm(). Call this function once
// during shutdown after disassembling service is no longer necessary.
void Finishdisasm(void) {
if (cmdchain!=NULL) {
free(cmdchain);
cmdchain=NULL;
};
};
////////////////////////////////////////////////////////////////////////////////
////////////////////////////// AUXILIARY ROUTINES //////////////////////////////
// Given index of byte register, returns index of 32-bit container.
static int Byteregtodwordreg(int bytereg) {
if (bytereg<0 || bytereg>=NREG)
return REG_UNDEF;
if (bytereg>=4)
return bytereg-4;
return bytereg;
};
// Checks prefix override flags and generates warnings if prefix is superfluous.
// Returns index of segment register. Note that Disasm() assures that two
// segment override bits in im->prefixlist can't be set simultaneously.
static int Getsegment(t_imdata *im,int arg,int defseg) {
if ((im->prefixlist & PF_SEGMASK)==0)
return defseg; // Optimization for most frequent case
switch (im->prefixlist & PF_SEGMASK) {
case PF_ES:
if (defseg==SEG_ES) im->da->warnings|=DAW_DEFSEG;
if (arg & B_NOSEG) im->da->warnings|=DAW_SEGPREFIX;
return SEG_ES;
case PF_CS:
if (defseg==SEG_CS) im->da->warnings|=DAW_DEFSEG;
if (arg & B_NOSEG) im->da->warnings|=DAW_SEGPREFIX;
return SEG_CS;
case PF_SS:
if (defseg==SEG_SS) im->da->warnings|=DAW_DEFSEG;
if (arg & B_NOSEG) im->da->warnings|=DAW_SEGPREFIX;
return SEG_SS;
case PF_DS:
if (defseg==SEG_DS) im->da->warnings|=DAW_DEFSEG;
if (arg & B_NOSEG) im->da->warnings|=DAW_SEGPREFIX;
return SEG_DS;
case PF_FS:
if (defseg==SEG_FS) im->da->warnings|=DAW_DEFSEG;
if (arg & B_NOSEG) im->da->warnings|=DAW_SEGPREFIX;
return SEG_FS;
case PF_GS:
if (defseg==SEG_GS) im->da->warnings|=DAW_DEFSEG;
if (arg & B_NOSEG) im->da->warnings|=DAW_SEGPREFIX;
return SEG_GS;
default: return defseg; // Most frequent case of default segment
};
};
// Decodes generalized memory address to text.
static void Memaddrtotext(t_imdata *im,int arg,int datasize,int seg,
const tchar *regpart,long constpart,tchar *s) {
int n;
tchar label[TEXTLEN];
if (im->config->disasmmode==DAMODE_ATT) {
// AT&T memory address syntax is so different from Intel that I process it
// separately from the rest.
n=0;
if ((arg & B_MODMASK)==B_JMPCALL)
s[n++]=T('*');
// On request, I show only explicit segments.
if ((im->config->putdefseg && (arg & B_NOSEG)==0) ||
(im->prefixlist & PF_SEGMASK)!=0
) {
s[n++]=T('%');
n+=Tcopycase(s+n,TEXTLEN-n,segname[seg],im->config->lowercase);
s[n++]=T(':'); };
// Add constant part (offset).
if (constpart<0 && constpart>NEGLIMIT) {
s[n++]=T('-');
n+=Hexprint((im->prefixlist & PF_ASIZE?2:4),s+n,-constpart,im,B_ADDR); }
else if (constpart!=0) {
if (seg!=SEG_FS && seg!=SEG_GS &&
im->decodeaddress!=NULL &&
im->decodeaddress(label,constpart)!=0)
n+=Tstrcopy(s+n,TEXTLEN-n,label);
else
n+=Hexprint((im->prefixlist & PF_ASIZE?2:4),s+n,constpart,im,B_ADDR);
;
};
// Add register part of address, may be absent.
if (regpart[0]!=T('\0')) {
n+=Tstrcopy(s+n,TEXTLEN-n,T("("));
n+=Tcopycase(s+n,TEXTLEN-n,regpart,im->config->lowercase);
n+=Tstrcopy(s+n,TEXTLEN-n,T(")"));
}; }
else {
// Mark far and near jump/call addresses.
if ((arg & B_MODMASK)==B_JMPCALLFAR)
n=Tcopycase(s,TEXTLEN,T("FAR "),im->config->lowercase);
else if (im->config->shownear && (arg & B_MODMASK)==B_JMPCALL)
n=Tcopycase(s,TEXTLEN,T("NEAR "),im->config->lowercase);
else
n=0;
if (im->config->disasmmode!=DAMODE_MASM) {
s[n++]=T('[');
if ((im->prefixlist & PF_ASIZE)!=0 && regpart[0]==T('\0'))
n+=Tcopycase(s+n,TEXTLEN-n,T("SMALL "),im->config->lowercase);
;
};
// If operand is longer than 32 bytes or of type B_ANYMEM (memory contents
// unimportant), its size is not displayed. Otherwise, bit B_SHOWSIZE
// indicates that explicit operand's size can't be omitted.
if (datasize<=32 && (arg & B_ARGMASK)!=B_ANYMEM &&
(im->config->showmemsize!=0 || (arg & B_SHOWSIZE)!=0)
) {
if (im->config->disasmmode==DAMODE_HLA)
n+=Tcopycase(s+n,TEXTLEN-n,T("TYPE "),im->config->lowercase);
if ((arg & B_ARGMASK)==B_INTPAIR && im->config->disasmmode==DAMODE_IDEAL){
// If operand is a pair of integers (BOUND), Borland in IDEAL mode
// expects size of single integer, whereas MASM requires size of the
// whole pair.
n+=Tcopycase(s+n,TEXTLEN-n,sizename[datasize/2],im->config->lowercase);
s[n++]=T(' '); }
else if (datasize==16 && im->config->ssesizemode==1)
n+=Tcopycase(s+n,TEXTLEN-n,T("XMMWORD "),im->config->lowercase);
else if (datasize==32 && im->config->ssesizemode==1)
n+=Tcopycase(s+n,TEXTLEN-n,T("YMMWORD "),im->config->lowercase);
else {
n+=Tcopycase(s+n,TEXTLEN-n,sizename[datasize],im->config->lowercase);
s[n++]=T(' '); };
if (im->config->disasmmode==DAMODE_MASM)
n+=Tcopycase(s+n,TEXTLEN-n,T("PTR "),im->config->lowercase);
;
};
// On request, I show only explicit segments.
if ((im->config->putdefseg && (arg & B_NOSEG)==0) ||
(im->prefixlist & PF_SEGMASK)!=0
) {
n+=Tcopycase(s+n,TEXTLEN-n,segname[seg],im->config->lowercase);
s[n++]=T(':'); };
if (im->config->disasmmode==DAMODE_MASM) {
s[n++]=T('[');
if ((im->prefixlist & PF_ASIZE)!=0 && regpart[0]==T('\0'))
n+=Tcopycase(s+n,TEXTLEN-n,T("SMALL "),im->config->lowercase);
;
};
// Add register part of address, may be absent.
if (regpart[0]!=T('\0'))
n+=Tcopycase(s+n,TEXTLEN-n,regpart,im->config->lowercase);
if (regpart[0]!=T('\0') && constpart<0 && constpart>NEGLIMIT) {
s[n++]=T('-');
n+=Hexprint((im->prefixlist & PF_ASIZE?2:4),s+n,-constpart,im,B_ADDR); }
else if (constpart!=0 || regpart[0]==T('\0')) {
if (regpart[0]!=T('\0')) s[n++]=T('+');
if (seg!=SEG_FS && seg!=SEG_GS &&
im->decodeaddress!=NULL &&
im->decodeaddress(label,constpart)!=0)
n+=Tstrcopy(s+n,TEXTLEN-n,label);
else
n+=Hexprint((im->prefixlist & PF_ASIZE?2:4),s+n,constpart,im,B_ADDR);
;
};
n+=Tstrcopy(s+n,TEXTLEN-n,T("]"));
};
s[n]=T('\0');
};
// Service function, returns granularity of MMX, 3DNow! and SSE operands.
static int Getgranularity(ulong arg) {
int granularity;
switch (arg & B_ARGMASK) {
case B_MREG8x8: // MMX register as 8 8-bit integers
case B_MMX8x8: // MMX reg/memory as 8 8-bit integers
case B_MMX8x8DI: // MMX 8 8-bit integers at [DS:(E)DI]
case B_XMM0I8x16: // XMM0 as 16 8-bit integers
case B_SREGI8x16: // SSE register as 16 8-bit sigints
case B_SVEXI8x16: // SSE reg in VEX as 16 8-bit sigints
case B_SIMMI8x16: // SSE reg in immediate 8-bit constant
case B_SSEI8x16: // SSE reg/memory as 16 8-bit sigints
case B_SSEI8x16DI: // SSE 16 8-bit sigints at [DS:(E)DI]
case B_SSEI8x8L: // Low 8 8-bit ints in SSE reg/memory
case B_SSEI8x4L: // Low 4 8-bit ints in SSE reg/memory
case B_SSEI8x2L: // Low 2 8-bit ints in SSE reg/memory
granularity=1; break;
case B_MREG16x4: // MMX register as 4 16-bit integers
case B_MMX16x4: // MMX reg/memory as 4 16-bit integers
case B_SREGI16x8: // SSE register as 8 16-bit sigints
case B_SVEXI16x8: // SSE reg in VEX as 8 16-bit sigints
case B_SSEI16x8: // SSE reg/memory as 8 16-bit sigints
case B_SSEI16x4L: // Low 4 16-bit ints in SSE reg/memory
case B_SSEI16x2L: // Low 2 16-bit ints in SSE reg/memory
granularity=2; break;
case B_MREG32x2: // MMX register as 2 32-bit integers
case B_MMX32x2: // MMX reg/memory as 2 32-bit integers
case B_3DREG: // 3DNow! register as 2 32-bit floats
case B_3DNOW: // 3DNow! reg/memory as 2 32-bit floats
case B_SREGF32x4: // SSE register as 4 32-bit floats
case B_SVEXF32x4: // SSE reg in VEX as 4 32-bit floats
case B_SREGF32L: // Low 32-bit float in SSE register
case B_SVEXF32L: // Low 32-bit float in SSE in VEX
case B_SREGF32x2L: // Low 2 32-bit floats in SSE register
case B_SSEF32x4: // SSE reg/memory as 4 32-bit floats
case B_SSEF32L: // Low 32-bit float in SSE reg/memory
case B_SSEF32x2L: // Low 2 32-bit floats in SSE reg/memory
granularity=4; break;
case B_XMM0I32x4: // XMM0 as 4 32-bit integers
case B_SREGI32x4: // SSE register as 4 32-bit sigints
case B_SVEXI32x4: // SSE reg in VEX as 4 32-bit sigints
case B_SREGI32L: // Low 32-bit sigint in SSE register
case B_SREGI32x2L: // Low 2 32-bit sigints in SSE register
case B_SSEI32x4: // SSE reg/memory as 4 32-bit sigints
case B_SSEI32x2L: // Low 2 32-bit sigints in SSE reg/memory
granularity=4; break;
case B_MREG64: // MMX register as 1 64-bit integer
case B_MMX64: // MMX reg/memory as 1 64-bit integer
case B_XMM0I64x2: // XMM0 as 2 64-bit integers
case B_SREGF64x2: // SSE register as 2 64-bit floats
case B_SVEXF64x2: // SSE reg in VEX as 2 64-bit floats
case B_SREGF64L: // Low 64-bit float in SSE register
case B_SVEXF64L: // Low 64-bit float in SSE in VEX
case B_SSEF64x2: // SSE reg/memory as 2 64-bit floats
case B_SSEF64L: // Low 64-bit float in SSE reg/memory
granularity=8; break;
case B_SREGI64x2: // SSE register as 2 64-bit sigints
case B_SVEXI64x2: // SSE reg in VEX as 2 64-bit sigints
case B_SSEI64x2: // SSE reg/memory as 2 64-bit sigints
case B_SREGI64L: // Low 64-bit sigint in SSE register
granularity=8; break;
default:
granularity=1; // Treat unknown ops as string of bytes
break; };
return granularity;
};
////////////////////////////////////////////////////////////////////////////////
////////////////////////// OPERAND DECODING ROUTINES ///////////////////////////
// Decodes 8/16/32-bit integer register operand. ATTENTION, calling routine
// must set usesdatasize and usesaddrsize by itself!
static void Operandintreg(t_imdata *im,ulong datasize,int index,t_operand *op) {
int n,reg32;
op->features=OP_REGISTER;
op->opsize=op->granularity=datasize;
op->reg=index;
op->seg=SEG_UNDEF;
// Add container register to lists of used and modified registers.
if (datasize==1)
reg32=Byteregtodwordreg(index);
else
reg32=index;
if ((op->arg & B_CHG)==0) {
op->uses=(1<<reg32);
im->da->uses|=(1<<reg32); };
if (op->arg & (B_CHG|B_UPD)) {
op->modifies=(1<<reg32);
im->da->modifies|=(1<<reg32); };
// Warn if ESP is misused.
if ((op->arg & B_NOESP)!=0 && reg32==REG_ESP)
im->da->warnings|=DAW_NOESP;
// Decode name of integer register.
if (im->damode & DA_TEXT) {
n=0;
if (im->config->disasmmode==DAMODE_ATT) {
if ((op->arg & B_MODMASK)==B_JMPCALL) op->text[n++]=T('*');
op->text[n++]=T('%'); };
if (datasize==4) // Most frequent case first
Tcopycase(op->text+n,TEXTLEN-n,regname32[index],im->config->lowercase);
else if (datasize==1)
Tcopycase(op->text+n,TEXTLEN-n,regname8[index],im->config->lowercase);
else // 16-bit registers are seldom
Tcopycase(op->text+n,TEXTLEN-n,regname16[index],im->config->lowercase);
;
};
};
// Decodes 16/32-bit memory address in ModRM/SIB bytes. Returns full length of
// address (ModRM+SIB+displacement) in bytes, 0 if ModRM indicates register
// operand and -1 on error. ATTENTION, calling routine must set usesdatasize,
// granularity (preset to datasize) and reg together with OP_MODREG by itself!
static int Operandmodrm(t_imdata *im,ulong datasize,uchar *cmd,ulong cmdsize,
t_operand *op) {
tchar *ardec;
t_modrm *pmrm;
if (cmdsize==0) {
im->da->errors|=DAE_CROSS; // Command crosses end of memory block
return -1; };
// Decode ModRM/SIB. Most of the work is already done in Preparedisasm(), we
// only need to find corresponding t_modrm.
if (im->prefixlist & PF_ASIZE) {
pmrm=modrm16+cmd[0]; // 16-bit address
im->modsize=1; }
else {
pmrm=modrm32+cmd[0];
if (pmrm->psib==NULL)
im->modsize=1; // No SIB byte
else {
if (cmdsize<2) {
im->da->errors|=DAE_CROSS; // Command crosses end of memory block
return -1; };
pmrm=pmrm->psib+cmd[1];
im->modsize=2; // Both ModRM and SIB
};
};
// Check whether ModRM indicates register operand and immediately return if
// true. As a side effect, modsize is already set.
if ((cmd[0] & 0xC0)==0xC0)
return 0;
// Operand in memory.
op->opsize=datasize;
op->granularity=datasize; // Default, may be overriden later
op->reg=REG_UNDEF;