diff --git a/bindings/python/sample.py b/bindings/python/sample.py index 2e2830ee..db3e70dc 100755 --- a/bindings/python/sample.py +++ b/bindings/python/sample.py @@ -19,50 +19,70 @@ def test_ks(arch, mode, code, syntax=0): print("%02x " % i, end='') print("]") + return bytes(encoding) + # test symbol resolver -def test_sym_resolver(): +def test_sym_resolver(arch, mode, code, base, symbol_table): def sym_resolver(symbol, value): # is this the missing symbol we want to handle? - if symbol == "_l1": + if symbol in symbol_table: # put value of this symbol in @value - value = 0x1002 + value[0] = symbol_table[symbol] # we handled this symbol, so return true return True # we did not handle this symbol, so return false return False - ks = Ks(KS_ARCH_X86, KS_MODE_32) + ks = Ks(arch, mode) # register callback for symbol resolver ks.sym_resolver = sym_resolver - CODE = b"jmp _l1; nop; _l1:" - encoding, count = ks.asm(CODE, 0x1000) + encoding, count = ks.asm(code, base) - print("%s = [ " % CODE, end='') + print("%s = [ " % code, end='') for i in encoding: print("%02x " % i, end='') print("]") + return bytes(encoding) + if __name__ == '__main__': # X86 - test_ks(KS_ARCH_X86, KS_MODE_16, b"add eax, ecx") - test_ks(KS_ARCH_X86, KS_MODE_32, b"add eax, ecx") - test_ks(KS_ARCH_X86, KS_MODE_64, b"add rax, rcx") - test_ks(KS_ARCH_X86, KS_MODE_32, b"add %ecx, %eax", KS_OPT_SYNTAX_ATT) - test_ks(KS_ARCH_X86, KS_MODE_64, b"add %rcx, %rax", KS_OPT_SYNTAX_ATT) - - test_ks(KS_ARCH_X86, KS_MODE_32, b"add eax, 0x15") - test_ks(KS_ARCH_X86, KS_MODE_32, b"add eax, 15h"); - test_ks(KS_ARCH_X86, KS_MODE_32, b"add eax, 15") - - # RADIX16 syntax Intel (default syntax) - test_ks(KS_ARCH_X86, KS_MODE_32, b"add eax, 15", KS_OPT_SYNTAX_RADIX16) + encoding = test_ks(KS_ARCH_X86, KS_MODE_16, b"add eax, ecx") + assert encoding == bytes.fromhex("66 01 c8") + + encoding = test_ks(KS_ARCH_X86, KS_MODE_32, b"add eax, ecx") + assert encoding == bytes.fromhex("01 c8") + + encoding = test_ks(KS_ARCH_X86, KS_MODE_64, b"add rax, rcx") + assert encoding == bytes.fromhex("48 01 c8") + + encoding = test_ks(KS_ARCH_X86, KS_MODE_32, b"add %ecx, %eax", KS_OPT_SYNTAX_ATT) + assert encoding == bytes.fromhex("01 c8") + + encoding = test_ks(KS_ARCH_X86, KS_MODE_64, b"add %rcx, %rax", KS_OPT_SYNTAX_ATT) + assert encoding == bytes.fromhex("48 01 c8") + + encoding = test_ks(KS_ARCH_X86, KS_MODE_32, b"add eax, 0x15") + assert encoding == bytes.fromhex("83 c0 15") + + encoding = test_ks(KS_ARCH_X86, KS_MODE_32, b"add eax, 15h"); + assert encoding == bytes.fromhex("83 c0 15") + + encoding = test_ks(KS_ARCH_X86, KS_MODE_32, b"add eax, 15") + assert encoding == bytes.fromhex("83 c0 0f") + + # RADIX16 syntax for Intel + encoding = test_ks(KS_ARCH_X86, KS_MODE_32, b"add eax, 15", KS_OPT_SYNTAX_RADIX16) + assert encoding == bytes.fromhex("83 c0 15") + # RADIX16 syntax for AT&T - test_ks(KS_ARCH_X86, KS_MODE_32, b"add $15, %eax", KS_OPT_SYNTAX_RADIX16 | KS_OPT_SYNTAX_ATT) + encoding = test_ks(KS_ARCH_X86, KS_MODE_32, b"add $15, %eax", KS_OPT_SYNTAX_RADIX16 | KS_OPT_SYNTAX_ATT) + assert encoding == bytes.fromhex("83 c0 15") # ARM test_ks(KS_ARCH_ARM, KS_MODE_ARM, b"sub r1, r2, r5") @@ -95,4 +115,8 @@ def sym_resolver(symbol, value): test_ks(KS_ARCH_SYSTEMZ, KS_MODE_BIG_ENDIAN, b"a %r0, 4095(%r15,%r1)") # test symbol resolver - test_sym_resolver() + + # X64 - Backward jump + encoding = test_sym_resolver(KS_ARCH_X86, KS_MODE_64, b"jmp _l1; nop", 0x1000, {b"_l1": 0x1000}) + assert encoding == bytes.fromhex("eb fe 90") + diff --git a/llvm/keystone/ks.cpp b/llvm/keystone/ks.cpp index 736a0fbc..7e7a41a6 100644 --- a/llvm/keystone/ks.cpp +++ b/llvm/keystone/ks.cpp @@ -554,11 +554,17 @@ ks_err ks_close(ks_engine *ks) KEYSTONE_EXPORT ks_err ks_option(ks_engine *ks, ks_opt_type type, size_t value) { - ks->MAI->setRadix(16); switch(type) { case KS_OPT_SYNTAX: if (ks->arch != KS_ARCH_X86) return KS_ERR_OPT_INVALID; + + // Reset to radix 10, the default, first. When + // KS_OPT_SYNTAX_RADIX16 is given, this will be set to 16 again. + // This allows to switch the radix on a keystone instance from 16 + // to 10 again. + ks->MAI->setRadix(10); + switch(value) { default: return KS_ERR_OPT_INVALID; diff --git a/llvm/lib/MC/MCAsmInfo.cpp b/llvm/lib/MC/MCAsmInfo.cpp index 0939eb86..234cf3d8 100644 --- a/llvm/lib/MC/MCAsmInfo.cpp +++ b/llvm/lib/MC/MCAsmInfo.cpp @@ -49,6 +49,7 @@ MCAsmInfo::MCAsmInfo() { Code32Directive = ".code32"; Code64Directive = ".code64"; AssemblerDialect = 0; + Radix = 10; AllowAtInName = false; SupportsQuotedNames = true; UseDataRegionDirectives = false; diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp index 2e656e7d..4972cbc5 100644 --- a/llvm/lib/MC/MCAssembler.cpp +++ b/llvm/lib/MC/MCAssembler.cpp @@ -208,7 +208,7 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout, ks_sym_resolver resolver = (ks_sym_resolver)KsSymResolver; if (resolver(Sym.getName().str().c_str(), &imm)) { // resolver handled this symbol - Value = imm; + Value += imm; IsResolved = true; } else { // resolver did not handle this symbol diff --git a/suite/regress/x64_radix.py b/suite/regress/x64_radix.py new file mode 100644 index 00000000..da646da1 --- /dev/null +++ b/suite/regress/x64_radix.py @@ -0,0 +1,63 @@ +#!/usr/bin/python + +# Test radix configuration for X86 + +# Github issue: #481 #436 #538 +# Author: endofunky + +from keystone import * + +import regress + + +class TestX86(regress.RegressTest): + def runTest(self): + # Default value without ks_option + ks = Ks(KS_ARCH_X86, KS_MODE_64) + + encoding, _ = ks.asm(b"add eax, 0x15", 0x1000) + self.assertEqual(encoding, [0x83, 0xC0, 0x15]) + + encoding, _ = ks.asm(b"add eax, 15h", 0x1000) + self.assertEqual(encoding, [0x83, 0xC0, 0x15]) + + encoding, _ = ks.asm(b"add eax, 15", 0x1000) + self.assertEqual(encoding, [0x83, 0xC0, 0x0F]) + + # NASM + RADIX16 + ks = Ks(KS_ARCH_X86, KS_MODE_64) + ks.syntax = KS_OPT_SYNTAX_NASM | KS_OPT_SYNTAX_RADIX16 + encoding, _ = ks.asm(b"add eax, 15", 0x1000) + self.assertEqual(encoding, [0x83, 0xC0, 0x15]) + + # AT&T + RADIX16 + ks = Ks(KS_ARCH_X86, KS_MODE_64) + ks.syntax = KS_OPT_SYNTAX_ATT | KS_OPT_SYNTAX_RADIX16 + encoding, _ = ks.asm(b"add $15, %eax", 0x1000) + self.assertEqual(encoding, [0x83, 0xC0, 0x15]) + + # Default with symbol resolver set (#481) + def sym_resolver(symbol, value): + return False + + ks = Ks(KS_ARCH_X86, KS_MODE_64) + ks.sym_resolver = sym_resolver + + encoding, _ = ks.asm(b"add eax, 15", 0x1000) + self.assertEqual(encoding, [0x83, 0xC0, 0x0F]) + + # Switching from 16 to 10 + ks = Ks(KS_ARCH_X86, KS_MODE_64) + ks.syntax = KS_OPT_SYNTAX_NASM | KS_OPT_SYNTAX_RADIX16 + + encoding, _ = ks.asm(b"add eax, 15", 0x1000) + self.assertEqual(encoding, [0x83, 0xC0, 0x15]) + + ks.syntax = KS_OPT_SYNTAX_NASM + + encoding, _ = ks.asm(b"add eax, 15", 0x1000) + self.assertEqual(encoding, [0x83, 0xC0, 0x0F]) + + +if __name__ == "__main__": + regress.main() diff --git a/suite/regress/x64_sym_resolver.py b/suite/regress/x64_sym_resolver.py index 1bf6a121..5b7189fa 100755 --- a/suite/regress/x64_sym_resolver.py +++ b/suite/regress/x64_sym_resolver.py @@ -4,22 +4,30 @@ # Github issue: #244 # Author: Duncan (mrexodia) +# Author: endofunky from keystone import * import regress + class TestX86(regress.RegressTest): def runTest(self): + symbol_table = { + b"ZwQueryInformationProcess": 0x7FF98A050840, + b"_l1": 0x1000, + b"_l2": 0x1002, + b"_l3": 0xAABBCCDD, + } + def sym_resolver(symbol, value): # is this the missing symbol we want to handle? - if symbol == "ZwQueryInformationProcess": + if symbol in symbol_table: # put value of this symbol in @value - value = 0x7FF98A050840 + value[0] = symbol_table[symbol] # we handled this symbol, so return true - print 'sym_resolver called!' return True - + # we did not handle this symbol, so return false return False @@ -28,11 +36,23 @@ def sym_resolver(symbol, value): ks.sym_resolver = sym_resolver encoding, _ = ks.asm(b"call 0x7FF98A050840", 0x7FF98A081A38) - self.assertEqual(encoding, [ 0xE8, 0x03, 0xEE, 0xFC, 0xFF ]) + self.assertEqual(encoding, [0xE8, 0x03, 0xEE, 0xFC, 0xFF]) encoding, _ = ks.asm(b"call ZwQueryInformationProcess", 0x7FF98A081A38) - self.assertEqual(encoding, [ 0xE8, 0x03, 0xEE, 0xFC, 0xFF ]) + self.assertEqual(encoding, [0xE8, 0x03, 0xEE, 0xFC, 0xFF]) + + encoding, _ = ks.asm(b"mov rax, 80", 0x1000) + self.assertEqual(encoding, [0x48, 0xC7, 0xC0, 0x50, 0x00, 0x00, 0x00]) + + encoding, _ = ks.asm(b"jmp _l1; nop", 0x1000) + self.assertEqual(encoding, [0xEB, 0xFE, 0x90]) + + encoding, _ = ks.asm(b"jmp _l2; nop", 0x1000) + self.assertEqual(encoding, [0xEB, 0x00, 0x90]) + + encoding, _ = ks.asm(b"jmp _l3; nop", 0x1000) + self.assertEqual(encoding, [0xE9, 0xD8, 0xBC, 0xBB, 0xAA, 0x90]) -if __name__ == '__main__': +if __name__ == "__main__": regress.main()