Skip to content

Commit 75dfc18

Browse files
committed
feat: tool disasm for FLE
1 parent df52896 commit 75dfc18

File tree

5 files changed

+359
-8
lines changed

5 files changed

+359
-8
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ compile_commands.json
66
tests/cases/*/build
77

88
cc
9+
disasm
910
exec
1011
fle_base
1112
ld

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ OBJS = $(SRCS:.cpp=.o)
1717
BASE_EXEC = fle_base
1818

1919
# 工具名称
20-
TOOLS = cc ld nm objdump readfle exec
20+
TOOLS = cc ld nm objdump readfle exec disasm
2121

2222
# 默认目标
2323
all: check_compiler $(TOOLS)

include/fle.hpp

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@ using json = nlohmann::ordered_json;
1010

1111
// 重定位类型
1212
enum class RelocationType {
13-
R_X86_64_32, // 32位绝对寻址
14-
R_X86_64_PC32, // 32位相对寻址
15-
R_X86_64_64, // 64位绝对寻址
16-
R_X86_64_32S, // 32位有符号绝对寻址
13+
R_X86_64_32, // 32 位绝对寻址
14+
R_X86_64_PC32, // 32 位相对寻址
15+
R_X86_64_64, // 64 位绝对寻址
16+
R_X86_64_32S, // 32 位有符号绝对寻址
1717
};
1818

1919
// 重定位项
@@ -72,13 +72,13 @@ struct SectionHeader {
7272

7373
struct ProgramHeader {
7474
std::string name; // 段名
75-
uint64_t vaddr; // 虚拟地址(改用64位
75+
uint64_t vaddr; // 虚拟地址(64位
7676
uint64_t size; // 段大小
7777
uint32_t flags; // 权限
7878
};
7979

8080
struct FLEObject {
81-
std::string name; // object name
81+
std::string name; // Object name
8282
std::string type; // ".obj" or ".exe"
8383
std::map<std::string, FLESection> sections; // Section name -> section data
8484
std::vector<Symbol> symbols; // Global symbol table
@@ -218,3 +218,15 @@ FLEObject FLE_ld(const std::vector<FLEObject>& objects);
218218
* @param obj The FLE object to read
219219
*/
220220
void FLE_readfle(const FLEObject& obj);
221+
222+
/**
223+
* Disassemble data from specified section
224+
* @param obj The FLE object
225+
* @param section_name Name of section to disassemble
226+
*
227+
* Example output format:
228+
* 0000: 55 push rbp
229+
* 0001: 48 89 e5 mov rbp, rsp
230+
* 0004: 48 83 ec 10 sub rsp, 0x10
231+
*/
232+
void FLE_disasm(const FLEObject& obj, const std::string& section_name);

src/base/disasm.cpp

Lines changed: 331 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,331 @@
1+
#include "fle.hpp"
2+
#include <array>
3+
#include <cstdio>
4+
#include <filesystem>
5+
#include <fstream>
6+
#include <iomanip>
7+
#include <iostream>
8+
#include <map>
9+
#include <memory>
10+
#include <sstream>
11+
#include <stdexcept>
12+
#include <string>
13+
14+
// 辅助函数:执行命令并获取输出
15+
std::string exec_command(const std::string& cmd)
16+
{
17+
std::array<char, 128> buffer;
18+
std::string result;
19+
std::unique_ptr<FILE, decltype(&pclose)> pipe(popen(cmd.c_str(), "r"), pclose);
20+
21+
if (!pipe) {
22+
throw std::runtime_error("Failed to execute command: " + cmd);
23+
}
24+
25+
while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) {
26+
result += buffer.data();
27+
}
28+
29+
return result;
30+
}
31+
32+
// 辅助函数:去除字符串前后的空白字符
33+
std::string trim(const std::string& str)
34+
{
35+
size_t first = str.find_first_not_of(" \t\n\r");
36+
if (first == std::string::npos)
37+
return "";
38+
size_t last = str.find_last_not_of(" \t\n\r");
39+
return str.substr(first, last - first + 1);
40+
}
41+
42+
// 辅助函数:格式化地址
43+
std::string format_address(uint64_t addr)
44+
{
45+
std::stringstream ss;
46+
ss << std::hex << std::setw(4) << std::setfill('0') << addr;
47+
return ss.str();
48+
}
49+
50+
// 辅助函数:获取重定位类型的字符串表示
51+
std::string get_reloc_type_str(RelocationType type)
52+
{
53+
switch (type) {
54+
case RelocationType::R_X86_64_32:
55+
return "R_X86_64_32";
56+
case RelocationType::R_X86_64_PC32:
57+
return "R_X86_64_PC32";
58+
case RelocationType::R_X86_64_64:
59+
return "R_X86_64_64";
60+
case RelocationType::R_X86_64_32S:
61+
return "R_X86_64_32S";
62+
default:
63+
return "UNKNOWN";
64+
}
65+
}
66+
67+
// 辅助函数:判断是否是代码段
68+
bool is_code_section(const std::string& section_name)
69+
{
70+
return section_name.find(".text") != std::string::npos;
71+
}
72+
73+
// 辅助函数:格式化数据字节
74+
std::string format_data_bytes(const std::vector<uint8_t>& data, size_t offset, size_t max_len = 16)
75+
{
76+
std::stringstream ss;
77+
for (size_t i = 0; i < max_len && offset + i < data.size(); ++i) {
78+
if (i > 0)
79+
ss << " ";
80+
ss << std::hex << std::setw(2) << std::setfill('0') << static_cast<int>(data[offset + i]);
81+
}
82+
return ss.str();
83+
}
84+
85+
// 辅助函数:获取字符串实际长度
86+
size_t get_string_length(const std::vector<uint8_t>& data, size_t offset)
87+
{
88+
size_t len = 0;
89+
while (offset + len < data.size() && data[offset + len] != 0) {
90+
len++;
91+
}
92+
return len + 1; // 包含结尾的 null 字符
93+
}
94+
95+
// 辅助函数:格式化字符串内容为注释
96+
std::string format_string_comment(const std::vector<uint8_t>& data, size_t offset, size_t len)
97+
{
98+
std::stringstream ss;
99+
ss << "# \"";
100+
for (size_t i = 0; i < len - 1; ++i) { // -1 to exclude null terminator
101+
char c = static_cast<char>(data[offset + i]);
102+
if (c == '\n')
103+
ss << "\\n";
104+
else if (c == '\t')
105+
ss << "\\t";
106+
else if (c == '\r')
107+
ss << "\\r";
108+
else if (c == '\"')
109+
ss << "\\\"";
110+
else if (c == '\\')
111+
ss << "\\\\";
112+
else if (isprint(c))
113+
ss << c;
114+
else
115+
ss << "\\x" << std::hex << std::setw(2) << std::setfill('0') << static_cast<int>(static_cast<unsigned char>(c));
116+
}
117+
ss << "\"";
118+
return ss.str();
119+
}
120+
121+
void FLE_disasm(const FLEObject& obj, const std::string& section_name)
122+
{
123+
// 查找指定的段
124+
auto it = obj.sections.find(section_name);
125+
if (it == obj.sections.end()) {
126+
throw std::runtime_error("Section not found: " + section_name);
127+
}
128+
129+
const auto& section = it->second;
130+
const auto& data = section.data;
131+
132+
if (data.empty()) {
133+
throw std::runtime_error("Section is empty");
134+
}
135+
136+
// 创建偏移到重定位信息的映射
137+
std::map<uint64_t, std::vector<const Relocation*>> reloc_map;
138+
// 创建偏移到符号的映射
139+
std::map<uint64_t, const Symbol*> symbol_map;
140+
141+
if (obj.type == ".obj") {
142+
for (const auto& reloc : section.relocs) {
143+
reloc_map[reloc.offset].push_back(&reloc);
144+
}
145+
146+
for (const auto& sym : obj.symbols) {
147+
if (sym.section == section_name) {
148+
symbol_map[sym.offset] = &sym;
149+
}
150+
}
151+
}
152+
153+
std::cout << "Disassembly of section " << section_name << ":" << std::endl;
154+
155+
// 如果是数据段,直接显示数据
156+
if (!is_code_section(section_name)) {
157+
// 首先获取所有符号的偏移量并排序
158+
std::vector<std::pair<uint64_t, const Symbol*>> sorted_symbols;
159+
for (const auto& [offset, sym] : symbol_map) {
160+
sorted_symbols.push_back({ offset, sym });
161+
}
162+
std::sort(sorted_symbols.begin(), sorted_symbols.end());
163+
164+
// 遍历所有符号
165+
for (const auto& [sym_offset, sym] : sorted_symbols) {
166+
std::cout << std::endl;
167+
std::cout << sym->name << ":" << std::endl;
168+
169+
// 确定数据长度
170+
size_t data_len = sym->size;
171+
bool is_string = false;
172+
if (section_name.find(".rodata.str") != std::string::npos) {
173+
// 对于字符串段,使用实际字符串长度
174+
data_len = get_string_length(data, sym_offset);
175+
is_string = true;
176+
}
177+
178+
// 按每16字节一行输出数据
179+
for (size_t i = 0; i < data_len; i += 16) {
180+
size_t chunk_size = std::min(size_t(16), data_len - i);
181+
std::cout << format_address(sym_offset + i) << ": "
182+
<< std::left << std::setfill(' ') << std::setw(50)
183+
<< format_data_bytes(data, sym_offset + i, chunk_size);
184+
185+
// 对于第一行,输出重定位信息和字符串内容
186+
if (i == 0) {
187+
if (obj.type == ".obj") {
188+
auto reloc_it = reloc_map.find(sym_offset);
189+
if (reloc_it != reloc_map.end()) {
190+
std::cout << "# ";
191+
for (const auto* reloc : reloc_it->second) {
192+
std::cout << get_reloc_type_str(reloc->type) << " "
193+
<< reloc->symbol;
194+
if (reloc->addend != 0) {
195+
std::cout << std::showpos << std::dec << reloc->addend;
196+
}
197+
std::cout << " ";
198+
}
199+
}
200+
}
201+
if (is_string) {
202+
if (obj.type == ".obj")
203+
std::cout << " ";
204+
std::cout << format_string_comment(data, sym_offset, data_len);
205+
}
206+
}
207+
std::cout << std::endl;
208+
}
209+
}
210+
return;
211+
}
212+
213+
// 对于代码段,使用 objdump 反汇编
214+
// 创建临时文件来存储段数据
215+
std::filesystem::path temp_dir = std::filesystem::temp_directory_path();
216+
std::string temp_file = (temp_dir / "section.bin").string();
217+
218+
// 写入段数据
219+
std::ofstream out(temp_file, std::ios::binary);
220+
out.write(reinterpret_cast<const char*>(data.data()), data.size());
221+
out.close();
222+
223+
// 构造 objdump 命令
224+
std::stringstream cmd;
225+
cmd << "objdump -D -b binary -m i386:x86-64 " << temp_file;
226+
227+
try {
228+
std::string output = exec_command(cmd.str());
229+
std::cout << "Disassembly of section " << section_name << ":" << std::endl;
230+
231+
std::istringstream iss(output);
232+
std::string line;
233+
bool start_processing = false;
234+
uint64_t next_addr = 0;
235+
236+
while (std::getline(iss, line)) {
237+
if (line.find("Disassembly of section") != std::string::npos) {
238+
start_processing = true;
239+
continue;
240+
}
241+
242+
if (!start_processing || line.empty() || line.find('>') != std::string::npos)
243+
continue;
244+
245+
line = trim(line);
246+
if (line.empty() || !std::isxdigit(line[0]))
247+
continue;
248+
249+
size_t colon_pos = line.find(':');
250+
if (colon_pos != std::string::npos) {
251+
// 提取当前指令的地址
252+
std::string addr_str = line.substr(0, colon_pos);
253+
uint64_t addr;
254+
std::stringstream ss;
255+
ss << std::hex << addr_str;
256+
ss >> addr;
257+
258+
// 检查是否有符号在这个地址
259+
if (obj.type == ".obj") {
260+
auto sym_it = symbol_map.find(addr);
261+
if (sym_it != symbol_map.end()) {
262+
std::cout << std::endl; // 在符号前添加空行
263+
std::cout << sym_it->second->name << ":" << std::endl;
264+
}
265+
}
266+
267+
// 提取机器码和指令
268+
std::string rest = line.substr(colon_pos + 1);
269+
rest = trim(rest);
270+
271+
size_t instr_pos = rest.find_first_not_of("0123456789abcdef ");
272+
if (instr_pos != std::string::npos) {
273+
std::string bytes = trim(rest.substr(0, instr_pos));
274+
std::string instr = trim(rest.substr(instr_pos));
275+
276+
// 如果是.obj文件,去掉objdump的注释
277+
if (obj.type == ".obj") {
278+
size_t comment_pos = instr.find('#');
279+
if (comment_pos != std::string::npos) {
280+
instr = trim(instr.substr(0, comment_pos));
281+
}
282+
}
283+
284+
// 计算指令长度
285+
std::istringstream byte_stream(bytes);
286+
std::string byte;
287+
size_t instr_len = 0;
288+
while (byte_stream >> byte) {
289+
if (byte.length() == 2)
290+
instr_len++;
291+
}
292+
next_addr = addr + instr_len;
293+
294+
// 基本输出
295+
std::cout << format_address(addr) << ": "
296+
<< std::left << std::setfill(' ') << std::setw(30) << bytes
297+
<< std::left << std::setw(30) << instr;
298+
299+
// 检查这条指令范围内的重定位信息
300+
if (obj.type == ".obj") {
301+
bool has_reloc = false;
302+
for (uint64_t offset = addr; offset < next_addr; offset++) {
303+
auto reloc_it = reloc_map.find(offset);
304+
if (reloc_it != reloc_map.end()) {
305+
if (!has_reloc) {
306+
std::cout << "# ";
307+
has_reloc = true;
308+
}
309+
for (const auto* reloc : reloc_it->second) {
310+
std::cout << get_reloc_type_str(reloc->type) << " "
311+
<< reloc->symbol;
312+
if (reloc->addend != 0) {
313+
std::cout << std::showpos << std::dec << reloc->addend;
314+
}
315+
std::cout << " ";
316+
}
317+
}
318+
}
319+
}
320+
std::cout << std::endl;
321+
}
322+
}
323+
}
324+
325+
std::filesystem::remove(temp_file);
326+
327+
} catch (const std::exception& e) {
328+
std::filesystem::remove(temp_file);
329+
throw;
330+
}
331+
}

0 commit comments

Comments
 (0)