Skip to content

Commit

Permalink
added proper SCOP support. still needs some work
Browse files Browse the repository at this point in the history
  • Loading branch information
elfmaster committed Jan 15, 2019
1 parent 24a0e22 commit 9b77363
Show file tree
Hide file tree
Showing 7 changed files with 191 additions and 7 deletions.
49 changes: 49 additions & 0 deletions PARSING_DETAILS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
## LibElfmaster Parsing Details

It is the goal of libelfmaster to accomplish the already existing goals that
have been outlined -- be the most sophisticated and secure parser for forensics
reconstruction of all binaries, including anomalous ones.

## SCOP (Secure code partitioning)

--1/15/2019

The text segment is partitioned into 3 PT_LOAD segment's, although the way its
actually partitioned is contingent upon the linker, /bin/ld, which looks at the
section header sh_flags, i.e. SHN_ALLOC is put into PF_R segment, whereas
SHN_ALLOC|SHN_EXECINSTR is put into a PF_R|PF_X segment, so it is conceivable
that depending on the ordering there could be only two PT_LOAD segments for the
text, or even 4 PT_LOAD segments for the text, as the linker applies these
PT_LOAD partitions in the order that it reads the section headers.

```uint64_t elf_text_base(elfobj_t *)``` previously this returned the base
address of the first PT_LOAD at offset 0, or that is the first PF_R|PF_X. Now
we must consider how to handle SCOP binaries. Here is my proposition:

Based on testing ```if (elf_flags(obj, ELF_SCOP_F)``` which denotes that SCOP
is in enabled...

```uint64_t elf_text_base()``` gives the base address of the first PT_LOAD
which is probably PF_R. We then have elf_executable_text_base() which will
return the (In most cases) second load segment base, which is the one that's
actually executable.

We have also had ```sizez_t elf_text_filesz(elfobj_t *)``` which returns the
p_filesz of the text segment. We now have in addition ```ssize_t
elf_scop_text_filesz(elfobj_t *)``` which returns -1 otherwise it returns the
size of all of the LOAD segments that relate to the partitioned text segments.
i.e. it adds them all up and gives the sum total.

Status: Finished
TODO: Handle SCOP scenarios where one of the PT_LOAD's (Say out of 3) have been modified
to be executable, i.e. phdr[text + 0] |= PF_X; which will throw off the way that
libelfmaster handles. SCOP parsing. This is an easy fix in ELF_LOAD_F_STRICT_F cases
because we can simply follow the sh_flags of the section headers to see which corresponding
segments (To SHN_ALLOC, SHN_ALLOC|SHN_EXECINSTR) etc. just like the linker does, but for
forensics mode this doesn't cut it. More heuristics to be added.

## Handle gcc -nostdlib -N -static t.c -o t binaries

Single PT_LOAD segments of RWX usually have a p_vaddr that is not page aligned,
but will be at runtime. This also causes issues with many parsers. We
currently must handle this more appropriately.
7 changes: 6 additions & 1 deletion examples/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,12 @@ all:
$(CC) -g objdump_libelfmaster.c ../src/libelfmaster.a -o objdump_elfmaster -lcapstone
$(CC) -fPIC -pie -Wl,-z,separate-code,-z,relro,-z,now test.c -o test_scop
$(CC) ../utils/stripx.c -o stripx
$(CC) -N -static -m32 -nostdlib nostdlib.c -o nostdlib32
$(CC) -N -static -nostdlib nostdlib.c -o nostdlib
$(CC) -O2 -g elf_text.c ../src/libelfmaster.a -o elf_text
$(CC) -no-pie -Wl,-z,separate-code test.c -o test_scop_binary

./stripx test_stripped
./stripx test32_stripped
clean:
rm elfparse ldd plt_dump plt_dump2 sections eh_frame test test2 test32bit_pie test_pie test_stripped test32bit stripx symbols checksec test32_stripped
rm elfparse ldd plt_dump plt_dump2 sections eh_frame test test2 test32bit_pie test_pie test_stripped test32bit stripx symbols checksec test32_stripped test_scop_binary elf_text nostdlib nostdlib32 test_scop test32_scop
30 changes: 30 additions & 0 deletions examples/merged.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
*/

#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <elf.h>
#include <sys/types.h>
#include <search.h>
#include <sys/time.h>
#include "../include/libelfmaster.h"

int main(int argc, char **argv)
{
elfobj_t obj;
elf_error_t error;

if (argc < 2) {
printf("Usage: %s <binary>\n", argv[0]);
exit(EXIT_SUCCESS);
}
if (elf_open_object(argv[1], &obj,
ELF_LOAD_F_SMART|ELF_LOAD_F_FORENSICS, &error) == false) {
fprintf(stderr, "%s\n", elf_error_msg(&error));
return -1;
}
if (elf_flags(&obj, ELF_MERGED_SEGMENTS_F) == true)
printf("Merged code segment: %#lx\n", elf_text_base(&obj));
elf_close_object(&obj);
}
4 changes: 4 additions & 0 deletions examples/nostdlib.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
int _start()
{
asm("nop");
}
2 changes: 1 addition & 1 deletion examples/objdump_libelfmaster.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#define _GNU_SOURCE
#include <stdio.h>
#include <inttypes.h>

#include <string.h>
#include "../include/libelfmaster.h"
#include <capstone/capstone.h>

Expand Down
14 changes: 13 additions & 1 deletion include/libelfmaster.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,8 @@ typedef enum elf_obj_flags {
ELF_SYMTAB_RECONSTRUCTION_F = (1 << 15), /* .symtab is being reconstructed */
ELF_FORENSICS_F = (1 << 16), /* elf sections at the least are reconstructed */
ELF_DT_DEBUG_F = (1 << 17),
ELF_SCOP_F = (1 << 18) /* secure code partitioning */
ELF_SCOP_F = (1 << 18), /* secure code partitioning */
ELF_MERGED_SEGMENTS_F = (1 << 19) /* Merged text+data segment, i.e. gcc -nostdlib -N -static test.c -o test */
} elf_obj_flags_t;

/*
Expand Down Expand Up @@ -277,6 +278,10 @@ typedef struct elfobj {
#define ELF_PT_LOAD_MISC_F (1 << 2)
/* Handle SCOP cases */
#define ELF_PT_LOAD_TEXT_RDONLY_F (1 << 3)
/*
* Handle merged text and data
*/
#define ELF_PT_LOAD_MERGED_F (1 << 4)

struct pt_load *pt_load;
size_t load_count;
Expand Down Expand Up @@ -757,4 +762,11 @@ typedef enum typewidth {

bool elf_read_address(elfobj_t *, uint64_t, uint64_t *, typewidth_t);
bool elf_read_offset(elfobj_t *, uint64_t, uint64_t *, typewidth_t);

/*
* Return an allocated array of the file sizes for each text LOAD segment
* and return the sum total
*/
ssize_t elf_scop_text_filesz(elfobj_t *);

#endif
92 changes: 88 additions & 4 deletions src/libelfmaster.c
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,39 @@ elf_text_filesz(elfobj_t *obj)
return obj->text_segment_filesz;
}

/*
* Gets the sum total of all 3 LOAD segments for SCOP
* binaries.
*/
ssize_t
elf_scop_text_filesz(elfobj_t *obj)
{
elf_segment_iterator_t iter;
struct elf_segment segment;
uint32_t count = 0;
size_t total = 0;

elf_segment_iterator_init(obj, &iter);
for (;;) {
elf_iterator_res_t res;

res = elf_segment_iterator_next(&iter, &segment);
if (res == ELF_ITER_OK) {
if (segment.type == PT_LOAD && count < 3) {
total += segment.filesz;
count++;
}
} else if (res == ELF_ITER_ERROR) {
return -1;
} else if (res == ELF_ITER_DONE) {
return total;
} else {
return total;
}
}
return -1;
}

const char *
elf_pathname(elfobj_t *obj)
{
Expand Down Expand Up @@ -427,13 +460,22 @@ elf_reloc_type_string(struct elfobj *obj, uint32_t r_type)
return "R_UNKNOWN";
}

/*
* In the event of SCOP we will give the base address
* of the first LOAD segment that is RDONLY. For more
* granularity use the elf_executable_text_base()
* function when elf_flags(obj, ELF_SCOP_F) == true
* If you need to locate the base of where the executable
* region actually begins.
*/
uint64_t
elf_text_base(struct elfobj *obj)
{
size_t i;

for (i = 0; i < obj->load_count; i++) {
if (obj->pt_load[i].flag & ELF_PT_LOAD_TEXT_F) {
if ((obj->pt_load[i].flag & ELF_PT_LOAD_TEXT_F) |
(obj->pt_load[i].flag & ELF_PT_LOAD_TEXT_RDONLY_F)) {
switch(obj->e_class) {
case elfclass32:
return obj->pt_load[i].phdr32.p_vaddr;
Expand All @@ -445,13 +487,18 @@ elf_text_base(struct elfobj *obj)
return 0;
}

/*
* Same logic as elf_text_base for taking SCOP
* binaries into consideration. See PARSING_DETAILS.md
*/
uint64_t
elf_text_offset(struct elfobj *obj)
{
size_t i;

for (i = 0; i < obj->load_count; i++) {
if (obj->pt_load[i].flag & ELF_PT_LOAD_TEXT_F) {
if ((obj->pt_load[i].flag & ELF_PT_LOAD_TEXT_F) |
(obj->pt_load[i].flag & ELF_PT_LOAD_TEXT_RDONLY_F)) {
switch(obj->e_class) {
case elfclass32:
return obj->pt_load[i].phdr32.p_offset;
Expand Down Expand Up @@ -1673,7 +1720,8 @@ elf_open_object(const char *path, struct elfobj *obj, uint64_t load_flags,
elf_error_t *error)
{
int fd;
uint32_t i, phdr_count = 0;
uint32_t i;
uint32_t phdr_count = 0, possible_merge_index = 0;
unsigned int open_flags = (load_flags & ELF_LOAD_F_MODIFY) ? O_RDWR : O_RDONLY;
unsigned int mmap_perms = PROT_READ|PROT_WRITE; // always need +write even with MAP_PRIVATE
unsigned int mmap_flags = MAP_PRIVATE;
Expand Down Expand Up @@ -2011,7 +2059,8 @@ elf_open_object(const char *path, struct elfobj *obj, uint64_t load_flags,
obj->text_segment_filesz = obj->phdr32[i].p_filesz;
} else if (obj->phdr32[i].p_type == PT_LOAD && text_found == false) {
/*
* TODO: This will not catch text segments marked as RWX.
* Handle cases where there is a single R+X text segment and
* no data segment.
*/
if ((obj->phdr32[i].p_flags & (PF_R|PF_X)) == (PF_R|PF_X)) {
obj->pt_load[obj->load_count].flag |= ELF_PT_LOAD_TEXT_F;
Expand All @@ -2020,6 +2069,20 @@ elf_open_object(const char *path, struct elfobj *obj, uint64_t load_flags,
&obj->phdr32[i], sizeof(Elf32_Phdr));
obj->text_segment_filesz = obj->phdr32[i].p_filesz;
obj->text_address = obj->phdr32[i].p_vaddr;
/*
* Handle cases where the text and data segment are merged into one RWX
* PT_LOAD segment. Note a possible_merge_index, and then if we haven't
* found a data segment we can finally assign it the ELF_PT_LOAD_MERGED_F
* flag
*/
} else if ((obj->phdr32[i].p_flags & (PF_R|PF_W|PF_X)) == (PF_R|PF_W|PF_X)) {
obj->pt_load[obj->load_count].flag |= ELF_PT_LOAD_TEXT_F;
text_found = true;
memcpy(&obj->pt_load[obj->load_count++].phdr32,
&obj->phdr32[i], sizeof(Elf32_Phdr));
obj->text_segment_filesz = obj->phdr32[i].p_filesz;
obj->text_address = obj->phdr32[i].p_vaddr;
possible_merge_index = i;
}
} else if (obj->phdr32[i].p_type == PT_LOAD) {
if (data_found == true) {
Expand All @@ -2035,6 +2098,12 @@ elf_open_object(const char *path, struct elfobj *obj, uint64_t load_flags,
}

}
if (data_found == false) {
obj->pt_load[possible_merge_index].flag = ELF_PT_LOAD_DATA_F|ELF_PT_LOAD_TEXT_F|
ELF_PT_LOAD_MERGED_F;
obj->flags |= ELF_MERGED_SEGMENTS_F;
}

}
break;
case ELFCLASS64:
Expand Down Expand Up @@ -2279,7 +2348,16 @@ elf_open_object(const char *path, struct elfobj *obj, uint64_t load_flags,
&obj->phdr64[i], sizeof(Elf64_Phdr));
obj->text_segment_filesz = obj->phdr64[i].p_filesz;
obj->text_address = obj->phdr64[i].p_vaddr;
} else if ((obj->phdr64[i].p_flags & (PF_R|PF_W|PF_X)) == (PF_R|PF_W|PF_X)) {
obj->pt_load[obj->load_count].flag |= ELF_PT_LOAD_TEXT_F;
text_found = true;
memcpy(&obj->pt_load[obj->load_count++].phdr64,
&obj->phdr64[i], sizeof(Elf64_Phdr));
obj->text_segment_filesz = obj->phdr64[i].p_filesz;
obj->text_address = obj->phdr64[i].p_vaddr;
possible_merge_index = i;
}

} else if (obj->phdr64[i].p_type == PT_LOAD) {
if (data_found == true) {
obj->pt_load[obj->load_count].flag |= ELF_PT_LOAD_MISC_F;
Expand All @@ -2293,6 +2371,12 @@ elf_open_object(const char *path, struct elfobj *obj, uint64_t load_flags,
obj->data_segment_filesz = obj->phdr64[i].p_filesz;
}
}
if (data_found == false) {
obj->pt_load[possible_merge_index].flag = ELF_PT_LOAD_DATA_F|ELF_PT_LOAD_TEXT_F|
ELF_PT_LOAD_MERGED_F;
obj->flags |= ELF_MERGED_SEGMENTS_F;
}

}
break;
default:
Expand Down

1 comment on commit 9b77363

@elfmaster
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also added support for executable built with a single PT_LOAD merged into a data segment. This is when a text and data segment phdr get the special flag ELF_PT_LOAD_MERGED_F assigned internally.

Please sign in to comment.