学习一下内存dump文件后修复的原理。意识流分析,可能有些乱,敬请谅解(

函数分析

先单独分析函数

ObElfReader::Load

加载so文件的各类结构,以下是用到的函数

ObElfReader::FixDumpSoPhdr

修正dump出来so的程序头表,这里是改变filesz扩大了之后dump的范围,不仅dump了LOAD部分的内存还dump了中间非LOAD段的内存。因为可能有些数据会留存在俩LOAD段之间

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
void ObElfReader::FixDumpSoPhdr() {
if (dump_so_base_ != 0) {
std::vector<Elf_Phdr*> loaded_phdrs;
//遍历program header table,将字段是LOAD的全部保存起来
for (auto i = 0; i < phdr_num_; i++) {
auto phdr = &phdr_table_[i];
if(phdr->p_type != PT_LOAD) continue;
loaded_phdrs.push_back(phdr);
}
//按照起始地址大小进行从小到大排序
std::sort(loaded_phdrs.begin(), loaded_phdrs.end(),
[](Elf_Phdr * first, Elf_Phdr * second) {
return first->p_vaddr < second->p_vaddr;
});
if (!loaded_phdrs.empty()) {
for (unsigned long i = 0, total = loaded_phdrs.size(); i < total; i++) {
auto phdr = loaded_phdrs[i];
//通过后一个保存的LOAD段和前一个之间的差值计算offset
if (i != total - 1) {
auto nphdr = loaded_phdrs[i+1];
phdr->p_memsz = nphdr->p_vaddr - phdr->p_vaddr;
} else {
//最后一个直接和文件结尾计算差值
phdr->p_memsz = file_size - phdr->p_vaddr;
}
//相当于把俩LOAD中间所有的内存全部提取出来,防止遗漏两个LOAD之间不是LOAD段的内存
phdr->p_filesz = phdr->p_memsz;
}
}
}

auto phdr = phdr_table_;
for(auto i = 0; i < phdr_num_; i++) {
phdr->p_paddr = phdr->p_vaddr;
phdr->p_filesz = phdr->p_memsz; // 把文件大小扩展成LOAD之间的大小
phdr->p_offset = phdr->p_vaddr;
phdr++;
}
}

ObElfReader::haveDynamicSectionInLoadableSegment

判断LOAD段里面有没有dynamic section,和名字描述的一样

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
bool ObElfReader::haveDynamicSectionInLoadableSegment() {
Elf_Addr min_vaddr, max_vaddr;
phdr_table_get_load_size(phdr_table_, phdr_num_, &min_vaddr, &max_vaddr);

const Elf_Phdr* phdr = phdr_table_;
const Elf_Phdr* phdr_limit = phdr + phdr_num_;

for (phdr = phdr_table_; phdr < phdr_limit; phdr++) {
//不是dynamic段就跳过
if (phdr->p_type != PT_DYNAMIC) {
continue;
}
//当前dynamic内存段被LOAD段包裹住的话 返回true
if (phdr->p_vaddr > min_vaddr && (phdr->p_vaddr + phdr->p_memsz) < max_vaddr) {
return true;
}
break;
}
return false;
}

ElfReader::FindPhdr

返回程序头表的地址,它出现在内存中加载的段中。和”phdr_table_“形成对比,”phdr_table_“是临时的,将在库重新定位之前释放。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
bool ElfReader::FindPhdr() {
const Elf_Phdr* phdr_limit = phdr_table_ + phdr_num_;

// 如果有PT_PHDR这个类型直接读取
for (const Elf_Phdr* phdr = phdr_table_; phdr < phdr_limit; ++phdr) {
if (phdr->p_type == PT_PHDR) {
return CheckPhdr((uint8_t*)load_bias_ + phdr->p_vaddr);
}
}

// 没有的话,如果LOAD的最开始就是elf文件的最开始,直接读取header然后找到phdr
for (const Elf_Phdr* phdr = phdr_table_; phdr < phdr_limit; ++phdr) {
if (phdr->p_type == PT_LOAD) {
if (phdr->p_offset == 0) {
uint8_t *elf_addr = (uint8_t*)load_bias_ + phdr->p_vaddr;
const Elf_Ehdr* ehdr = (const Elf_Ehdr*)(void*)elf_addr;
Elf_Addr offset = ehdr->e_phoff;
return CheckPhdr((uint8_t*)ehdr + offset);
}
break;
}
}

FLOGE("can't find loaded phdr for \"%s\"", name_);
return false;
}

ElfRebuilder::Rebuild

开始进行rebuild

1
2
3
4
5
6
7
bool ElfRebuilder::Rebuild() {
return RebuildPhdr() &&
ReadSoInfo() &&
RebuildShdr() &&
RebuildRelocs() &&
RebuildFin();
}

ElfRebuilder::RebuildPhdr

重建phdr,和签名的FixDumpSoPhdr差不多

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
bool ElfRebuilder::RebuildPhdr() {
FLOGD("=============LoadDynamicSectionFromBaseSource==========RebuildPhdr=========================");


auto phdr = (Elf_Phdr*)elf_reader_->loaded_phdr();
//遍历所有program header
for(auto i = 0; i < elf_reader_->phdr_count(); i++) {
phdr->p_filesz = phdr->p_memsz; // 扩大范围
phdr->p_paddr = phdr->p_vaddr;
phdr->p_offset = phdr->p_vaddr;
phdr++;
}
FLOGD("=====================RebuildPhdr End======================");
return true;
}

ElfRebuilder::ReadSoInfo

实际上感觉就是解析dynamic段的各类信息

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
bool ElfRebuilder::ReadSoInfo() {
FLOGD("=======================ReadSoInfo=========================");
//首先读取so的base,program header地址,program header数目
si.base = si.load_bias = elf_reader_->load_bias();
si.phdr = elf_reader_->loaded_phdr();
si.phnum = elf_reader_->phdr_count();
auto base = si.load_bias;
phdr_table_get_load_size(si.phdr, si.phnum, &si.min_load, &si.max_load);
si.max_load += elf_reader_->pad_size_;

//首先读取dynamic段
elf_reader_->GetDynamicSection(&si.dynamic, &si.dynamic_count, &si.dynamic_flags);
if(si.dynamic == nullptr) {
FLOGE("No valid dynamic phdr data");
return false;
}
//读取类型是PT_ARM_EXIDX的段
phdr_table_get_arm_exidx(si.phdr, si.phnum, si.base,
&si.ARM_exidx, (unsigned*)&si.ARM_exidx_count);

//这里就是和安卓源码差不多了,将dynamic的段的信息进行分类解析
uint32_t needed_count = 0;
for (Elf_Dyn* d = si.dynamic; d->d_tag != DT_NULL; ++d) {
switch(d->d_tag){
case DT_HASH:
si.hash = d->d_un.d_ptr + (uint8_t*)base;
si.nbucket = ((unsigned *) (base + d->d_un.d_ptr))[0];
si.nchain = ((unsigned *) (base + d->d_un.d_ptr))[1];
si.bucket = (unsigned *) (base + d->d_un.d_ptr + 8);
si.chain = (unsigned *) (base + d->d_un.d_ptr + 8 + si.nbucket * 4);
break;
case DT_STRTAB:
si.strtab = (const char *) (base + d->d_un.d_ptr);
FLOGD("string table found at %" ADDRESS_FORMAT "x", d->d_un.d_ptr);
break;
case DT_SYMTAB:
si.symtab = (Elf_Sym *) (base + d->d_un.d_ptr);
FLOGD("symbol table found at %" ADDRESS_FORMAT "x", d->d_un.d_ptr);
break;
.........
}
}
FLOGD("=======================ReadSoInfo End=========================");
return true;
}

image-20230901212116597

ElfRebuilder::RebuildShdr

重建program header table,也是最重要的一个环节

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
bool ElfRebuilder::RebuildShdr() {
FLOGD("=======================RebuildShdr=========================");
// rebuilding shdr, link information
auto base = si.load_bias;
shstrtab.push_back('\0');

// empty shdr
if(true) {
Elf_Shdr shdr = {0};
shdrs.push_back(shdr);
}

// 如果是dynsym类型
if(si.symtab != nullptr) {
sDYNSYM = shdrs.size();

Elf_Shdr shdr;
shdr.sh_name = shstrtab.length();
shstrtab.append(".dynsym");//Section Header String Table里加入当前的段名
shstrtab.push_back('\0');//字符串\0结尾

shdr.sh_type = SHT_DYNSYM;//type是dynsym
shdr.sh_flags = SHF_ALLOC;
shdr.sh_addr = (uintptr_t)si.symtab - (uintptr_t)base;//计算symbol table和base的offset
shdr.sh_offset = shdr.sh_addr;
shdr.sh_size = 0; // 需要填充到下一个shdr,原因前面说了
shdr.sh_link = 0; // link to dynstr later
// shdr.sh_info = 1;
shdr.sh_info = 0;
#ifdef __SO64__//64位和32位
shdr.sh_addralign = 8;
shdr.sh_entsize = 0x18;
#else
shdr.sh_addralign = 4;
shdr.sh_entsize = 0x10;
#endif

shdrs.push_back(shdr);//放到修正后的program header table数组里面
}

// gen .dynstr
if(si.strtab != nullptr) {
sDYNSTR = shdrs.size();

Elf_Shdr shdr;
shdr.sh_name = shstrtab.length();
shstrtab.append(".dynstr");
shstrtab.push_back('\0');

shdr.sh_type = SHT_STRTAB;
shdr.sh_flags = SHF_ALLOC;
shdr.sh_addr = (uintptr_t)si.strtab - (uintptr_t)base;
shdr.sh_offset = shdr.sh_addr;
shdr.sh_size = si.strtabsize;
shdr.sh_link = 0;
shdr.sh_info = 0;
shdr.sh_addralign = 1;
shdr.sh_entsize = 0x0;

shdrs.push_back(shdr);
}

//由于中间都是差不多的代码,跳过了
........................

// gen .data
if(true) {
sDATA = shdrs.size();
auto sLast = sDATA - 1;

Elf_Shdr shdr;
shdr.sh_name = shstrtab.length();
shstrtab.append(".data");
shstrtab.push_back('\0');

shdr.sh_type = SHT_PROGBITS;
shdr.sh_flags = SHF_ALLOC | SHF_WRITE;
shdr.sh_addr = shdrs[sLast].sh_addr + shdrs[sLast].sh_size;//data段放在dynamic后
shdr.sh_offset = shdr.sh_addr;
shdr.sh_size = si.max_load - shdr.sh_addr;
shdr.sh_link = 0;
shdr.sh_info = 0;
shdr.sh_addralign = 4;
shdr.sh_entsize = 0x0;

shdrs.push_back(shdr);
}

// shstrtab放最后
if(true) {
sSHSTRTAB = shdrs.size();

Elf_Shdr shdr;
shdr.sh_name = shstrtab.length();
shstrtab.append(".shstrtab");
shstrtab.push_back('\0');

shdr.sh_type = SHT_STRTAB;
shdr.sh_flags = 0;
shdr.sh_addr = si.max_load;
shdr.sh_offset = shdr.sh_addr;
shdr.sh_size = shstrtab.length();
shdr.sh_link = 0;
shdr.sh_info = 0;
shdr.sh_addralign = 1;
shdr.sh_entsize = 0x0;

shdrs.push_back(shdr);
}


// 冒泡排序,根据每个program header的地址从小到大排列一下
for(auto i = 1; i < shdrs.size(); i++) {
for(auto j = i + 1; j < shdrs.size(); j++) {
if(shdrs[i].sh_addr > shdrs[j].sh_addr) {
// exchange i, j
auto tmp = shdrs[i];
shdrs[i] = shdrs[j];
shdrs[j] = tmp;

// 下标交换
auto chgIdx = [i, j](Elf_Word &t) {
if(t == i) {
t = j;
} else if(t == j) {
t = i;
}
};
chgIdx(sDYNSYM);
chgIdx(sDYNSTR);
chgIdx(sHASH);
chgIdx(sRELDYN);
chgIdx(sRELADYN);
chgIdx(sRELPLT);
chgIdx(sPLT);
chgIdx(sTEXTTAB);
chgIdx(sARMEXIDX);
chgIdx(sFINIARRAY);
chgIdx(sINITARRAY);
chgIdx(sDYNAMIC);
chgIdx(sGOT);
chgIdx(sDATA);
chgIdx(sBSS);
chgIdx(sSHSTRTAB);
}
}
}
if (sHASH != 0) {
shdrs[sHASH].sh_link = sDYNSYM;
}
if (sRELDYN != 0){
shdrs[sRELDYN].sh_link = sDYNSYM;
}
if (sRELADYN != 0){
shdrs[sRELADYN].sh_link = sDYNSYM;
}
if (sRELPLT != 0) {
shdrs[sRELPLT].sh_link = sDYNSYM;
}
if (sARMEXIDX != 0) {
shdrs[sARMEXIDX].sh_link = sTEXTTAB;
}
if (sDYNAMIC != 0) {
shdrs[sDYNAMIC].sh_link = sDYNSTR;
}
if(sDYNSYM != 0) {
shdrs[sDYNSYM].sh_link = sDYNSTR;
}

if(sDYNSYM != 0) {
auto sNext = sDYNSYM + 1;
shdrs[sDYNSYM].sh_size = shdrs[sNext].sh_addr - shdrs[sDYNSYM].sh_addr;
}

if(sTEXTTAB != 0) {
auto sNext = sTEXTTAB + 1;
shdrs[sTEXTTAB].sh_size = shdrs[sNext].sh_addr - shdrs[sTEXTTAB].sh_addr;
}

// 大小修复,排序后可能出现两个program header之间offset小于前一个的size,需要修复一下
for(auto i = 2; i < shdrs.size(); i++) {
if(shdrs[i].sh_offset - shdrs[i-1].sh_offset < shdrs[i-1].sh_size) {
shdrs[i-1].sh_size = shdrs[i].sh_offset - shdrs[i-1].sh_offset;
}
}

FLOGD("=====================RebuildShdr End======================");
return true;
}

ElfRebuilder::RebuildRelocs

将需要重定位的部分进行重定位,就是模拟android源码的重定位

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
bool ElfRebuilder::RebuildRelocs() {
if(elf_reader_->dump_so_base_ == 0) return true;
FLOGD("=======================RebuildRelocs=========================");
if (si.plt_type == DT_REL) {
auto rel = si.rel;
for (auto i = 0; i < si.rel_count; i++, rel++){
relocate<false>(si.load_bias, rel, elf_reader_->dump_so_base_);
}
rel = si.plt_rel;
for (auto i = 0; i < si.plt_rel_count; i++, rel++){
relocate<false>(si.load_bias, rel, elf_reader_->dump_so_base_);
}
} else {
auto rel = (Elf_Rela*)si.plt_rela;
for (auto i = 0; i <si.plt_rela_count; i++, rel ++) {
relocate<true>(si.load_bias, (Elf_Rel*)rel, elf_reader_->dump_so_base_);
}
rel = (Elf_Rela*) si.plt_rel;
for (auto i = 0; i < si.plt_rel_count; i++, rel++){
relocate<true>(si.load_bias, (Elf_Rel*)rel, elf_reader_->dump_so_base_);
}
}
auto relocate_address = [](Elf_Addr * pelf, Elf_Addr dump_base){
if (*pelf > dump_base)
*pelf = *pelf - dump_base;
};
// relocate_address(p, elf_reader_->dump_so_base_);
// relocate_address(p, elf_reader_->dump_so_base_);
FLOGD("=======================RebuildRelocs End=======================");
return true;
}

template <bool isRela>
void ElfRebuilder::relocate(uint8_t * base, Elf_Rel* rel, Elf_Addr dump_base) {
if(rel == nullptr) return ;
#ifndef __SO64__
auto type = ELF32_R_TYPE(rel->r_info);
auto sym = ELF32_R_SYM(rel->r_info);
#else
auto type = ELF64_R_TYPE(rel->r_info);
auto sym = ELF64_R_SYM(rel->r_info);
#endif
auto prel = reinterpret_cast<Elf_Addr *>(base + rel->r_offset);
switch (type) {
// 重定位的几个选项
case R_386_RELATIVE:
case R_ARM_RELATIVE:
*prel = *prel - dump_base;
break;
case 0x402:{
auto syminfo = si.symtab[sym];
if (syminfo.st_value != 0) {
*prel = syminfo.st_value;
} else {
auto load_size = si.max_load - si.min_load;
*prel = load_size + external_pointer;
external_pointer += sizeof(*prel);
}
break;
}
default:
break;
}
if (isRela){
Elf_Rela* rela = (Elf_Rela*)rel;
switch (type){
case 0x403:
*prel = rela->r_addend;
break;
default:
break;
}
}
};

ElfRebuilder::RebuildFin

最后就是重构一个完整的文件了,就是简单的复制粘贴

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
bool ElfRebuilder::RebuildFin() {
FLOGD("=======================try to finish file rebuild =========================");
auto load_size = si.max_load - si.min_load;
rebuild_size = load_size + shstrtab.length() +
shdrs.size() * sizeof(Elf_Shdr);
rebuild_data = new uint8_t[rebuild_size];
memcpy(rebuild_data, (void*)si.load_bias, load_size);
// pad with shstrtab
memcpy(rebuild_data + load_size, shstrtab.c_str(), shstrtab.length());
// pad with shdrs
auto shdr_off = load_size + shstrtab.length();
memcpy(rebuild_data + (int)shdr_off, (void*)&shdrs[0],
shdrs.size() * sizeof(Elf_Shdr));
auto ehdr = *elf_reader_->record_ehdr();
ehdr.e_type = ET_DYN;
#ifdef __SO64__
ehdr.e_machine = 183;
#else
ehdr.e_machine = 40;
#endif
ehdr.e_shnum = shdrs.size();
ehdr.e_shoff = (Elf_Addr)shdr_off;
ehdr.e_shstrndx = sSHSTRTAB;
memcpy(rebuild_data, &ehdr, sizeof(Elf_Ehdr));

FLOGD("=======================End=========================");
return true;
}

重要的函数差不多就这些了。整体看下来逻辑也比较清楚。