学习一下内存dump文件后修复的原理。意识流分析,可能有些乱,敬请谅解(
函数分析 先单独分析函数
ObElfReader::Load 加载so文件的各类结构,以下是用到的函数
ObElfReader::FixDumpSoPhdr 修正dump出来so的程序头表,这里是改变filesz扩大了之后dump的范围,不仅dump了LOAD部分的内存还dump了中间非LOAD段的内存。因为可能有些数据会留存在俩LOAD段之间
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 void ObElfReader::FixDumpSoPhdr () { if (dump_so_base_ != 0 ) { std::vector<Elf_Phdr*> loaded_phdrs; for (auto i = 0 ; i < phdr_num_; i++) { auto phdr = &phdr_table_[i]; if (phdr->p_type != PT_LOAD) continue ; loaded_phdrs.push_back (phdr); } std::sort (loaded_phdrs.begin (), loaded_phdrs.end (), [](Elf_Phdr * first, Elf_Phdr * second) { return first->p_vaddr < second->p_vaddr; }); if (!loaded_phdrs.empty ()) { for (unsigned long i = 0 , total = loaded_phdrs.size (); i < total; i++) { auto phdr = loaded_phdrs[i]; if (i != total - 1 ) { auto nphdr = loaded_phdrs[i+1 ]; phdr->p_memsz = nphdr->p_vaddr - phdr->p_vaddr; } else { phdr->p_memsz = file_size - phdr->p_vaddr; } phdr->p_filesz = phdr->p_memsz; } } } auto phdr = phdr_table_; for (auto i = 0 ; i < phdr_num_; i++) { phdr->p_paddr = phdr->p_vaddr; phdr->p_filesz = phdr->p_memsz; phdr->p_offset = phdr->p_vaddr; phdr++; } }
ObElfReader::haveDynamicSectionInLoadableSegment 判断LOAD段里面有没有dynamic section,和名字描述的一样
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 bool ObElfReader::haveDynamicSectionInLoadableSegment () { Elf_Addr min_vaddr, max_vaddr; phdr_table_get_load_size (phdr_table_, phdr_num_, &min_vaddr, &max_vaddr); const Elf_Phdr* phdr = phdr_table_; const Elf_Phdr* phdr_limit = phdr + phdr_num_; for (phdr = phdr_table_; phdr < phdr_limit; phdr++) { if (phdr->p_type != PT_DYNAMIC) { continue ; } if (phdr->p_vaddr > min_vaddr && (phdr->p_vaddr + phdr->p_memsz) < max_vaddr) { return true ; } break ; } return false ; }
ElfReader::FindPhdr 返回程序头表的地址,它出现在内存中加载的段中。和”phdr_table_“形成对比,”phdr_table_“是临时的,将在库重新定位之前释放。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 bool ElfReader::FindPhdr () { const Elf_Phdr* phdr_limit = phdr_table_ + phdr_num_; for (const Elf_Phdr* phdr = phdr_table_; phdr < phdr_limit; ++phdr) { if (phdr->p_type == PT_PHDR) { return CheckPhdr ((uint8_t *)load_bias_ + phdr->p_vaddr); } } for (const Elf_Phdr* phdr = phdr_table_; phdr < phdr_limit; ++phdr) { if (phdr->p_type == PT_LOAD) { if (phdr->p_offset == 0 ) { uint8_t *elf_addr = (uint8_t *)load_bias_ + phdr->p_vaddr; const Elf_Ehdr* ehdr = (const Elf_Ehdr*)(void *)elf_addr; Elf_Addr offset = ehdr->e_phoff; return CheckPhdr ((uint8_t *)ehdr + offset); } break ; } } FLOGE ("can't find loaded phdr for \"%s\"" , name_); return false ; }
ElfRebuilder::Rebuild 开始进行rebuild
1 2 3 4 5 6 7 bool ElfRebuilder::Rebuild () { return RebuildPhdr () && ReadSoInfo () && RebuildShdr () && RebuildRelocs () && RebuildFin (); }
ElfRebuilder::RebuildPhdr 重建phdr,和签名的FixDumpSoPhdr差不多
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 bool ElfRebuilder::RebuildPhdr () { FLOGD ("=============LoadDynamicSectionFromBaseSource==========RebuildPhdr=========================" ); auto phdr = (Elf_Phdr*)elf_reader_->loaded_phdr (); for (auto i = 0 ; i < elf_reader_->phdr_count (); i++) { phdr->p_filesz = phdr->p_memsz; phdr->p_paddr = phdr->p_vaddr; phdr->p_offset = phdr->p_vaddr; phdr++; } FLOGD ("=====================RebuildPhdr End======================" ); return true ; }
ElfRebuilder::ReadSoInfo 实际上感觉就是解析dynamic段的各类信息
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 bool ElfRebuilder::ReadSoInfo () { FLOGD ("=======================ReadSoInfo=========================" ); si.base = si.load_bias = elf_reader_->load_bias (); si.phdr = elf_reader_->loaded_phdr (); si.phnum = elf_reader_->phdr_count (); auto base = si.load_bias; phdr_table_get_load_size (si.phdr, si.phnum, &si.min_load, &si.max_load); si.max_load += elf_reader_->pad_size_; elf_reader_->GetDynamicSection (&si.dynamic, &si.dynamic_count, &si.dynamic_flags); if (si.dynamic == nullptr ) { FLOGE ("No valid dynamic phdr data" ); return false ; } phdr_table_get_arm_exidx (si.phdr, si.phnum, si.base, &si.ARM_exidx, (unsigned *)&si.ARM_exidx_count); uint32_t needed_count = 0 ; for (Elf_Dyn* d = si.dynamic; d->d_tag != DT_NULL; ++d) { switch (d->d_tag){ case DT_HASH: si.hash = d->d_un.d_ptr + (uint8_t *)base; si.nbucket = ((unsigned *) (base + d->d_un.d_ptr))[0 ]; si.nchain = ((unsigned *) (base + d->d_un.d_ptr))[1 ]; si.bucket = (unsigned *) (base + d->d_un.d_ptr + 8 ); si.chain = (unsigned *) (base + d->d_un.d_ptr + 8 + si.nbucket * 4 ); break ; case DT_STRTAB: si.strtab = (const char *) (base + d->d_un.d_ptr); FLOGD ("string table found at %" ADDRESS_FORMAT "x" , d->d_un.d_ptr); break ; case DT_SYMTAB: si.symtab = (Elf_Sym *) (base + d->d_un.d_ptr); FLOGD ("symbol table found at %" ADDRESS_FORMAT "x" , d->d_un.d_ptr); break ; ......... } } FLOGD ("=======================ReadSoInfo End=========================" ); return true ; }
ElfRebuilder::RebuildShdr 重建program header table,也是最重要的一个环节
bool ElfRebuilder::RebuildShdr () { FLOGD ("=======================RebuildShdr=========================" ); auto base = si.load_bias; shstrtab.push_back ('\0' ); if (true ) { Elf_Shdr shdr = {0 }; shdrs.push_back (shdr); } if (si.symtab != nullptr ) { sDYNSYM = shdrs.size (); Elf_Shdr shdr; shdr.sh_name = shstrtab.length (); shstrtab.append (".dynsym" ); shstrtab.push_back ('\0' ); shdr.sh_type = SHT_DYNSYM; shdr.sh_flags = SHF_ALLOC; shdr.sh_addr = (uintptr_t )si.symtab - (uintptr_t )base; shdr.sh_offset = shdr.sh_addr; shdr.sh_size = 0 ; shdr.sh_link = 0 ; shdr.sh_info = 0 ; #ifdef __SO64__ shdr.sh_addralign = 8 ; shdr.sh_entsize = 0x18 ; #else shdr.sh_addralign = 4 ; shdr.sh_entsize = 0x10 ; #endif shdrs.push_back (shdr); } if (si.strtab != nullptr ) { sDYNSTR = shdrs.size (); Elf_Shdr shdr; shdr.sh_name = shstrtab.length (); shstrtab.append (".dynstr" ); shstrtab.push_back ('\0' ); shdr.sh_type = SHT_STRTAB; shdr.sh_flags = SHF_ALLOC; shdr.sh_addr = (uintptr_t )si.strtab - (uintptr_t )base; shdr.sh_offset = shdr.sh_addr; shdr.sh_size = si.strtabsize; shdr.sh_link = 0 ; shdr.sh_info = 0 ; shdr.sh_addralign = 1 ; shdr.sh_entsize = 0x0 ; shdrs.push_back (shdr); } ........................ if (true ) { sDATA = shdrs.size (); auto sLast = sDATA - 1 ; Elf_Shdr shdr; shdr.sh_name = shstrtab.length (); shstrtab.append (".data" ); shstrtab.push_back ('\0' ); shdr.sh_type = SHT_PROGBITS; shdr.sh_flags = SHF_ALLOC | SHF_WRITE; shdr.sh_addr = shdrs[sLast].sh_addr + shdrs[sLast].sh_size; shdr.sh_offset = shdr.sh_addr; shdr.sh_size = si.max_load - shdr.sh_addr; shdr.sh_link = 0 ; shdr.sh_info = 0 ; shdr.sh_addralign = 4 ; shdr.sh_entsize = 0x0 ; shdrs.push_back (shdr); } if (true ) { sSHSTRTAB = shdrs.size (); Elf_Shdr shdr; shdr.sh_name = shstrtab.length (); shstrtab.append (".shstrtab" ); shstrtab.push_back ('\0' ); shdr.sh_type = SHT_STRTAB; shdr.sh_flags = 0 ; shdr.sh_addr = si.max_load; shdr.sh_offset = shdr.sh_addr; shdr.sh_size = shstrtab.length (); shdr.sh_link = 0 ; shdr.sh_info = 0 ; shdr.sh_addralign = 1 ; shdr.sh_entsize = 0x0 ; shdrs.push_back (shdr); } for (auto i = 1 ; i < shdrs.size (); i++) { for (auto j = i + 1 ; j < shdrs.size (); j++) { if (shdrs[i].sh_addr > shdrs[j].sh_addr) { auto tmp = shdrs[i]; shdrs[i] = shdrs[j]; shdrs[j] = tmp; auto chgIdx = [i, j](Elf_Word &t) { if (t == i) { t = j; } else if (t == j) { t = i; } }; chgIdx (sDYNSYM); chgIdx (sDYNSTR); chgIdx (sHASH); chgIdx (sRELDYN); chgIdx (sRELADYN); chgIdx (sRELPLT); chgIdx (sPLT); chgIdx (sTEXTTAB); chgIdx (sARMEXIDX); chgIdx (sFINIARRAY); chgIdx (sINITARRAY); chgIdx (sDYNAMIC); chgIdx (sGOT); chgIdx (sDATA); chgIdx (sBSS); chgIdx (sSHSTRTAB); } } } if (sHASH != 0 ) { shdrs[sHASH].sh_link = sDYNSYM; } if (sRELDYN != 0 ){ shdrs[sRELDYN].sh_link = sDYNSYM; } if (sRELADYN != 0 ){ shdrs[sRELADYN].sh_link = sDYNSYM; } if (sRELPLT != 0 ) { shdrs[sRELPLT].sh_link = sDYNSYM; } if (sARMEXIDX != 0 ) { shdrs[sARMEXIDX].sh_link = sTEXTTAB; } if (sDYNAMIC != 0 ) { shdrs[sDYNAMIC].sh_link = sDYNSTR; } if (sDYNSYM != 0 ) { shdrs[sDYNSYM].sh_link = sDYNSTR; } if (sDYNSYM != 0 ) { auto sNext = sDYNSYM + 1 ; shdrs[sDYNSYM].sh_size = shdrs[sNext].sh_addr - shdrs[sDYNSYM].sh_addr; } if (sTEXTTAB != 0 ) { auto sNext = sTEXTTAB + 1 ; shdrs[sTEXTTAB].sh_size = shdrs[sNext].sh_addr - shdrs[sTEXTTAB].sh_addr; } for (auto i = 2 ; i < shdrs.size (); i++) { if (shdrs[i].sh_offset - shdrs[i-1 ].sh_offset < shdrs[i-1 ].sh_size) { shdrs[i-1 ].sh_size = shdrs[i].sh_offset - shdrs[i-1 ].sh_offset; } } FLOGD ("=====================RebuildShdr End======================" ); return true ; }
ElfRebuilder::RebuildRelocs 将需要重定位的部分进行重定位,就是模拟android源码的重定位
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 bool ElfRebuilder::RebuildRelocs () { if (elf_reader_->dump_so_base_ == 0 ) return true ; FLOGD ("=======================RebuildRelocs=========================" ); if (si.plt_type == DT_REL) { auto rel = si.rel; for (auto i = 0 ; i < si.rel_count; i++, rel++){ relocate <false >(si.load_bias, rel, elf_reader_->dump_so_base_); } rel = si.plt_rel; for (auto i = 0 ; i < si.plt_rel_count; i++, rel++){ relocate <false >(si.load_bias, rel, elf_reader_->dump_so_base_); } } else { auto rel = (Elf_Rela*)si.plt_rela; for (auto i = 0 ; i <si.plt_rela_count; i++, rel ++) { relocate <true >(si.load_bias, (Elf_Rel*)rel, elf_reader_->dump_so_base_); } rel = (Elf_Rela*) si.plt_rel; for (auto i = 0 ; i < si.plt_rel_count; i++, rel++){ relocate <true >(si.load_bias, (Elf_Rel*)rel, elf_reader_->dump_so_base_); } } auto relocate_address = [](Elf_Addr * pelf, Elf_Addr dump_base){ if (*pelf > dump_base) *pelf = *pelf - dump_base; }; FLOGD ("=======================RebuildRelocs End=======================" ); return true ; } template <bool isRela>void ElfRebuilder::relocate (uint8_t * base, Elf_Rel* rel, Elf_Addr dump_base) { if (rel == nullptr ) return ; #ifndef __SO64__ auto type = ELF32_R_TYPE (rel->r_info); auto sym = ELF32_R_SYM (rel->r_info); #else auto type = ELF64_R_TYPE (rel->r_info); auto sym = ELF64_R_SYM (rel->r_info); #endif auto prel = reinterpret_cast <Elf_Addr *>(base + rel->r_offset); switch (type) { case R_386_RELATIVE: case R_ARM_RELATIVE: *prel = *prel - dump_base; break ; case 0x402 :{ auto syminfo = si.symtab[sym]; if (syminfo.st_value != 0 ) { *prel = syminfo.st_value; } else { auto load_size = si.max_load - si.min_load; *prel = load_size + external_pointer; external_pointer += sizeof (*prel); } break ; } default : break ; } if (isRela){ Elf_Rela* rela = (Elf_Rela*)rel; switch (type){ case 0x403 : *prel = rela->r_addend; break ; default : break ; } } };
ElfRebuilder::RebuildFin 最后就是重构一个完整的文件了,就是简单的复制粘贴
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 bool ElfRebuilder::RebuildFin () { FLOGD ("=======================try to finish file rebuild =========================" ); auto load_size = si.max_load - si.min_load; rebuild_size = load_size + shstrtab.length () + shdrs.size () * sizeof (Elf_Shdr); rebuild_data = new uint8_t [rebuild_size]; memcpy (rebuild_data, (void *)si.load_bias, load_size); memcpy (rebuild_data + load_size, shstrtab.c_str (), shstrtab.length ()); auto shdr_off = load_size + shstrtab.length (); memcpy (rebuild_data + (int )shdr_off, (void *)&shdrs[0 ], shdrs.size () * sizeof (Elf_Shdr)); auto ehdr = *elf_reader_->record_ehdr (); ehdr.e_type = ET_DYN; #ifdef __SO64__ ehdr.e_machine = 183 ; #else ehdr.e_machine = 40 ; #endif ehdr.e_shnum = shdrs.size (); ehdr.e_shoff = (Elf_Addr)shdr_off; ehdr.e_shstrndx = sSHSTRTAB; memcpy (rebuild_data, &ehdr, sizeof (Elf_Ehdr)); FLOGD ("=======================End=========================" ); return true ; }
重要的函数差不多就这些了。整体看下来逻辑也比较清楚。