From 9d87c6859646c882b7ef7c631eae707814c0726a Mon Sep 17 00:00:00 2001 From: Redbeanw44602 Date: Sat, 1 Mar 2025 23:44:57 +0800 Subject: [PATCH] refactor: merge symbol-extractor and type-extractor into io/pdb. --- src/data_format/raw_type_data.cpp | 57 ------- src/data_format/raw_type_data.h | 40 ----- src/object_file/pdb.cpp | 274 ++++++++++++++++++++---------- src/object_file/pdb.h | 79 +++++++-- src/tools/extractsym/main.cpp | 72 +++----- src/tools/makepdb/main.cpp | 8 +- 6 files changed, 264 insertions(+), 266 deletions(-) delete mode 100644 src/data_format/raw_type_data.cpp delete mode 100644 src/data_format/raw_type_data.h diff --git a/src/data_format/raw_type_data.cpp b/src/data_format/raw_type_data.cpp deleted file mode 100644 index 5c9985d..0000000 --- a/src/data_format/raw_type_data.cpp +++ /dev/null @@ -1,57 +0,0 @@ -#include "data_format/raw_type_data.h" - -#include -#include -#include -#include -#include -#include -#include - -using namespace llvm::pdb; - -namespace di::data_format { - -void RawTypeData::read(const fs::path& path) { - std::unique_ptr pdb_session; - if (llvm::pdb::loadDataForPDB( - PDB_ReaderType::Native, - path.string(), - pdb_session - )) { - throw std::runtime_error("Failed to load PDB."); - } - - auto native_session = static_cast(pdb_session.get()); - auto& pdb_file = native_session->getPDBFile(); - - SmallVector type_map; - SmallVector id_map; - - if (auto tpi_stream = pdb_file.getPDBTpiStream()) { - if (codeview::mergeTypeRecords( - m_storaged_TPI, - type_map, - (*tpi_stream).typeArray() - )) { - throw std::runtime_error("Failed to merge type record."); - } - } else { - throw std::runtime_error("TPI is not valid."); - } - - if (auto ipi_stream = pdb_file.getPDBIpiStream()) { - if (codeview::mergeIdRecords( - m_storaged_IPI, - type_map, - id_map, - (*ipi_stream).typeArray() - )) { - throw std::runtime_error("Failed to merge id record."); - } - } else { - throw std::runtime_error("IPI is not valid."); - } -} - -} // namespace di::data_format diff --git a/src/data_format/raw_type_data.h b/src/data_format/raw_type_data.h deleted file mode 100644 index bf1cedf..0000000 --- a/src/data_format/raw_type_data.h +++ /dev/null @@ -1,40 +0,0 @@ -#pragma once - -#include "data_format/io/io_base.h" - -#include - -namespace di::data_format { - -class RawTypeData : public IOBase { -public: - using for_each_callback_t = - std::function; - - enum TypedStream { TPI, IPI }; - - RawTypeData() : m_storaged_IPI(m_allocator), m_storaged_TPI(m_allocator) {} - - void read(const fs::path& path) override; - void write(const fs::path& path) const override { - throw std::runtime_error("Unsupported operation."); - } - - template - void for_each(const for_each_callback_t& callback) /*const*/ { - if constexpr (Stream == TPI) { - return m_storaged_TPI.ForEachRecord(callback); - } - if constexpr (Stream == IPI) { - return m_storaged_IPI.ForEachRecord(callback); - } - } - -private: - BumpPtrAllocator m_allocator; - - codeview::MergingTypeTableBuilder m_storaged_IPI; - codeview::MergingTypeTableBuilder m_storaged_TPI; -}; - -} // namespace di::data_format diff --git a/src/object_file/pdb.cpp b/src/object_file/pdb.cpp index dd0de64..9c4e46c 100644 --- a/src/object_file/pdb.cpp +++ b/src/object_file/pdb.cpp @@ -1,133 +1,221 @@ #include "object_file/pdb.h" +#include +#include #include +#include #include #include #include +#include +#include #include +#include +#include #include +#include +#include using namespace llvm::pdb; +using namespace llvm::codeview; namespace di::object_file { -PDB::PDB() : m_builder(m_allocator) { - constexpr auto block_size = 4096; - if (m_builder.initialize(block_size)) { - throw std::runtime_error("Failed to initialize pdb file builder."); +void PDB::read(const fs::path& path) { + if (loadDataForPDB(PDB_ReaderType::Native, path.string(), m_session)) { + throw std::runtime_error("Failed to load PDB."); } - for (uint32_t idx = 0; idx < pdb::kSpecialStreamCount; ++idx) { - if (!m_builder.getMsfBuilder().addStream(0)) { - throw std::runtime_error("Failed to add initial stream."); + std::unique_ptr pdb_session; + if (llvm::pdb::loadDataForPDB( + PDB_ReaderType::Native, + path.string(), + pdb_session + )) { + throw std::runtime_error("Failed to load PDB."); + } + + auto native_session = static_cast(pdb_session.get()); + auto& pdb_file = native_session->getPDBFile(); + + SmallVector type_map; + SmallVector id_map; + + if (auto tpi_stream = pdb_file.getPDBTpiStream()) { + if (codeview::mergeTypeRecords( + *m_storaged_Tpi, + type_map, + (*tpi_stream).typeArray() + )) { + throw std::runtime_error("Failed to merge type record."); } + } else { + throw std::runtime_error("TPI is not valid."); + } + + if (auto ipi_stream = pdb_file.getPDBIpiStream()) { + if (codeview::mergeIdRecords( + *m_storaged_Ipi, + type_map, + id_map, + (*ipi_stream).typeArray() + )) { + throw std::runtime_error("Failed to merge id record."); + } + } else { + throw std::runtime_error("IPI is not valid."); } } -void PDB::write(const fs::path& path) { +void PDB::_write(const fs::path& path) { build(); - codeview::GUID out_guid; - if (m_builder.commit(path.string(), &out_guid)) { + GUID out_guid; + if (m_builder->commit(path.string(), &out_guid)) { throw std::runtime_error("Failed to create pdb!"); } } +NativeSession& PDB::get_native_session() { + return *static_cast(m_session.get()); +} + +pdb::PDBFile& PDB::get_pdb_file() { return get_native_session().getPDBFile(); } + +void PDB::_for_each_public(const for_each_symbol_callback_t& callback) { + using namespace codeview; + auto& file = get_pdb_file(); + auto publics_stream = file.getPDBPublicsStream(); + if (!publics_stream) { + throw std::runtime_error("Failed to get public stream from PDB."); + } + + auto publics_symbol_stream = file.getPDBSymbolStream(); + if (!publics_symbol_stream) { + throw std::runtime_error("Failed to get symbol stream from PDB."); + } + + auto publics_symbols = + publics_symbol_stream->getSymbolArray().getUnderlyingStream(); + for (auto offset : publics_stream->getPublicsTable()) { + auto cv_symbol = readSymbolFromStream(publics_symbols, offset); + auto public_sym32 = + SymbolDeserializer::deserializeAs(cv_symbol.get()); + if (!public_sym32) { + throw std::runtime_error("Unsupported symbol type."); + } + callback(*public_sym32); + } +} + void PDB::build() { - build_Info(); - build_DBI(); - build_TPI(); - build_GSI(); -} + constexpr auto block_size = 4096; -void PDB::build_Info() { - auto pdb_info = m_owning_coff->get_debug_info(); - auto& Info = m_builder.getInfoBuilder(); + m_builder.reset(new PDBFileBuilder{m_Alloc}); - Info.setVersion(PdbRaw_ImplVer::PdbImplVC70); - Info.setAge(pdb_info.Age); - Info.setGuid(*reinterpret_cast(pdb_info.Signature)); - Info.addFeature(PdbRaw_FeatureSig::VC140); -} - -void PDB::build_DBI() { - auto pdb_info = m_owning_coff->get_debug_info(); - auto& DBI = m_builder.getDbiBuilder(); - - DBI.setVersionHeader(PdbRaw_DbiVer::PdbDbiV70); - DBI.setAge(pdb_info.Age); - DBI.setMachineType(PDB_Machine::Amd64); - DBI.setFlags(DbiFlags::FlagStrippedMask); - DBI.setBuildNumber(14, 11); // LLVM is compatible with LINK 14.11 - - // Add sections. - auto section_table = m_owning_coff->get_section_table(); - auto number_of_sections = m_owning_coff->get_number_of_sections(); - - auto section_data_ref = ArrayRef( - (uint8_t*)section_table, - m_owning_coff->get_number_of_sections() * sizeof(object::coff_section) - ); - - auto section_table_ref = ArrayRef( - (const object::coff_section*)section_data_ref.data(), - number_of_sections - ); - - DBI.createSectionMap(section_table_ref); - - // Add COFF section header stream. - if (DBI.addDbgStream(DbgHeaderType::SectionHdr, section_data_ref)) { - throw std::runtime_error("Failed to add dbg stream."); + if (m_builder->initialize(block_size)) { + throw std::runtime_error("Failed to initialize pdb file builder."); } -} -void PDB::build_TPI() { - auto& TPI = m_builder.getTpiBuilder(); - auto& IPI = m_builder.getIpiBuilder(); - - TPI.setVersionHeader(PdbRaw_TpiVer::PdbTpiV80); - IPI.setVersionHeader(PdbRaw_TpiVer::PdbTpiV80); - - if (m_owning_raw_type_data) { - m_owning_raw_type_data->for_each( - [&TPI](codeview::TypeIndex index, const codeview::CVType& type) { - TPI.addTypeRecord(type.RecordData, std::nullopt); - } - ); - m_owning_raw_type_data->for_each( - [&IPI](codeview::TypeIndex index, const codeview::CVType& type) { - IPI.addTypeRecord(type.RecordData, std::nullopt); - } - ); + for (uint32_t idx = 0; idx < pdb::kSpecialStreamCount; ++idx) { + if (!m_builder->getMsfBuilder().addStream(0)) { + throw std::runtime_error("Failed to add initial stream."); + } } -} -void PDB::build_GSI() { - std::vector publics; - m_owning_symbol_data->for_each([&publics, this](const BoundSymbol& entity) { - BulkPublic symbol; + // INFO + { + auto pdb_info = m_owning_coff->get_debug_info(); + auto& Info = m_builder->getInfoBuilder(); - auto section_index = - m_owning_coff->get_section_index(entity.m_rva - m_image_base); - auto section_or_err = - m_owning_coff->get_owning_coff().getSection(section_index + 1); - if (!section_or_err) { - throw std::runtime_error("Invalid section."); + Info.setVersion(PdbRaw_ImplVer::PdbImplVC70); + Info.setAge(pdb_info.Age); + Info.setGuid(*reinterpret_cast(pdb_info.Signature)); + Info.addFeature(PdbRaw_FeatureSig::VC140); + } + + // DBI + { + auto pdb_info = m_owning_coff->get_debug_info(); + auto& Dbi = m_builder->getDbiBuilder(); + + Dbi.setVersionHeader(PdbRaw_DbiVer::PdbDbiV70); + Dbi.setAge(pdb_info.Age); + Dbi.setMachineType(PDB_Machine::Amd64); + Dbi.setFlags(DbiFlags::FlagStrippedMask); + Dbi.setBuildNumber(14, 11); // LLVM is compatible with LINK 14.11 + + // Add sections. + auto section_table = m_owning_coff->get_section_table(); + auto number_of_sections = m_owning_coff->get_number_of_sections(); + + auto section_data_ref = ArrayRef( + (uint8_t*)section_table, + m_owning_coff->get_number_of_sections() + * sizeof(object::coff_section) + ); + + auto section_table_ref = ArrayRef( + (const object::coff_section*)section_data_ref.data(), + number_of_sections + ); + + Dbi.createSectionMap(section_table_ref); + + // Add COFF section header stream. + if (Dbi.addDbgStream(DbgHeaderType::SectionHdr, section_data_ref)) { + throw std::runtime_error("Failed to add dbg stream."); + } + } + + // TPI & IPI + { + auto& Tpi = m_builder->getTpiBuilder(); + auto& Ipi = m_builder->getIpiBuilder(); + + Tpi.setVersionHeader(PdbRaw_TpiVer::PdbTpiV80); + Ipi.setVersionHeader(PdbRaw_TpiVer::PdbTpiV80); + + if (m_storaged_Tpi) { + for_each([&Tpi](TypeIndex index, const CVType& type) { + Tpi.addTypeRecord(type.RecordData, std::nullopt); + }); } - symbol.Name = strdup(entity.m_symbol_name.c_str()); - symbol.NameLen = entity.m_symbol_name.size(); - symbol.Segment = section_index + 1; - symbol.Offset = - entity.m_rva - m_image_base - section_or_err.get()->VirtualAddress; - if (entity.m_is_function) - symbol.setFlags(codeview::PublicSymFlags::Function); + if (m_storaged_Ipi) { + for_each([&Ipi](TypeIndex index, const CVType& type) { + Ipi.addTypeRecord(type.RecordData, std::nullopt); + }); + } + } - publics.emplace_back(symbol); - }); + // PUBLIC + { + std::vector publics; + m_owning_symbol_data->for_each([&publics, + this](const BoundSymbol& entity) { + BulkPublic symbol; - m_builder.getGsiBuilder().addPublicSymbols(std::move(publics)); + auto section_index = + m_owning_coff->get_section_index(entity.m_rva - m_image_base); + auto section_or_err = + m_owning_coff->get_owning_coff().getSection(section_index + 1); + if (!section_or_err) { + throw std::runtime_error("Invalid section."); + } + + symbol.Name = strdup(entity.m_symbol_name.c_str()); + symbol.NameLen = entity.m_symbol_name.size(); + symbol.Segment = section_index + 1; + symbol.Offset = entity.m_rva - m_image_base + - section_or_err.get()->VirtualAddress; + if (entity.m_is_function) symbol.setFlags(PublicSymFlags::Function); + + publics.emplace_back(symbol); + }); + + m_builder->getGsiBuilder().addPublicSymbols(std::move(publics)); + } } } // namespace di::object_file diff --git a/src/object_file/pdb.h b/src/object_file/pdb.h index 94b9fa3..42ca68a 100644 --- a/src/object_file/pdb.h +++ b/src/object_file/pdb.h @@ -1,23 +1,59 @@ #pragma once +#include "data_format/bound_symbol_list.h" +#include "io/io_base.h" #include "object_file/coff.h" -#include "data_format/bound_symbol_list.h" -#include "data_format/raw_type_data.h" - +#include +#include +#include +#include #include #include namespace di::object_file { -class PDB { +class PDB : public IOBase { public: using owning_coff_t = std::unique_ptr; using owning_symbol_data_t = std::unique_ptr; - using owning_type_data_t = std::unique_ptr; - explicit PDB(); + using for_each_symbol_callback_t = + std::function; + using for_each_type_callback_t = + std::function; + enum IterableStream { TPI, IPI, Public }; + + void read(const fs::path& path) override; + void write(const fs::path& path) const override { + const_cast(this)->_write(path); + } + + // r + pdb::NativeSession& get_native_session(); + pdb::PDBFile& get_pdb_file(); + + template + void for_each(const CallbackT& callback) const { + if constexpr (Stream == TPI) { + return const_cast(this)->_for_each_ipi( + *m_storaged_Tpi, + callback + ); + } + if constexpr (Stream == IPI) { + return const_cast(this)->_for_each_ipi( + *m_storaged_Ipi, + callback + ); + } + if constexpr (Stream == Public) { + return const_cast(this)->_for_each_public(callback); + } + } + + // w void set_coff_object(owning_coff_t coff_object) { m_owning_coff = std::move(coff_object); m_image_base = m_owning_coff->get_owning_coff().getImageBase(); @@ -26,28 +62,35 @@ public: void set_symbol_data(owning_symbol_data_t symbol_data) { m_owning_symbol_data = std::move(symbol_data); } - void set_raw_type_data(owning_type_data_t raw_type_data) { - m_owning_raw_type_data = std::move(raw_type_data); - } - - void write(const fs::path& path); private: void build(); - inline void build_Info(); - inline void build_DBI(); - inline void build_TPI(); - inline void build_GSI(); + // helpers + void _write(const fs::path& path); + + void _for_each_public(const for_each_symbol_callback_t& callback); + void _for_each_ipi( + codeview::MergingTypeTableBuilder& type_table, + const for_each_type_callback_t& callback + ) { + type_table.ForEachRecord(callback); + } owning_coff_t m_owning_coff; owning_symbol_data_t m_owning_symbol_data; - owning_type_data_t m_owning_raw_type_data; addr_t m_image_base; - BumpPtrAllocator m_allocator; - pdb::PDBFileBuilder m_builder; + BumpPtrAllocator m_Alloc; + + // w + std::unique_ptr m_builder; + + // r + std::unique_ptr m_session; + std::unique_ptr m_storaged_Ipi; + std::unique_ptr m_storaged_Tpi; }; } // namespace di::object_file diff --git a/src/tools/extractsym/main.cpp b/src/tools/extractsym/main.cpp index 6c85b98..de090c3 100644 --- a/src/tools/extractsym/main.cpp +++ b/src/tools/extractsym/main.cpp @@ -1,17 +1,12 @@ -#include -#include -#include -#include -#include -#include -#include -#include - #include -using namespace llvm; -using namespace llvm::pdb; -using namespace llvm::codeview; +#include + +#include "data_format/typed_symbol_list.h" +#include "object_file/pdb.h" + +using namespace di; +using namespace di::object_file; auto load_args(int argc, char* argv[]) { argparse::ArgumentParser program("extractpdb"); @@ -44,50 +39,23 @@ int main(int argc, char* argv[]) try { auto args = load_args(argc, argv); - std::unique_ptr pdb_session; - if (llvm::pdb::loadDataForPDB( - PDB_ReaderType::Native, - args.m_program_database_path, - pdb_session - )) { - throw std::runtime_error("Failed to load PDB."); - } + PDB pdb; + pdb.read(args.m_program_database_path); - auto native_session = static_cast(pdb_session.get()); - auto& pdb_file = native_session->getPDBFile(); + data_format::TypedSymbolList symbol_list; - auto publics_stream = pdb_file.getPDBPublicsStream(); - if (!publics_stream) { - throw std::runtime_error("Failed to get public stream from PDB."); - } + pdb.for_each([&symbol_list](const codeview::PublicSym32& symbol + ) { + using codeview::PublicSymFlags; - auto publics_symbol_stream = pdb_file.getPDBSymbolStream(); - if (!publics_symbol_stream) { - throw std::runtime_error("Failed to get symbol stream from PDB."); - } + auto is_fun = + (symbol.Flags & PublicSymFlags::Function) != PublicSymFlags::None; + symbol_list.record( + symbol.Name.str(), + is_fun ? DeclType::Function : DeclType::Var + ); + }); - std::ofstream ofs(args.m_output_path); - if (!ofs) { - throw std::runtime_error("Failed to open output file."); - } - - auto publics_symbols = - publics_symbol_stream->getSymbolArray().getUnderlyingStream(); - for (auto offset : publics_stream->getPublicsTable()) { - auto cv_symbol = readSymbolFromStream(publics_symbols, offset); - auto public_sym32 = - SymbolDeserializer::deserializeAs(cv_symbol.get()); - if (!public_sym32) { - throw std::runtime_error("Unsupported symbol type."); - } - - ofs - << ((public_sym32->Flags & PublicSymFlags::Function) - != PublicSymFlags::None - ? "Function, " - : "Var, ") - << public_sym32->Name.str() << "\n"; - } return 0; } catch (const std::exception& e) { diff --git a/src/tools/makepdb/main.cpp b/src/tools/makepdb/main.cpp index d9e1d73..02f97d7 100644 --- a/src/tools/makepdb/main.cpp +++ b/src/tools/makepdb/main.cpp @@ -4,7 +4,6 @@ #include "object_file/pdb.h" #include "data_format/bound_symbol_list.h" -#include "data_format/raw_type_data.h" using namespace di; @@ -58,16 +57,13 @@ int main(int argc, char* argv[]) try { auto symbol_data = std::make_unique(); symbol_data->read(args.symbol_data_path); - std::unique_ptr raw_type_data; + object_file::PDB pdb; if (args.typeinfo_pdb_path) { - raw_type_data = std::make_unique(); - raw_type_data->read(*args.typeinfo_pdb_path); + pdb.read(*args.typeinfo_pdb_path); } - object_file::PDB pdb; pdb.set_coff_object(std::move(server_program)); pdb.set_symbol_data(std::move(symbol_data)); - pdb.set_raw_type_data(std::move(raw_type_data)); pdb.write(args.output_path);