refactor: merge symbol-extractor and type-extractor into io/pdb.

This commit is contained in:
2025-03-01 23:44:57 +08:00
parent c6faa0ae19
commit 9d87c68596
6 changed files with 264 additions and 266 deletions

View File

@@ -1,57 +0,0 @@
#include "data_format/raw_type_data.h"
#include <llvm/DebugInfo/CodeView/TypeStreamMerger.h>
#include <llvm/DebugInfo/PDB/IPDBSession.h>
#include <llvm/DebugInfo/PDB/Native/NativeSession.h>
#include <llvm/DebugInfo/PDB/Native/PDBFile.h>
#include <llvm/DebugInfo/PDB/Native/TpiStream.h>
#include <llvm/DebugInfo/PDB/PDB.h>
#include <llvm/DebugInfo/PDB/PDBTypes.h>
using namespace llvm::pdb;
namespace di::data_format {
void RawTypeData::read(const fs::path& path) {
std::unique_ptr<IPDBSession> pdb_session;
if (llvm::pdb::loadDataForPDB(
PDB_ReaderType::Native,
path.string(),
pdb_session
)) {
throw std::runtime_error("Failed to load PDB.");
}
auto native_session = static_cast<NativeSession*>(pdb_session.get());
auto& pdb_file = native_session->getPDBFile();
SmallVector<codeview::TypeIndex, 128> type_map;
SmallVector<codeview::TypeIndex, 128> id_map;
if (auto tpi_stream = pdb_file.getPDBTpiStream()) {
if (codeview::mergeTypeRecords(
m_storaged_TPI,
type_map,
(*tpi_stream).typeArray()
)) {
throw std::runtime_error("Failed to merge type record.");
}
} else {
throw std::runtime_error("TPI is not valid.");
}
if (auto ipi_stream = pdb_file.getPDBIpiStream()) {
if (codeview::mergeIdRecords(
m_storaged_IPI,
type_map,
id_map,
(*ipi_stream).typeArray()
)) {
throw std::runtime_error("Failed to merge id record.");
}
} else {
throw std::runtime_error("IPI is not valid.");
}
}
} // namespace di::data_format

View File

@@ -1,40 +0,0 @@
#pragma once
#include "data_format/io/io_base.h"
#include <llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h>
namespace di::data_format {
class RawTypeData : public IOBase {
public:
using for_each_callback_t =
std::function<void(codeview::TypeIndex, codeview::CVType)>;
enum TypedStream { TPI, IPI };
RawTypeData() : m_storaged_IPI(m_allocator), m_storaged_TPI(m_allocator) {}
void read(const fs::path& path) override;
void write(const fs::path& path) const override {
throw std::runtime_error("Unsupported operation.");
}
template <TypedStream Stream>
void for_each(const for_each_callback_t& callback) /*const*/ {
if constexpr (Stream == TPI) {
return m_storaged_TPI.ForEachRecord(callback);
}
if constexpr (Stream == IPI) {
return m_storaged_IPI.ForEachRecord(callback);
}
}
private:
BumpPtrAllocator m_allocator;
codeview::MergingTypeTableBuilder m_storaged_IPI;
codeview::MergingTypeTableBuilder m_storaged_TPI;
};
} // namespace di::data_format

View File

@@ -1,64 +1,149 @@
#include "object_file/pdb.h"
#include <llvm/DebugInfo/CodeView/SymbolDeserializer.h>
#include <llvm/DebugInfo/CodeView/TypeStreamMerger.h>
#include <llvm/DebugInfo/MSF/MSFBuilder.h>
#include <llvm/DebugInfo/PDB/IPDBSession.h>
#include <llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h>
#include <llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h>
#include <llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h>
#include <llvm/DebugInfo/PDB/Native/PDBFile.h>
#include <llvm/DebugInfo/PDB/Native/PublicsStream.h>
#include <llvm/DebugInfo/PDB/Native/RawConstants.h>
#include <llvm/DebugInfo/PDB/Native/SymbolStream.h>
#include <llvm/DebugInfo/PDB/Native/TpiStream.h>
#include <llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h>
#include <llvm/DebugInfo/PDB/PDB.h>
#include <llvm/DebugInfo/PDB/PDBTypes.h>
using namespace llvm::pdb;
using namespace llvm::codeview;
namespace di::object_file {
PDB::PDB() : m_builder(m_allocator) {
constexpr auto block_size = 4096;
if (m_builder.initialize(block_size)) {
throw std::runtime_error("Failed to initialize pdb file builder.");
void PDB::read(const fs::path& path) {
if (loadDataForPDB(PDB_ReaderType::Native, path.string(), m_session)) {
throw std::runtime_error("Failed to load PDB.");
}
for (uint32_t idx = 0; idx < pdb::kSpecialStreamCount; ++idx) {
if (!m_builder.getMsfBuilder().addStream(0)) {
throw std::runtime_error("Failed to add initial stream.");
std::unique_ptr<IPDBSession> pdb_session;
if (llvm::pdb::loadDataForPDB(
PDB_ReaderType::Native,
path.string(),
pdb_session
)) {
throw std::runtime_error("Failed to load PDB.");
}
auto native_session = static_cast<NativeSession*>(pdb_session.get());
auto& pdb_file = native_session->getPDBFile();
SmallVector<codeview::TypeIndex, 128> type_map;
SmallVector<codeview::TypeIndex, 128> id_map;
if (auto tpi_stream = pdb_file.getPDBTpiStream()) {
if (codeview::mergeTypeRecords(
*m_storaged_Tpi,
type_map,
(*tpi_stream).typeArray()
)) {
throw std::runtime_error("Failed to merge type record.");
}
} else {
throw std::runtime_error("TPI is not valid.");
}
if (auto ipi_stream = pdb_file.getPDBIpiStream()) {
if (codeview::mergeIdRecords(
*m_storaged_Ipi,
type_map,
id_map,
(*ipi_stream).typeArray()
)) {
throw std::runtime_error("Failed to merge id record.");
}
} else {
throw std::runtime_error("IPI is not valid.");
}
}
void PDB::write(const fs::path& path) {
void PDB::_write(const fs::path& path) {
build();
codeview::GUID out_guid;
if (m_builder.commit(path.string(), &out_guid)) {
GUID out_guid;
if (m_builder->commit(path.string(), &out_guid)) {
throw std::runtime_error("Failed to create pdb!");
}
}
void PDB::build() {
build_Info();
build_DBI();
build_TPI();
build_GSI();
NativeSession& PDB::get_native_session() {
return *static_cast<NativeSession*>(m_session.get());
}
void PDB::build_Info() {
pdb::PDBFile& PDB::get_pdb_file() { return get_native_session().getPDBFile(); }
void PDB::_for_each_public(const for_each_symbol_callback_t& callback) {
using namespace codeview;
auto& file = get_pdb_file();
auto publics_stream = file.getPDBPublicsStream();
if (!publics_stream) {
throw std::runtime_error("Failed to get public stream from PDB.");
}
auto publics_symbol_stream = file.getPDBSymbolStream();
if (!publics_symbol_stream) {
throw std::runtime_error("Failed to get symbol stream from PDB.");
}
auto publics_symbols =
publics_symbol_stream->getSymbolArray().getUnderlyingStream();
for (auto offset : publics_stream->getPublicsTable()) {
auto cv_symbol = readSymbolFromStream(publics_symbols, offset);
auto public_sym32 =
SymbolDeserializer::deserializeAs<PublicSym32>(cv_symbol.get());
if (!public_sym32) {
throw std::runtime_error("Unsupported symbol type.");
}
callback(*public_sym32);
}
}
void PDB::build() {
constexpr auto block_size = 4096;
m_builder.reset(new PDBFileBuilder{m_Alloc});
if (m_builder->initialize(block_size)) {
throw std::runtime_error("Failed to initialize pdb file builder.");
}
for (uint32_t idx = 0; idx < pdb::kSpecialStreamCount; ++idx) {
if (!m_builder->getMsfBuilder().addStream(0)) {
throw std::runtime_error("Failed to add initial stream.");
}
}
// INFO
{
auto pdb_info = m_owning_coff->get_debug_info();
auto& Info = m_builder.getInfoBuilder();
auto& Info = m_builder->getInfoBuilder();
Info.setVersion(PdbRaw_ImplVer::PdbImplVC70);
Info.setAge(pdb_info.Age);
Info.setGuid(*reinterpret_cast<codeview::GUID*>(pdb_info.Signature));
Info.setGuid(*reinterpret_cast<GUID*>(pdb_info.Signature));
Info.addFeature(PdbRaw_FeatureSig::VC140);
}
}
void PDB::build_DBI() {
// DBI
{
auto pdb_info = m_owning_coff->get_debug_info();
auto& DBI = m_builder.getDbiBuilder();
auto& Dbi = m_builder->getDbiBuilder();
DBI.setVersionHeader(PdbRaw_DbiVer::PdbDbiV70);
DBI.setAge(pdb_info.Age);
DBI.setMachineType(PDB_Machine::Amd64);
DBI.setFlags(DbiFlags::FlagStrippedMask);
DBI.setBuildNumber(14, 11); // LLVM is compatible with LINK 14.11
Dbi.setVersionHeader(PdbRaw_DbiVer::PdbDbiV70);
Dbi.setAge(pdb_info.Age);
Dbi.setMachineType(PDB_Machine::Amd64);
Dbi.setFlags(DbiFlags::FlagStrippedMask);
Dbi.setBuildNumber(14, 11); // LLVM is compatible with LINK 14.11
// Add sections.
auto section_table = m_owning_coff->get_section_table();
@@ -66,7 +151,8 @@ void PDB::build_DBI() {
auto section_data_ref = ArrayRef<uint8_t>(
(uint8_t*)section_table,
m_owning_coff->get_number_of_sections() * sizeof(object::coff_section)
m_owning_coff->get_number_of_sections()
* sizeof(object::coff_section)
);
auto section_table_ref = ArrayRef<object::coff_section>(
@@ -74,38 +160,40 @@ void PDB::build_DBI() {
number_of_sections
);
DBI.createSectionMap(section_table_ref);
Dbi.createSectionMap(section_table_ref);
// Add COFF section header stream.
if (DBI.addDbgStream(DbgHeaderType::SectionHdr, section_data_ref)) {
if (Dbi.addDbgStream(DbgHeaderType::SectionHdr, section_data_ref)) {
throw std::runtime_error("Failed to add dbg stream.");
}
}
void PDB::build_TPI() {
auto& TPI = m_builder.getTpiBuilder();
auto& IPI = m_builder.getIpiBuilder();
TPI.setVersionHeader(PdbRaw_TpiVer::PdbTpiV80);
IPI.setVersionHeader(PdbRaw_TpiVer::PdbTpiV80);
if (m_owning_raw_type_data) {
m_owning_raw_type_data->for_each<data_format::RawTypeData::TPI>(
[&TPI](codeview::TypeIndex index, const codeview::CVType& type) {
TPI.addTypeRecord(type.RecordData, std::nullopt);
}
);
m_owning_raw_type_data->for_each<data_format::RawTypeData::IPI>(
[&IPI](codeview::TypeIndex index, const codeview::CVType& type) {
IPI.addTypeRecord(type.RecordData, std::nullopt);
}
);
}
}
void PDB::build_GSI() {
// TPI & IPI
{
auto& Tpi = m_builder->getTpiBuilder();
auto& Ipi = m_builder->getIpiBuilder();
Tpi.setVersionHeader(PdbRaw_TpiVer::PdbTpiV80);
Ipi.setVersionHeader(PdbRaw_TpiVer::PdbTpiV80);
if (m_storaged_Tpi) {
for_each<TPI>([&Tpi](TypeIndex index, const CVType& type) {
Tpi.addTypeRecord(type.RecordData, std::nullopt);
});
}
if (m_storaged_Ipi) {
for_each<IPI>([&Ipi](TypeIndex index, const CVType& type) {
Ipi.addTypeRecord(type.RecordData, std::nullopt);
});
}
}
// PUBLIC
{
std::vector<BulkPublic> publics;
m_owning_symbol_data->for_each([&publics, this](const BoundSymbol& entity) {
m_owning_symbol_data->for_each([&publics,
this](const BoundSymbol& entity) {
BulkPublic symbol;
auto section_index =
@@ -119,15 +207,15 @@ void PDB::build_GSI() {
symbol.Name = strdup(entity.m_symbol_name.c_str());
symbol.NameLen = entity.m_symbol_name.size();
symbol.Segment = section_index + 1;
symbol.Offset =
entity.m_rva - m_image_base - section_or_err.get()->VirtualAddress;
if (entity.m_is_function)
symbol.setFlags(codeview::PublicSymFlags::Function);
symbol.Offset = entity.m_rva - m_image_base
- section_or_err.get()->VirtualAddress;
if (entity.m_is_function) symbol.setFlags(PublicSymFlags::Function);
publics.emplace_back(symbol);
});
m_builder.getGsiBuilder().addPublicSymbols(std::move(publics));
m_builder->getGsiBuilder().addPublicSymbols(std::move(publics));
}
}
} // namespace di::object_file

View File

@@ -1,23 +1,59 @@
#pragma once
#include "data_format/bound_symbol_list.h"
#include "io/io_base.h"
#include "object_file/coff.h"
#include "data_format/bound_symbol_list.h"
#include "data_format/raw_type_data.h"
#include <llvm/DebugInfo/CodeView/CVRecord.h>
#include <llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h>
#include <llvm/DebugInfo/CodeView/SymbolRecord.h>
#include <llvm/DebugInfo/PDB/Native/NativeSession.h>
#include <llvm/DebugInfo/PDB/Native/PDBFileBuilder.h>
#include <llvm/Support/Allocator.h>
namespace di::object_file {
class PDB {
class PDB : public IOBase {
public:
using owning_coff_t = std::unique_ptr<COFF>;
using owning_symbol_data_t = std::unique_ptr<data_format::BoundSymbolList>;
using owning_type_data_t = std::unique_ptr<data_format::RawTypeData>;
explicit PDB();
using for_each_symbol_callback_t =
std::function<void(codeview::PublicSym32 const&)>;
using for_each_type_callback_t =
std::function<void(codeview::TypeIndex, codeview::CVType)>;
enum IterableStream { TPI, IPI, Public };
void read(const fs::path& path) override;
void write(const fs::path& path) const override {
const_cast<PDB*>(this)->_write(path);
}
// r
pdb::NativeSession& get_native_session();
pdb::PDBFile& get_pdb_file();
template <IterableStream Stream, typename CallbackT>
void for_each(const CallbackT& callback) const {
if constexpr (Stream == TPI) {
return const_cast<PDB*>(this)->_for_each_ipi(
*m_storaged_Tpi,
callback
);
}
if constexpr (Stream == IPI) {
return const_cast<PDB*>(this)->_for_each_ipi(
*m_storaged_Ipi,
callback
);
}
if constexpr (Stream == Public) {
return const_cast<PDB*>(this)->_for_each_public(callback);
}
}
// w
void set_coff_object(owning_coff_t coff_object) {
m_owning_coff = std::move(coff_object);
m_image_base = m_owning_coff->get_owning_coff().getImageBase();
@@ -26,28 +62,35 @@ public:
void set_symbol_data(owning_symbol_data_t symbol_data) {
m_owning_symbol_data = std::move(symbol_data);
}
void set_raw_type_data(owning_type_data_t raw_type_data) {
m_owning_raw_type_data = std::move(raw_type_data);
}
void write(const fs::path& path);
private:
void build();
inline void build_Info();
inline void build_DBI();
inline void build_TPI();
inline void build_GSI();
// helpers
void _write(const fs::path& path);
void _for_each_public(const for_each_symbol_callback_t& callback);
void _for_each_ipi(
codeview::MergingTypeTableBuilder& type_table,
const for_each_type_callback_t& callback
) {
type_table.ForEachRecord(callback);
}
owning_coff_t m_owning_coff;
owning_symbol_data_t m_owning_symbol_data;
owning_type_data_t m_owning_raw_type_data;
addr_t m_image_base;
BumpPtrAllocator m_allocator;
pdb::PDBFileBuilder m_builder;
BumpPtrAllocator m_Alloc;
// w
std::unique_ptr<pdb::PDBFileBuilder> m_builder;
// r
std::unique_ptr<pdb::IPDBSession> m_session;
std::unique_ptr<codeview::MergingTypeTableBuilder> m_storaged_Ipi;
std::unique_ptr<codeview::MergingTypeTableBuilder> m_storaged_Tpi;
};
} // namespace di::object_file

View File

@@ -1,17 +1,12 @@
#include <llvm/DebugInfo/CodeView/CVRecord.h>
#include <llvm/DebugInfo/CodeView/SymbolDeserializer.h>
#include <llvm/DebugInfo/PDB/IPDBSession.h>
#include <llvm/DebugInfo/PDB/Native/NativeSession.h>
#include <llvm/DebugInfo/PDB/Native/PDBFile.h>
#include <llvm/DebugInfo/PDB/Native/PublicsStream.h>
#include <llvm/DebugInfo/PDB/Native/SymbolStream.h>
#include <llvm/DebugInfo/PDB/PDB.h>
#include <argparse/argparse.hpp>
using namespace llvm;
using namespace llvm::pdb;
using namespace llvm::codeview;
#include <llvm/DebugInfo/CodeView/SymbolRecord.h>
#include "data_format/typed_symbol_list.h"
#include "object_file/pdb.h"
using namespace di;
using namespace di::object_file;
auto load_args(int argc, char* argv[]) {
argparse::ArgumentParser program("extractpdb");
@@ -44,50 +39,23 @@ int main(int argc, char* argv[]) try {
auto args = load_args(argc, argv);
std::unique_ptr<IPDBSession> pdb_session;
if (llvm::pdb::loadDataForPDB(
PDB_ReaderType::Native,
args.m_program_database_path,
pdb_session
)) {
throw std::runtime_error("Failed to load PDB.");
}
PDB pdb;
pdb.read(args.m_program_database_path);
auto native_session = static_cast<NativeSession*>(pdb_session.get());
auto& pdb_file = native_session->getPDBFile();
data_format::TypedSymbolList symbol_list;
auto publics_stream = pdb_file.getPDBPublicsStream();
if (!publics_stream) {
throw std::runtime_error("Failed to get public stream from PDB.");
}
pdb.for_each<PDB::Public>([&symbol_list](const codeview::PublicSym32& symbol
) {
using codeview::PublicSymFlags;
auto publics_symbol_stream = pdb_file.getPDBSymbolStream();
if (!publics_symbol_stream) {
throw std::runtime_error("Failed to get symbol stream from PDB.");
}
auto is_fun =
(symbol.Flags & PublicSymFlags::Function) != PublicSymFlags::None;
symbol_list.record(
symbol.Name.str(),
is_fun ? DeclType::Function : DeclType::Var
);
});
std::ofstream ofs(args.m_output_path);
if (!ofs) {
throw std::runtime_error("Failed to open output file.");
}
auto publics_symbols =
publics_symbol_stream->getSymbolArray().getUnderlyingStream();
for (auto offset : publics_stream->getPublicsTable()) {
auto cv_symbol = readSymbolFromStream(publics_symbols, offset);
auto public_sym32 =
SymbolDeserializer::deserializeAs<PublicSym32>(cv_symbol.get());
if (!public_sym32) {
throw std::runtime_error("Unsupported symbol type.");
}
ofs
<< ((public_sym32->Flags & PublicSymFlags::Function)
!= PublicSymFlags::None
? "Function, "
: "Var, ")
<< public_sym32->Name.str() << "\n";
}
return 0;
} catch (const std::exception& e) {

View File

@@ -4,7 +4,6 @@
#include "object_file/pdb.h"
#include "data_format/bound_symbol_list.h"
#include "data_format/raw_type_data.h"
using namespace di;
@@ -58,16 +57,13 @@ int main(int argc, char* argv[]) try {
auto symbol_data = std::make_unique<data_format::BoundSymbolList>();
symbol_data->read(args.symbol_data_path);
std::unique_ptr<data_format::RawTypeData> raw_type_data;
object_file::PDB pdb;
if (args.typeinfo_pdb_path) {
raw_type_data = std::make_unique<data_format::RawTypeData>();
raw_type_data->read(*args.typeinfo_pdb_path);
pdb.read(*args.typeinfo_pdb_path);
}
object_file::PDB pdb;
pdb.set_coff_object(std::move(server_program));
pdb.set_symbol_data(std::move(symbol_data));
pdb.set_raw_type_data(std::move(raw_type_data));
pdb.write(args.output_path);