refactor: polymorphic di::data_format::MagicBlob.
Some checks failed
Build / build (debug, map[arch:arm64 os:linux runner:ubuntu-24.04-arm toolchain:gcc-14]) (push) Has been cancelled
Build / build (debug, map[arch:arm64 os:macosx runner:macos-latest toolchain:xcode]) (push) Has been cancelled
Build / build (debug, map[arch:x64 os:windows runner:windows-latest toolchain:clang]) (push) Has been cancelled
Build / build (debug, map[arch:x86_64 os:linux runner:ubuntu-latest toolchain:gcc-14]) (push) Has been cancelled
Build / build (release, map[arch:arm64 os:linux runner:ubuntu-24.04-arm toolchain:gcc-14]) (push) Has been cancelled
Build / build (release, map[arch:arm64 os:macosx runner:macos-latest toolchain:xcode]) (push) Has been cancelled
Build / build (release, map[arch:x64 os:windows runner:windows-latest toolchain:clang]) (push) Has been cancelled
Build / build (release, map[arch:x86_64 os:linux runner:ubuntu-latest toolchain:gcc-14]) (push) Has been cancelled

This commit is contained in:
2025-08-17 23:50:40 +08:00
parent a45c22a4a1
commit b2e1427387
11 changed files with 243 additions and 111 deletions

View File

@@ -0,0 +1,74 @@
#include "blob_impl.h"
#include "entry_impl.h"
#define XXH_INLINE_ALL
#include "xxhash.h"
namespace {
constexpr uint64_t hash_qseed(uint64_t stored_seed) {
constexpr auto v1 = 0x7ED55D16u;
constexpr auto v2 = 0xC761C23Cu;
constexpr auto v3 = 0x165667B1u;
constexpr auto v4 = 0x160733E3u;
constexpr auto v5 = 0x028FB93Bu;
constexpr auto v6 = 0xB55A4F09uLL;
constexpr auto al = [](uint32_t a) {
auto c1 = (0x1000 + 1) * a + v1;
auto d1 = (0x20 + 1) * (c1 ^ (c1 >> 19) ^ v2);
auto e1 = ((d1 + v3) << 9) ^ (d1 - v4);
return e1 + 8 * e1 - v5;
};
auto a1 = al(stored_seed >> 32);
auto a2 = al(stored_seed);
auto b1 = v6 | 0xFFFFFFFF00000000uLL;
auto c1 = ((a1 & 0xFFFF0000) ^ ((a1 ^ b1) << 16)) << 16;
auto c2 = a2 ^ ((a2 ^ v6 << 16) >> 16);
return c1 | c2;
}
} // namespace
namespace di::data_format::_pl::v1_12_0 {
void MagicBlobImpl::read(const fs::path& path) {
StreamedIO::read(path);
m_stored_seed = eat<uint64_t>();
m_query_seed = hash_qseed(m_stored_seed);
rva_t n_rva{};
while (next() != EOF) {
auto flags = eat_varint<uint64_t>();
auto rva = eat_varint<rva_t>();
auto hash = eat<hash_t>();
// What is stored in the original format is not the RVA itself, but the
// difference with the previous entry (in MagicBlob, RVA is sorted from
// small to large)
// But here, we still store the "real" RVA.
n_rva += rva;
rva = n_rva;
m_entries.emplace(
hash,
std::make_shared<MagicEntryImpl>(hash, rva, flags)
);
}
}
MagicBlob::shared_entry_t MagicBlobImpl::query(std::string_view symbol) const {
auto query_hash = XXH64(symbol.data(), symbol.size(), m_query_seed);
if (m_entries.contains(query_hash)) {
return m_entries.at(query_hash);
}
return nullptr;
}
} // namespace di::data_format::_pl::v1_12_0

View File

@@ -0,0 +1,31 @@
#pragma once
#include "data_format/magic_blob.h"
namespace di::data_format::_pl::v1_12_0 {
class MagicBlobImpl : public MagicBlob {
public:
using for_each_callback_t =
std::function<void(hash_t, shared_entry_t const&)>;
void read(const fs::path& path) override;
shared_entry_t query(std::string_view symbol) const override;
void for_each(const for_each_callback_t& callback) const override {
for (const auto& [hash, entry] : m_entries) callback(hash, entry);
}
size_t count() const override { return m_entries.size(); }
private:
std::unordered_map<hash_t, shared_entry_t> m_entries;
// MagicBlob uses a custom algorithm to transform the stored seed. When
// querying, you should use m_query_seed.
uint64_t m_stored_seed{};
uint64_t m_query_seed{};
};
} // namespace di::data_format::_pl::v1_12_0

View File

@@ -0,0 +1,16 @@
#include "entry_impl.h"
#include <nlohmann/json.hpp>
namespace di::data_format::_pl::v1_12_0 {
void MagicEntryImpl::to_json(nlohmann::json& json) const {
MagicEntry::to_json(json);
json["is_function"] = is_function();
json["_unk2"] = _unk2();
json["is_verbose"] = is_verbose();
json["_unk4"] = _unk4();
}
} // namespace di::data_format::_pl::v1_12_0

View File

@@ -0,0 +1,29 @@
#pragma once
#include "data_format/type/magic_entry.h"
#include <nlohmann/json_fwd.hpp>
namespace di::data_format::_pl::v1_12_0 {
struct MagicEntryImpl : public MagicEntry {
using flags_t = std::bitset<64>;
flags_t flags;
MagicEntryImpl(hash_t hash, rva_t rva, flags_t flags)
: MagicEntry(hash, rva),
flags(flags) {}
void to_json(nlohmann::json& json) const override;
constexpr bool is_function() const { return flags[0]; }
constexpr bool _unk2() const { return flags[1]; }
constexpr bool is_verbose() const { return flags[2]; }
constexpr bool _unk4() const { return flags[3]; }
};
// TODO
// void from_json(const nlohmann::json& json, MagicEntry& entry);
} // namespace di::data_format::_pl::v1_12_0

View File

@@ -1,67 +1,15 @@
#include "magic_blob.h"
#include "data_format/magic_blob.h"
#define XXH_INLINE_ALL
#include "xxhash.h"
namespace _preloader_v_1_12_0 {
constexpr uint64_t hash_qseed(uint64_t stored_seed) {
constexpr auto v1 = 0x7ED55D16u;
constexpr auto v2 = 0xC761C23Cu;
constexpr auto v3 = 0x165667B1u;
constexpr auto v4 = 0x160733E3u;
constexpr auto v5 = 0x028FB93Bu;
constexpr auto v6 = 0xB55A4F09uLL;
constexpr auto al = [](uint32_t a) {
auto c1 = (0x1000 + 1) * a + v1;
auto d1 = (0x20 + 1) * (c1 ^ (c1 >> 19) ^ v2);
auto e1 = ((d1 + v3) << 9) ^ (d1 - v4);
return e1 + 8 * e1 - v5;
};
auto a1 = al(stored_seed >> 32);
auto a2 = al(stored_seed);
auto b1 = v6 | 0xFFFFFFFF00000000uLL;
auto c1 = ((a1 & 0xFFFF0000) ^ ((a1 ^ b1) << 16)) << 16;
auto c2 = a2 ^ ((a2 ^ v6 << 16) >> 16);
return c1 | c2;
}
} // namespace _preloader_v_1_12_0
#include "data_format/_pl/v1_12_0/blob_impl.h"
namespace di::data_format {
void MagicBlob::read(const fs::path& path) {
StreamedIO::read(path);
m_stored_seed = eat<uint64_t>();
m_query_seed = _preloader_v_1_12_0::hash_qseed(m_stored_seed);
rva_t n_rva{};
while (next() != EOF) {
auto flags = eat_varint<uint64_t>();
auto rva = eat_varint<rva_t>();
auto hash = eat<hash_t>();
// see comments in magic_entry.h
n_rva += rva;
rva = n_rva;
m_entries.emplace(hash, std::make_unique<MagicEntry>(flags, rva));
}
}
MagicEntry const* MagicBlob::query(std::string_view symbol) const {
auto query_hash = XXH64(symbol.data(), symbol.size(), m_query_seed);
if (m_entries.contains(query_hash)) {
return m_entries.at(query_hash).get();
MagicBlob::blob_t MagicBlob::create(FormatVersion version) {
switch (version) {
case V1_12_0:
return std::make_unique<_pl::v1_12_0::MagicBlobImpl>();
}
return nullptr;
}
} // namespace di::data_format
} // namespace di::data_format

View File

@@ -7,25 +7,29 @@ namespace di::data_format {
class MagicBlob : public io::StreamedIO {
public:
using for_each_callback_t = std::function<void(hash_t, MagicEntry const&)>;
enum FormatVersion {
V1_12_0,
};
void read(const fs::path& path) override;
using blob_t = std::unique_ptr<MagicBlob>;
using entry_t = std::unique_ptr<MagicEntry>;
DI_CONSTEXPR void for_each(const for_each_callback_t& callback) const {
for (const auto& [hash, entry] : m_entries) callback(hash, *entry);
}
using shared_blob_t = std::shared_ptr<const MagicBlob>;
using shared_entry_t = std::shared_ptr<const MagicEntry>;
constexpr size_t count() const { return m_entries.size(); }
using for_each_callback_t =
std::function<void(hash_t, shared_entry_t const&)>;
MagicEntry const* query(std::string_view symbol) const;
virtual void read(const fs::path& path) = 0;
private:
std::unordered_map<hash_t, std::unique_ptr<MagicEntry>> m_entries;
virtual shared_entry_t query(std::string_view symbol) const = 0;
virtual void for_each(const for_each_callback_t& callback) const = 0;
virtual size_t count() const = 0;
// MagicBlob uses a custom algorithm to transform the stored seed. When
// querying, you should use m_query_seed.
uint64_t m_stored_seed{};
uint64_t m_query_seed{};
static blob_t create(FormatVersion version);
protected:
MagicBlob() = default;
};
} // namespace di::data_format
} // namespace di::data_format

View File

@@ -4,12 +4,9 @@
namespace di {
void to_json(nlohmann::json& json, const MagicEntry& entry) {
json["rva"] = entry.rva;
json["is_function"] = entry.is_function();
json["_unk2"] = entry._unk2();
json["is_verbose"] = entry.is_verbose();
json["_unk4"] = entry._unk4();
void MagicEntry::to_json(nlohmann::json& json) const {
json["hash"] = hash;
json["rva"] = rva;
}
} // namespace di

View File

@@ -5,28 +5,12 @@
namespace di {
struct MagicEntry {
#if __cpp_aggregate_paren_init < 201902L
MagicEntry(std::bitset<64> flags, rva_t rva) : flags(flags), rva(rva) {}
#endif
hash_t hash; // key
rva_t rva; // value
std::bitset<64> flags;
// What is stored in the original format is not the RVA itself, but the
// difference with the previous entry (in MagicBlob, RVA is sorted from
// small to large)
// But here, we still store the "real" RVA.
rva_t rva;
// Do not put the original hash in the entry yet.
// hash_t hash;
MagicEntry(hash_t hash, rva_t rva) : hash(hash), rva(rva) {}
constexpr bool is_function() const { return flags[0]; }
constexpr bool _unk2() const { return flags[1]; }
constexpr bool is_verbose() const { return flags[2]; }
constexpr bool _unk4() const { return flags[3]; }
virtual void to_json(nlohmann::json& json) const;
};
void to_json(nlohmann::json& json, const MagicEntry& entry);
// TODO
// void from_json(const nlohmann::json& json, MagicEntry& entry);
} // namespace di

View File

@@ -2,14 +2,15 @@
#include "data_format/raw_text.h"
#include "data_format/typed_symbol_list.h"
#include <argparse/argparse.hpp>
#if DI_USE_NATIVE_SYMBOL_RESOLVER
#include <pl/SymbolProvider.h>
#else
#include "data_format/magic_blob.h"
#endif
#include <argparse/argparse.hpp>
#include <magic_enum.hpp>
using namespace di;
using namespace di::data_format;
@@ -20,6 +21,7 @@ using namespace di::data_format;
std::vector<std::string> m_input_paths;
std::string m_output_path;
std::string m_magic_blob_path;
std::string m_format_version;
std::optional<std::string> m_output_failed_path;
} args;
@@ -36,6 +38,15 @@ using namespace di::data_format;
.help("Path to magic blob (for builtin-symbol-resolver only).")
.default_value("bedrock_runtime_data")
.store_into(args.m_magic_blob_path);
std::apply([&](auto&&... xs) {
program.add_argument("--preloader-version")
.help("Choose a compatible PreLoader version. (for builtin-symbol-resolver only).")
.choices(xs...)
.store_into(args.m_format_version)
.required();
}, magic_enum::enum_names<MagicBlob::FormatVersion>());
program.add_argument("--output", "-o")
.help("Path to output.")
@@ -75,10 +86,21 @@ int main(int argc, char* argv[]) try {
);
#if !DI_USE_NATIVE_SYMBOL_RESOLVER
MagicBlob magic_blob;
magic_blob.read(args.m_magic_blob_path);
auto format_version =
magic_enum::enum_cast<MagicBlob::FormatVersion>(args.m_format_version);
assert(format_version.has_value());
std::println("{} entries loaded from magicblob.", magic_blob.count());
auto magic_blob = MagicBlob::create(*format_version);
if (!magic_blob) {
std::println(
"Format version: {} is not yet implemented.",
args.m_format_version
);
}
magic_blob->read(args.m_magic_blob_path);
std::println("{} entries loaded from magicblob.", magic_blob->count());
#endif
symlist.for_each([&](const TypedSymbol& symbol) {
@@ -94,7 +116,7 @@ int main(int argc, char* argv[]) try {
));
// TODO: imagebase...
#else
if (auto entry = magic_blob.query(sym)) {
if (auto entry = magic_blob->query(sym)) {
rva = entry->rva;
}
#endif

View File

@@ -1,6 +1,7 @@
#include "data_format/magic_blob.h"
#include <argparse/argparse.hpp>
#include <magic_enum.hpp>
#include <nlohmann/json.hpp>
using namespace di;
@@ -13,6 +14,8 @@ auto load_args(int argc, char* argv[]) {
struct {
std::string m_magic_blob_path;
fs::path m_output_path;
std::string m_format_version;
} args;
// clang-format off
@@ -26,6 +29,14 @@ auto load_args(int argc, char* argv[]) {
.help("Path to output symlist.")
.required();
std::apply([&](auto&&... xs) {
program.add_argument("--preloader-version")
.help("Choose a compatible PreLoader version. (for builtin-symbol-resolver only).")
.choices(xs...)
.store_into(args.m_format_version)
.required();
}, magic_enum::enum_names<MagicBlob::FormatVersion>());
// clang-format on
program.parse_args(argc, argv);
@@ -39,12 +50,23 @@ int main(int argc, char* argv[]) try {
auto args = load_args(argc, argv);
MagicBlob blob;
blob.read(args.m_magic_blob_path);
auto format_version =
magic_enum::enum_cast<MagicBlob::FormatVersion>(args.m_format_version);
assert(format_version.has_value());
auto magic_blob = MagicBlob::create(*format_version);
if (!magic_blob) {
std::println(
"Format version: {} is not yet implemented.",
args.m_format_version
);
}
magic_blob->read(args.m_magic_blob_path);
nlohmann::json data;
blob.for_each([&data](hash_t hash, const MagicEntry& entry) {
data[std::format("{:#x}", hash)] = entry;
magic_blob->for_each([&data](hash_t hash, MagicBlob::shared_entry_t entry) {
entry->to_json(data.emplace_back());
});
std::ofstream ofs(args.m_output_path);

View File

@@ -4,6 +4,7 @@ add_requires('argparse 3.2')
add_requires('nlohmann_json 3.12.0')
add_requires('xxhash 0.8.3')
add_requires('libllvm 19.1.7')
add_requires('magic_enum 0.9.7')
add_requires('boost 1.88.0', {
system = false,
configs = {
@@ -97,7 +98,10 @@ target('askrva')
set_pcxxheader('src/pch.h')
add_deps('libdi')
add_packages('argparse')
add_packages(
'argparse',
'magic_enum'
)
if is_config('symbol-resolver', 'native') then
add_packages('preloader')
@@ -112,7 +116,8 @@ target('blob-extractor')
add_deps('libdi')
add_packages(
'argparse',
'nlohmann_json'
'nlohmann_json',
'magic_enum'
)
target('dumpsym')