refactor: polymorphic di::data_format::MagicBlob.
Some checks failed
Build / build (debug, map[arch:arm64 os:linux runner:ubuntu-24.04-arm toolchain:gcc-14]) (push) Has been cancelled
Build / build (debug, map[arch:arm64 os:macosx runner:macos-latest toolchain:xcode]) (push) Has been cancelled
Build / build (debug, map[arch:x64 os:windows runner:windows-latest toolchain:clang]) (push) Has been cancelled
Build / build (debug, map[arch:x86_64 os:linux runner:ubuntu-latest toolchain:gcc-14]) (push) Has been cancelled
Build / build (release, map[arch:arm64 os:linux runner:ubuntu-24.04-arm toolchain:gcc-14]) (push) Has been cancelled
Build / build (release, map[arch:arm64 os:macosx runner:macos-latest toolchain:xcode]) (push) Has been cancelled
Build / build (release, map[arch:x64 os:windows runner:windows-latest toolchain:clang]) (push) Has been cancelled
Build / build (release, map[arch:x86_64 os:linux runner:ubuntu-latest toolchain:gcc-14]) (push) Has been cancelled

This commit is contained in:
2025-08-17 23:50:40 +08:00
parent a45c22a4a1
commit b2e1427387
11 changed files with 243 additions and 111 deletions

View File

@@ -0,0 +1,74 @@
#include "blob_impl.h"
#include "entry_impl.h"
#define XXH_INLINE_ALL
#include "xxhash.h"
namespace {
constexpr uint64_t hash_qseed(uint64_t stored_seed) {
constexpr auto v1 = 0x7ED55D16u;
constexpr auto v2 = 0xC761C23Cu;
constexpr auto v3 = 0x165667B1u;
constexpr auto v4 = 0x160733E3u;
constexpr auto v5 = 0x028FB93Bu;
constexpr auto v6 = 0xB55A4F09uLL;
constexpr auto al = [](uint32_t a) {
auto c1 = (0x1000 + 1) * a + v1;
auto d1 = (0x20 + 1) * (c1 ^ (c1 >> 19) ^ v2);
auto e1 = ((d1 + v3) << 9) ^ (d1 - v4);
return e1 + 8 * e1 - v5;
};
auto a1 = al(stored_seed >> 32);
auto a2 = al(stored_seed);
auto b1 = v6 | 0xFFFFFFFF00000000uLL;
auto c1 = ((a1 & 0xFFFF0000) ^ ((a1 ^ b1) << 16)) << 16;
auto c2 = a2 ^ ((a2 ^ v6 << 16) >> 16);
return c1 | c2;
}
} // namespace
namespace di::data_format::_pl::v1_12_0 {
void MagicBlobImpl::read(const fs::path& path) {
StreamedIO::read(path);
m_stored_seed = eat<uint64_t>();
m_query_seed = hash_qseed(m_stored_seed);
rva_t n_rva{};
while (next() != EOF) {
auto flags = eat_varint<uint64_t>();
auto rva = eat_varint<rva_t>();
auto hash = eat<hash_t>();
// What is stored in the original format is not the RVA itself, but the
// difference with the previous entry (in MagicBlob, RVA is sorted from
// small to large)
// But here, we still store the "real" RVA.
n_rva += rva;
rva = n_rva;
m_entries.emplace(
hash,
std::make_shared<MagicEntryImpl>(hash, rva, flags)
);
}
}
MagicBlob::shared_entry_t MagicBlobImpl::query(std::string_view symbol) const {
auto query_hash = XXH64(symbol.data(), symbol.size(), m_query_seed);
if (m_entries.contains(query_hash)) {
return m_entries.at(query_hash);
}
return nullptr;
}
} // namespace di::data_format::_pl::v1_12_0

View File

@@ -0,0 +1,31 @@
#pragma once
#include "data_format/magic_blob.h"
namespace di::data_format::_pl::v1_12_0 {
class MagicBlobImpl : public MagicBlob {
public:
using for_each_callback_t =
std::function<void(hash_t, shared_entry_t const&)>;
void read(const fs::path& path) override;
shared_entry_t query(std::string_view symbol) const override;
void for_each(const for_each_callback_t& callback) const override {
for (const auto& [hash, entry] : m_entries) callback(hash, entry);
}
size_t count() const override { return m_entries.size(); }
private:
std::unordered_map<hash_t, shared_entry_t> m_entries;
// MagicBlob uses a custom algorithm to transform the stored seed. When
// querying, you should use m_query_seed.
uint64_t m_stored_seed{};
uint64_t m_query_seed{};
};
} // namespace di::data_format::_pl::v1_12_0

View File

@@ -0,0 +1,16 @@
#include "entry_impl.h"
#include <nlohmann/json.hpp>
namespace di::data_format::_pl::v1_12_0 {
void MagicEntryImpl::to_json(nlohmann::json& json) const {
MagicEntry::to_json(json);
json["is_function"] = is_function();
json["_unk2"] = _unk2();
json["is_verbose"] = is_verbose();
json["_unk4"] = _unk4();
}
} // namespace di::data_format::_pl::v1_12_0

View File

@@ -0,0 +1,29 @@
#pragma once
#include "data_format/type/magic_entry.h"
#include <nlohmann/json_fwd.hpp>
namespace di::data_format::_pl::v1_12_0 {
struct MagicEntryImpl : public MagicEntry {
using flags_t = std::bitset<64>;
flags_t flags;
MagicEntryImpl(hash_t hash, rva_t rva, flags_t flags)
: MagicEntry(hash, rva),
flags(flags) {}
void to_json(nlohmann::json& json) const override;
constexpr bool is_function() const { return flags[0]; }
constexpr bool _unk2() const { return flags[1]; }
constexpr bool is_verbose() const { return flags[2]; }
constexpr bool _unk4() const { return flags[3]; }
};
// TODO
// void from_json(const nlohmann::json& json, MagicEntry& entry);
} // namespace di::data_format::_pl::v1_12_0

View File

@@ -1,65 +1,13 @@
#include "magic_blob.h" #include "data_format/magic_blob.h"
#define XXH_INLINE_ALL #include "data_format/_pl/v1_12_0/blob_impl.h"
#include "xxhash.h"
namespace _preloader_v_1_12_0 {
constexpr uint64_t hash_qseed(uint64_t stored_seed) {
constexpr auto v1 = 0x7ED55D16u;
constexpr auto v2 = 0xC761C23Cu;
constexpr auto v3 = 0x165667B1u;
constexpr auto v4 = 0x160733E3u;
constexpr auto v5 = 0x028FB93Bu;
constexpr auto v6 = 0xB55A4F09uLL;
constexpr auto al = [](uint32_t a) {
auto c1 = (0x1000 + 1) * a + v1;
auto d1 = (0x20 + 1) * (c1 ^ (c1 >> 19) ^ v2);
auto e1 = ((d1 + v3) << 9) ^ (d1 - v4);
return e1 + 8 * e1 - v5;
};
auto a1 = al(stored_seed >> 32);
auto a2 = al(stored_seed);
auto b1 = v6 | 0xFFFFFFFF00000000uLL;
auto c1 = ((a1 & 0xFFFF0000) ^ ((a1 ^ b1) << 16)) << 16;
auto c2 = a2 ^ ((a2 ^ v6 << 16) >> 16);
return c1 | c2;
}
} // namespace _preloader_v_1_12_0
namespace di::data_format { namespace di::data_format {
void MagicBlob::read(const fs::path& path) { MagicBlob::blob_t MagicBlob::create(FormatVersion version) {
StreamedIO::read(path); switch (version) {
case V1_12_0:
m_stored_seed = eat<uint64_t>(); return std::make_unique<_pl::v1_12_0::MagicBlobImpl>();
m_query_seed = _preloader_v_1_12_0::hash_qseed(m_stored_seed);
rva_t n_rva{};
while (next() != EOF) {
auto flags = eat_varint<uint64_t>();
auto rva = eat_varint<rva_t>();
auto hash = eat<hash_t>();
// see comments in magic_entry.h
n_rva += rva;
rva = n_rva;
m_entries.emplace(hash, std::make_unique<MagicEntry>(flags, rva));
}
}
MagicEntry const* MagicBlob::query(std::string_view symbol) const {
auto query_hash = XXH64(symbol.data(), symbol.size(), m_query_seed);
if (m_entries.contains(query_hash)) {
return m_entries.at(query_hash).get();
} }
return nullptr; return nullptr;
} }

View File

@@ -7,25 +7,29 @@ namespace di::data_format {
class MagicBlob : public io::StreamedIO { class MagicBlob : public io::StreamedIO {
public: public:
using for_each_callback_t = std::function<void(hash_t, MagicEntry const&)>; enum FormatVersion {
V1_12_0,
};
void read(const fs::path& path) override; using blob_t = std::unique_ptr<MagicBlob>;
using entry_t = std::unique_ptr<MagicEntry>;
DI_CONSTEXPR void for_each(const for_each_callback_t& callback) const { using shared_blob_t = std::shared_ptr<const MagicBlob>;
for (const auto& [hash, entry] : m_entries) callback(hash, *entry); using shared_entry_t = std::shared_ptr<const MagicEntry>;
}
constexpr size_t count() const { return m_entries.size(); } using for_each_callback_t =
std::function<void(hash_t, shared_entry_t const&)>;
MagicEntry const* query(std::string_view symbol) const; virtual void read(const fs::path& path) = 0;
private: virtual shared_entry_t query(std::string_view symbol) const = 0;
std::unordered_map<hash_t, std::unique_ptr<MagicEntry>> m_entries; virtual void for_each(const for_each_callback_t& callback) const = 0;
virtual size_t count() const = 0;
// MagicBlob uses a custom algorithm to transform the stored seed. When static blob_t create(FormatVersion version);
// querying, you should use m_query_seed.
uint64_t m_stored_seed{}; protected:
uint64_t m_query_seed{}; MagicBlob() = default;
}; };
} // namespace di::data_format } // namespace di::data_format

View File

@@ -4,12 +4,9 @@
namespace di { namespace di {
void to_json(nlohmann::json& json, const MagicEntry& entry) { void MagicEntry::to_json(nlohmann::json& json) const {
json["rva"] = entry.rva; json["hash"] = hash;
json["is_function"] = entry.is_function(); json["rva"] = rva;
json["_unk2"] = entry._unk2();
json["is_verbose"] = entry.is_verbose();
json["_unk4"] = entry._unk4();
} }
} // namespace di } // namespace di

View File

@@ -5,28 +5,12 @@
namespace di { namespace di {
struct MagicEntry { struct MagicEntry {
#if __cpp_aggregate_paren_init < 201902L hash_t hash; // key
MagicEntry(std::bitset<64> flags, rva_t rva) : flags(flags), rva(rva) {} rva_t rva; // value
#endif
std::bitset<64> flags; MagicEntry(hash_t hash, rva_t rva) : hash(hash), rva(rva) {}
// What is stored in the original format is not the RVA itself, but the
// difference with the previous entry (in MagicBlob, RVA is sorted from
// small to large)
// But here, we still store the "real" RVA.
rva_t rva;
// Do not put the original hash in the entry yet.
// hash_t hash;
constexpr bool is_function() const { return flags[0]; } virtual void to_json(nlohmann::json& json) const;
constexpr bool _unk2() const { return flags[1]; }
constexpr bool is_verbose() const { return flags[2]; }
constexpr bool _unk4() const { return flags[3]; }
}; };
void to_json(nlohmann::json& json, const MagicEntry& entry);
// TODO
// void from_json(const nlohmann::json& json, MagicEntry& entry);
} // namespace di } // namespace di

View File

@@ -2,14 +2,15 @@
#include "data_format/raw_text.h" #include "data_format/raw_text.h"
#include "data_format/typed_symbol_list.h" #include "data_format/typed_symbol_list.h"
#include <argparse/argparse.hpp>
#if DI_USE_NATIVE_SYMBOL_RESOLVER #if DI_USE_NATIVE_SYMBOL_RESOLVER
#include <pl/SymbolProvider.h> #include <pl/SymbolProvider.h>
#else #else
#include "data_format/magic_blob.h" #include "data_format/magic_blob.h"
#endif #endif
#include <argparse/argparse.hpp>
#include <magic_enum.hpp>
using namespace di; using namespace di;
using namespace di::data_format; using namespace di::data_format;
@@ -20,6 +21,7 @@ using namespace di::data_format;
std::vector<std::string> m_input_paths; std::vector<std::string> m_input_paths;
std::string m_output_path; std::string m_output_path;
std::string m_magic_blob_path; std::string m_magic_blob_path;
std::string m_format_version;
std::optional<std::string> m_output_failed_path; std::optional<std::string> m_output_failed_path;
} args; } args;
@@ -37,6 +39,15 @@ using namespace di::data_format;
.default_value("bedrock_runtime_data") .default_value("bedrock_runtime_data")
.store_into(args.m_magic_blob_path); .store_into(args.m_magic_blob_path);
std::apply([&](auto&&... xs) {
program.add_argument("--preloader-version")
.help("Choose a compatible PreLoader version. (for builtin-symbol-resolver only).")
.choices(xs...)
.store_into(args.m_format_version)
.required();
}, magic_enum::enum_names<MagicBlob::FormatVersion>());
program.add_argument("--output", "-o") program.add_argument("--output", "-o")
.help("Path to output.") .help("Path to output.")
.store_into(args.m_output_path) .store_into(args.m_output_path)
@@ -75,10 +86,21 @@ int main(int argc, char* argv[]) try {
); );
#if !DI_USE_NATIVE_SYMBOL_RESOLVER #if !DI_USE_NATIVE_SYMBOL_RESOLVER
MagicBlob magic_blob; auto format_version =
magic_blob.read(args.m_magic_blob_path); magic_enum::enum_cast<MagicBlob::FormatVersion>(args.m_format_version);
assert(format_version.has_value());
std::println("{} entries loaded from magicblob.", magic_blob.count()); auto magic_blob = MagicBlob::create(*format_version);
if (!magic_blob) {
std::println(
"Format version: {} is not yet implemented.",
args.m_format_version
);
}
magic_blob->read(args.m_magic_blob_path);
std::println("{} entries loaded from magicblob.", magic_blob->count());
#endif #endif
symlist.for_each([&](const TypedSymbol& symbol) { symlist.for_each([&](const TypedSymbol& symbol) {
@@ -94,7 +116,7 @@ int main(int argc, char* argv[]) try {
)); ));
// TODO: imagebase... // TODO: imagebase...
#else #else
if (auto entry = magic_blob.query(sym)) { if (auto entry = magic_blob->query(sym)) {
rva = entry->rva; rva = entry->rva;
} }
#endif #endif

View File

@@ -1,6 +1,7 @@
#include "data_format/magic_blob.h" #include "data_format/magic_blob.h"
#include <argparse/argparse.hpp> #include <argparse/argparse.hpp>
#include <magic_enum.hpp>
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
using namespace di; using namespace di;
@@ -13,6 +14,8 @@ auto load_args(int argc, char* argv[]) {
struct { struct {
std::string m_magic_blob_path; std::string m_magic_blob_path;
fs::path m_output_path; fs::path m_output_path;
std::string m_format_version;
} args; } args;
// clang-format off // clang-format off
@@ -26,6 +29,14 @@ auto load_args(int argc, char* argv[]) {
.help("Path to output symlist.") .help("Path to output symlist.")
.required(); .required();
std::apply([&](auto&&... xs) {
program.add_argument("--preloader-version")
.help("Choose a compatible PreLoader version. (for builtin-symbol-resolver only).")
.choices(xs...)
.store_into(args.m_format_version)
.required();
}, magic_enum::enum_names<MagicBlob::FormatVersion>());
// clang-format on // clang-format on
program.parse_args(argc, argv); program.parse_args(argc, argv);
@@ -39,12 +50,23 @@ int main(int argc, char* argv[]) try {
auto args = load_args(argc, argv); auto args = load_args(argc, argv);
MagicBlob blob; auto format_version =
blob.read(args.m_magic_blob_path); magic_enum::enum_cast<MagicBlob::FormatVersion>(args.m_format_version);
assert(format_version.has_value());
auto magic_blob = MagicBlob::create(*format_version);
if (!magic_blob) {
std::println(
"Format version: {} is not yet implemented.",
args.m_format_version
);
}
magic_blob->read(args.m_magic_blob_path);
nlohmann::json data; nlohmann::json data;
blob.for_each([&data](hash_t hash, const MagicEntry& entry) { magic_blob->for_each([&data](hash_t hash, MagicBlob::shared_entry_t entry) {
data[std::format("{:#x}", hash)] = entry; entry->to_json(data.emplace_back());
}); });
std::ofstream ofs(args.m_output_path); std::ofstream ofs(args.m_output_path);

View File

@@ -4,6 +4,7 @@ add_requires('argparse 3.2')
add_requires('nlohmann_json 3.12.0') add_requires('nlohmann_json 3.12.0')
add_requires('xxhash 0.8.3') add_requires('xxhash 0.8.3')
add_requires('libllvm 19.1.7') add_requires('libllvm 19.1.7')
add_requires('magic_enum 0.9.7')
add_requires('boost 1.88.0', { add_requires('boost 1.88.0', {
system = false, system = false,
configs = { configs = {
@@ -97,7 +98,10 @@ target('askrva')
set_pcxxheader('src/pch.h') set_pcxxheader('src/pch.h')
add_deps('libdi') add_deps('libdi')
add_packages('argparse') add_packages(
'argparse',
'magic_enum'
)
if is_config('symbol-resolver', 'native') then if is_config('symbol-resolver', 'native') then
add_packages('preloader') add_packages('preloader')
@@ -112,7 +116,8 @@ target('blob-extractor')
add_deps('libdi') add_deps('libdi')
add_packages( add_packages(
'argparse', 'argparse',
'nlohmann_json' 'nlohmann_json',
'magic_enum'
) )
target('dumpsym') target('dumpsym')