diff --git a/src/data_format/_pl/v1_12_0/blob_impl.cpp b/src/data_format/_pl/v1_12_0/blob_impl.cpp new file mode 100644 index 0000000..3c24670 --- /dev/null +++ b/src/data_format/_pl/v1_12_0/blob_impl.cpp @@ -0,0 +1,74 @@ +#include "blob_impl.h" +#include "entry_impl.h" + +#define XXH_INLINE_ALL +#include "xxhash.h" + +namespace { + +constexpr uint64_t hash_qseed(uint64_t stored_seed) { + constexpr auto v1 = 0x7ED55D16u; + constexpr auto v2 = 0xC761C23Cu; + constexpr auto v3 = 0x165667B1u; + constexpr auto v4 = 0x160733E3u; + constexpr auto v5 = 0x028FB93Bu; + constexpr auto v6 = 0xB55A4F09uLL; + + constexpr auto al = [](uint32_t a) { + auto c1 = (0x1000 + 1) * a + v1; + auto d1 = (0x20 + 1) * (c1 ^ (c1 >> 19) ^ v2); + auto e1 = ((d1 + v3) << 9) ^ (d1 - v4); + return e1 + 8 * e1 - v5; + }; + + auto a1 = al(stored_seed >> 32); + auto a2 = al(stored_seed); + + auto b1 = v6 | 0xFFFFFFFF00000000uLL; + + auto c1 = ((a1 & 0xFFFF0000) ^ ((a1 ^ b1) << 16)) << 16; + auto c2 = a2 ^ ((a2 ^ v6 << 16) >> 16); + + return c1 | c2; +} + +} // namespace + +namespace di::data_format::_pl::v1_12_0 { + +void MagicBlobImpl::read(const fs::path& path) { + StreamedIO::read(path); + + m_stored_seed = eat(); + m_query_seed = hash_qseed(m_stored_seed); + + rva_t n_rva{}; + + while (next() != EOF) { + auto flags = eat_varint(); + auto rva = eat_varint(); + auto hash = eat(); + + // What is stored in the original format is not the RVA itself, but the + // difference with the previous entry (in MagicBlob, RVA is sorted from + // small to large) + // But here, we still store the "real" RVA. + n_rva += rva; + rva = n_rva; + + m_entries.emplace( + hash, + std::make_shared(hash, rva, flags) + ); + } +} + +MagicBlob::shared_entry_t MagicBlobImpl::query(std::string_view symbol) const { + auto query_hash = XXH64(symbol.data(), symbol.size(), m_query_seed); + if (m_entries.contains(query_hash)) { + return m_entries.at(query_hash); + } + return nullptr; +} + +} // namespace di::data_format::_pl::v1_12_0 diff --git a/src/data_format/_pl/v1_12_0/blob_impl.h b/src/data_format/_pl/v1_12_0/blob_impl.h new file mode 100644 index 0000000..65c1290 --- /dev/null +++ b/src/data_format/_pl/v1_12_0/blob_impl.h @@ -0,0 +1,31 @@ +#pragma once + +#include "data_format/magic_blob.h" + +namespace di::data_format::_pl::v1_12_0 { + +class MagicBlobImpl : public MagicBlob { +public: + using for_each_callback_t = + std::function; + + void read(const fs::path& path) override; + + shared_entry_t query(std::string_view symbol) const override; + + void for_each(const for_each_callback_t& callback) const override { + for (const auto& [hash, entry] : m_entries) callback(hash, entry); + } + + size_t count() const override { return m_entries.size(); } + +private: + std::unordered_map m_entries; + + // MagicBlob uses a custom algorithm to transform the stored seed. When + // querying, you should use m_query_seed. + uint64_t m_stored_seed{}; + uint64_t m_query_seed{}; +}; + +} // namespace di::data_format::_pl::v1_12_0 diff --git a/src/data_format/_pl/v1_12_0/entry_impl.cpp b/src/data_format/_pl/v1_12_0/entry_impl.cpp new file mode 100644 index 0000000..4077fcd --- /dev/null +++ b/src/data_format/_pl/v1_12_0/entry_impl.cpp @@ -0,0 +1,16 @@ +#include "entry_impl.h" + +#include + +namespace di::data_format::_pl::v1_12_0 { + +void MagicEntryImpl::to_json(nlohmann::json& json) const { + MagicEntry::to_json(json); + + json["is_function"] = is_function(); + json["_unk2"] = _unk2(); + json["is_verbose"] = is_verbose(); + json["_unk4"] = _unk4(); +} + +} // namespace di::data_format::_pl::v1_12_0 \ No newline at end of file diff --git a/src/data_format/_pl/v1_12_0/entry_impl.h b/src/data_format/_pl/v1_12_0/entry_impl.h new file mode 100644 index 0000000..fc554e5 --- /dev/null +++ b/src/data_format/_pl/v1_12_0/entry_impl.h @@ -0,0 +1,29 @@ +#pragma once + +#include "data_format/type/magic_entry.h" + +#include + +namespace di::data_format::_pl::v1_12_0 { + +struct MagicEntryImpl : public MagicEntry { + using flags_t = std::bitset<64>; + + flags_t flags; + + MagicEntryImpl(hash_t hash, rva_t rva, flags_t flags) + : MagicEntry(hash, rva), + flags(flags) {} + + void to_json(nlohmann::json& json) const override; + + constexpr bool is_function() const { return flags[0]; } + constexpr bool _unk2() const { return flags[1]; } + constexpr bool is_verbose() const { return flags[2]; } + constexpr bool _unk4() const { return flags[3]; } +}; + +// TODO +// void from_json(const nlohmann::json& json, MagicEntry& entry); + +} // namespace di::data_format::_pl::v1_12_0 diff --git a/src/data_format/magic_blob.cpp b/src/data_format/magic_blob.cpp index c17fa54..bbeae99 100644 --- a/src/data_format/magic_blob.cpp +++ b/src/data_format/magic_blob.cpp @@ -1,67 +1,15 @@ -#include "magic_blob.h" +#include "data_format/magic_blob.h" -#define XXH_INLINE_ALL -#include "xxhash.h" - -namespace _preloader_v_1_12_0 { - -constexpr uint64_t hash_qseed(uint64_t stored_seed) { - constexpr auto v1 = 0x7ED55D16u; - constexpr auto v2 = 0xC761C23Cu; - constexpr auto v3 = 0x165667B1u; - constexpr auto v4 = 0x160733E3u; - constexpr auto v5 = 0x028FB93Bu; - constexpr auto v6 = 0xB55A4F09uLL; - - constexpr auto al = [](uint32_t a) { - auto c1 = (0x1000 + 1) * a + v1; - auto d1 = (0x20 + 1) * (c1 ^ (c1 >> 19) ^ v2); - auto e1 = ((d1 + v3) << 9) ^ (d1 - v4); - return e1 + 8 * e1 - v5; - }; - - auto a1 = al(stored_seed >> 32); - auto a2 = al(stored_seed); - - auto b1 = v6 | 0xFFFFFFFF00000000uLL; - - auto c1 = ((a1 & 0xFFFF0000) ^ ((a1 ^ b1) << 16)) << 16; - auto c2 = a2 ^ ((a2 ^ v6 << 16) >> 16); - - return c1 | c2; -} - -} // namespace _preloader_v_1_12_0 +#include "data_format/_pl/v1_12_0/blob_impl.h" namespace di::data_format { -void MagicBlob::read(const fs::path& path) { - StreamedIO::read(path); - - m_stored_seed = eat(); - m_query_seed = _preloader_v_1_12_0::hash_qseed(m_stored_seed); - - rva_t n_rva{}; - - while (next() != EOF) { - auto flags = eat_varint(); - auto rva = eat_varint(); - auto hash = eat(); - - // see comments in magic_entry.h - n_rva += rva; - rva = n_rva; - - m_entries.emplace(hash, std::make_unique(flags, rva)); - } -} - -MagicEntry const* MagicBlob::query(std::string_view symbol) const { - auto query_hash = XXH64(symbol.data(), symbol.size(), m_query_seed); - if (m_entries.contains(query_hash)) { - return m_entries.at(query_hash).get(); +MagicBlob::blob_t MagicBlob::create(FormatVersion version) { + switch (version) { + case V1_12_0: + return std::make_unique<_pl::v1_12_0::MagicBlobImpl>(); } return nullptr; } -} // namespace di::data_format +} // namespace di::data_format \ No newline at end of file diff --git a/src/data_format/magic_blob.h b/src/data_format/magic_blob.h index bfa54d3..e70314b 100644 --- a/src/data_format/magic_blob.h +++ b/src/data_format/magic_blob.h @@ -7,25 +7,29 @@ namespace di::data_format { class MagicBlob : public io::StreamedIO { public: - using for_each_callback_t = std::function; + enum FormatVersion { + V1_12_0, + }; - void read(const fs::path& path) override; + using blob_t = std::unique_ptr; + using entry_t = std::unique_ptr; - DI_CONSTEXPR void for_each(const for_each_callback_t& callback) const { - for (const auto& [hash, entry] : m_entries) callback(hash, *entry); - } + using shared_blob_t = std::shared_ptr; + using shared_entry_t = std::shared_ptr; - constexpr size_t count() const { return m_entries.size(); } + using for_each_callback_t = + std::function; - MagicEntry const* query(std::string_view symbol) const; + virtual void read(const fs::path& path) = 0; -private: - std::unordered_map> m_entries; + virtual shared_entry_t query(std::string_view symbol) const = 0; + virtual void for_each(const for_each_callback_t& callback) const = 0; + virtual size_t count() const = 0; - // MagicBlob uses a custom algorithm to transform the stored seed. When - // querying, you should use m_query_seed. - uint64_t m_stored_seed{}; - uint64_t m_query_seed{}; + static blob_t create(FormatVersion version); + +protected: + MagicBlob() = default; }; -} // namespace di::data_format +} // namespace di::data_format \ No newline at end of file diff --git a/src/data_format/type/magic_entry.cpp b/src/data_format/type/magic_entry.cpp index 745b0c6..ca127ba 100644 --- a/src/data_format/type/magic_entry.cpp +++ b/src/data_format/type/magic_entry.cpp @@ -4,12 +4,9 @@ namespace di { -void to_json(nlohmann::json& json, const MagicEntry& entry) { - json["rva"] = entry.rva; - json["is_function"] = entry.is_function(); - json["_unk2"] = entry._unk2(); - json["is_verbose"] = entry.is_verbose(); - json["_unk4"] = entry._unk4(); +void MagicEntry::to_json(nlohmann::json& json) const { + json["hash"] = hash; + json["rva"] = rva; } } // namespace di \ No newline at end of file diff --git a/src/data_format/type/magic_entry.h b/src/data_format/type/magic_entry.h index 26ad3d3..f85289b 100644 --- a/src/data_format/type/magic_entry.h +++ b/src/data_format/type/magic_entry.h @@ -5,28 +5,12 @@ namespace di { struct MagicEntry { -#if __cpp_aggregate_paren_init < 201902L - MagicEntry(std::bitset<64> flags, rva_t rva) : flags(flags), rva(rva) {} -#endif + hash_t hash; // key + rva_t rva; // value - std::bitset<64> flags; - // What is stored in the original format is not the RVA itself, but the - // difference with the previous entry (in MagicBlob, RVA is sorted from - // small to large) - // But here, we still store the "real" RVA. - rva_t rva; - // Do not put the original hash in the entry yet. - // hash_t hash; + MagicEntry(hash_t hash, rva_t rva) : hash(hash), rva(rva) {} - constexpr bool is_function() const { return flags[0]; } - constexpr bool _unk2() const { return flags[1]; } - constexpr bool is_verbose() const { return flags[2]; } - constexpr bool _unk4() const { return flags[3]; } + virtual void to_json(nlohmann::json& json) const; }; -void to_json(nlohmann::json& json, const MagicEntry& entry); - -// TODO -// void from_json(const nlohmann::json& json, MagicEntry& entry); - } // namespace di diff --git a/src/tools/askrva/main.cpp b/src/tools/askrva/main.cpp index 795ec7d..2ebbc25 100644 --- a/src/tools/askrva/main.cpp +++ b/src/tools/askrva/main.cpp @@ -2,14 +2,15 @@ #include "data_format/raw_text.h" #include "data_format/typed_symbol_list.h" -#include - #if DI_USE_NATIVE_SYMBOL_RESOLVER #include #else #include "data_format/magic_blob.h" #endif +#include +#include + using namespace di; using namespace di::data_format; @@ -20,6 +21,7 @@ using namespace di::data_format; std::vector m_input_paths; std::string m_output_path; std::string m_magic_blob_path; + std::string m_format_version; std::optional m_output_failed_path; } args; @@ -36,6 +38,15 @@ using namespace di::data_format; .help("Path to magic blob (for builtin-symbol-resolver only).") .default_value("bedrock_runtime_data") .store_into(args.m_magic_blob_path); + + std::apply([&](auto&&... xs) { + program.add_argument("--preloader-version") + .help("Choose a compatible PreLoader version. (for builtin-symbol-resolver only).") + .choices(xs...) + .store_into(args.m_format_version) + .required(); + }, magic_enum::enum_names()); + program.add_argument("--output", "-o") .help("Path to output.") @@ -75,10 +86,21 @@ int main(int argc, char* argv[]) try { ); #if !DI_USE_NATIVE_SYMBOL_RESOLVER - MagicBlob magic_blob; - magic_blob.read(args.m_magic_blob_path); + auto format_version = + magic_enum::enum_cast(args.m_format_version); + assert(format_version.has_value()); - std::println("{} entries loaded from magicblob.", magic_blob.count()); + auto magic_blob = MagicBlob::create(*format_version); + if (!magic_blob) { + std::println( + "Format version: {} is not yet implemented.", + args.m_format_version + ); + } + + magic_blob->read(args.m_magic_blob_path); + + std::println("{} entries loaded from magicblob.", magic_blob->count()); #endif symlist.for_each([&](const TypedSymbol& symbol) { @@ -94,7 +116,7 @@ int main(int argc, char* argv[]) try { )); // TODO: imagebase... #else - if (auto entry = magic_blob.query(sym)) { + if (auto entry = magic_blob->query(sym)) { rva = entry->rva; } #endif diff --git a/src/tools/blob-extractor/main.cpp b/src/tools/blob-extractor/main.cpp index 77af351..9e2e055 100644 --- a/src/tools/blob-extractor/main.cpp +++ b/src/tools/blob-extractor/main.cpp @@ -1,6 +1,7 @@ #include "data_format/magic_blob.h" #include +#include #include using namespace di; @@ -13,6 +14,8 @@ auto load_args(int argc, char* argv[]) { struct { std::string m_magic_blob_path; fs::path m_output_path; + + std::string m_format_version; } args; // clang-format off @@ -26,6 +29,14 @@ auto load_args(int argc, char* argv[]) { .help("Path to output symlist.") .required(); + std::apply([&](auto&&... xs) { + program.add_argument("--preloader-version") + .help("Choose a compatible PreLoader version. (for builtin-symbol-resolver only).") + .choices(xs...) + .store_into(args.m_format_version) + .required(); + }, magic_enum::enum_names()); + // clang-format on program.parse_args(argc, argv); @@ -39,12 +50,23 @@ int main(int argc, char* argv[]) try { auto args = load_args(argc, argv); - MagicBlob blob; - blob.read(args.m_magic_blob_path); + auto format_version = + magic_enum::enum_cast(args.m_format_version); + assert(format_version.has_value()); + + auto magic_blob = MagicBlob::create(*format_version); + if (!magic_blob) { + std::println( + "Format version: {} is not yet implemented.", + args.m_format_version + ); + } + + magic_blob->read(args.m_magic_blob_path); nlohmann::json data; - blob.for_each([&data](hash_t hash, const MagicEntry& entry) { - data[std::format("{:#x}", hash)] = entry; + magic_blob->for_each([&data](hash_t hash, MagicBlob::shared_entry_t entry) { + entry->to_json(data.emplace_back()); }); std::ofstream ofs(args.m_output_path); diff --git a/xmake.lua b/xmake.lua index fd08c65..f1e8470 100644 --- a/xmake.lua +++ b/xmake.lua @@ -4,6 +4,7 @@ add_requires('argparse 3.2') add_requires('nlohmann_json 3.12.0') add_requires('xxhash 0.8.3') add_requires('libllvm 19.1.7') +add_requires('magic_enum 0.9.7') add_requires('boost 1.88.0', { system = false, configs = { @@ -97,7 +98,10 @@ target('askrva') set_pcxxheader('src/pch.h') add_deps('libdi') - add_packages('argparse') + add_packages( + 'argparse', + 'magic_enum' + ) if is_config('symbol-resolver', 'native') then add_packages('preloader') @@ -112,7 +116,8 @@ target('blob-extractor') add_deps('libdi') add_packages( 'argparse', - 'nlohmann_json' + 'nlohmann_json', + 'magic_enum' ) target('dumpsym')