diff --git a/src/data_format/magic_blob.cpp b/src/data_format/magic_blob.cpp new file mode 100644 index 0000000..80900e0 --- /dev/null +++ b/src/data_format/magic_blob.cpp @@ -0,0 +1,70 @@ +#include "magic_blob.h" + +#define XXH_INLINE_ALL +#include "xxhash.h" + +#ifndef HIDWORD +#define HIDWORD(x) (*((int32_t*)&(x) + 1)) +#endif + +// copy from ida F5. +constexpr uint64_t unk_hash(uint64_t a1) { + unsigned int v1; // eax + int v2; // edx + int64_t v3; // rdx + + v1 = ((33 + * ((4097 * HIDWORD(a1) + 2127912214) + ^ ((unsigned int)(4097 * HIDWORD(a1) + 2127912214) >> 19) + ^ 0xC761C23C) + + 374761393) + << 9) + ^ (33 + * ((4097 * HIDWORD(a1) + 2127912214) + ^ ((unsigned int)(4097 * HIDWORD(a1) + 2127912214) >> 19) + ^ 0xC761C23C) + - 369570787); + v2 = 33 + * ((4097 * a1 + 2127912214) + ^ ((unsigned int)(4097 * a1 + 2127912214) >> 19) ^ 0xC761C23C); + v3 = (((v2 + 374761393) << 9) ^ (v2 - 369570787)) + + 8 * (((v2 + 374761393) << 9) ^ (unsigned int)(v2 - 369570787)) + - 42973499; + return (v3 ^ (((unsigned int)v3 ^ 0xB55A4F090000uLL) >> 16)) + | ((((v1 + 8 * v1 - 42973499) & 0xFFFF0000) + ^ (((v1 + 8 * v1 - 42973499) ^ 0xFFFFFFFFB55A4F09uLL) << 16)) + << 16); +} + +namespace di::data_format { + +void MagicBlob::read(const fs::path& path) { + StreamedIO::read(path); + + m_stored_seed = eat(); + m_query_seed = unk_hash(m_stored_seed); + + rva_t n_rva{}; + + while (next() != EOF) { + auto flags = eat_varint(); + auto rva = eat_varint(); + auto hash = eat(); + + // see comments in magic_entry.h + n_rva += rva; + rva = n_rva; + + m_entities.emplace(hash, std::make_unique(flags, rva)); + } +} + +MagicEntry const* MagicBlob::query(std::string_view symbol) const { + auto query_hash = XXH64(symbol.data(), symbol.size(), m_query_seed); + if (m_entities.contains(query_hash)) { + return m_entities.at(query_hash).get(); + } + return nullptr; +} + +} // namespace di::data_format \ No newline at end of file diff --git a/src/data_format/magic_blob.h b/src/data_format/magic_blob.h new file mode 100644 index 0000000..3e38e48 --- /dev/null +++ b/src/data_format/magic_blob.h @@ -0,0 +1,25 @@ +#pragma once + +#include "data_format/io/streamed_io.h" +#include "data_format/type/magic_entry.h" + +namespace di::data_format { + +class MagicBlob : public StreamedIO { +public: + void read(const fs::path& path) override; + + constexpr size_t count() const { return m_entities.size(); } + + MagicEntry const* query(std::string_view symbol) const; + +private: + std::unordered_map> m_entities; + + // MagicBlob uses a custom algorithm to transform the stored seed. When + // querying, you should use m_query_seed. + uint64_t m_stored_seed{}; + uint64_t m_query_seed{}; +}; + +} // namespace di::data_format diff --git a/src/data_format/type/magic_entry.h b/src/data_format/type/magic_entry.h new file mode 100644 index 0000000..7271194 --- /dev/null +++ b/src/data_format/type/magic_entry.h @@ -0,0 +1,21 @@ +#pragma once + +namespace di { + +struct MagicEntry { + std::bitset<64> flags; + // What is stored in the original format is not the RVA itself, but the + // difference with the previous entry (in MagicBlob, RVA is sorted from + // small to large) + // But here, we still store the "real" RVA. + rva_t rva; + // Do not put the original hash in the entry yet. + // hash_t hash; + + constexpr bool is_function() { return flags[0]; } + constexpr bool _unk2() { return flags[1]; } + constexpr bool is_verbose() { return flags[2]; } + constexpr bool _unk4() { return flags[3]; } +}; + +} // namespace di