From 2ec106b78519db03f2f7909ab390173a1708529a Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 23 Nov 2025 19:38:25 -0800 Subject: [PATCH 1/2] Rebuild opcode map whenever ISA/logging changes This facilitates caching less info in the opcode cache. --- riscv/csrs.cc | 1 + riscv/processor.cc | 1 + riscv/processor.h | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/riscv/csrs.cc b/riscv/csrs.cc index fbd914fe..3d5a10aa 100644 --- a/riscv/csrs.cc +++ b/riscv/csrs.cc @@ -778,6 +778,7 @@ bool misa_csr_t::unlogged_write(const reg_t val) noexcept { } proc->get_mmu()->flush_tlb(); + proc->build_opcode_map(); return basic_csr_t::unlogged_write(new_misa); } diff --git a/riscv/processor.cc b/riscv/processor.cc index ffdaa190..11a4d6cc 100644 --- a/riscv/processor.cc +++ b/riscv/processor.cc @@ -146,6 +146,7 @@ void processor_t::enable_log_commits() { log_commits_enabled = true; mmu->flush_tlb(); // the TLB caches this setting + build_opcode_map(); } void processor_t::reset() diff --git a/riscv/processor.h b/riscv/processor.h index 38781b8b..4939ef55 100644 --- a/riscv/processor.h +++ b/riscv/processor.h @@ -347,6 +347,7 @@ public: register_insn(insn, true /* is_custom */); } void register_extension(extension_t*); + void build_opcode_map(); // MMIO slave interface bool load(reg_t addr, size_t len, uint8_t* bytes) override; @@ -429,7 +430,6 @@ private: friend class extension_t; void parse_priv_string(const char*); - void build_opcode_map(); void register_base_instructions(); insn_func_t decode_insn(insn_t insn); From f7616ad7c258b2b313f78a635e2c18e5ca39170c Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 24 Nov 2025 16:47:25 -0800 Subject: [PATCH 2/2] Eliminate the opcode cache Just bucket the intruction lists, then search them. Simpler and faster. --- riscv/processor.cc | 70 ++++++++++++++++++++++++---------------------- riscv/processor.h | 58 ++++++++------------------------------ 2 files changed, 47 insertions(+), 81 deletions(-) diff --git a/riscv/processor.cc b/riscv/processor.cc index 11a4d6cc..8962d6c1 100644 --- a/riscv/processor.cc +++ b/riscv/processor.cc @@ -62,13 +62,8 @@ processor_t::processor_t(const char* isa_str, const char* priv_str, VU.vlenb = isa.get_vlen() / 8; VU.vstart_alu = 0; - register_base_instructions(); mmu = new mmu_t(sim, cfg->endianness, this, cfg->cache_blocksz); - disassembler = new disassembler_t(&isa); - for (auto e : isa.get_extensions()) - register_extension(find_extension(e.c_str())()); - set_pmp_granularity(cfg->pmpgranularity); set_pmp_num(cfg->pmpregions); @@ -81,6 +76,12 @@ processor_t::processor_t(const char* isa_str, const char* priv_str, set_impl(IMPL_MMU_VMID, true); reset(); + + register_base_instructions(); + + disassembler = new disassembler_t(&isa); + for (auto e : isa.get_extensions()) + register_extension(find_extension(e.c_str())()); } processor_t::~processor_t() @@ -663,47 +664,48 @@ reg_t processor_t::throw_instruction_address_misaligned(reg_t pc) insn_func_t processor_t::decode_insn(insn_t insn) { - if (!extension_enabled(EXT_ZCA) && insn_length(insn.bits()) % 4) - return &::illegal_instruction; - - // look up opcode in hash table - size_t idx = insn.bits() % OPCODE_CACHE_SIZE; - auto [hit, desc] = opcode_cache[idx].lookup(insn.bits()); + const auto& pool = opcode_map[insn.bits() % std::size(opcode_map)]; - bool rve = extension_enabled('E'); - - if (unlikely(!hit)) { - // fall back to linear search - auto matching = [insn_bits = insn.bits()](const insn_desc_t &d) { - return (insn_bits & d.mask) == d.match; - }; - auto p = std::find_if(custom_instructions.begin(), - custom_instructions.end(), matching); - if (p == custom_instructions.end()) { - p = std::find_if(instructions.begin(), instructions.end(), matching); - assert(p != instructions.end()); + for (auto p = pool.begin(); ; ++p) { + if ((insn.bits() & p->mask) == p->match) { + return p->func; } - desc = &*p; - opcode_cache[idx].replace(insn.bits(), desc); } - - return desc->func(xlen, rve, log_commits_enabled); } -void processor_t::register_insn(insn_desc_t desc, bool is_custom) { +void processor_t::register_insn(insn_desc_t desc, std::vector& pool) { assert(desc.fast_rv32i && desc.fast_rv64i && desc.fast_rv32e && desc.fast_rv64e && desc.logged_rv32i && desc.logged_rv64i && desc.logged_rv32e && desc.logged_rv64e); - if (is_custom) - custom_instructions.push_back(desc); - else - instructions.push_back(desc); + pool.push_back(desc); } void processor_t::build_opcode_map() { - for (size_t i = 0; i < OPCODE_CACHE_SIZE; i++) - opcode_cache[i].reset(); + bool rve = extension_enabled('E'); + bool zca = extension_enabled(EXT_ZCA); + const size_t N = std::size(opcode_map); + + auto build_one = [&](const insn_desc_t& desc) { + auto func = desc.func(xlen, rve, log_commits_enabled); + if (!zca && insn_length(desc.match) % 4) + func = &::illegal_instruction; + + auto stride = std::min(N, size_t(1) << ctz(~desc.mask)); + for (size_t i = desc.match & (stride - 1); i < N; i += stride) { + if ((desc.match % N) == (i & desc.mask)) + opcode_map[i].push_back({desc.match, desc.mask, func}); + } + }; + + for (auto& p : opcode_map) + p.clear(); + + for (auto& d : custom_instructions) + build_one(d); + + for (auto& d : instructions) + build_one(d); } void processor_t::register_extension(extension_t *x) { diff --git a/riscv/processor.h b/riscv/processor.h index 4939ef55..d0c5b3f9 100644 --- a/riscv/processor.h +++ b/riscv/processor.h @@ -61,6 +61,13 @@ struct insn_desc_t static const insn_desc_t illegal_instruction; }; +struct opcode_map_entry_t +{ + insn_bits_t match; + insn_bits_t mask; + insn_func_t func; +}; + // regnum, data typedef std::map commit_log_reg_t; @@ -207,47 +214,6 @@ struct state_t void csr_init(processor_t* const proc, reg_t max_isa); }; -class opcode_cache_entry_t { - public: - opcode_cache_entry_t() - { - reset(); - } - - void reset() - { - for (size_t i = 0; i < associativity; i++) { - tag[i] = 0; - contents[i] = &insn_desc_t::illegal_instruction; - } - } - - void replace(insn_bits_t opcode, const insn_desc_t* desc) - { - for (size_t i = associativity - 1; i > 0; i--) { - tag[i] = tag[i-1]; - contents[i] = contents[i-1]; - } - - tag[0] = opcode; - contents[0] = desc; - } - - std::tuple lookup(insn_bits_t opcode) - { - for (size_t i = 0; i < associativity; i++) - if (tag[i] == opcode) - return std::tuple(true, contents[i]); - - return std::tuple(false, nullptr); - } - - private: - static const size_t associativity = 4; - insn_bits_t tag[associativity]; - const insn_desc_t* contents[associativity]; -}; - // this class represents one processor in a RISC-V machine. class processor_t : public abstract_device_t { @@ -341,10 +307,10 @@ public: FILE *get_log_file() { return log_file; } void register_base_insn(insn_desc_t insn) { - register_insn(insn, false /* is_custom */); + register_insn(insn, instructions); } void register_custom_insn(insn_desc_t insn) { - register_insn(insn, true /* is_custom */); + register_insn(insn, custom_instructions); } void register_extension(extension_t*); void build_opcode_map(); @@ -406,19 +372,17 @@ private: std::bitset extension_dynamic; mutable std::bitset extension_assumed_const; + std::vector opcode_map[128]; std::vector instructions; std::vector custom_instructions; std::unordered_map pc_histogram; - static const size_t OPCODE_CACHE_SIZE = 4095; - opcode_cache_entry_t opcode_cache[OPCODE_CACHE_SIZE]; - void take_pending_interrupt() { take_interrupt(state.mip->read() & state.mie->read()); } void take_interrupt(reg_t mask); // take first enabled interrupt in mask void take_trap(trap_t& t, reg_t epc); // take an exception void take_trigger_action(triggers::action_t action, reg_t breakpoint_tval, reg_t epc, bool virt); void disasm(insn_t insn); // disassemble and print an instruction - void register_insn(insn_desc_t, bool); + void register_insn(insn_desc_t, std::vector& pool); void enter_debug_mode(uint8_t cause, uint8_t ext_cause);