From 41c31ce4278b91ba2eb361e5e50017d9f11d6c60 Mon Sep 17 00:00:00 2001 From: Nicolas Brunie Date: Tue, 14 Apr 2026 09:14:57 -0700 Subject: [PATCH 1/2] [#2273][bulknormdot] fixing spurious invalid flag in bulk_norm_dot_no_mult Bulk normalized dot product is expected to raise the invalid flag if two products with infinite values and opposite signs are added. Spike function failed to consider the second operand of a product when determining whether the product had infinite value or not, and missed the exclusion of NaN and zero from this case. The zero case is quiet, because it happens for inf * zero which is expected to raise the invalid flag. The qNaN case is not quiet and can cause the apparition of a spurious invalid flag when none were expected. --- riscv/bulknormdot.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/riscv/bulknormdot.h b/riscv/bulknormdot.h index 37981ae8..379e2f43 100644 --- a/riscv/bulknormdot.h +++ b/riscv/bulknormdot.h @@ -203,13 +203,18 @@ template bul a[i].isZero() || b[i].isZero() ? (f32_exp_bias - (lhs_bias + rhs_bias)) : // minimalize exp of zero product a[i].expSubFixed() + b[i].expSubFixed() + (f32_exp_bias - (lhs_bias + rhs_bias)); + bool a_is_zero = (a[i].subOrZero() && cfg.flushSub) || a[i].isZero(); + bool b_is_zero = (b[i].subOrZero() && cfg.flushSub) || b[i].isZero(); + bool either_inf = a[i].inf() || b[i].inf(); - any_pos_inf |= either_inf && a[i].sign() == b[i].sign(); - any_neg_inf |= either_inf && a[i].sign() != b[i].sign(); + bool either_nan = a[i].nan() || b[i].nan(); + bool either_zero = a_is_zero || b_is_zero; + any_pos_inf |= either_inf && !either_nan && !either_zero && a[i].sign() == b[i].sign(); + any_neg_inf |= either_inf && !either_nan && !either_zero && a[i].sign() != b[i].sign(); any_invalid_nan |= - (a[i].inf() && ((b[i].subOrZero() && cfg.flushSub) || b[i].isZero())) || - (b[i].inf() && ((a[i].subOrZero() && cfg.flushSub) || a[i].isZero())); + (a[i].inf() && b_is_zero) || + (b[i].inf() && a_is_zero); any_nan |= any_invalid_nan || a[i].nan() || b[i].nan(); From f369386d5af92c573504c235c1ce0eedccafde37 Mon Sep 17 00:00:00 2001 From: Alexander Romanov Date: Wed, 15 Apr 2026 13:44:29 +0300 Subject: [PATCH 2/2] refactor: remove excessive chunk_max_size calls These calls are virtual and cause notable overhead --- fesvr/htif.cc | 4 ++-- fesvr/memif.cc | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/fesvr/htif.cc b/fesvr/htif.cc index 267bb498..26c3a3bf 100644 --- a/fesvr/htif.cc +++ b/fesvr/htif.cc @@ -277,8 +277,8 @@ void htif_t::clear_chunk(addr_t taddr, size_t len) { std::vector zeros(chunk_max_size(), 0); - for (size_t pos = 0; pos < len; pos += chunk_max_size()) - write_chunk(taddr + pos, std::min(len - pos, chunk_max_size()), &zeros[0]); + for (size_t pos = 0; pos < len; pos += zeros.size()) + write_chunk(taddr + pos, std::min(len - pos, zeros.size()), &zeros[0]); } int htif_t::run() diff --git a/fesvr/memif.cc b/fesvr/memif.cc index 59938b91..78107ce3 100644 --- a/fesvr/memif.cc +++ b/fesvr/memif.cc @@ -35,8 +35,9 @@ void memif_t::read(addr_t addr, size_t len, void* bytes) } // now we're aligned - for (size_t pos = 0; pos < len; pos += cmemif->chunk_max_size()) - cmemif->read_chunk(addr + pos, std::min(cmemif->chunk_max_size(), len - pos), (char*)bytes + pos); + size_t max_chunk = cmemif->chunk_max_size(); + for (size_t pos = 0; pos < len; pos += max_chunk) + cmemif->read_chunk(addr + pos, std::min(max_chunk, len - pos), (char*)bytes + pos); } void memif_t::write(addr_t addr, size_t len, const void* bytes)