WIP adding next_pc: added mock_fetch_decode_pipe

2025-10-27 22:41:33 -07:00 · 2025-10-27 22:41:33 -07:00 · a927451f8c
commit a927451f8c
parent 688732ec4c
6 changed files with 12796 additions and 4 deletions
--- a/crates/cpu/src/config.rs
+++ b/crates/cpu/src/config.rs
@ -34,6 +34,8 @@ pub struct CpuConfig {
    pub units: Vec<UnitConfig>,
    pub out_reg_num_width: usize,
    pub fetch_width: NonZeroUsize,
+    pub max_branches_per_fetch: NonZeroUsize,
+    pub log2_fetch_width_in_bytes: u8,
    /// default value for [`UnitConfig::max_in_flight`]
    pub default_unit_max_in_flight: NonZeroUsize,
    pub rob_size: NonZeroUsize,
@ -47,6 +49,13 @@ impl CpuConfig {
        };
        v
    };
+    pub const DEFAULT_MAX_BRANCHES_PER_FETCH: NonZeroUsize = {
+        let Some(v) = NonZeroUsize::new(1) else {
+            unreachable!();
+        };
+        v
+    };
+    pub const DEFAULT_LOG2_FETCH_WIDTH_IN_BYTES: u8 = 3;
    pub const DEFAULT_UNIT_MAX_IN_FLIGHT: NonZeroUsize = {
        let Some(v) = NonZeroUsize::new(8) else {
            unreachable!();
@ -58,6 +67,8 @@ impl CpuConfig {
            units,
            out_reg_num_width: Self::DEFAULT_OUT_REG_NUM_WIDTH,
            fetch_width: Self::DEFAULT_FETCH_WIDTH,
+            max_branches_per_fetch: Self::DEFAULT_MAX_BRANCHES_PER_FETCH,
+            log2_fetch_width_in_bytes: Self::DEFAULT_LOG2_FETCH_WIDTH_IN_BYTES,
            default_unit_max_in_flight: Self::DEFAULT_UNIT_MAX_IN_FLIGHT,
            rob_size,
        }
@ -117,4 +128,21 @@ impl CpuConfig {
        UnitToRegAlloc[mop_ty][extra_out_ty][self.unit_num_width()][self.out_reg_num_width]
            [self.non_const_unit_nums().len()]
    }
+    pub fn fetch_width_in_bytes(&self) -> usize {
+        1usize
+            .checked_shl(self.log2_fetch_width_in_bytes.into())
+            .expect("log2_fetch_width_in_bytes is too big")
+    }
 }
+
+#[hdl(get(|c| c.fetch_width.get()))]
+pub type CpuConfigFetchWidth<C: PhantomConstGet<CpuConfig>> = DynSize;
+
+#[hdl(get(|c| c.max_branches_per_fetch.get()))]
+pub type CpuConfigMaxBranchesPerFetch<C: PhantomConstGet<CpuConfig>> = DynSize;
+
+#[hdl(get(|c| c.log2_fetch_width_in_bytes.into()))]
+pub type CpuConfigLog2FetchWidthInBytes<C: PhantomConstGet<CpuConfig>> = DynSize;
+
+#[hdl(get(|c| c.fetch_width_in_bytes()))]
+pub type CpuConfigFetchWidthInBytes<C: PhantomConstGet<CpuConfig>> = DynSize;
--- a/crates/cpu/src/lib.rs
+++ b/crates/cpu/src/lib.rs
@ -2,6 +2,7 @@
 // See Notices.txt for copyright information
 pub mod config;
 pub mod instruction;
+pub mod next_pc;
 pub mod reg_alloc;
 pub mod register;
 pub mod unit;
--- a/crates/cpu/src/next_pc.rs
+++ b/crates/cpu/src/next_pc.rs
@ -0,0 +1,804 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+// See Notices.txt for copyright information
+
+//! [Next-Instruction Logic](https://git.libre-chip.org/libre-chip/grant-tracking/issues/10)
+//!
+//! The basic idea here is that there's a `next_pc` stage that sends predicted fetch PCs to the `fetch` stage,
+//! the `fetch` stage's outputs eventually end up in the `decode` stage,
+//! after the `decode` stage there's a `post_decode` stage (that may run in the same clock cycle as `decode`)
+//! that checks that the fetched instructions' kinds match the predicted instruction kinds and that feeds
+//! information back to the `fetch` stage to cancel fetches that need to be predicted differently.
+
+use crate::{
+    config::{CpuConfig, CpuConfigFetchWidth},
+    util::array_vec::ArrayVec,
+};
+use fayalite::{
+    int::{UIntInRange, UIntInRangeInclusive, UIntInRangeType},
+    prelude::*,
+    sim::{ForkJoinScope, value::SimOnlyValueTrait},
+    util::ready_valid::ReadyValid,
+};
+
+#[hdl]
+pub enum PredictedCond {
+    Taken,
+    Fallthrough,
+}
+
+#[hdl]
+pub struct PredictedFallthrough {}
+
+#[hdl]
+pub enum BranchPredictionKind<CondKind> {
+    Branch(HdlOption<CondKind>),
+    IndirectBranch(HdlOption<CondKind>),
+    Call(HdlOption<CondKind>),
+    IndirectCall(HdlOption<CondKind>),
+    Ret(HdlOption<CondKind>),
+}
+
+#[hdl(get(|c| c.max_branches_per_fetch.get() - 1))]
+pub type NextPcPredictionMaxBranchesBeforeLast<C: PhantomConstGet<CpuConfig>> = DynSize;
+
+#[hdl(no_static)]
+pub struct NextPcPrediction<C: PhantomConstGet<CpuConfig>> {
+    pub fetch_pc: UInt<64>,
+    pub async_interrupt: Bool,
+    pub branches_before_last: ArrayVec<
+        BranchPredictionKind<PredictedFallthrough>,
+        NextPcPredictionMaxBranchesBeforeLast<C>,
+    >,
+    pub last_branch: HdlOption<BranchPredictionKind<PredictedCond>>,
+    pub last_branch_target_pc: UInt<64>,
+}
+
+pub const FETCH_BLOCK_ID_WIDTH: usize = FetchBlockIdInt::BITS as usize;
+type FetchBlockIdInt = u8;
+
+#[hdl]
+pub struct NextPcToFetchInterfaceInner {
+    pub next_fetch_pc: UInt<64>,
+    pub fetch_block_id: UInt<{ FETCH_BLOCK_ID_WIDTH }>,
+    pub in_progress_fetches_to_cancel: UInt<8>,
+}
+
+#[hdl(no_static)]
+pub struct NextPcToFetchInterface<C: PhantomConstGet<CpuConfig>> {
+    pub inner: ReadyValid<NextPcToFetchInterfaceInner>,
+    pub config: C,
+}
+
+#[hdl]
+/// WIP version of decoded instruction just good enough to represent stuff needed for [`next_pc()`] since the actual instruction definition isn't finalized yet. This will be replaced at a later point.
+pub enum WipDecodedInsnKind {
+    NonBranch,
+    Branch(UInt<64>),
+    BranchCond(UInt<64>),
+    IndirectBranch,
+    IndirectBranchCond,
+    Call(UInt<64>),
+    CallCond(UInt<64>),
+    IndirectCall,
+    IndirectCallCond,
+    Ret,
+    RetCond,
+    /// not actually an instruction read from memory, covers stuff like external interrupts, page faults, memory errors, and so on.
+    Interrupt(UInt<64>),
+}
+
+#[hdl]
+/// WIP version of decoded instruction just good enough to represent stuff needed for [`next_pc()`] since the actual instruction definition isn't finalized yet. This will be replaced at a later point.
+pub struct WipDecodedInsn {
+    pub fetch_block_id: UInt<8>,
+    pub id: UInt<12>,
+    pub pc: UInt<64>,
+    pub size_in_bytes: UInt<4>,
+    pub kind: WipDecodedInsnKind,
+}
+
+#[hdl(no_static)]
+/// handles updating speculative branch predictor state (e.g. branch histories) when instructions retire,
+/// as well as updating state when a branch instruction is mis-speculated.
+pub struct NextPcToRetireInterface<C: PhantomConstGet<CpuConfig>> {
+    // TODO: add needed fields
+    pub config: C,
+}
+
+#[hdl(no_static)]
+pub struct DecodeToPostDecodeInterfaceInner<C: PhantomConstGet<CpuConfig>> {
+    pub fetch_block_id: UInt<{ FETCH_BLOCK_ID_WIDTH }>,
+    pub insns: ArrayVec<WipDecodedInsn, CpuConfigFetchWidth<C>>,
+    // TODO: add needed fields
+    pub config: C,
+}
+
+#[hdl(no_static)]
+pub struct DecodeToPostDecodeInterface<C: PhantomConstGet<CpuConfig>> {
+    pub inner: ReadyValid<DecodeToPostDecodeInterfaceInner<C>>,
+}
+
+#[hdl(no_static)]
+pub struct PostDecodeOutputInterface<C: PhantomConstGet<CpuConfig>> {
+    // TODO: add needed fields
+    pub config: C,
+}
+
+#[hdl]
+enum BranchPredictionState {
+    StronglyNotTaken,
+    WeaklyNotTaken,
+    WeaklyTaken,
+    StronglyTaken,
+}
+
+impl BranchPredictionState {
+    #[must_use]
+    #[hdl]
+    fn is_taken(this: &SimValue<Self>) -> bool {
+        #[hdl(sim)]
+        match this {
+            Self::StronglyNotTaken => false,
+            Self::WeaklyNotTaken => false,
+            Self::WeaklyTaken => true,
+            Self::StronglyTaken => true,
+        }
+    }
+    #[must_use]
+    #[hdl]
+    fn towards_taken(this: &SimValue<Self>) -> SimValue<Self> {
+        (#[hdl(sim)]
+        match this {
+            Self::StronglyNotTaken => BranchPredictionState.WeaklyNotTaken(),
+            Self::WeaklyNotTaken => BranchPredictionState.WeaklyTaken(),
+            Self::WeaklyTaken => BranchPredictionState.StronglyTaken(),
+            Self::StronglyTaken => BranchPredictionState.StronglyTaken(),
+        })
+        .to_sim_value()
+    }
+    #[must_use]
+    #[hdl]
+    fn towards_not_taken(this: &SimValue<Self>) -> SimValue<Self> {
+        (#[hdl(sim)]
+        match this {
+            Self::StronglyNotTaken => BranchPredictionState.StronglyNotTaken(),
+            Self::WeaklyNotTaken => BranchPredictionState.StronglyNotTaken(),
+            Self::WeaklyTaken => BranchPredictionState.WeaklyNotTaken(),
+            Self::StronglyTaken => BranchPredictionState.WeaklyTaken(),
+        })
+        .to_sim_value()
+    }
+}
+
+impl SimValueDefault for BranchPredictionState {
+    fn sim_value_default(self) -> SimValue<Self> {
+        self.WeaklyNotTaken().to_sim_value()
+    }
+}
+
+#[derive(Copy, Clone, Debug)]
+#[must_use]
+enum ResetStatus {
+    Done,
+    Working,
+}
+
+impl ResetStatus {
+    fn and(self, other: Self) -> Self {
+        match (self, other) {
+            (ResetStatus::Done, ResetStatus::Done) => ResetStatus::Done,
+            (ResetStatus::Done | ResetStatus::Working, ResetStatus::Working)
+            | (ResetStatus::Working, ResetStatus::Done) => ResetStatus::Working,
+        }
+    }
+}
+
+trait SimValueDefault: Type {
+    fn sim_value_default(self) -> SimValue<Self>;
+}
+
+impl<T: SimOnlyValueTrait> SimValueDefault for SimOnly<T> {
+    fn sim_value_default(self) -> SimValue<Self> {
+        SimOnlyValue::<T>::default().to_sim_value_with_type(self)
+    }
+}
+
+impl<T: Type> SimValueDefault for HdlOption<T> {
+    fn sim_value_default(self) -> SimValue<Self> {
+        self.HdlNone().to_sim_value_with_type(self)
+    }
+}
+
+impl SimValueDefault for Bool {
+    fn sim_value_default(self) -> SimValue<Self> {
+        false.to_sim_value()
+    }
+}
+
+impl<Width: Size> SimValueDefault for UIntType<Width> {
+    fn sim_value_default(self) -> SimValue<Self> {
+        self.zero().to_sim_value()
+    }
+}
+
+trait ResetSteps: Type {
+    fn reset_step(this: &mut SimValue<Self>, step: usize) -> ResetStatus;
+}
+
+impl<T: SimValueDefault, Len: Size> ResetSteps for ArrayType<T, Len> {
+    fn reset_step(this: &mut SimValue<Self>, step: usize) -> ResetStatus {
+        let element = SimValue::ty(this).element();
+        let len = SimValue::ty(this).len();
+        if step < len {
+            this[step] = element.sim_value_default();
+        }
+        if step.saturating_add(1) >= len {
+            ResetStatus::Done
+        } else {
+            ResetStatus::Working
+        }
+    }
+}
+
+#[hdl]
+struct CallStack {
+    return_addresses: Array<UInt<64>, { CallStack::SIZE }>,
+    len: UIntInRangeInclusive<0, { CallStack::SIZE }>,
+}
+
+impl CallStack {
+    const SIZE: usize = 16;
+}
+
+impl SimValueDefault for CallStack {
+    #[hdl]
+    fn sim_value_default(self) -> SimValue<Self> {
+        #[hdl(sim)]
+        CallStack {
+            // something other than zero so you can see the values getting reset
+            return_addresses: [!0u64; Self::SIZE],
+            len: 0usize.to_sim_value_with_type(self.len),
+        }
+    }
+}
+
+impl ResetSteps for CallStack {
+    #[hdl]
+    fn reset_step(this: &mut SimValue<Self>, _step: usize) -> ResetStatus {
+        #[hdl(sim)]
+        let CallStack {
+            return_addresses,
+            len,
+        } = this;
+        // return_addresses is implemented as a shift register, so it can be all reset at once
+        return_addresses.fill(0u64.to_sim_value());
+        **len = 0;
+        ResetStatus::Done
+    }
+}
+
+#[hdl]
+enum BTBEntryInsnKind {
+    Branch,
+    Call,
+    Ret,
+}
+
+#[hdl]
+enum BTBEntryAddrKind {
+    Unconditional,
+    Indirect,
+    CondTaken,
+    CondNotTaken,
+}
+
+impl BTBEntryAddrKind {
+    #[hdl]
+    fn taken(this: &SimValue<Self>) -> bool {
+        #[hdl(sim)]
+        match this {
+            Self::Unconditional | Self::Indirect | Self::CondTaken => true,
+            Self::CondNotTaken => false,
+        }
+    }
+}
+
+#[hdl]
+struct BTBEntry {
+    /// address of first instruction to run in this fetch block
+    start_pc: UInt<64>,
+    target_pc: UInt<64>,
+    /// when branch is not taken, the next pc to fetch from is `start_pc + fallthrough_offset`.
+    /// needed because there may be more than one branch in a fetch block
+    fallthrough_offset: UInt<8>,
+    insn_kind: BTBEntryInsnKind,
+    addr_kind: BTBEntryAddrKind,
+}
+
+impl BTBEntry {
+    fn taken_pc(this: &SimValue<Self>) -> u64 {
+        this.target_pc.as_int()
+    }
+    fn not_taken_fetch_pc(this: &SimValue<Self>) -> u64 {
+        this.start_pc
+            .as_int()
+            .wrapping_add(this.fallthrough_offset.as_int().into())
+    }
+}
+
+#[hdl]
+struct BranchTargetBuffer {
+    branch_pc_to_target_map: Array<HdlOption<BTBEntry>, { BranchTargetBuffer::SIZE }>,
+}
+
+impl BranchTargetBuffer {
+    const SIZE: usize = 16;
+}
+
+impl SimValueDefault for BranchTargetBuffer {
+    #[hdl]
+    fn sim_value_default(self) -> SimValue<Self> {
+        #[hdl(sim)]
+        BranchTargetBuffer {
+            // something other than zero so you can see the values getting reset
+            branch_pc_to_target_map: [HdlSome(
+                #[hdl(sim)]
+                BTBEntry {
+                    start_pc: !0u64,
+                    target_pc: !0u64,
+                    fallthrough_offset: !0u8,
+                    insn_kind: BTBEntryInsnKind.Call(),
+                    addr_kind: BTBEntryAddrKind.CondNotTaken(),
+                },
+            ); Self::SIZE],
+        }
+    }
+}
+
+impl ResetSteps for BranchTargetBuffer {
+    #[hdl]
+    fn reset_step(this: &mut SimValue<Self>, step: usize) -> ResetStatus {
+        #[hdl(sim)]
+        let BranchTargetBuffer {
+            branch_pc_to_target_map,
+        } = this;
+        ResetSteps::reset_step(branch_pc_to_target_map, step)
+    }
+}
+
+#[hdl]
+struct BranchHistory {
+    history: Array<Bool, { BranchHistory::SIZE }>,
+    /// exclusive
+    tail: UIntInRange<0, { BranchHistory::SIZE }>,
+    /// inclusive, always at or after tail, always at or before speculative_head
+    non_speculative_head: UIntInRange<0, { BranchHistory::SIZE }>,
+    /// inclusive, always at or after both tail and non_speculative_head
+    speculative_head: UIntInRange<0, { BranchHistory::SIZE }>,
+}
+
+impl ResetSteps for BranchHistory {
+    #[hdl]
+    fn reset_step(this: &mut SimValue<Self>, step: usize) -> ResetStatus {
+        #[hdl(sim)]
+        let Self {
+            history,
+            tail,
+            non_speculative_head,
+            speculative_head,
+        } = this;
+        **tail = 0;
+        **non_speculative_head = 0;
+        **speculative_head = 0;
+        ResetSteps::reset_step(history, step)
+    }
+}
+
+impl SimValueDefault for BranchHistory {
+    #[hdl]
+    fn sim_value_default(self) -> SimValue<Self> {
+        #[hdl(sim)]
+        BranchHistory {
+            // something other than zero so you can see the values getting reset
+            history: [true; Self::SIZE],
+            tail: 0usize.to_sim_value_with_type(self.tail),
+            non_speculative_head: 0usize.to_sim_value_with_type(self.non_speculative_head),
+            speculative_head: 0usize.to_sim_value_with_type(self.speculative_head),
+        }
+    }
+}
+
+enum BranchHistoryTryPushSpeculativeError {
+    NoSpace,
+}
+
+enum BranchHistoryTryPushNonSpeculativeError {
+    NoSpace,
+    Misprediction { speculated: bool },
+}
+
+impl BranchHistory {
+    const LOG2_SIZE: usize = 8;
+    const SIZE: usize = 1 << Self::LOG2_SIZE;
+    fn next_pos(pos: usize) -> usize {
+        (pos + 1) % Self::SIZE
+    }
+    fn prev_pos(pos: usize) -> usize {
+        (pos + Self::SIZE - 1) % Self::SIZE
+    }
+    fn history_from_head<const N: usize>(this: &SimValue<Self>, head: usize) -> [bool; N] {
+        let mut retval = [false; N];
+        let mut pos = head;
+        for entry in &mut retval {
+            if pos == *this.tail {
+                break;
+            }
+            *entry = *this.history[pos];
+            pos = Self::prev_pos(pos);
+        }
+        retval
+    }
+    fn delete_speculative_history(this: &mut SimValue<Self>) {
+        let non_speculative_head = *this.non_speculative_head;
+        *this.speculative_head = non_speculative_head;
+    }
+    fn recent_history_including_speculative<const N: usize>(this: &SimValue<Self>) -> [bool; N] {
+        let head = *this.speculative_head;
+        Self::history_from_head(this, head)
+    }
+    fn speculative_full(this: &SimValue<Self>) -> bool {
+        let speculative_head = *this.speculative_head;
+        Self::next_pos(speculative_head) == *this.tail
+    }
+    fn try_push_speculative(
+        this: &mut SimValue<Self>,
+        value: bool,
+    ) -> Result<(), BranchHistoryTryPushSpeculativeError> {
+        if Self::speculative_full(this) {
+            Err(BranchHistoryTryPushSpeculativeError::NoSpace)
+        } else {
+            let speculative_head = Self::next_pos(*this.speculative_head);
+            *this.speculative_head = speculative_head;
+            *this.history[speculative_head] = value;
+            Ok(())
+        }
+    }
+    fn try_push_non_speculative(
+        this: &mut SimValue<Self>,
+        value: bool,
+    ) -> Result<(), BranchHistoryTryPushNonSpeculativeError> {
+        let speculative_head = *this.speculative_head;
+        let non_speculative_head = *this.non_speculative_head;
+        if speculative_head == non_speculative_head {
+            Err(BranchHistoryTryPushNonSpeculativeError::NoSpace)
+        } else {
+            let pos = Self::next_pos(non_speculative_head);
+            let speculated = *this.history[pos];
+            if speculated != value {
+                Err(BranchHistoryTryPushNonSpeculativeError::Misprediction { speculated })
+            } else {
+                *this.non_speculative_head = pos;
+                Ok(())
+            }
+        }
+    }
+}
+
+#[hdl]
+struct Queue<T, Capacity: Size> {
+    data: ArrayType<T, Capacity>,
+    /// inclusive
+    head: UIntInRangeType<ConstUsize<0>, Capacity>,
+    /// exclusive
+    tail: UIntInRangeType<ConstUsize<0>, Capacity>,
+}
+
+impl<T: Type, Capacity: Size> Queue<T, Capacity> {
+    fn capacity(self) -> usize {
+        self.data.len()
+    }
+    fn next_pos(self, pos: usize) -> usize {
+        assert_ne!(self.capacity(), 0);
+        (pos + 1) % self.capacity()
+    }
+    fn prev_pos(self, pos: usize) -> usize {
+        assert_ne!(self.capacity(), 0);
+        (pos + self.capacity() - 1) % self.capacity()
+    }
+    fn is_empty(this: &SimValue<Self>) -> bool {
+        this.head == this.tail
+    }
+    fn is_full(this: &SimValue<Self>) -> bool {
+        let head = *this.head;
+        let tail = *this.tail;
+        SimValue::ty(this).next_pos(head) == tail
+    }
+    fn try_push(this: &mut SimValue<Self>, value: impl ToSimValueWithType<T>) -> Result<(), ()> {
+        if Self::is_full(this) {
+            Err(())
+        } else {
+            let head = *this.head;
+            let head = SimValue::ty(this).next_pos(head);
+            *this.head = head;
+            let data = &mut this.data[head];
+            *data = value.to_sim_value_with_type(SimValue::ty(data));
+            Ok(())
+        }
+    }
+}
+
+impl<T: SimValueDefault, Capacity: Size> SimValueDefault for Queue<T, Capacity> {
+    #[hdl]
+    fn sim_value_default(self) -> SimValue<Self> {
+        let Self { data, head, tail } = self;
+        #[hdl(sim)]
+        Queue::<T, Capacity> {
+            data: repeat(
+                data.element().sim_value_default(),
+                Capacity::from_usize(data.len()),
+            ),
+            head: 0usize.to_sim_value_with_type(head),
+            tail: 0usize.to_sim_value_with_type(tail),
+        }
+    }
+}
+
+impl<T: SimValueDefault, Capacity: Size> ResetSteps for Queue<T, Capacity> {
+    #[hdl]
+    fn reset_step(this: &mut SimValue<Self>, step: usize) -> ResetStatus {
+        #[hdl(sim)]
+        let Queue::<T, Capacity> { data, head, tail } = this;
+        **head = 0;
+        **tail = 0;
+        ResetSteps::reset_step(data, step)
+    }
+}
+
+#[hdl]
+struct FetchQueueEntry {
+    fetch_block_id: UInt<{ FETCH_BLOCK_ID_WIDTH }>,
+}
+
+impl SimValueDefault for FetchQueueEntry {
+    #[hdl]
+    fn sim_value_default(self) -> SimValue<Self> {
+        #[hdl(sim)]
+        FetchQueueEntry {
+            fetch_block_id: 0 as FetchBlockIdInt,
+        }
+    }
+}
+
+const BRANCH_PREDICTOR_LOG2_SIZE: usize = 8;
+const BRANCH_PREDICTOR_SIZE: usize = 1 << BRANCH_PREDICTOR_LOG2_SIZE;
+
+#[hdl]
+pub struct NextPcState<C: PhantomConstGet<CpuConfig>> {
+    speculative_call_stack: CallStack,
+    non_speculative_call_stack: CallStack,
+    branch_target_buffer: BranchTargetBuffer,
+    branch_history: BranchHistory,
+    branch_predictor: Array<BranchPredictionState, { BRANCH_PREDICTOR_SIZE }>,
+    fetching_queue: Queue<FetchQueueEntry, ConstUsize<{ 1 << FETCH_BLOCK_ID_WIDTH }>>,
+    pc: UInt<64>,
+    fetch_block_id: UInt<{ FETCH_BLOCK_ID_WIDTH }>,
+    config: C,
+}
+
+impl<C: Type + PhantomConstGet<CpuConfig>> NextPcState<C> {
+    fn next_fetch_pc(this: &SimValue<Self>) -> u64 {
+        let pc = u64::try_from(this.pc.to_bigint()).expect("in range");
+        pc & (!0u64 << SimValue::ty(&this.config).get().log2_fetch_width_in_bytes)
+    }
+    fn branch_predictor_index(this: &SimValue<Self>, pc: u64) -> usize {
+        let mut history = 0u64;
+        let history_bits: [bool; BRANCH_PREDICTOR_LOG2_SIZE] =
+            BranchHistory::recent_history_including_speculative(&this.branch_history);
+        for history_bit in history_bits {
+            history <<= 1;
+            if history_bit {
+                history |= 1;
+            }
+        }
+        let mut t = history;
+        t ^= t.rotate_left(5) & !pc.rotate_right(3);
+        t ^= pc;
+        t ^= !t.rotate_left(2) & t.rotate_left(4);
+        let mut retval = 0;
+        for i in (0..BRANCH_PREDICTOR_LOG2_SIZE).step_by(BRANCH_PREDICTOR_LOG2_SIZE) {
+            retval ^= t >> i;
+        }
+        retval as usize % BRANCH_PREDICTOR_SIZE
+    }
+}
+
+impl SimValueDefault for NextPcState<PhantomConst<CpuConfig>> {
+    #[hdl]
+    fn sim_value_default(self) -> SimValue<Self> {
+        let Self {
+            speculative_call_stack,
+            non_speculative_call_stack,
+            branch_target_buffer,
+            branch_history,
+            branch_predictor: _,
+            fetching_queue,
+            pc: _,
+            fetch_block_id: _,
+            config,
+        } = self;
+        #[hdl(sim)]
+        Self {
+            speculative_call_stack: speculative_call_stack.sim_value_default(),
+            non_speculative_call_stack: non_speculative_call_stack.sim_value_default(),
+            branch_target_buffer: branch_target_buffer.sim_value_default(),
+            branch_history: branch_history.sim_value_default(),
+            // use something other than the default so you can see the reset progress
+            branch_predictor: std::array::from_fn(|_| {
+                BranchPredictionState::towards_not_taken(&BranchPredictionState.sim_value_default())
+            }),
+            fetching_queue: fetching_queue.sim_value_default(),
+            // use something other than the default so you can see the reset progress
+            pc: !0u64,
+            // use something other than the default so you can see the reset progress
+            fetch_block_id: !0u8,
+            config,
+        }
+    }
+}
+
+impl<C: Type + PhantomConstGet<CpuConfig>> ResetSteps for NextPcState<C> {
+    #[hdl]
+    fn reset_step(this: &mut SimValue<Self>, step: usize) -> ResetStatus {
+        #[hdl(sim)]
+        let NextPcState::<C> {
+            speculative_call_stack,
+            non_speculative_call_stack,
+            branch_target_buffer,
+            branch_history,
+            branch_predictor,
+            fetching_queue,
+            pc,
+            fetch_block_id,
+            config: _,
+        } = this;
+        **pc = 0u64.into(); // match Microwatt's reset PC
+        **fetch_block_id = 0u8.into();
+        let speculative_call_stack = ResetSteps::reset_step(speculative_call_stack, step);
+        let non_speculative_call_stack = ResetSteps::reset_step(non_speculative_call_stack, step);
+        let branch_target_buffer = ResetSteps::reset_step(branch_target_buffer, step);
+        let branch_history = ResetSteps::reset_step(branch_history, step);
+        let branch_predictor = ResetSteps::reset_step(branch_predictor, step);
+        let fetching_queue = ResetSteps::reset_step(fetching_queue, step);
+        speculative_call_stack
+            .and(non_speculative_call_stack)
+            .and(branch_target_buffer)
+            .and(branch_history)
+            .and(branch_predictor)
+            .and(fetching_queue)
+    }
+}
+
+#[hdl_module(extern)]
+pub fn next_pc(config: PhantomConst<CpuConfig>) {
+    #[hdl]
+    let cd: ClockDomain = m.input();
+    #[hdl]
+    let to_fetch: NextPcToFetchInterface<PhantomConst<CpuConfig>> =
+        m.output(NextPcToFetchInterface[config]);
+    #[hdl]
+    let from_decode: DecodeToPostDecodeInterface<PhantomConst<CpuConfig>> =
+        m.input(DecodeToPostDecodeInterface[config]);
+    #[hdl]
+    let state_for_debug: NextPcState<PhantomConst<CpuConfig>> = m.output(NextPcState[config]);
+    m.register_clock_for_past(cd.clk);
+    #[hdl]
+    async fn run(
+        scope: ForkJoinScope<'_>,
+        mut sim: ExternModuleSimulationState,
+        cd: Expr<ClockDomain>,
+        to_fetch: Expr<NextPcToFetchInterface<PhantomConst<CpuConfig>>>,
+        from_decode: Expr<DecodeToPostDecodeInterface<PhantomConst<CpuConfig>>>,
+        state_expr: Expr<NextPcState<PhantomConst<CpuConfig>>>,
+    ) {
+        let config = state_expr.config.ty();
+        let mut state = sim.read(state_expr).await;
+        for step in 0usize.. {
+            sim.write(state_expr, state).await;
+            sim.wait_for_clock_edge(cd.clk).await;
+            state = sim.read_past(state_expr, cd.clk).await;
+            let reset_status = ResetSteps::reset_step(&mut state, step);
+            match reset_status {
+                ResetStatus::Done => break,
+                ResetStatus::Working => {}
+            }
+        }
+        scope.spawn_detached(|_, mut sim: ExternModuleSimulationState| async move {
+            loop {
+                let state = sim.read(state_expr).await;
+                if Queue::is_full(&state.fetching_queue) {
+                    sim.write(to_fetch.inner.data, HdlNone()).await;
+                } else {
+                    sim.write(
+                        to_fetch.inner.data,
+                        HdlSome(
+                            #[hdl(sim)]
+                            NextPcToFetchInterfaceInner {
+                                next_fetch_pc: NextPcState::next_fetch_pc(&state),
+                                fetch_block_id: state.fetch_block_id,
+                                in_progress_fetches_to_cancel: 0u8, // TODO: implement
+                            },
+                        ),
+                    )
+                    .await;
+                }
+                sim.wait_for_changes([state_expr], None).await;
+            }
+        });
+        scope.spawn_detached(|_, mut sim: ExternModuleSimulationState| async move {
+            loop {
+                sim.write(state_expr, state).await;
+                sim.wait_for_clock_edge(cd.clk).await;
+                state = sim.read_past(state_expr, cd.clk).await;
+                let next_fetch_pc = NextPcState::next_fetch_pc(&state);
+                if Queue::is_full(&state.fetching_queue) {
+                    continue;
+                }
+                if sim.read_past_bool(to_fetch.inner.ready, cd.clk).await {
+                    let fetch_block_id =
+                        FetchBlockIdInt::try_from(state.fetch_block_id.to_bigint())
+                            .expect("in range");
+                    // TODO: handle instructions not aligned with fetch blocks
+                    let mut new_pc =
+                        next_fetch_pc.wrapping_add(config.get().fetch_width_in_bytes() as u64);
+                    for entry in &state.branch_target_buffer.branch_pc_to_target_map {
+                        #[hdl(sim)]
+                        match entry {
+                            HdlNone => continue,
+                            HdlSome(entry) => {
+                                if entry.start_pc == state.pc {
+                                    new_pc = if BTBEntryAddrKind::taken(&entry.addr_kind) {
+                                        BTBEntry::taken_pc(entry)
+                                    } else {
+                                        BTBEntry::not_taken_fetch_pc(entry)
+                                    };
+                                    break;
+                                }
+                            }
+                        }
+                    }
+                    Queue::try_push(
+                        &mut state.fetching_queue,
+                        #[hdl(sim)]
+                        FetchQueueEntry { fetch_block_id },
+                    )
+                    .expect("checked is_full above");
+                    // TODO: insert pipeline stage between BTB and branch predictor
+                    *state.pc = new_pc.into();
+                    *state.fetch_block_id = fetch_block_id.wrapping_add(1).into();
+                }
+            }
+        });
+        // TODO: finish
+    }
+    m.extern_module_simulation_fn(
+        (cd, to_fetch, from_decode, state_for_debug),
+        |(cd, to_fetch, from_decode, state_for_debug), mut sim| async move {
+            sim.write(state_for_debug, state_for_debug.ty().sim_value_default())
+                .await;
+            sim.resettable(
+                cd,
+                |mut sim: ExternModuleSimulationState| async move {
+                    sim.write(to_fetch.inner.data, HdlNone()).await;
+                    sim.write(from_decode.inner.ready, false).await;
+                },
+                |mut sim: ExternModuleSimulationState, ()| async move {
+                    sim.fork_join_scope(|scope, sim| {
+                        run(scope, sim, cd, to_fetch, from_decode, state_for_debug)
+                    })
+                    .await
+                },
+            )
+            .await;
+        },
+    );
+}
--- a/crates/cpu/src/util/array_vec.rs
+++ b/crates/cpu/src/util/array_vec.rs
@ -22,6 +22,18 @@ impl<T: Type, N: Size> ArrayVec<T, N> {
            len: 0u8.cast_to(self.len),
        }
    }
+    #[hdl]
+    pub fn new_sim(self, uninit_element: impl ToSimValueWithType<T>) -> SimValue<Self> {
+        let uninit_element = uninit_element.into_sim_value_with_type(self.element());
+        #[hdl(sim)]
+        ArrayVec::<_, _> {
+            elements: SimValue::from_array_elements(
+                self.elements,
+                (0..self.elements.len()).map(|_| uninit_element.clone()),
+            ),
+            len: 0u8.cast_to(self.len),
+        }
+    }
    pub fn element(self) -> T {
        self.elements.element()
    }
@ -52,6 +64,9 @@ impl<T: Type, N: Size> ArrayVec<T, N> {
    pub fn len(this: impl ToExpr<Type = Self>) -> Expr<Length<N>> {
        this.to_expr().len
    }
+    pub fn len_sim(this: &SimValue<Self>) -> &SimValue<Length<N>> {
+        &this.len
+    }
    pub fn is_empty(this: impl ToExpr<Type = Self>) -> Expr<Bool> {
        let len = Self::len(this);
        len.cmp_eq(0u8)
@ -75,6 +90,62 @@ impl<T: Type, N: Size> ArrayVec<T, N> {
            }
        }
    }
+    #[hdl]
+    pub async fn for_each_sim(
+        this: impl ToSimValue<Type = Self>,
+        mut f: impl AsyncFnMut(usize, SimValue<T>),
+    ) {
+        #[hdl(sim)]
+        let ArrayVec::<_, _> { elements, len } = this.into_sim_value();
+        for (index, element) in elements.into_iter().enumerate() {
+            if index.cmp_lt(*len) {
+                f(index, element).await;
+            }
+        }
+    }
+    #[hdl]
+    pub async fn for_each_sim_ref<'a>(
+        this: &'a SimValue<Self>,
+        mut f: impl AsyncFnMut(usize, &'a SimValue<T>),
+    ) {
+        #[hdl(sim)]
+        let ArrayVec::<_, _> { elements, len } = this;
+        for (index, element) in elements.iter().enumerate() {
+            if index.cmp_lt(**len) {
+                f(index, element).await;
+            }
+        }
+    }
+    #[hdl]
+    pub async fn for_each_sim_mut<'a>(
+        this: &'a mut SimValue<Self>,
+        mut f: impl AsyncFnMut(usize, &'a mut SimValue<T>),
+    ) {
+        #[hdl(sim)]
+        let ArrayVec::<_, _> { elements, len } = this;
+        for (index, element) in elements.iter_mut().enumerate() {
+            if index.cmp_lt(**len) {
+                f(index, element).await;
+            }
+        }
+    }
+    #[hdl]
+    pub fn try_push_sim(
+        this: &mut SimValue<Self>,
+        value: impl ToSimValueWithType<T>,
+    ) -> Result<(), SimValue<T>> {
+        let value = value.into_sim_value_with_type(this.ty().element());
+        let capacity = this.ty().capacity();
+        #[hdl(sim)]
+        let ArrayVec::<_, _> { elements, len } = this;
+        if **len < capacity {
+            elements[**len] = value;
+            **len += 1;
+            Ok(())
+        } else {
+            Err(value)
+        }
+    }
    pub fn mapped_ty<U: Type>(self, new_element_ty: U) -> ArrayVec<U, N> {
        ArrayVec {
            elements: ArrayType[new_element_ty][N::from_usize(self.elements.len())],
@ -100,10 +171,8 @@ impl<T: Type, N: Size> ArrayVec<T, N> {
    pub fn as_array_of_options(this: impl ToExpr<Type = Self>) -> Expr<ArrayType<HdlOption<T>, N>> {
        let this = this.to_expr();
        #[hdl]
-        let array_vec_as_array_of_options = wire(
-            ArrayType[HdlOption[this.ty().element()]]
-                [N::from_usize(this.ty().capacity())],
-        );
+        let array_vec_as_array_of_options =
+            wire(ArrayType[HdlOption[this.ty().element()]][N::from_usize(this.ty().capacity())]);
        for element in array_vec_as_array_of_options {
            connect(element, element.ty().HdlNone());
        }
--- a/crates/cpu/tests/expected/next_pc.vcd
+++ b/crates/cpu/tests/expected/next_pc.vcd
--- a/crates/cpu/tests/next_pc.rs
+++ b/crates/cpu/tests/next_pc.rs
@ -0,0 +1,338 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+// See Notices.txt for copyright information
+
+use cpu::{
+    config::{CpuConfig, UnitConfig},
+    next_pc::{
+        DecodeToPostDecodeInterface, DecodeToPostDecodeInterfaceInner, FETCH_BLOCK_ID_WIDTH,
+        NextPcToFetchInterface, NextPcToFetchInterfaceInner, WipDecodedInsn, WipDecodedInsnKind,
+        next_pc,
+    },
+    unit::UnitKind,
+    util::array_vec::ArrayVec,
+};
+use fayalite::{prelude::*, sim::vcd::VcdWriterDecls, util::RcWriter};
+use std::{
+    cell::Cell,
+    collections::{BTreeMap, VecDeque},
+    num::NonZeroUsize,
+};
+
+#[derive(Copy, Clone, Debug)]
+enum MockInsn {
+    Nop4,
+    Jump { target: u64 },
+    CondBranch { target: u64 },
+    Call { target: u64 },
+    Ret,
+}
+
+impl MockInsn {
+    fn byte_len(self) -> u64 {
+        match self {
+            MockInsn::Nop4 => 4,
+            MockInsn::Jump { .. } => 4,
+            MockInsn::CondBranch { .. } => 4,
+            MockInsn::Call { .. } => 4,
+            MockInsn::Ret => 4,
+        }
+    }
+}
+
+#[derive(Debug)]
+struct MockInsns {
+    insns: BTreeMap<u64, MockInsn>,
+}
+
+impl MockInsns {
+    fn new() -> Self {
+        Self {
+            insns: BTreeMap::from_iter([
+                (0x0, MockInsn::Nop4),
+                (0x4, MockInsn::Nop4),
+                (0x8, MockInsn::CondBranch { target: 0x4 }),
+                (0xC, MockInsn::Call { target: 0x18 }),
+                (0x10, MockInsn::Jump { target: 0x10 }),
+                (0x14, MockInsn::Jump { target: 0x10 }),
+                (0x18, MockInsn::Jump { target: 0x1C }),
+                (0x1C, MockInsn::Ret),
+            ]),
+        }
+    }
+    fn fetch_block(&self, pc_range: std::ops::Range<u64>) -> impl Iterator<Item = (u64, MockInsn)> {
+        self.insns
+            .range(pc_range.clone())
+            .filter_map(move |(&pc, &insn)| {
+                if pc_range.end >= pc + insn.byte_len() {
+                    Some((pc, insn))
+                } else {
+                    None
+                }
+            })
+    }
+}
+
+const FETCH_PIPE_QUEUE_SIZE: usize = 5;
+
+const DEMO_ILLEGAL_INSN_TRAP: u64 = 0xFF000000u64;
+
+#[hdl]
+struct FetchPipeQueueEntry {
+    fetch_pc: UInt<64>,
+    cycles_left: UInt<8>,
+    fetch_block_id: UInt<{ FETCH_BLOCK_ID_WIDTH }>,
+}
+
+impl FetchPipeQueueEntry {
+    #[hdl]
+    fn default_sim(self) -> SimValue<Self> {
+        #[hdl(sim)]
+        FetchPipeQueueEntry {
+            fetch_pc: 0u64,
+            cycles_left: 0u8,
+            fetch_block_id: 0u8,
+        }
+    }
+    fn get_next_delay(delay_sequence_index: &Cell<u64>) -> u8 {
+        let index = delay_sequence_index.get();
+        delay_sequence_index.set(delay_sequence_index.get().wrapping_add(1));
+        // make a pseudo-random number deterministically based on index
+        let random = index
+            .wrapping_add(1)
+            .wrapping_mul(0x18C49126EABE7A0D) // random prime
+            .rotate_left(32)
+            .wrapping_mul(0x92B38C197608A6B) // random prime
+            .rotate_right(60);
+        (random % 8) as u8
+    }
+}
+
+#[hdl_module(extern)]
+fn mock_fetch_pipe(config: PhantomConst<CpuConfig>) {
+    #[hdl]
+    let cd: ClockDomain = m.input();
+    #[hdl]
+    let from_fetch: NextPcToFetchInterface<PhantomConst<CpuConfig>> =
+        m.input(NextPcToFetchInterface[config]);
+    #[hdl]
+    let to_post_decode: DecodeToPostDecodeInterface<PhantomConst<CpuConfig>> =
+        m.output(DecodeToPostDecodeInterface[config]);
+    #[hdl]
+    let queue_debug: ArrayVec<FetchPipeQueueEntry, ConstUsize<{ FETCH_PIPE_QUEUE_SIZE }>> =
+        m.output();
+    m.register_clock_for_past(cd.clk);
+    m.extern_module_simulation_fn(
+        (cd, from_fetch, to_post_decode, queue_debug),
+        |(cd, from_fetch, to_post_decode, queue_debug), mut sim| async move {
+            // intentionally have a different sequence each time we're reset
+            let delay_sequence_index = Cell::new(0);
+            sim.resettable(
+                cd,
+                async |mut sim| {
+                    sim.write(from_fetch.inner.ready, false).await;
+                    sim.write(
+                        to_post_decode.inner.data,
+                        to_post_decode.ty().inner.data.HdlNone(),
+                    )
+                    .await;
+                    sim.write(
+                        queue_debug,
+                        queue_debug.ty().new_sim(FetchPipeQueueEntry.default_sim()),
+                    )
+                    .await;
+                },
+                |sim, ()| {
+                    run_fn(
+                        cd,
+                        from_fetch,
+                        to_post_decode,
+                        queue_debug,
+                        &delay_sequence_index,
+                        sim,
+                    )
+                },
+            )
+            .await;
+        },
+    );
+    #[hdl]
+    async fn run_fn(
+        cd: Expr<ClockDomain>,
+        from_fetch: Expr<NextPcToFetchInterface<PhantomConst<CpuConfig>>>,
+        to_post_decode: Expr<DecodeToPostDecodeInterface<PhantomConst<CpuConfig>>>,
+        queue_debug: Expr<ArrayVec<FetchPipeQueueEntry, ConstUsize<{ FETCH_PIPE_QUEUE_SIZE }>>>,
+        delay_sequence_index: &Cell<u64>,
+        mut sim: ExternModuleSimulationState,
+    ) {
+        let config = from_fetch.config.ty();
+        let mock_insns = MockInsns::new();
+        let mut queue: VecDeque<SimValue<FetchPipeQueueEntry>> = VecDeque::new();
+        let mut next_id = 0u32;
+        loop {
+            let mut sim_queue = queue_debug.ty().new_sim(FetchPipeQueueEntry.default_sim());
+            for entry in &queue {
+                ArrayVec::try_push_sim(&mut sim_queue, entry)
+                    .ok()
+                    .expect("queue is known to be small enough");
+            }
+            sim.write(queue_debug, sim_queue).await;
+            if let Some(front) = queue.front().filter(|v| v.cycles_left.as_int() == 0) {
+                #[hdl(sim)]
+                let FetchPipeQueueEntry {
+                    fetch_pc,
+                    cycles_left: _,
+                    fetch_block_id,
+                } = front;
+                let fetch_pc = fetch_pc.as_int();
+                let fetch_end =
+                    (fetch_pc + 1).next_multiple_of(config.get().fetch_width_in_bytes() as u64);
+                let insns = to_post_decode.ty().inner.data.HdlSome.insns;
+                let zeroed_insn = UInt[insns.element().canonical().bit_width()]
+                    .zero()
+                    .cast_bits_to(insns.element());
+                let mut insns = insns.new_sim(zeroed_insn);
+                // TODO: handle instructions that go past the end of a fetch block
+                for (pc, insn) in mock_insns.fetch_block(fetch_pc..fetch_end) {
+                    let kind = match insn {
+                        MockInsn::Nop4 => WipDecodedInsnKind.NonBranch(),
+                        MockInsn::Jump { target } => WipDecodedInsnKind.Branch(target),
+                        MockInsn::CondBranch { target } => WipDecodedInsnKind.BranchCond(target),
+                        MockInsn::Call { target } => WipDecodedInsnKind.Call(target),
+                        MockInsn::Ret => WipDecodedInsnKind.Ret(),
+                    };
+                    let insn = #[hdl(sim)]
+                    WipDecodedInsn {
+                        fetch_block_id,
+                        id: next_id.cast_to_static::<UInt<_>>(),
+                        pc,
+                        size_in_bytes: insn.byte_len().cast_to_static::<UInt<_>>(),
+                        kind,
+                    };
+                    match ArrayVec::try_push_sim(&mut insns, insn) {
+                        Ok(()) => next_id = next_id.wrapping_add(1),
+                        Err(_) => break,
+                    }
+                }
+                if **ArrayVec::len_sim(&insns) == 0 {
+                    let Ok(()) = ArrayVec::try_push_sim(
+                        &mut insns,
+                        #[hdl(sim)]
+                        WipDecodedInsn {
+                            fetch_block_id,
+                            id: next_id.cast_to_static::<UInt<_>>(),
+                            pc: fetch_pc,
+                            size_in_bytes: 0u8.cast_to_static::<UInt<_>>(),
+                            kind: WipDecodedInsnKind.Interrupt(DEMO_ILLEGAL_INSN_TRAP),
+                        },
+                    ) else {
+                        unreachable!();
+                    };
+                    next_id = next_id.wrapping_add(1);
+                }
+                sim.write(
+                    to_post_decode.inner.data,
+                    HdlSome(
+                        #[hdl(sim)]
+                        DecodeToPostDecodeInterfaceInner::<_> {
+                            fetch_block_id,
+                            insns,
+                            config,
+                        },
+                    ),
+                )
+                .await;
+            } else {
+                sim.write(
+                    to_post_decode.inner.data,
+                    to_post_decode.ty().inner.data.HdlNone(),
+                )
+                .await;
+            }
+            sim.write(from_fetch.inner.ready, queue.len() < FETCH_PIPE_QUEUE_SIZE)
+                .await;
+            sim.wait_for_clock_edge(cd.clk).await;
+            if sim.read_past_bool(to_post_decode.inner.ready, cd.clk).await {
+                #[hdl(sim)]
+                if let HdlSome(_) = sim.read_past(to_post_decode.inner.data, cd.clk).await {
+                    queue.pop_front();
+                }
+            }
+            for entry in &mut queue {
+                if entry.cycles_left.as_int() > 0 {
+                    entry.cycles_left = (entry.cycles_left.as_int() - 1u8).to_sim_value();
+                }
+            }
+            if !sim.read_past_bool(from_fetch.inner.ready, cd.clk).await {
+                continue;
+            }
+            #[hdl(sim)]
+            if let HdlSome(inner) = sim.read_past(from_fetch.inner.data, cd.clk).await {
+                #[hdl(sim)]
+                let NextPcToFetchInterfaceInner {
+                    next_fetch_pc,
+                    fetch_block_id,
+                    in_progress_fetches_to_cancel,
+                } = &inner;
+                // cancel in-progress fetches from newest to oldest
+                for _ in 0..in_progress_fetches_to_cancel.as_int() {
+                    let _ = queue.pop_back();
+                }
+                queue.push_back(
+                    #[hdl(sim)]
+                    FetchPipeQueueEntry {
+                        fetch_pc: next_fetch_pc,
+                        cycles_left: FetchPipeQueueEntry::get_next_delay(delay_sequence_index),
+                        fetch_block_id,
+                    },
+                );
+            }
+        }
+    }
+}
+
+#[hdl_module]
+fn dut(config: PhantomConst<CpuConfig>) {
+    #[hdl]
+    let cd: ClockDomain = m.input();
+    #[hdl]
+    let next_pc = instance(next_pc(config));
+    connect(next_pc.cd, cd);
+    #[hdl]
+    let mock_fetch_pipe = instance(mock_fetch_pipe(config));
+    connect(mock_fetch_pipe.cd, cd);
+    connect(mock_fetch_pipe.from_fetch, next_pc.to_fetch);
+    connect(next_pc.from_decode, mock_fetch_pipe.to_post_decode);
+}
+
+#[hdl]
+#[test]
+fn test_next_pc() {
+    let _n = SourceLocation::normalize_files_for_tests();
+    let mut config = CpuConfig::new(
+        vec![
+            UnitConfig::new(UnitKind::AluBranch),
+            UnitConfig::new(UnitKind::AluBranch),
+        ],
+        NonZeroUsize::new(20).unwrap(),
+    );
+    config.fetch_width = NonZeroUsize::new(2).unwrap();
+    let m = dut(PhantomConst::new_sized(config));
+    let mut sim = Simulation::new(m);
+    let mut writer = RcWriter::default();
+    sim.add_trace_writer(VcdWriterDecls::new(writer.clone()));
+    sim.write_clock(sim.io().cd.clk, false);
+    sim.write_reset(sim.io().cd.rst, true);
+    for _cycle in 0..300 {
+        sim.advance_time(SimDuration::from_nanos(500));
+        sim.write_clock(sim.io().cd.clk, true);
+        sim.advance_time(SimDuration::from_nanos(500));
+        sim.write_clock(sim.io().cd.clk, false);
+        sim.write_reset(sim.io().cd.rst, false);
+    }
+    // FIXME: vcd is just whatever next_pc does now, which isn't known to be correct
+    let vcd = String::from_utf8(writer.take()).unwrap();
+    println!("####### VCD:\n{vcd}\n#######");
+    if vcd != include_str!("expected/next_pc.vcd") {
+        panic!();
+    }
+}