From 3054f0cb4144fa45c272e7125c1013fddbea7b41 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 10 Mar 2026 23:43:18 +0000
Subject: [PATCH 01/13] Initial plan


From 334df71b1198fcbfb70204db5910918b512056c3 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 11 Mar 2026 00:05:26 +0000
Subject: [PATCH 02/13] Add nseq_parith.h and nseq_parikh.cpp: Parikh filter
 for ZIPT string solver

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
---
 src/smt/seq/CMakeLists.txt  |   1 +
 src/smt/seq/nseq_parikh.cpp | 274 ++++++++++++++++++++++++++++++++++++
 src/smt/seq/nseq_parith.h   | 112 +++++++++++++++
 3 files changed, 387 insertions(+)
 create mode 100644 src/smt/seq/nseq_parikh.cpp
 create mode 100644 src/smt/seq/nseq_parith.h
diff --git a/src/smt/seq/CMakeLists.txt b/src/smt/seq/CMakeLists.txt
index db63e4c6f..9ea9c4bab 100644
--- a/src/smt/seq/CMakeLists.txt
+++ b/src/smt/seq/CMakeLists.txt
@@ -1,5 +1,6 @@
 z3_add_component(smt_seq
   SOURCES
+    nseq_parikh.cpp
     seq_nielsen.cpp
   COMPONENT_DEPENDENCIES
     euf
diff --git a/src/smt/seq/nseq_parikh.cpp b/src/smt/seq/nseq_parikh.cpp
new file mode 100644
index 000000000..9bc2f4bcd
--- /dev/null
+++ b/src/smt/seq/nseq_parikh.cpp
@@ -0,0 +1,274 @@
+/*++
+Copyright (c) 2026 Microsoft Corporation
+
+Module Name:
+
+    nseq_parikh.cpp
+
+Abstract:
+
+    Parikh image filter implementation for the ZIPT-based Nielsen string
+    solver.  See nseq_parith.h for the full design description.
+
+    The key operation is compute_length_stride(re), which performs a
+    structural traversal of the regex to find the period k such that all
+    string lengths in L(re) are congruent to min_length(re) modulo k.
+    The stride is used to generate modular length constraints that help
+    the integer subsolver prune infeasible Nielsen graph nodes.
+
+Author:
+
+    Nikolaj Bjorner (nbjorner) 2026-03-10
+
+--*/
+
+#include "smt/seq/nseq_parith.h"
+#include "ast/arith_decl_plugin.h"
+#include "ast/seq_decl_plugin.h"
+#include <string>
+
+namespace seq {
+
+    // -----------------------------------------------------------------------
+    // Helpers
+    // -----------------------------------------------------------------------
+
+    // GCD via Euclidean algorithm.  gcd(0, x) = x, gcd(0, 0) = 0.
+    unsigned nseq_parith::gcd(unsigned a, unsigned b) {
+        if (a == 0 && b == 0) return 0;
+        while (b != 0) {
+            unsigned t = b;
+            b = a % b;
+            a = t;
+        }
+        return a;
+    }
+
+    nseq_parith::nseq_parith(euf::sgraph& sg)
+        : m_sg(sg), m_fresh_cnt(0) {}
+
+    expr_ref nseq_parith::mk_fresh_int_var() {
+        ast_manager& m = m_sg.get_manager();
+        arith_util arith(m);
+        std::string name = "pk!" + std::to_string(m_fresh_cnt++);
+        return expr_ref(m.mk_fresh_const(name.c_str(), arith.mk_int()), m);
+    }
+
+    // -----------------------------------------------------------------------
+    // Stride computation
+    // -----------------------------------------------------------------------
+
+    // compute_length_stride: structural traversal of regex expression.
+    //
+    // Return value semantics:
+    //   0 — fixed length (or empty language): no modular constraint needed
+    //         beyond the min == max bounds.
+    //   1 — all integer lengths ≥ min_len are achievable: no useful modular
+    //         constraint.
+    //   k > 1 — all lengths in L(re) satisfy len ≡ min_len (mod k):
+    //         modular constraint len(str) = min_len + k·j is useful.
+    unsigned nseq_parith::compute_length_stride(expr* re) {
+        if (!re) return 1;
+
+        seq_util& seq = m_sg.get_seq_util();
+        expr* r1 = nullptr, *r2 = nullptr, *s = nullptr;
+        unsigned lo = 0, hi = 0;
+
+        // Empty language: no strings exist; stride is irrelevant.
+        if (seq.re.is_empty(re))
+            return 0;
+
+        // Epsilon regex {""}: single fixed length 0.
+        if (seq.re.is_epsilon(re))
+            return 0;
+
+        // to_re(concrete_string): fixed-length, no modular constraint needed.
+        if (seq.re.is_to_re(re, s)) {
+            // min_length == max_length, covered by bounds.
+            return 0;
+        }
+
+        // Single character: range, full_char — fixed length 1.
+        if (seq.re.is_range(re) || seq.re.is_full_char(re))
+            return 0;
+
+        // full_seq (.* / Σ*): every length ≥ 0 is possible.
+        if (seq.re.is_full_seq(re))
+            return 1;
+
+        // r* — Kleene star.
+        // L(r*) = {ε} ∪ L(r) ∪ L(r)·L(r) ∪ ...
+        // If r has a fixed length k, then L(r*) = {0, k, 2k, ...} → stride k.
+        // If r has variable length, strides from different iterations combine
+        // by GCD.
+        if (seq.re.is_star(re, r1)) {
+            unsigned mn = seq.re.min_length(r1);
+            unsigned mx = seq.re.max_length(r1);
+            // When the body has unbounded length (mx == UINT_MAX), different
+            // iterations can produce many different lengths, and the stride of
+            // the star as a whole degenerates to gcd(mn, mn) = mn (or 1 if
+            // mn == 1).  This is conservative: we use the body's min-length
+            // as the only available fixed quantity.
+            if (mx == UINT_MAX) mx = mn;
+            if (mn == mx) {
+                // Fixed-length body: L(r*) = {0, mn, 2·mn, ...} → stride = mn.
+                // When mn == 1 the stride would be 1, which gives no useful
+                // modular constraint, so return 0 instead.
+                return (mn > 1) ? mn : 0;
+            }
+            // Variable-length body: GCD of min and max lengths
+            return gcd(mn, mx);
+        }
+
+        // r+ — one or more: same stride analysis as r*.
+        if (seq.re.is_plus(re, r1)) {
+            unsigned mn = seq.re.min_length(r1);
+            unsigned mx = seq.re.max_length(r1);
+            if (mx == UINT_MAX) mx = mn;  // same conservative treatment as star
+            if (mn == mx)
+                return (mn > 1) ? mn : 0;
+            return gcd(mn, mx);
+        }
+
+        // r? — zero or one: lengths = {0} ∪ lengths(r)
+        // stride = GCD(mn_r, stride(r)) unless stride(r) is 0 (fixed length).
+        if (seq.re.is_opt(re, r1)) {
+            unsigned mn = seq.re.min_length(r1);
+            unsigned inner = compute_length_stride(r1);
+            // L(r?) includes length 0 and all lengths of L(r).
+            // GCD(stride(r), min_len(r)) is a valid stride because:
+            //   - the gap from 0 to min_len(r) is min_len(r) itself, and
+            //   - subsequent lengths grow in steps governed by stride(r).
+            // A result > 1 gives a useful modular constraint; result == 1
+            // means every non-negative integer is achievable (no constraint).
+            if (inner == 0)
+                return gcd(mn, 0);   // gcd(mn, 0) = mn; useful when mn > 1
+            return gcd(inner, mn);
+        }
+
+        // concat(r1, r2): lengths add → stride = GCD(stride(r1), stride(r2)).
+        if (seq.re.is_concat(re, r1, r2)) {
+            unsigned s1 = compute_length_stride(r1);
+            unsigned s2 = compute_length_stride(r2);
+            // 0 (fixed) on either side: result is governed by the other.
+            if (s1 == 0 && s2 == 0) return 0;
+            if (s1 == 0) return s2;
+            if (s2 == 0) return s1;
+            return gcd(s1, s2);
+        }
+
+        // union(r1, r2): lengths from either branch → need GCD of both
+        // strides and the difference between their minimum lengths.
+        if (seq.re.is_union(re, r1, r2)) {
+            unsigned s1 = compute_length_stride(r1);
+            unsigned s2 = compute_length_stride(r2);
+            unsigned m1 = seq.re.min_length(r1);
+            unsigned m2 = seq.re.min_length(r2);
+            unsigned d  = (m1 >= m2) ? (m1 - m2) : (m2 - m1);
+            // Replace 0-strides with d for GCD computation:
+            // a fixed-length branch only introduces constraint via its offset.
+            unsigned g = gcd(s1 == 0 ? d : s1, s2 == 0 ? d : s2);
+            g = gcd(g, d);
+            return g;
+        }
+
+        // loop(r, lo, hi): lengths = {lo·len(r), ..., hi·len(r)} if r is fixed.
+        // stride = len(r) when r is fixed-length; otherwise GCD.
+        if (seq.re.is_loop(re, r1, lo, hi)) {
+            unsigned mn = seq.re.min_length(r1);
+            unsigned mx = seq.re.max_length(r1);
+            if (mx == UINT_MAX) mx = mn;
+            if (mn == mx)
+                return (mn > 1) ? mn : 0;
+            return gcd(mn, mx);
+        }
+        if (seq.re.is_loop(re, r1, lo)) {
+            unsigned mn = seq.re.min_length(r1);
+            unsigned mx = seq.re.max_length(r1);
+            if (mx == UINT_MAX) mx = mn;
+            if (mn == mx)
+                return (mn > 1) ? mn : 0;
+            return gcd(mn, mx);
+        }
+
+        // intersection(r1, r2): lengths must be in both languages.
+        // A conservative safe choice: GCD(stride(r1), stride(r2)) is a valid
+        // stride for the intersection (every length in the intersection is
+        // also in r1 and in r2).
+        if (seq.re.is_intersection(re, r1, r2)) {
+            unsigned s1 = compute_length_stride(r1);
+            unsigned s2 = compute_length_stride(r2);
+            if (s1 == 0 && s2 == 0) return 0;
+            if (s1 == 0) return s2;
+            if (s2 == 0) return s1;
+            return gcd(s1, s2);
+        }
+
+        // For complement, diff, reverse, derivative, of_pred, and anything
+        // else we cannot analyse statically: be conservative and return 1
+        // (no useful modular constraint rather than an unsound one).
+        return 1;
+    }
+
+    // -----------------------------------------------------------------------
+    // Constraint generation
+    // -----------------------------------------------------------------------
+
+    void nseq_parith::generate_parikh_constraints(str_mem const& mem,
+                                                   vector<int_constraint>& out) {
+        if (!mem.m_regex || !mem.m_str)
+            return;
+
+        ast_manager& m = m_sg.get_manager();
+        seq_util&    seq  = m_sg.get_seq_util();
+        arith_util   arith(m);
+
+        expr* re_expr = mem.m_regex->get_expr();
+        if (!re_expr || !seq.is_re(re_expr))
+            return;
+
+        // Length bounds from the regex.
+        unsigned min_len = seq.re.min_length(re_expr);
+        unsigned max_len = seq.re.max_length(re_expr);
+
+        // If min_len == max_len the bounds already pin the length exactly;
+        // no modular constraint is needed.
+        if (min_len == max_len)
+            return;
+
+        unsigned stride = compute_length_stride(re_expr);
+
+        // stride == 1: every integer length is possible — no useful constraint.
+        // stride == 0: fixed length or empty — handled by bounds.
+        if (stride <= 1)
+            return;
+
+        // Build len(str) as an arithmetic expression.
+        expr_ref len_str(seq.str.mk_length(mem.m_str->get_expr()), m);
+
+        // Introduce fresh integer variable k ≥ 0.
+        expr_ref k_var = mk_fresh_int_var();
+
+        // Constraint 1: len(str) = min_len + stride · k
+        expr_ref min_expr(arith.mk_int(min_len), m);
+        expr_ref stride_expr(arith.mk_int(stride), m);
+        expr_ref stride_k(arith.mk_mul(stride_expr, k_var), m);
+        expr_ref rhs(arith.mk_add(min_expr, stride_k), m);
+        out.push_back(int_constraint(len_str, rhs,
+                                     int_constraint_kind::eq, mem.m_dep, m));
+
+        // Constraint 2: k ≥ 0
+        expr_ref zero(arith.mk_int(0), m);
+        out.push_back(int_constraint(k_var, zero,
+                                     int_constraint_kind::ge, mem.m_dep, m));
+    }
+
+    void nseq_parith::apply_to_node(nielsen_node& node) {
+        vector<int_constraint> constraints;
+        for (str_mem const& mem : node.str_mems())
+            generate_parikh_constraints(mem, constraints);
+        for (auto& ic : constraints)
+            node.add_int_constraint(ic);
+    }
+
+} // namespace seq
diff --git a/src/smt/seq/nseq_parith.h b/src/smt/seq/nseq_parith.h
new file mode 100644
index 000000000..e7f84e247
--- /dev/null
+++ b/src/smt/seq/nseq_parith.h
@@ -0,0 +1,112 @@
+/*++
+Copyright (c) 2026 Microsoft Corporation
+
+Module Name:
+
+    nseq_parith.h
+
+Abstract:
+
+    Parikh image filter for the ZIPT-based Nielsen string solver.
+
+    Implements Parikh-based arithmetic constraint generation for
+    nielsen_node instances.  For a regex membership constraint str ∈ r,
+    the Parikh image of r constrains the multiset of characters in str.
+    This module computes the "length stride" (period) of the length
+    language of r and generates modular arithmetic constraints of the form
+
+        len(str) = min_len + stride · k    (k ≥ 0, k fresh integer)
+
+    which tighten the arithmetic subproblem beyond the simple min/max
+    length bounds already produced by nielsen_node::init_var_bounds_from_mems().
+
+    For example:
+      • str ∈ (ab)*  → min_len = 0, stride = 2  → len(str) = 2·k
+      • str ∈ a(bc)* → min_len = 1, stride = 2  → len(str) = 1 + 2·k
+      • str ∈ ab|abc → stride = 1 (no useful modular constraint)
+
+    The generated int_constraints are added to the node's integer constraint
+    set and discharged by the integer subsolver (see seq_nielsen.h,
+    simple_solver).
+
+    Implements the Parikh filter described in ZIPT
+    (https://github.com/CEisenhofer/ZIPT/tree/parikh/ZIPT/Constraints)
+    replacing ZIPT's PDD-based Parikh subsolver with Z3's linear arithmetic.
+
+Author:
+
+    Nikolaj Bjorner (nbjorner) 2026-03-10
+
+--*/
+#pragma once
+
+#include "ast/euf/euf_sgraph.h"
+#include "smt/seq/seq_nielsen.h"
+
+namespace seq {
+
+    /**
+     * Parikh image filter: generates modular length constraints from
+     * regex membership constraints in a nielsen_node.
+     *
+     * Usage:
+     *   nseq_parith parith(sg);
+     *   parith.apply_to_node(node);      // adds constraints to node
+     *
+     * Or per-membership:
+     *   vector<int_constraint> out;
+     *   parith.generate_parikh_constraints(mem, out);
+     */
+    class nseq_parith {
+        euf::sgraph& m_sg;
+        unsigned     m_fresh_cnt;  // counter for fresh variable names
+
+        // Compute GCD of a and b.  gcd(0, x) = x by convention.
+        // Returns 0 only when both arguments are 0.
+        static unsigned gcd(unsigned a, unsigned b);
+
+        // Compute the stride (period) of the length language of a regex.
+        //
+        // The stride k satisfies: all lengths in L(re) are congruent to
+        // min_length(re) modulo k.  A stride of 1 means every integer
+        // length is possible (no useful modular constraint).  A stride of
+        // 0 is a sentinel meaning the language is empty or has a single
+        // fixed length (already captured by bounds).
+        //
+        // Examples:
+        //   stride(to_re("ab"))   = 0  (fixed length 2)
+        //   stride((ab)*)         = 2  (lengths 0, 2, 4, ...)
+        //   stride((abc)*)        = 3  (lengths 0, 3, 6, ...)
+        //   stride(a*b*)          = 1  (all lengths possible)
+        //   stride((ab)*(cd)*)    = 2  (lengths 0, 2, 4, ...)
+        //   stride((ab)*|(abc)*)  = 1  (lengths 0, 2, 3, 4, ...)
+        unsigned compute_length_stride(expr* re);
+
+        // Create a fresh integer variable (name "pk!N") for use as the
+        // Parikh multiplier variable k in len(str) = min_len + stride·k.
+        expr_ref mk_fresh_int_var();
+
+    public:
+        explicit nseq_parith(euf::sgraph& sg);
+
+        // Generate Parikh modular length constraints for one membership.
+        //
+        // When stride > 1 and min_len < max_len (bounds don't pin length):
+        //   adds: len(str) = min_len + stride · k   (equality)
+        //         k ≥ 0                              (non-negativity)
+        // These tighten the integer constraint set for the subsolver.
+        // Dependencies are copied from mem.m_dep.
+        void generate_parikh_constraints(str_mem const& mem,
+                                         vector<int_constraint>& out);
+
+        // Apply Parikh constraints to all memberships at a node.
+        // Calls generate_parikh_constraints for each str_mem in the node
+        // and appends the resulting int_constraints to node.int_constraints().
+        void apply_to_node(nielsen_node& node);
+
+        // Compute the length stride of a regex expression.
+        // Exposed for testing and external callers.
+        unsigned get_length_stride(expr* re) { return compute_length_stride(re); }
+    };
+
+} // namespace seq

From d267e452a2402bb8449823fdd6536f56993da5ee Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 11 Mar 2026 04:21:30 +0000
Subject: [PATCH 03/13] Add Clemens Eisenhofer as co-author to nseq_parith.h
 and nseq_parikh.cpp

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
---
 src/smt/seq/nseq_parikh.cpp | 1 +
 src/smt/seq/nseq_parith.h   | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/smt/seq/nseq_parikh.cpp b/src/smt/seq/nseq_parikh.cpp
index 9bc2f4bcd..4ce843052 100644
--- a/src/smt/seq/nseq_parikh.cpp
+++ b/src/smt/seq/nseq_parikh.cpp
@@ -18,6 +18,7 @@ Abstract:
 
 Author:
 
+    Clemens Eisenhofer 2026-03-10
     Nikolaj Bjorner (nbjorner) 2026-03-10
 
 --*/
diff --git a/src/smt/seq/nseq_parith.h b/src/smt/seq/nseq_parith.h
index e7f84e247..719efa5d9 100644
--- a/src/smt/seq/nseq_parith.h
+++ b/src/smt/seq/nseq_parith.h
@@ -35,6 +35,7 @@ Abstract:
 
 Author:
 
+    Clemens Eisenhofer 2026-03-10
     Nikolaj Bjorner (nbjorner) 2026-03-10
 
 --*/

From 4cdfceabc5c7dbbd10072f418c7b96db433f953b Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 11 Mar 2026 04:24:16 +0000
Subject: [PATCH 04/13] Use u_gcd from util/mpz.h instead of local gcd
 definition

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
---
 src/smt/seq/nseq_parikh.cpp | 36 +++++++++++-------------------------
 src/smt/seq/nseq_parith.h   |  4 ----
 2 files changed, 11 insertions(+), 29 deletions(-)

diff --git a/src/smt/seq/nseq_parikh.cpp b/src/smt/seq/nseq_parikh.cpp
index 4ce843052..a6d1d20e5 100644
--- a/src/smt/seq/nseq_parikh.cpp
+++ b/src/smt/seq/nseq_parikh.cpp
@@ -26,25 +26,11 @@ Author:
 #include "smt/seq/nseq_parith.h"
 #include "ast/arith_decl_plugin.h"
 #include "ast/seq_decl_plugin.h"
+#include "util/mpz.h"
 #include <string>
 
 namespace seq {
 
-    // -----------------------------------------------------------------------
-    // Helpers
-    // -----------------------------------------------------------------------
-
-    // GCD via Euclidean algorithm.  gcd(0, x) = x, gcd(0, 0) = 0.
-    unsigned nseq_parith::gcd(unsigned a, unsigned b) {
-        if (a == 0 && b == 0) return 0;
-        while (b != 0) {
-            unsigned t = b;
-            b = a % b;
-            a = t;
-        }
-        return a;
-    }
-
     nseq_parith::nseq_parith(euf::sgraph& sg)
         : m_sg(sg), m_fresh_cnt(0) {}
 
@@ -118,7 +104,7 @@ namespace seq {
                 return (mn > 1) ? mn : 0;
             }
             // Variable-length body: GCD of min and max lengths
-            return gcd(mn, mx);
+            return u_gcd(mn, mx);
         }
 
         // r+ — one or more: same stride analysis as r*.
@@ -128,7 +114,7 @@ namespace seq {
             if (mx == UINT_MAX) mx = mn;  // same conservative treatment as star
             if (mn == mx)
                 return (mn > 1) ? mn : 0;
-            return gcd(mn, mx);
+            return u_gcd(mn, mx);
         }
 
         // r? — zero or one: lengths = {0} ∪ lengths(r)
@@ -143,8 +129,8 @@ namespace seq {
             // A result > 1 gives a useful modular constraint; result == 1
             // means every non-negative integer is achievable (no constraint).
             if (inner == 0)
-                return gcd(mn, 0);   // gcd(mn, 0) = mn; useful when mn > 1
-            return gcd(inner, mn);
+                return u_gcd(mn, 0);   // gcd(mn, 0) = mn; useful when mn > 1
+            return u_gcd(inner, mn);
         }
 
         // concat(r1, r2): lengths add → stride = GCD(stride(r1), stride(r2)).
@@ -155,7 +141,7 @@ namespace seq {
             if (s1 == 0 && s2 == 0) return 0;
             if (s1 == 0) return s2;
             if (s2 == 0) return s1;
-            return gcd(s1, s2);
+            return u_gcd(s1, s2);
         }
 
         // union(r1, r2): lengths from either branch → need GCD of both
@@ -168,8 +154,8 @@ namespace seq {
             unsigned d  = (m1 >= m2) ? (m1 - m2) : (m2 - m1);
             // Replace 0-strides with d for GCD computation:
             // a fixed-length branch only introduces constraint via its offset.
-            unsigned g = gcd(s1 == 0 ? d : s1, s2 == 0 ? d : s2);
-            g = gcd(g, d);
+            unsigned g = u_gcd(s1 == 0 ? d : s1, s2 == 0 ? d : s2);
+            g = u_gcd(g, d);
             return g;
         }
 
@@ -181,7 +167,7 @@ namespace seq {
             if (mx == UINT_MAX) mx = mn;
             if (mn == mx)
                 return (mn > 1) ? mn : 0;
-            return gcd(mn, mx);
+            return u_gcd(mn, mx);
         }
         if (seq.re.is_loop(re, r1, lo)) {
             unsigned mn = seq.re.min_length(r1);
@@ -189,7 +175,7 @@ namespace seq {
             if (mx == UINT_MAX) mx = mn;
             if (mn == mx)
                 return (mn > 1) ? mn : 0;
-            return gcd(mn, mx);
+            return u_gcd(mn, mx);
         }
 
         // intersection(r1, r2): lengths must be in both languages.
@@ -202,7 +188,7 @@ namespace seq {
             if (s1 == 0 && s2 == 0) return 0;
             if (s1 == 0) return s2;
             if (s2 == 0) return s1;
-            return gcd(s1, s2);
+            return u_gcd(s1, s2);
         }
 
         // For complement, diff, reverse, derivative, of_pred, and anything
diff --git a/src/smt/seq/nseq_parith.h b/src/smt/seq/nseq_parith.h
index 719efa5d9..6f8fc7ae3 100644
--- a/src/smt/seq/nseq_parith.h
+++ b/src/smt/seq/nseq_parith.h
@@ -62,10 +62,6 @@ namespace seq {
         euf::sgraph& m_sg;
         unsigned     m_fresh_cnt;  // counter for fresh variable names
 
-        // Compute GCD of a and b.  gcd(0, x) = x by convention.
-        // Returns 0 only when both arguments are 0.
-        static unsigned gcd(unsigned a, unsigned b);
-
         // Compute the stride (period) of the length language of a regex.
         //
         // The stride k satisfies: all lengths in L(re) are congruent to

From 35ee8f917d02839d675a97b7fd7eb5238ec181c3 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 11 Mar 2026 04:26:26 +0000
Subject: [PATCH 05/13] Remove redundant zero-guards before u_gcd calls in
 concat/intersection cases

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
---
 src/smt/seq/nseq_parikh.cpp | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/src/smt/seq/nseq_parikh.cpp b/src/smt/seq/nseq_parikh.cpp
index a6d1d20e5..9ee4690b8 100644
--- a/src/smt/seq/nseq_parikh.cpp
+++ b/src/smt/seq/nseq_parikh.cpp
@@ -137,10 +137,6 @@ namespace seq {
         if (seq.re.is_concat(re, r1, r2)) {
             unsigned s1 = compute_length_stride(r1);
             unsigned s2 = compute_length_stride(r2);
-            // 0 (fixed) on either side: result is governed by the other.
-            if (s1 == 0 && s2 == 0) return 0;
-            if (s1 == 0) return s2;
-            if (s2 == 0) return s1;
             return u_gcd(s1, s2);
         }
 
@@ -185,9 +181,6 @@ namespace seq {
         if (seq.re.is_intersection(re, r1, r2)) {
             unsigned s1 = compute_length_stride(r1);
             unsigned s2 = compute_length_stride(r2);
-            if (s1 == 0 && s2 == 0) return 0;
-            if (s1 == 0) return s2;
-            if (s2 == 0) return s1;
             return u_gcd(s1, s2);
         }
 

From eca5fcc7bb27a27e2847a3f536ca32ecacd82bf0 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 11 Mar 2026 05:10:30 +0000
Subject: [PATCH 06/13] Integrate nseq_parith into nielsen_graph; add k upper
 bound and check_parikh_conflict

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
---
 src/smt/seq/nseq_parikh.cpp | 71 +++++++++++++++++++++++++++++++++++++
 src/smt/seq/nseq_parith.h   | 17 +++++++++
 src/smt/seq/seq_nielsen.cpp | 31 +++++++++++++++-
 src/smt/seq/seq_nielsen.h   | 19 ++++++++++
 4 files changed, 137 insertions(+), 1 deletion(-)

diff --git a/src/smt/seq/nseq_parikh.cpp b/src/smt/seq/nseq_parikh.cpp
index 9ee4690b8..13efbd94a 100644
--- a/src/smt/seq/nseq_parikh.cpp
+++ b/src/smt/seq/nseq_parikh.cpp
@@ -241,6 +241,18 @@ namespace seq {
         expr_ref zero(arith.mk_int(0), m);
         out.push_back(int_constraint(k_var, zero,
                                      int_constraint_kind::ge, mem.m_dep, m));
+
+        // Constraint 3 (optional): k ≤ max_k when max_len is bounded.
+        // max_k = floor((max_len - min_len) / stride)
+        // This gives the solver an explicit upper bound on k, which tightens
+        // the search space when combined with other constraints on len(str).
+        if (max_len != UINT_MAX) {
+            unsigned range = max_len - min_len;  // max_len >= min_len here
+            unsigned max_k = range / stride;
+            expr_ref max_k_expr(arith.mk_int(max_k), m);
+            out.push_back(int_constraint(k_var, max_k_expr,
+                                         int_constraint_kind::le, mem.m_dep, m));
+        }
     }
 
     void nseq_parith::apply_to_node(nielsen_node& node) {
@@ -251,4 +263,63 @@ namespace seq {
             node.add_int_constraint(ic);
     }
 
+    // -----------------------------------------------------------------------
+    // Quick Parikh feasibility check (no solver call)
+    // -----------------------------------------------------------------------
+
+    // Returns true if a Parikh conflict is detected: there exists a membership
+    // str ∈ re for a single-variable str where the modular length constraint
+    //   len(str) = min_len + stride * k  (k ≥ 0)
+    // is inconsistent with the variable's current integer bounds [lb, ub].
+    //
+    // This check is lightweight — it uses only modular arithmetic on the already-
+    // known regex min/max lengths and the per-variable bounds stored in the node.
+    bool nseq_parith::check_parikh_conflict(nielsen_node& node) {
+        seq_util& seq = m_sg.get_seq_util();
+
+        for (str_mem const& mem : node.str_mems()) {
+            if (!mem.m_str || !mem.m_regex || !mem.m_str->is_var())
+                continue;
+
+            expr* re_expr = mem.m_regex->get_expr();
+            if (!re_expr || !seq.is_re(re_expr))
+                continue;
+
+            unsigned min_len = seq.re.min_length(re_expr);
+            unsigned max_len = seq.re.max_length(re_expr);
+            if (min_len >= max_len) continue; // fixed or empty — no stride constraint
+
+            unsigned stride = compute_length_stride(re_expr);
+            if (stride <= 1) continue; // no useful modular constraint
+
+            unsigned lb = node.var_lb(mem.m_str);
+            unsigned ub = node.var_ub(mem.m_str);
+
+            // Check: ∃k ≥ 0 such that lb ≤ min_len + stride * k ≤ ub ?
+            //
+            // First find the smallest k satisfying the lower bound:
+            //   k_min = 0                          if min_len ≥ lb
+            //   k_min = ⌈(lb - min_len) / stride⌉  otherwise
+            //
+            // Then verify min_len + stride * k_min ≤ ub.
+            unsigned k_min = 0;
+            if (lb > min_len) {
+                unsigned gap = lb - min_len;
+                k_min = (gap + stride - 1) / stride;  // ceiling division
+            }
+
+            // Overflow guard: stride * k_min may overflow unsigned.
+            unsigned len_at_k_min;
+            if (k_min > (UINT_MAX - min_len) / stride) {
+                // Overflow: min_len + stride * k_min > UINT_MAX ≥ ub → conflict.
+                return true;
+            }
+            len_at_k_min = min_len + stride * k_min;
+
+            if (ub != UINT_MAX && len_at_k_min > ub)
+                return true; // no valid k exists → Parikh conflict
+        }
+        return false;
+    }
+
 } // namespace seq
diff --git a/src/smt/seq/nseq_parith.h b/src/smt/seq/nseq_parith.h
index 6f8fc7ae3..e10e5b6eb 100644
--- a/src/smt/seq/nseq_parith.h
+++ b/src/smt/seq/nseq_parith.h
@@ -91,6 +91,7 @@ namespace seq {
         // When stride > 1 and min_len < max_len (bounds don't pin length):
         //   adds: len(str) = min_len + stride · k   (equality)
         //         k ≥ 0                              (non-negativity)
+        //         k ≤ (max_len - min_len) / stride   (upper bound, when max_len bounded)
         // These tighten the integer constraint set for the subsolver.
         // Dependencies are copied from mem.m_dep.
         void generate_parikh_constraints(str_mem const& mem,
@@ -101,6 +102,22 @@ namespace seq {
         // and appends the resulting int_constraints to node.int_constraints().
         void apply_to_node(nielsen_node& node);
 
+        // Quick Parikh feasibility check (no solver call).
+        //
+        // For each single-variable membership str ∈ re, checks whether the
+        // modular constraint  len(str) = min_len + stride · k  (k ≥ 0)
+        // has any solution given the current per-variable bounds stored in
+        // node.var_lb(str) and node.var_ub(str).
+        //
+        // Returns true when a conflict is detected (no valid k exists for
+        // some membership).  The caller should then mark the node with
+        // backtrack_reason::parikh_image.
+        //
+        // This is a lightweight pre-check that avoids calling the integer
+        // subsolver.  It is sound (never returns true for a satisfiable node)
+        // but incomplete (may miss conflicts that require the full solver).
+        bool check_parikh_conflict(nielsen_node& node);
+
         // Compute the length stride of a regex expression.
         // Exposed for testing and external callers.
         unsigned get_length_stride(expr* re) { return compute_length_stride(re); }
diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp
index 6a2c730ca..98f114fa6 100644
--- a/src/smt/seq/seq_nielsen.cpp
+++ b/src/smt/seq/seq_nielsen.cpp
@@ -20,6 +20,7 @@ Author:
 --*/
 
 #include "smt/seq/seq_nielsen.h"
+#include "smt/seq/nseq_parith.h"
 #include "ast/arith_decl_plugin.h"
 #include "ast/ast_pp.h"
 #include "util/hashtable.h"
@@ -431,10 +432,12 @@ namespace seq {
 
     nielsen_graph::nielsen_graph(euf::sgraph& sg, simple_solver& solver):
         m_sg(sg),
-        m_solver(solver) {
+        m_solver(solver),
+        m_parith(alloc(nseq_parith, sg)) {
     }
 
     nielsen_graph::~nielsen_graph() {
+        dealloc(m_parith);
         reset();
     }
 
@@ -1032,6 +1035,23 @@ namespace seq {
     // nielsen_graph: search
     // -----------------------------------------------------------------------
 
+    void nielsen_graph::apply_parikh_to_node(nielsen_node& node) {
+        if (node.m_parikh_applied) return;
+        node.m_parikh_applied = true;
+
+        // Generate modular length constraints (len(str) = min_len + stride·k, etc.)
+        // and append them to the node's integer constraint list.
+        m_parith->apply_to_node(node);
+
+        // Lightweight feasibility pre-check: does the Parikh modular constraint
+        // contradict the variable's current integer bounds?  If so, mark this
+        // node as a Parikh-image conflict immediately (avoids a solver call).
+        if (!node.is_currently_conflict() && m_parith->check_parikh_conflict(node)) {
+            node.m_is_general_conflict = true;
+            node.m_reason = backtrack_reason::parikh_image;
+        }
+    }
+
     void nielsen_graph::assert_root_constraints_to_solver() {
         if (m_root_constraints_asserted) return;
         m_root_constraints_asserted = true;
@@ -1129,6 +1149,15 @@ namespace seq {
             return search_result::sat;
         }
 
+        // Apply Parikh image filter: generate modular length constraints and
+        // perform a lightweight feasibility pre-check.  The filter is guarded
+        // internally (m_parikh_applied) so it only runs once per node.
+        apply_parikh_to_node(*node);
+        if (node->is_currently_conflict()) {
+            ++m_stats.m_num_simplify_conflict;
+            return search_result::unsat;
+        }
+
         // integer feasibility check: collect side constraints along the path
         // and verify they are jointly satisfiable using the LP solver
         if (!cur_path.empty() && !check_int_feasibility(node, cur_path)) {
diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h
index 1660be312..70ab76edc 100644
--- a/src/smt/seq/seq_nielsen.h
+++ b/src/smt/seq/seq_nielsen.h
@@ -248,6 +248,7 @@ namespace seq {
     class nielsen_node;
     class nielsen_edge;
     class nielsen_graph;
+    class nseq_parith;  // Parikh image filter (defined in nseq_parith.h)
 
     /**
      * Abstract interface for an incremental solver used by nielsen_graph
@@ -519,6 +520,10 @@ namespace seq {
         // evaluation index for run tracking
         unsigned                m_eval_idx = 0;
 
+        // Parikh filter: set to true once apply_parikh_to_node has been applied
+        // to this node. Prevents duplicate constraint generation across DFS runs.
+        bool                    m_parikh_applied = false;
+
     public:
         nielsen_node(nielsen_graph* graph, unsigned id);
 
@@ -711,6 +716,10 @@ namespace seq {
         // Set to true after assert_root_constraints_to_solver() is first called.
         bool                          m_root_constraints_asserted = false;
 
+        // Parikh image filter: generates modular length constraints from regex
+        // memberships.  Allocated in the constructor; owned by this graph.
+        nseq_parith*                  m_parith = nullptr;
+
     public:
         // Construct with a caller-supplied solver.  Ownership is NOT transferred;
         // the caller is responsible for keeping the solver alive.
@@ -816,6 +825,16 @@ namespace seq {
     private:
         search_result search_dfs(nielsen_node* node, unsigned depth, svector<nielsen_edge*>& cur_path);
 
+        // Apply the Parikh image filter to a node: generate modular length
+        // constraints from regex memberships and append them to the node's
+        // int_constraints.  Also performs a lightweight feasibility pre-check;
+        // if a Parikh conflict is detected the node's conflict flag is set with
+        // backtrack_reason::parikh_image.
+        //
+        // Guarded by node.m_parikh_applied so that constraints are generated
+        // only once per node across DFS iterations.
+        void apply_parikh_to_node(nielsen_node& node);
+
         // create a fresh variable with a unique name
         euf::snode* mk_fresh_var();
 

From 4ac5315846d7c18c82d1c5592c5a314cf66f5297 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 11 Mar 2026 05:15:20 +0000
Subject: [PATCH 07/13] Fix review: min>=max guard, ceiling-div overflow,
 SASSERT, accessor methods, comments

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
---
 src/smt/seq/nseq_parikh.cpp | 27 ++++++++++++++++++++-------
 src/smt/seq/nseq_parith.h   |  4 +++-
 src/smt/seq/seq_nielsen.cpp |  6 ++++--
 3 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/src/smt/seq/nseq_parikh.cpp b/src/smt/seq/nseq_parikh.cpp
index 13efbd94a..788221f68 100644
--- a/src/smt/seq/nseq_parikh.cpp
+++ b/src/smt/seq/nseq_parikh.cpp
@@ -211,9 +211,11 @@ namespace seq {
         unsigned min_len = seq.re.min_length(re_expr);
         unsigned max_len = seq.re.max_length(re_expr);
 
-        // If min_len == max_len the bounds already pin the length exactly;
-        // no modular constraint is needed.
-        if (min_len == max_len)
+        // If min_len >= max_len the bounds already pin the length exactly
+        // (or the language is empty — empty language is detected by simplify_and_init
+        // via Brzozowski derivative / is_empty checks, not here).
+        // We only generate modular constraints when the length is variable.
+        if (min_len >= max_len)
             return;
 
         unsigned stride = compute_length_stride(re_expr);
@@ -244,10 +246,12 @@ namespace seq {
 
         // Constraint 3 (optional): k ≤ max_k when max_len is bounded.
         // max_k = floor((max_len - min_len) / stride)
-        // This gives the solver an explicit upper bound on k, which tightens
-        // the search space when combined with other constraints on len(str).
+        // This gives the solver an explicit upper bound on k.
+        // The subtraction is safe because min_len < max_len is guaranteed
+        // by the early return above.
         if (max_len != UINT_MAX) {
-            unsigned range = max_len - min_len;  // max_len >= min_len here
+            SASSERT(max_len > min_len);
+            unsigned range = max_len - min_len;
             unsigned max_k = range / stride;
             expr_ref max_k_expr(arith.mk_int(max_k), m);
             out.push_back(int_constraint(k_var, max_k_expr,
@@ -291,6 +295,8 @@ namespace seq {
 
             unsigned stride = compute_length_stride(re_expr);
             if (stride <= 1) continue; // no useful modular constraint
+            // stride > 1 guaranteed from here onward.
+            SASSERT(stride > 1);
 
             unsigned lb = node.var_lb(mem.m_str);
             unsigned ub = node.var_ub(mem.m_str);
@@ -305,7 +311,14 @@ namespace seq {
             unsigned k_min = 0;
             if (lb > min_len) {
                 unsigned gap = lb - min_len;
-                k_min = (gap + stride - 1) / stride;  // ceiling division
+                // Ceiling division: k_min = ceil(gap / stride).
+                // Guard: (gap + stride - 1) may overflow if gap is close to UINT_MAX.
+                // In that case k_min would be huge, and min_len + stride*k_min would
+                // also overflow ub → treat as a conflict immediately.
+                if (gap > UINT_MAX - (stride - 1)) {
+                    return true; // ceiling division would overflow → k_min too large
+                }
+                k_min = (gap + stride - 1) / stride;
             }
 
             // Overflow guard: stride * k_min may overflow unsigned.
diff --git a/src/smt/seq/nseq_parith.h b/src/smt/seq/nseq_parith.h
index e10e5b6eb..5d019cfe4 100644
--- a/src/smt/seq/nseq_parith.h
+++ b/src/smt/seq/nseq_parith.h
@@ -88,12 +88,14 @@ namespace seq {
 
         // Generate Parikh modular length constraints for one membership.
         //
-        // When stride > 1 and min_len < max_len (bounds don't pin length):
+        // When stride > 1 and min_len < max_len (bounds don't pin length exactly,
+        // and the language is non-empty):
         //   adds: len(str) = min_len + stride · k   (equality)
         //         k ≥ 0                              (non-negativity)
         //         k ≤ (max_len - min_len) / stride   (upper bound, when max_len bounded)
         // These tighten the integer constraint set for the subsolver.
         // Dependencies are copied from mem.m_dep.
+        // Does nothing when min_len ≥ max_len (empty or fixed-length language).
         void generate_parikh_constraints(str_mem const& mem,
                                          vector<int_constraint>& out);
 
diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp
index 98f114fa6..4524dd7c0 100644
--- a/src/smt/seq/seq_nielsen.cpp
+++ b/src/smt/seq/seq_nielsen.cpp
@@ -1047,8 +1047,8 @@ namespace seq {
         // contradict the variable's current integer bounds?  If so, mark this
         // node as a Parikh-image conflict immediately (avoids a solver call).
         if (!node.is_currently_conflict() && m_parith->check_parikh_conflict(node)) {
-            node.m_is_general_conflict = true;
-            node.m_reason = backtrack_reason::parikh_image;
+            node.set_general_conflict(true);
+            node.set_reason(backtrack_reason::parikh_image);
         }
     }
 
@@ -1152,6 +1152,8 @@ namespace seq {
         // Apply Parikh image filter: generate modular length constraints and
         // perform a lightweight feasibility pre-check.  The filter is guarded
         // internally (m_parikh_applied) so it only runs once per node.
+        // Note: Parikh filtering is skipped for satisfied nodes (returned above);
+        // a fully satisfied node has no remaining memberships to filter.
         apply_parikh_to_node(*node);
         if (node->is_currently_conflict()) {
             ++m_stats.m_num_simplify_conflict;

From 213ddd36ba72d254825ca2f66f3debcdfada3c3e Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 11 Mar 2026 05:41:16 +0000
Subject: [PATCH 08/13] =?UTF-8?q?Rename=20nseq=5Fparikh=E2=86=92seq=5Fpari?=
 =?UTF-8?q?kh;=20add=20m/seq/a=20member=20attributes=20to=20seq=5Fparikh?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
---
 src/smt/seq/CMakeLists.txt  |   2 +-
 src/smt/seq/nseq_parikh.cpp | 331 +-----------------------------------
 src/smt/seq/nseq_parith.h   | 122 +------------
 src/smt/seq/seq_nielsen.cpp |   4 +-
 src/smt/seq/seq_nielsen.h   |   4 +-
 src/smt/seq/seq_parikh.cpp  | 327 +++++++++++++++++++++++++++++++++++
 src/smt/seq/seq_parikh.h    | 131 ++++++++++++++
 7 files changed, 474 insertions(+), 447 deletions(-)
 create mode 100644 src/smt/seq/seq_parikh.cpp
 create mode 100644 src/smt/seq/seq_parikh.h

diff --git a/src/smt/seq/CMakeLists.txt b/src/smt/seq/CMakeLists.txt
index 9ea9c4bab..e6ed34c34 100644
--- a/src/smt/seq/CMakeLists.txt
+++ b/src/smt/seq/CMakeLists.txt
@@ -1,6 +1,6 @@
 z3_add_component(smt_seq
   SOURCES
-    nseq_parikh.cpp
+    seq_parikh.cpp
     seq_nielsen.cpp
   COMPONENT_DEPENDENCIES
     euf
diff --git a/src/smt/seq/nseq_parikh.cpp b/src/smt/seq/nseq_parikh.cpp
index 788221f68..56f2bb344 100644
--- a/src/smt/seq/nseq_parikh.cpp
+++ b/src/smt/seq/nseq_parikh.cpp
@@ -5,334 +5,11 @@ Module Name:
 
     nseq_parikh.cpp
 
-Abstract:
+Note:
 
-    Parikh image filter implementation for the ZIPT-based Nielsen string
-    solver.  See nseq_parith.h for the full design description.
-
-    The key operation is compute_length_stride(re), which performs a
-    structural traversal of the regex to find the period k such that all
-    string lengths in L(re) are congruent to min_length(re) modulo k.
-    The stride is used to generate modular length constraints that help
-    the integer subsolver prune infeasible Nielsen graph nodes.
-
-Author:
-
-    Clemens Eisenhofer 2026-03-10
-    Nikolaj Bjorner (nbjorner) 2026-03-10
+    This file is retained for backwards compatibility.
+    The canonical implementation is now smt/seq/seq_parikh.cpp.
 
 --*/
+// intentionally empty — see seq_parikh.cpp
 
-#include "smt/seq/nseq_parith.h"
-#include "ast/arith_decl_plugin.h"
-#include "ast/seq_decl_plugin.h"
-#include "util/mpz.h"
-#include <string>
-
-namespace seq {
-
-    nseq_parith::nseq_parith(euf::sgraph& sg)
-        : m_sg(sg), m_fresh_cnt(0) {}
-
-    expr_ref nseq_parith::mk_fresh_int_var() {
-        ast_manager& m = m_sg.get_manager();
-        arith_util arith(m);
-        std::string name = "pk!" + std::to_string(m_fresh_cnt++);
-        return expr_ref(m.mk_fresh_const(name.c_str(), arith.mk_int()), m);
-    }
-
-    // -----------------------------------------------------------------------
-    // Stride computation
-    // -----------------------------------------------------------------------
-
-    // compute_length_stride: structural traversal of regex expression.
-    //
-    // Return value semantics:
-    //   0 — fixed length (or empty language): no modular constraint needed
-    //         beyond the min == max bounds.
-    //   1 — all integer lengths ≥ min_len are achievable: no useful modular
-    //         constraint.
-    //   k > 1 — all lengths in L(re) satisfy len ≡ min_len (mod k):
-    //         modular constraint len(str) = min_len + k·j is useful.
-    unsigned nseq_parith::compute_length_stride(expr* re) {
-        if (!re) return 1;
-
-        seq_util& seq = m_sg.get_seq_util();
-        expr* r1 = nullptr, *r2 = nullptr, *s = nullptr;
-        unsigned lo = 0, hi = 0;
-
-        // Empty language: no strings exist; stride is irrelevant.
-        if (seq.re.is_empty(re))
-            return 0;
-
-        // Epsilon regex {""}: single fixed length 0.
-        if (seq.re.is_epsilon(re))
-            return 0;
-
-        // to_re(concrete_string): fixed-length, no modular constraint needed.
-        if (seq.re.is_to_re(re, s)) {
-            // min_length == max_length, covered by bounds.
-            return 0;
-        }
-
-        // Single character: range, full_char — fixed length 1.
-        if (seq.re.is_range(re) || seq.re.is_full_char(re))
-            return 0;
-
-        // full_seq (.* / Σ*): every length ≥ 0 is possible.
-        if (seq.re.is_full_seq(re))
-            return 1;
-
-        // r* — Kleene star.
-        // L(r*) = {ε} ∪ L(r) ∪ L(r)·L(r) ∪ ...
-        // If r has a fixed length k, then L(r*) = {0, k, 2k, ...} → stride k.
-        // If r has variable length, strides from different iterations combine
-        // by GCD.
-        if (seq.re.is_star(re, r1)) {
-            unsigned mn = seq.re.min_length(r1);
-            unsigned mx = seq.re.max_length(r1);
-            // When the body has unbounded length (mx == UINT_MAX), different
-            // iterations can produce many different lengths, and the stride of
-            // the star as a whole degenerates to gcd(mn, mn) = mn (or 1 if
-            // mn == 1).  This is conservative: we use the body's min-length
-            // as the only available fixed quantity.
-            if (mx == UINT_MAX) mx = mn;
-            if (mn == mx) {
-                // Fixed-length body: L(r*) = {0, mn, 2·mn, ...} → stride = mn.
-                // When mn == 1 the stride would be 1, which gives no useful
-                // modular constraint, so return 0 instead.
-                return (mn > 1) ? mn : 0;
-            }
-            // Variable-length body: GCD of min and max lengths
-            return u_gcd(mn, mx);
-        }
-
-        // r+ — one or more: same stride analysis as r*.
-        if (seq.re.is_plus(re, r1)) {
-            unsigned mn = seq.re.min_length(r1);
-            unsigned mx = seq.re.max_length(r1);
-            if (mx == UINT_MAX) mx = mn;  // same conservative treatment as star
-            if (mn == mx)
-                return (mn > 1) ? mn : 0;
-            return u_gcd(mn, mx);
-        }
-
-        // r? — zero or one: lengths = {0} ∪ lengths(r)
-        // stride = GCD(mn_r, stride(r)) unless stride(r) is 0 (fixed length).
-        if (seq.re.is_opt(re, r1)) {
-            unsigned mn = seq.re.min_length(r1);
-            unsigned inner = compute_length_stride(r1);
-            // L(r?) includes length 0 and all lengths of L(r).
-            // GCD(stride(r), min_len(r)) is a valid stride because:
-            //   - the gap from 0 to min_len(r) is min_len(r) itself, and
-            //   - subsequent lengths grow in steps governed by stride(r).
-            // A result > 1 gives a useful modular constraint; result == 1
-            // means every non-negative integer is achievable (no constraint).
-            if (inner == 0)
-                return u_gcd(mn, 0);   // gcd(mn, 0) = mn; useful when mn > 1
-            return u_gcd(inner, mn);
-        }
-
-        // concat(r1, r2): lengths add → stride = GCD(stride(r1), stride(r2)).
-        if (seq.re.is_concat(re, r1, r2)) {
-            unsigned s1 = compute_length_stride(r1);
-            unsigned s2 = compute_length_stride(r2);
-            return u_gcd(s1, s2);
-        }
-
-        // union(r1, r2): lengths from either branch → need GCD of both
-        // strides and the difference between their minimum lengths.
-        if (seq.re.is_union(re, r1, r2)) {
-            unsigned s1 = compute_length_stride(r1);
-            unsigned s2 = compute_length_stride(r2);
-            unsigned m1 = seq.re.min_length(r1);
-            unsigned m2 = seq.re.min_length(r2);
-            unsigned d  = (m1 >= m2) ? (m1 - m2) : (m2 - m1);
-            // Replace 0-strides with d for GCD computation:
-            // a fixed-length branch only introduces constraint via its offset.
-            unsigned g = u_gcd(s1 == 0 ? d : s1, s2 == 0 ? d : s2);
-            g = u_gcd(g, d);
-            return g;
-        }
-
-        // loop(r, lo, hi): lengths = {lo·len(r), ..., hi·len(r)} if r is fixed.
-        // stride = len(r) when r is fixed-length; otherwise GCD.
-        if (seq.re.is_loop(re, r1, lo, hi)) {
-            unsigned mn = seq.re.min_length(r1);
-            unsigned mx = seq.re.max_length(r1);
-            if (mx == UINT_MAX) mx = mn;
-            if (mn == mx)
-                return (mn > 1) ? mn : 0;
-            return u_gcd(mn, mx);
-        }
-        if (seq.re.is_loop(re, r1, lo)) {
-            unsigned mn = seq.re.min_length(r1);
-            unsigned mx = seq.re.max_length(r1);
-            if (mx == UINT_MAX) mx = mn;
-            if (mn == mx)
-                return (mn > 1) ? mn : 0;
-            return u_gcd(mn, mx);
-        }
-
-        // intersection(r1, r2): lengths must be in both languages.
-        // A conservative safe choice: GCD(stride(r1), stride(r2)) is a valid
-        // stride for the intersection (every length in the intersection is
-        // also in r1 and in r2).
-        if (seq.re.is_intersection(re, r1, r2)) {
-            unsigned s1 = compute_length_stride(r1);
-            unsigned s2 = compute_length_stride(r2);
-            return u_gcd(s1, s2);
-        }
-
-        // For complement, diff, reverse, derivative, of_pred, and anything
-        // else we cannot analyse statically: be conservative and return 1
-        // (no useful modular constraint rather than an unsound one).
-        return 1;
-    }
-
-    // -----------------------------------------------------------------------
-    // Constraint generation
-    // -----------------------------------------------------------------------
-
-    void nseq_parith::generate_parikh_constraints(str_mem const& mem,
-                                                   vector<int_constraint>& out) {
-        if (!mem.m_regex || !mem.m_str)
-            return;
-
-        ast_manager& m = m_sg.get_manager();
-        seq_util&    seq  = m_sg.get_seq_util();
-        arith_util   arith(m);
-
-        expr* re_expr = mem.m_regex->get_expr();
-        if (!re_expr || !seq.is_re(re_expr))
-            return;
-
-        // Length bounds from the regex.
-        unsigned min_len = seq.re.min_length(re_expr);
-        unsigned max_len = seq.re.max_length(re_expr);
-
-        // If min_len >= max_len the bounds already pin the length exactly
-        // (or the language is empty — empty language is detected by simplify_and_init
-        // via Brzozowski derivative / is_empty checks, not here).
-        // We only generate modular constraints when the length is variable.
-        if (min_len >= max_len)
-            return;
-
-        unsigned stride = compute_length_stride(re_expr);
-
-        // stride == 1: every integer length is possible — no useful constraint.
-        // stride == 0: fixed length or empty — handled by bounds.
-        if (stride <= 1)
-            return;
-
-        // Build len(str) as an arithmetic expression.
-        expr_ref len_str(seq.str.mk_length(mem.m_str->get_expr()), m);
-
-        // Introduce fresh integer variable k ≥ 0.
-        expr_ref k_var = mk_fresh_int_var();
-
-        // Constraint 1: len(str) = min_len + stride · k
-        expr_ref min_expr(arith.mk_int(min_len), m);
-        expr_ref stride_expr(arith.mk_int(stride), m);
-        expr_ref stride_k(arith.mk_mul(stride_expr, k_var), m);
-        expr_ref rhs(arith.mk_add(min_expr, stride_k), m);
-        out.push_back(int_constraint(len_str, rhs,
-                                     int_constraint_kind::eq, mem.m_dep, m));
-
-        // Constraint 2: k ≥ 0
-        expr_ref zero(arith.mk_int(0), m);
-        out.push_back(int_constraint(k_var, zero,
-                                     int_constraint_kind::ge, mem.m_dep, m));
-
-        // Constraint 3 (optional): k ≤ max_k when max_len is bounded.
-        // max_k = floor((max_len - min_len) / stride)
-        // This gives the solver an explicit upper bound on k.
-        // The subtraction is safe because min_len < max_len is guaranteed
-        // by the early return above.
-        if (max_len != UINT_MAX) {
-            SASSERT(max_len > min_len);
-            unsigned range = max_len - min_len;
-            unsigned max_k = range / stride;
-            expr_ref max_k_expr(arith.mk_int(max_k), m);
-            out.push_back(int_constraint(k_var, max_k_expr,
-                                         int_constraint_kind::le, mem.m_dep, m));
-        }
-    }
-
-    void nseq_parith::apply_to_node(nielsen_node& node) {
-        vector<int_constraint> constraints;
-        for (str_mem const& mem : node.str_mems())
-            generate_parikh_constraints(mem, constraints);
-        for (auto& ic : constraints)
-            node.add_int_constraint(ic);
-    }
-
-    // -----------------------------------------------------------------------
-    // Quick Parikh feasibility check (no solver call)
-    // -----------------------------------------------------------------------
-
-    // Returns true if a Parikh conflict is detected: there exists a membership
-    // str ∈ re for a single-variable str where the modular length constraint
-    //   len(str) = min_len + stride * k  (k ≥ 0)
-    // is inconsistent with the variable's current integer bounds [lb, ub].
-    //
-    // This check is lightweight — it uses only modular arithmetic on the already-
-    // known regex min/max lengths and the per-variable bounds stored in the node.
-    bool nseq_parith::check_parikh_conflict(nielsen_node& node) {
-        seq_util& seq = m_sg.get_seq_util();
-
-        for (str_mem const& mem : node.str_mems()) {
-            if (!mem.m_str || !mem.m_regex || !mem.m_str->is_var())
-                continue;
-
-            expr* re_expr = mem.m_regex->get_expr();
-            if (!re_expr || !seq.is_re(re_expr))
-                continue;
-
-            unsigned min_len = seq.re.min_length(re_expr);
-            unsigned max_len = seq.re.max_length(re_expr);
-            if (min_len >= max_len) continue; // fixed or empty — no stride constraint
-
-            unsigned stride = compute_length_stride(re_expr);
-            if (stride <= 1) continue; // no useful modular constraint
-            // stride > 1 guaranteed from here onward.
-            SASSERT(stride > 1);
-
-            unsigned lb = node.var_lb(mem.m_str);
-            unsigned ub = node.var_ub(mem.m_str);
-
-            // Check: ∃k ≥ 0 such that lb ≤ min_len + stride * k ≤ ub ?
-            //
-            // First find the smallest k satisfying the lower bound:
-            //   k_min = 0                          if min_len ≥ lb
-            //   k_min = ⌈(lb - min_len) / stride⌉  otherwise
-            //
-            // Then verify min_len + stride * k_min ≤ ub.
-            unsigned k_min = 0;
-            if (lb > min_len) {
-                unsigned gap = lb - min_len;
-                // Ceiling division: k_min = ceil(gap / stride).
-                // Guard: (gap + stride - 1) may overflow if gap is close to UINT_MAX.
-                // In that case k_min would be huge, and min_len + stride*k_min would
-                // also overflow ub → treat as a conflict immediately.
-                if (gap > UINT_MAX - (stride - 1)) {
-                    return true; // ceiling division would overflow → k_min too large
-                }
-                k_min = (gap + stride - 1) / stride;
-            }
-
-            // Overflow guard: stride * k_min may overflow unsigned.
-            unsigned len_at_k_min;
-            if (k_min > (UINT_MAX - min_len) / stride) {
-                // Overflow: min_len + stride * k_min > UINT_MAX ≥ ub → conflict.
-                return true;
-            }
-            len_at_k_min = min_len + stride * k_min;
-
-            if (ub != UINT_MAX && len_at_k_min > ub)
-                return true; // no valid k exists → Parikh conflict
-        }
-        return false;
-    }
-
-} // namespace seq
diff --git a/src/smt/seq/nseq_parith.h b/src/smt/seq/nseq_parith.h
index 5d019cfe4..182f54a55 100644
--- a/src/smt/seq/nseq_parith.h
+++ b/src/smt/seq/nseq_parith.h
@@ -5,124 +5,16 @@ Module Name:
 
     nseq_parith.h
 
-Abstract:
+Note:
 
-    Parikh image filter for the ZIPT-based Nielsen string solver.
-
-    Implements Parikh-based arithmetic constraint generation for
-    nielsen_node instances.  For a regex membership constraint str ∈ r,
-    the Parikh image of r constrains the multiset of characters in str.
-    This module computes the "length stride" (period) of the length
-    language of r and generates modular arithmetic constraints of the form
-
-        len(str) = min_len + stride · k    (k ≥ 0, k fresh integer)
-
-    which tighten the arithmetic subproblem beyond the simple min/max
-    length bounds already produced by nielsen_node::init_var_bounds_from_mems().
-
-    For example:
-      • str ∈ (ab)*  → min_len = 0, stride = 2  → len(str) = 2·k
-      • str ∈ a(bc)* → min_len = 1, stride = 2  → len(str) = 1 + 2·k
-      • str ∈ ab|abc → stride = 1 (no useful modular constraint)
-
-    The generated int_constraints are added to the node's integer constraint
-    set and discharged by the integer subsolver (see seq_nielsen.h,
-    simple_solver).
-
-    Implements the Parikh filter described in ZIPT
-    (https://github.com/CEisenhofer/ZIPT/tree/parikh/ZIPT/Constraints)
-    replacing ZIPT's PDD-based Parikh subsolver with Z3's linear arithmetic.
-
-Author:
-
-    Clemens Eisenhofer 2026-03-10
-    Nikolaj Bjorner (nbjorner) 2026-03-10
+    This file is retained for backwards compatibility.
+    The canonical header is now smt/seq/seq_parikh.h.
 
 --*/
 #pragma once
-
-#include "ast/euf/euf_sgraph.h"
-#include "smt/seq/seq_nielsen.h"
+#include "smt/seq/seq_parikh.h"
 
 namespace seq {
-
-    /**
-     * Parikh image filter: generates modular length constraints from
-     * regex membership constraints in a nielsen_node.
-     *
-     * Usage:
-     *   nseq_parith parith(sg);
-     *   parith.apply_to_node(node);      // adds constraints to node
-     *
-     * Or per-membership:
-     *   vector<int_constraint> out;
-     *   parith.generate_parikh_constraints(mem, out);
-     */
-    class nseq_parith {
-        euf::sgraph& m_sg;
-        unsigned     m_fresh_cnt;  // counter for fresh variable names
-
-        // Compute the stride (period) of the length language of a regex.
-        //
-        // The stride k satisfies: all lengths in L(re) are congruent to
-        // min_length(re) modulo k.  A stride of 1 means every integer
-        // length is possible (no useful modular constraint).  A stride of
-        // 0 is a sentinel meaning the language is empty or has a single
-        // fixed length (already captured by bounds).
-        //
-        // Examples:
-        //   stride(to_re("ab"))   = 0  (fixed length 2)
-        //   stride((ab)*)         = 2  (lengths 0, 2, 4, ...)
-        //   stride((abc)*)        = 3  (lengths 0, 3, 6, ...)
-        //   stride(a*b*)          = 1  (all lengths possible)
-        //   stride((ab)*(cd)*)    = 2  (lengths 0, 2, 4, ...)
-        //   stride((ab)*|(abc)*)  = 1  (lengths 0, 2, 3, 4, ...)
-        unsigned compute_length_stride(expr* re);
-
-        // Create a fresh integer variable (name "pk!N") for use as the
-        // Parikh multiplier variable k in len(str) = min_len + stride·k.
-        expr_ref mk_fresh_int_var();
-
-    public:
-        explicit nseq_parith(euf::sgraph& sg);
-
-        // Generate Parikh modular length constraints for one membership.
-        //
-        // When stride > 1 and min_len < max_len (bounds don't pin length exactly,
-        // and the language is non-empty):
-        //   adds: len(str) = min_len + stride · k   (equality)
-        //         k ≥ 0                              (non-negativity)
-        //         k ≤ (max_len - min_len) / stride   (upper bound, when max_len bounded)
-        // These tighten the integer constraint set for the subsolver.
-        // Dependencies are copied from mem.m_dep.
-        // Does nothing when min_len ≥ max_len (empty or fixed-length language).
-        void generate_parikh_constraints(str_mem const& mem,
-                                         vector<int_constraint>& out);
-
-        // Apply Parikh constraints to all memberships at a node.
-        // Calls generate_parikh_constraints for each str_mem in the node
-        // and appends the resulting int_constraints to node.int_constraints().
-        void apply_to_node(nielsen_node& node);
-
-        // Quick Parikh feasibility check (no solver call).
-        //
-        // For each single-variable membership str ∈ re, checks whether the
-        // modular constraint  len(str) = min_len + stride · k  (k ≥ 0)
-        // has any solution given the current per-variable bounds stored in
-        // node.var_lb(str) and node.var_ub(str).
-        //
-        // Returns true when a conflict is detected (no valid k exists for
-        // some membership).  The caller should then mark the node with
-        // backtrack_reason::parikh_image.
-        //
-        // This is a lightweight pre-check that avoids calling the integer
-        // subsolver.  It is sound (never returns true for a satisfiable node)
-        // but incomplete (may miss conflicts that require the full solver).
-        bool check_parikh_conflict(nielsen_node& node);
-
-        // Compute the length stride of a regex expression.
-        // Exposed for testing and external callers.
-        unsigned get_length_stride(expr* re) { return compute_length_stride(re); }
-    };
-
-} // namespace seq
+    // backwards-compat alias
+    using nseq_parith = seq_parikh;
+}
diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp
index 4524dd7c0..81fd1b2ba 100644
--- a/src/smt/seq/seq_nielsen.cpp
+++ b/src/smt/seq/seq_nielsen.cpp
@@ -20,7 +20,7 @@ Author:
 --*/
 
 #include "smt/seq/seq_nielsen.h"
-#include "smt/seq/nseq_parith.h"
+#include "smt/seq/seq_parikh.h"
 #include "ast/arith_decl_plugin.h"
 #include "ast/ast_pp.h"
 #include "util/hashtable.h"
@@ -433,7 +433,7 @@ namespace seq {
     nielsen_graph::nielsen_graph(euf::sgraph& sg, simple_solver& solver):
         m_sg(sg),
         m_solver(solver),
-        m_parith(alloc(nseq_parith, sg)) {
+        m_parith(alloc(seq_parikh, sg)) {
     }
 
     nielsen_graph::~nielsen_graph() {
diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h
index 70ab76edc..0be8ae274 100644
--- a/src/smt/seq/seq_nielsen.h
+++ b/src/smt/seq/seq_nielsen.h
@@ -248,7 +248,7 @@ namespace seq {
     class nielsen_node;
     class nielsen_edge;
     class nielsen_graph;
-    class nseq_parith;  // Parikh image filter (defined in nseq_parith.h)
+    class seq_parikh;   // Parikh image filter (see seq_parikh.h)
 
     /**
      * Abstract interface for an incremental solver used by nielsen_graph
@@ -718,7 +718,7 @@ namespace seq {
 
         // Parikh image filter: generates modular length constraints from regex
         // memberships.  Allocated in the constructor; owned by this graph.
-        nseq_parith*                  m_parith = nullptr;
+        seq_parikh*                   m_parith = nullptr;
 
     public:
         // Construct with a caller-supplied solver.  Ownership is NOT transferred;
diff --git a/src/smt/seq/seq_parikh.cpp b/src/smt/seq/seq_parikh.cpp
new file mode 100644
index 000000000..255d09d72
--- /dev/null
+++ b/src/smt/seq/seq_parikh.cpp
@@ -0,0 +1,327 @@
+/*++
+Copyright (c) 2026 Microsoft Corporation
+
+Module Name:
+
+    seq_parikh.cpp
+
+Abstract:
+
+    Parikh image filter implementation for the ZIPT-based Nielsen string
+    solver.  See seq_parikh.h for the full design description.
+
+    The key operation is compute_length_stride(re), which performs a
+    structural traversal of the regex to find the period k such that all
+    string lengths in L(re) are congruent to min_length(re) modulo k.
+    The stride is used to generate modular length constraints that help
+    the integer subsolver prune infeasible Nielsen graph nodes.
+
+Author:
+
+    Clemens Eisenhofer 2026-03-10
+    Nikolaj Bjorner (nbjorner) 2026-03-10
+
+--*/
+
+#include "smt/seq/seq_parikh.h"
+#include "util/mpz.h"
+#include <string>
+
+namespace seq {
+
+    seq_parikh::seq_parikh(euf::sgraph& sg)
+        : m(sg.get_manager()), seq(m), a(m), m_fresh_cnt(0) {}
+
+    expr_ref seq_parikh::mk_fresh_int_var() {
+        std::string name = "pk!" + std::to_string(m_fresh_cnt++);
+        return expr_ref(m.mk_fresh_const(name.c_str(), a.mk_int()), m);
+    }
+
+    // -----------------------------------------------------------------------
+    // Stride computation
+    // -----------------------------------------------------------------------
+
+    // compute_length_stride: structural traversal of regex expression.
+    //
+    // Return value semantics:
+    //   0 — fixed length (or empty language): no modular constraint needed
+    //         beyond the min == max bounds.
+    //   1 — all integer lengths ≥ min_len are achievable: no useful modular
+    //         constraint.
+    //   k > 1 — all lengths in L(re) satisfy len ≡ min_len (mod k):
+    //         modular constraint len(str) = min_len + k·j is useful.
+    unsigned seq_parikh::compute_length_stride(expr* re) {
+        if (!re) return 1;
+
+        expr* r1 = nullptr, *r2 = nullptr, *s = nullptr;
+        unsigned lo = 0, hi = 0;
+
+        // Empty language: no strings exist; stride is irrelevant.
+        if (seq.re.is_empty(re))
+            return 0;
+
+        // Epsilon regex {""}: single fixed length 0.
+        if (seq.re.is_epsilon(re))
+            return 0;
+
+        // to_re(concrete_string): fixed-length, no modular constraint needed.
+        if (seq.re.is_to_re(re, s)) {
+            // min_length == max_length, covered by bounds.
+            return 0;
+        }
+
+        // Single character: range, full_char — fixed length 1.
+        if (seq.re.is_range(re) || seq.re.is_full_char(re))
+            return 0;
+
+        // full_seq (.* / Σ*): every length ≥ 0 is possible.
+        if (seq.re.is_full_seq(re))
+            return 1;
+
+        // r* — Kleene star.
+        // L(r*) = {ε} ∪ L(r) ∪ L(r)·L(r) ∪ ...
+        // If r has a fixed length k, then L(r*) = {0, k, 2k, ...} → stride k.
+        // If r has variable length, strides from different iterations combine
+        // by GCD.
+        if (seq.re.is_star(re, r1)) {
+            unsigned mn = seq.re.min_length(r1);
+            unsigned mx = seq.re.max_length(r1);
+            // When the body has unbounded length (mx == UINT_MAX), different
+            // iterations can produce many different lengths, and the stride of
+            // the star as a whole degenerates to gcd(mn, mn) = mn (or 1 if
+            // mn == 1).  This is conservative: we use the body's min-length
+            // as the only available fixed quantity.
+            if (mx == UINT_MAX) mx = mn;
+            if (mn == mx) {
+                // Fixed-length body: L(r*) = {0, mn, 2·mn, ...} → stride = mn.
+                // When mn == 1 the stride would be 1, which gives no useful
+                // modular constraint, so return 0 instead.
+                return (mn > 1) ? mn : 0;
+            }
+            // Variable-length body: GCD of min and max lengths
+            return u_gcd(mn, mx);
+        }
+
+        // r+ — one or more: same stride analysis as r*.
+        if (seq.re.is_plus(re, r1)) {
+            unsigned mn = seq.re.min_length(r1);
+            unsigned mx = seq.re.max_length(r1);
+            if (mx == UINT_MAX) mx = mn;  // same conservative treatment as star
+            if (mn == mx)
+                return (mn > 1) ? mn : 0;
+            return u_gcd(mn, mx);
+        }
+
+        // r? — zero or one: lengths = {0} ∪ lengths(r)
+        // stride = GCD(mn_r, stride(r)) unless stride(r) is 0 (fixed length).
+        if (seq.re.is_opt(re, r1)) {
+            unsigned mn = seq.re.min_length(r1);
+            unsigned inner = compute_length_stride(r1);
+            // L(r?) includes length 0 and all lengths of L(r).
+            // GCD(stride(r), min_len(r)) is a valid stride because:
+            //   - the gap from 0 to min_len(r) is min_len(r) itself, and
+            //   - subsequent lengths grow in steps governed by stride(r).
+            // A result > 1 gives a useful modular constraint; result == 1
+            // means every non-negative integer is achievable (no constraint).
+            if (inner == 0)
+                return u_gcd(mn, 0);   // gcd(mn, 0) = mn; useful when mn > 1
+            return u_gcd(inner, mn);
+        }
+
+        // concat(r1, r2): lengths add → stride = GCD(stride(r1), stride(r2)).
+        if (seq.re.is_concat(re, r1, r2)) {
+            unsigned s1 = compute_length_stride(r1);
+            unsigned s2 = compute_length_stride(r2);
+            return u_gcd(s1, s2);
+        }
+
+        // union(r1, r2): lengths from either branch → need GCD of both
+        // strides and the difference between their minimum lengths.
+        if (seq.re.is_union(re, r1, r2)) {
+            unsigned s1 = compute_length_stride(r1);
+            unsigned s2 = compute_length_stride(r2);
+            unsigned m1 = seq.re.min_length(r1);
+            unsigned m2 = seq.re.min_length(r2);
+            unsigned d  = (m1 >= m2) ? (m1 - m2) : (m2 - m1);
+            // Replace 0-strides with d for GCD computation:
+            // a fixed-length branch only introduces constraint via its offset.
+            unsigned g = u_gcd(s1 == 0 ? d : s1, s2 == 0 ? d : s2);
+            g = u_gcd(g, d);
+            return g;
+        }
+
+        // loop(r, lo, hi): lengths = {lo·len(r), ..., hi·len(r)} if r is fixed.
+        // stride = len(r) when r is fixed-length; otherwise GCD.
+        if (seq.re.is_loop(re, r1, lo, hi)) {
+            unsigned mn = seq.re.min_length(r1);
+            unsigned mx = seq.re.max_length(r1);
+            if (mx == UINT_MAX) mx = mn;
+            if (mn == mx)
+                return (mn > 1) ? mn : 0;
+            return u_gcd(mn, mx);
+        }
+        if (seq.re.is_loop(re, r1, lo)) {
+            unsigned mn = seq.re.min_length(r1);
+            unsigned mx = seq.re.max_length(r1);
+            if (mx == UINT_MAX) mx = mn;
+            if (mn == mx)
+                return (mn > 1) ? mn : 0;
+            return u_gcd(mn, mx);
+        }
+
+        // intersection(r1, r2): lengths must be in both languages.
+        // A conservative safe choice: GCD(stride(r1), stride(r2)) is a valid
+        // stride for the intersection (every length in the intersection is
+        // also in r1 and in r2).
+        if (seq.re.is_intersection(re, r1, r2)) {
+            unsigned s1 = compute_length_stride(r1);
+            unsigned s2 = compute_length_stride(r2);
+            return u_gcd(s1, s2);
+        }
+
+        // For complement, diff, reverse, derivative, of_pred, and anything
+        // else we cannot analyse statically: be conservative and return 1
+        // (no useful modular constraint rather than an unsound one).
+        return 1;
+    }
+
+    // -----------------------------------------------------------------------
+    // Constraint generation
+    // -----------------------------------------------------------------------
+
+    void seq_parikh::generate_parikh_constraints(str_mem const& mem,
+                                                  vector<int_constraint>& out) {
+        if (!mem.m_regex || !mem.m_str)
+            return;
+
+        expr* re_expr = mem.m_regex->get_expr();
+        if (!re_expr || !seq.is_re(re_expr))
+            return;
+
+        // Length bounds from the regex.
+        unsigned min_len = seq.re.min_length(re_expr);
+        unsigned max_len = seq.re.max_length(re_expr);
+
+        // If min_len >= max_len the bounds already pin the length exactly
+        // (or the language is empty — empty language is detected by simplify_and_init
+        // via Brzozowski derivative / is_empty checks, not here).
+        // We only generate modular constraints when the length is variable.
+        if (min_len >= max_len)
+            return;
+
+        unsigned stride = compute_length_stride(re_expr);
+
+        // stride == 1: every integer length is possible — no useful constraint.
+        // stride == 0: fixed length or empty — handled by bounds.
+        if (stride <= 1)
+            return;
+
+        // Build len(str) as an arithmetic expression.
+        expr_ref len_str(seq.str.mk_length(mem.m_str->get_expr()), m);
+
+        // Introduce fresh integer variable k ≥ 0.
+        expr_ref k_var = mk_fresh_int_var();
+
+        // Constraint 1: len(str) = min_len + stride · k
+        expr_ref min_expr(a.mk_int(min_len), m);
+        expr_ref stride_expr(a.mk_int(stride), m);
+        expr_ref stride_k(a.mk_mul(stride_expr, k_var), m);
+        expr_ref rhs(a.mk_add(min_expr, stride_k), m);
+        out.push_back(int_constraint(len_str, rhs,
+                                     int_constraint_kind::eq, mem.m_dep, m));
+
+        // Constraint 2: k ≥ 0
+        expr_ref zero(a.mk_int(0), m);
+        out.push_back(int_constraint(k_var, zero,
+                                     int_constraint_kind::ge, mem.m_dep, m));
+
+        // Constraint 3 (optional): k ≤ max_k when max_len is bounded.
+        // max_k = floor((max_len - min_len) / stride)
+        // This gives the solver an explicit upper bound on k.
+        // The subtraction is safe because min_len < max_len is guaranteed
+        // by the early return above.
+        if (max_len != UINT_MAX) {
+            SASSERT(max_len > min_len);
+            unsigned range = max_len - min_len;
+            unsigned max_k = range / stride;
+            expr_ref max_k_expr(a.mk_int(max_k), m);
+            out.push_back(int_constraint(k_var, max_k_expr,
+                                         int_constraint_kind::le, mem.m_dep, m));
+        }
+    }
+
+    void seq_parikh::apply_to_node(nielsen_node& node) {
+        vector<int_constraint> constraints;
+        for (str_mem const& mem : node.str_mems())
+            generate_parikh_constraints(mem, constraints);
+        for (auto& ic : constraints)
+            node.add_int_constraint(ic);
+    }
+
+    // -----------------------------------------------------------------------
+    // Quick Parikh feasibility check (no solver call)
+    // -----------------------------------------------------------------------
+
+    // Returns true if a Parikh conflict is detected: there exists a membership
+    // str ∈ re for a single-variable str where the modular length constraint
+    //   len(str) = min_len + stride * k  (k ≥ 0)
+    // is inconsistent with the variable's current integer bounds [lb, ub].
+    //
+    // This check is lightweight — it uses only modular arithmetic on the already-
+    // known regex min/max lengths and the per-variable bounds stored in the node.
+    bool seq_parikh::check_parikh_conflict(nielsen_node& node) {
+        for (str_mem const& mem : node.str_mems()) {
+            if (!mem.m_str || !mem.m_regex || !mem.m_str->is_var())
+                continue;
+
+            expr* re_expr = mem.m_regex->get_expr();
+            if (!re_expr || !seq.is_re(re_expr))
+                continue;
+
+            unsigned min_len = seq.re.min_length(re_expr);
+            unsigned max_len = seq.re.max_length(re_expr);
+            if (min_len >= max_len) continue; // fixed or empty — no stride constraint
+
+            unsigned stride = compute_length_stride(re_expr);
+            if (stride <= 1) continue; // no useful modular constraint
+            // stride > 1 guaranteed from here onward.
+            SASSERT(stride > 1);
+
+            unsigned lb = node.var_lb(mem.m_str);
+            unsigned ub = node.var_ub(mem.m_str);
+
+            // Check: ∃k ≥ 0 such that lb ≤ min_len + stride * k ≤ ub ?
+            //
+            // First find the smallest k satisfying the lower bound:
+            //   k_min = 0                          if min_len ≥ lb
+            //   k_min = ⌈(lb - min_len) / stride⌉  otherwise
+            //
+            // Then verify min_len + stride * k_min ≤ ub.
+            unsigned k_min = 0;
+            if (lb > min_len) {
+                unsigned gap = lb - min_len;
+                // Ceiling division: k_min = ceil(gap / stride).
+                // Guard: (gap + stride - 1) may overflow if gap is close to UINT_MAX.
+                // In that case k_min would be huge, and min_len + stride*k_min would
+                // also overflow ub → treat as a conflict immediately.
+                if (gap > UINT_MAX - (stride - 1)) {
+                    return true; // ceiling division would overflow → k_min too large
+                }
+                k_min = (gap + stride - 1) / stride;
+            }
+
+            // Overflow guard: stride * k_min may overflow unsigned.
+            unsigned len_at_k_min;
+            if (k_min > (UINT_MAX - min_len) / stride) {
+                // Overflow: min_len + stride * k_min > UINT_MAX ≥ ub → conflict.
+                return true;
+            }
+            len_at_k_min = min_len + stride * k_min;
+
+            if (ub != UINT_MAX && len_at_k_min > ub)
+                return true; // no valid k exists → Parikh conflict
+        }
+        return false;
+    }
+
+} // namespace seq
diff --git a/src/smt/seq/seq_parikh.h b/src/smt/seq/seq_parikh.h
new file mode 100644
index 000000000..730854832
--- /dev/null
+++ b/src/smt/seq/seq_parikh.h
@@ -0,0 +1,131 @@
+/*++
+Copyright (c) 2026 Microsoft Corporation
+
+Module Name:
+
+    seq_parikh.h
+
+Abstract:
+
+    Parikh image filter for the ZIPT-based Nielsen string solver.
+
+    Implements Parikh-based arithmetic constraint generation for
+    nielsen_node instances.  For a regex membership constraint str ∈ r,
+    the Parikh image of r constrains the multiset of characters in str.
+    This module computes the "length stride" (period) of the length
+    language of r and generates modular arithmetic constraints of the form
+
+        len(str) = min_len + stride · k    (k ≥ 0, k fresh integer)
+
+    which tighten the arithmetic subproblem beyond the simple min/max
+    length bounds already produced by nielsen_node::init_var_bounds_from_mems().
+
+    For example:
+      • str ∈ (ab)*  → min_len = 0, stride = 2  → len(str) = 2·k
+      • str ∈ a(bc)* → min_len = 1, stride = 2  → len(str) = 1 + 2·k
+      • str ∈ ab|abc → stride = 1 (no useful modular constraint)
+
+    The generated int_constraints are added to the node's integer constraint
+    set and discharged by the integer subsolver (see seq_nielsen.h,
+    simple_solver).
+
+    Implements the Parikh filter described in ZIPT
+    (https://github.com/CEisenhofer/ZIPT/tree/parikh/ZIPT/Constraints)
+    replacing ZIPT's PDD-based Parikh subsolver with Z3's linear arithmetic.
+
+Author:
+
+    Clemens Eisenhofer 2026-03-10
+    Nikolaj Bjorner (nbjorner) 2026-03-10
+
+--*/
+#pragma once
+
+#include "ast/arith_decl_plugin.h"
+#include "ast/seq_decl_plugin.h"
+#include "smt/seq/seq_nielsen.h"
+
+namespace seq {
+
+    /**
+     * Parikh image filter: generates modular length constraints from
+     * regex membership constraints in a nielsen_node.
+     *
+     * Usage:
+     *   seq_parikh parikh(sg);
+     *   parikh.apply_to_node(node);      // adds constraints to node
+     *
+     * Or per-membership:
+     *   vector<int_constraint> out;
+     *   parikh.generate_parikh_constraints(mem, out);
+     */
+    class seq_parikh {
+        ast_manager& m;
+        seq_util     seq;
+        arith_util   a;
+        unsigned     m_fresh_cnt;  // counter for fresh variable names
+
+        // Compute the stride (period) of the length language of a regex.
+        //
+        // The stride k satisfies: all lengths in L(re) are congruent to
+        // min_length(re) modulo k.  A stride of 1 means every integer
+        // length is possible (no useful modular constraint).  A stride of
+        // 0 is a sentinel meaning the language is empty or has a single
+        // fixed length (already captured by bounds).
+        //
+        // Examples:
+        //   stride(to_re("ab"))   = 0  (fixed length 2)
+        //   stride((ab)*)         = 2  (lengths 0, 2, 4, ...)
+        //   stride((abc)*)        = 3  (lengths 0, 3, 6, ...)
+        //   stride(a*b*)          = 1  (all lengths possible)
+        //   stride((ab)*(cd)*)    = 2  (lengths 0, 2, 4, ...)
+        //   stride((ab)*|(abc)*)  = 1  (lengths 0, 2, 3, 4, ...)
+        unsigned compute_length_stride(expr* re);
+
+        // Create a fresh integer variable (name "pk!N") for use as the
+        // Parikh multiplier variable k in len(str) = min_len + stride·k.
+        expr_ref mk_fresh_int_var();
+
+    public:
+        explicit seq_parikh(euf::sgraph& sg);
+
+        // Generate Parikh modular length constraints for one membership.
+        //
+        // When stride > 1 and min_len < max_len (bounds don't pin length exactly,
+        // and the language is non-empty):
+        //   adds: len(str) = min_len + stride · k   (equality)
+        //         k ≥ 0                              (non-negativity)
+        //         k ≤ (max_len - min_len) / stride   (upper bound, when max_len bounded)
+        // These tighten the integer constraint set for the subsolver.
+        // Dependencies are copied from mem.m_dep.
+        // Does nothing when min_len ≥ max_len (empty or fixed-length language).
+        void generate_parikh_constraints(str_mem const& mem,
+                                         vector<int_constraint>& out);
+
+        // Apply Parikh constraints to all memberships at a node.
+        // Calls generate_parikh_constraints for each str_mem in the node
+        // and appends the resulting int_constraints to node.int_constraints().
+        void apply_to_node(nielsen_node& node);
+
+        // Quick Parikh feasibility check (no solver call).
+        //
+        // For each single-variable membership str ∈ re, checks whether the
+        // modular constraint  len(str) = min_len + stride · k  (k ≥ 0)
+        // has any solution given the current per-variable bounds stored in
+        // node.var_lb(str) and node.var_ub(str).
+        //
+        // Returns true when a conflict is detected (no valid k exists for
+        // some membership).  The caller should then mark the node with
+        // backtrack_reason::parikh_image.
+        //
+        // This is a lightweight pre-check that avoids calling the integer
+        // subsolver.  It is sound (never returns true for a satisfiable node)
+        // but incomplete (may miss conflicts that require the full solver).
+        bool check_parikh_conflict(nielsen_node& node);
+
+        // Compute the length stride of a regex expression.
+        // Exposed for testing and external callers.
+        unsigned get_length_stride(expr* re) { return compute_length_stride(re); }
+    };
+
+} // namespace seq

From 45a574d7fad82d6b92ae17c2e431b45866cf8bfa Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 11 Mar 2026 15:48:05 +0000
Subject: [PATCH 09/13] Remove nseq_parith.h and nseq_parikh.cpp
 backwards-compat shims

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
---
 src/smt/seq/nseq_parikh.cpp | 15 ---------------
 src/smt/seq/nseq_parith.h   | 20 --------------------
 2 files changed, 35 deletions(-)
 delete mode 100644 src/smt/seq/nseq_parikh.cpp
 delete mode 100644 src/smt/seq/nseq_parith.h

diff --git a/src/smt/seq/nseq_parikh.cpp b/src/smt/seq/nseq_parikh.cpp
deleted file mode 100644
index 56f2bb344..000000000
--- a/src/smt/seq/nseq_parikh.cpp
+++ /dev/null
@@ -1,15 +0,0 @@
-/*++
-Copyright (c) 2026 Microsoft Corporation
-
-Module Name:
-
-    nseq_parikh.cpp
-
-Note:
-
-    This file is retained for backwards compatibility.
-    The canonical implementation is now smt/seq/seq_parikh.cpp.
-
---*/
-// intentionally empty — see seq_parikh.cpp
-
diff --git a/src/smt/seq/nseq_parith.h b/src/smt/seq/nseq_parith.h
deleted file mode 100644
index 182f54a55..000000000
--- a/src/smt/seq/nseq_parith.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/*++
-Copyright (c) 2026 Microsoft Corporation
-
-Module Name:
-
-    nseq_parith.h
-
-Note:
-
-    This file is retained for backwards compatibility.
-    The canonical header is now smt/seq/seq_parikh.h.
-
---*/
-#pragma once
-#include "smt/seq/seq_parikh.h"
-
-namespace seq {
-    // backwards-compat alias
-    using nseq_parith = seq_parikh;
-}

From 194dfe6c610d15780fb2c077be94cb32a2a22e6a Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 11 Mar 2026 17:21:27 +0000
Subject: [PATCH 10/13] Port ZIPT Parikh features: minterm_to_char_set, char
 range constraints, fix stride soundness

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
---
 src/smt/seq/seq_nielsen.cpp |  11 ++-
 src/smt/seq/seq_parikh.cpp  | 129 ++++++++++++++++++++++++++----------
 src/smt/seq/seq_parikh.h    |  19 ++++++
 3 files changed, 122 insertions(+), 37 deletions(-)

diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp
index 81fd1b2ba..58fd96692 100644
--- a/src/smt/seq/seq_nielsen.cpp
+++ b/src/smt/seq/seq_nielsen.cpp
@@ -2301,8 +2301,15 @@ namespace seq {
                 nielsen_subst s(first, replacement, mem.m_dep);
                 e->add_subst(s);
                 child->apply_subst(m_sg, s);
-                // TODO: derive char_set from minterm and add as range constraint
-                // child->add_char_range(fresh_char, minterm_to_char_set(mt));
+                // Constrain fresh_char to the character class of this minterm.
+                // This is the key Parikh pruning step: when x → ?c · x' is
+                // generated from minterm m_i, ?c must belong to the character
+                // class described by m_i so that str ∈ derivative(R, m_i).
+                if (mt->get_expr()) {
+                    char_set cs = m_parith->minterm_to_char_set(mt->get_expr());
+                    if (!cs.is_empty())
+                        child->add_char_range(fresh_char, cs);
+                }
                 created = true;
             }
 
diff --git a/src/smt/seq/seq_parikh.cpp b/src/smt/seq/seq_parikh.cpp
index 255d09d72..367412dad 100644
--- a/src/smt/seq/seq_parikh.cpp
+++ b/src/smt/seq/seq_parikh.cpp
@@ -80,36 +80,24 @@ namespace seq {
 
         // r* — Kleene star.
         // L(r*) = {ε} ∪ L(r) ∪ L(r)·L(r) ∪ ...
-        // If r has a fixed length k, then L(r*) = {0, k, 2k, ...} → stride k.
-        // If r has variable length, strides from different iterations combine
-        // by GCD.
+        // If all lengths in L(r) are congruent to c modulo s (c = min_len, s = stride),
+        // then L(r*) includes lengths {0, c, c+s, 2c, 2c+s, 2c+2s, ...} and
+        // the overall GCD is gcd(c, s).  This is strictly more accurate than
+        // the previous gcd(min, max) approximation, which can be unsound when
+        // the body contains lengths whose GCD is smaller than gcd(min, max).
         if (seq.re.is_star(re, r1)) {
             unsigned mn = seq.re.min_length(r1);
-            unsigned mx = seq.re.max_length(r1);
-            // When the body has unbounded length (mx == UINT_MAX), different
-            // iterations can produce many different lengths, and the stride of
-            // the star as a whole degenerates to gcd(mn, mn) = mn (or 1 if
-            // mn == 1).  This is conservative: we use the body's min-length
-            // as the only available fixed quantity.
-            if (mx == UINT_MAX) mx = mn;
-            if (mn == mx) {
-                // Fixed-length body: L(r*) = {0, mn, 2·mn, ...} → stride = mn.
-                // When mn == 1 the stride would be 1, which gives no useful
-                // modular constraint, so return 0 instead.
-                return (mn > 1) ? mn : 0;
-            }
-            // Variable-length body: GCD of min and max lengths
-            return u_gcd(mn, mx);
+            unsigned inner = compute_length_stride(r1);
+            // stride(r*) = gcd(min_length(r), stride(r))
+            // when inner=0 (fixed-length body), gcd(mn, 0) = mn → stride = mn
+            return u_gcd(mn, inner);
         }
 
         // r+ — one or more: same stride analysis as r*.
         if (seq.re.is_plus(re, r1)) {
             unsigned mn = seq.re.min_length(r1);
-            unsigned mx = seq.re.max_length(r1);
-            if (mx == UINT_MAX) mx = mn;  // same conservative treatment as star
-            if (mn == mx)
-                return (mn > 1) ? mn : 0;
-            return u_gcd(mn, mx);
+            unsigned inner = compute_length_stride(r1);
+            return u_gcd(mn, inner);
         }
 
         // r? — zero or one: lengths = {0} ∪ lengths(r)
@@ -150,23 +138,18 @@ namespace seq {
             return g;
         }
 
-        // loop(r, lo, hi): lengths = {lo·len(r), ..., hi·len(r)} if r is fixed.
-        // stride = len(r) when r is fixed-length; otherwise GCD.
+        // loop(r, lo, hi): the length of any word is a sum of lo..hi copies of
+        // lengths from L(r).  Since all lengths in L(r) are ≡ min_len(r) mod
+        // stride(r), the overall stride is gcd(min_len(r), stride(r)).
         if (seq.re.is_loop(re, r1, lo, hi)) {
             unsigned mn = seq.re.min_length(r1);
-            unsigned mx = seq.re.max_length(r1);
-            if (mx == UINT_MAX) mx = mn;
-            if (mn == mx)
-                return (mn > 1) ? mn : 0;
-            return u_gcd(mn, mx);
+            unsigned inner = compute_length_stride(r1);
+            return u_gcd(mn, inner);
         }
         if (seq.re.is_loop(re, r1, lo)) {
             unsigned mn = seq.re.min_length(r1);
-            unsigned mx = seq.re.max_length(r1);
-            if (mx == UINT_MAX) mx = mn;
-            if (mn == mx)
-                return (mn > 1) ? mn : 0;
-            return u_gcd(mn, mx);
+            unsigned inner = compute_length_stride(r1);
+            return u_gcd(mn, inner);
         }
 
         // intersection(r1, r2): lengths must be in both languages.
@@ -324,4 +307,80 @@ namespace seq {
         return false;
     }
 
+    // -----------------------------------------------------------------------
+    // minterm → char_set conversion
+    // -----------------------------------------------------------------------
+
+    // Convert a regex minterm expression to a char_set.
+    //
+    // Minterms are Boolean combinations of character-class predicates
+    // (re.range, re.full_char, complement, intersection) produced by
+    // sgraph::compute_minterms().  This function converts them to a
+    // concrete char_set so that fresh character variables introduced by
+    // apply_regex_var_split can be constrained with add_char_range().
+    //
+    // The implementation is recursive and handles:
+    //   re.full_char             → [0, max_char]  (full alphabet)
+    //   re.range(lo, hi)         → [lo, hi+1)     (hi inclusive in Z3)
+    //   re.complement(r)         → alphabet \ minterm_to_char_set(r)
+    //   re.inter(r1, r2)         → intersection of both sides
+    //   re.diff(r1, r2)          → r1 ∩ complement(r2)
+    //   re.to_re(str.unit(c))    → singleton {c}
+    //   re.empty                 → empty set
+    //   anything else            → [0, max_char]  (conservative)
+    char_set seq_parikh::minterm_to_char_set(expr* re_expr) {
+        unsigned max_c = seq.max_char();
+
+        if (!re_expr)
+            return char_set::full(max_c);
+
+        // full_char: the whole alphabet [0, max_char]
+        if (seq.re.is_full_char(re_expr))
+            return char_set::full(max_c);
+
+        // range [lo, hi] (hi inclusive in Z3's regex representation)
+        unsigned lo = 0, hi = 0;
+        if (seq.re.is_range(re_expr, lo, hi)) {
+            if (lo > hi) return char_set();
+            // char_range uses exclusive upper bound; Z3 hi is inclusive
+            return char_set(char_range(lo, hi + 1));
+        }
+
+        // complement: alphabet minus the inner set
+        expr* inner = nullptr;
+        if (seq.re.is_complement(re_expr, inner))
+            return minterm_to_char_set(inner).complement(max_c);
+
+        // intersection: characters present in both sets
+        expr* r1 = nullptr, *r2 = nullptr;
+        if (seq.re.is_intersection(re_expr, r1, r2))
+            return minterm_to_char_set(r1).intersect_with(minterm_to_char_set(r2));
+
+        // difference: r1 minus r2 = r1 ∩ complement(r2)
+        if (seq.re.is_diff(re_expr, r1, r2))
+            return minterm_to_char_set(r1).intersect_with(
+                       minterm_to_char_set(r2).complement(max_c));
+
+        // to_re(str.unit(c)): singleton character set
+        expr* str_arg = nullptr;
+        if (seq.re.is_to_re(re_expr, str_arg)) {
+            expr* ch_expr = nullptr;
+            unsigned char_val = 0;
+            if (seq.str.is_unit(str_arg, ch_expr) &&
+                seq.is_const_char(ch_expr, char_val)) {
+                char_set cs;
+                cs.add(char_val);
+                return cs;
+            }
+        }
+
+        // empty regex: no characters can appear
+        if (seq.re.is_empty(re_expr))
+            return char_set();
+
+        // Conservative fallback: return the full alphabet so that
+        // no unsound constraints are added for unrecognised expressions.
+        return char_set::full(max_c);
+    }
+
 } // namespace seq
diff --git a/src/smt/seq/seq_parikh.h b/src/smt/seq/seq_parikh.h
index 730854832..8e8a4e178 100644
--- a/src/smt/seq/seq_parikh.h
+++ b/src/smt/seq/seq_parikh.h
@@ -126,6 +126,25 @@ namespace seq {
         // Compute the length stride of a regex expression.
         // Exposed for testing and external callers.
         unsigned get_length_stride(expr* re) { return compute_length_stride(re); }
+
+        // Convert a regex minterm expression to a char_set.
+        //
+        // A minterm is a Boolean combination of character-class predicates
+        // (re.range, re.full_char, complement, intersection) that names a
+        // single, indivisible character equivalence class.  Minterms are
+        // produced by sgraph::compute_minterms and used in
+        // apply_regex_var_split to constrain fresh character variables.
+        //
+        // Supported expressions:
+        //   re.full_char       → full set [0, max_char]
+        //   re.range(lo, hi)   → char_set [lo, hi] (inclusive on both ends)
+        //   re.complement(r)   → complement of minterm_to_char_set(r)
+        //   re.inter(r1, r2)   → intersection of both sets
+        //   re.diff(r1, r2)    → r1 minus r2  (= r1 ∩ complement(r2))
+        //   re.to_re(unit(c))  → singleton {c}
+        //   re.empty           → empty set
+        //   anything else      → full set (conservative, sound over-approximation)
+        char_set minterm_to_char_set(expr* minterm_re);
     };
 
 } // namespace seq

From 30c2a9ccdc44cc56dcca408b1a5e1a3176504b2d Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 11 Mar 2026 17:23:00 +0000
Subject: [PATCH 11/13] Address code review: add SASSERT for lo>hi, simplify
 is_to_re handling

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
---
 src/smt/seq/seq_parikh.cpp | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/smt/seq/seq_parikh.cpp b/src/smt/seq/seq_parikh.cpp
index 367412dad..3f6deba40 100644
--- a/src/smt/seq/seq_parikh.cpp
+++ b/src/smt/seq/seq_parikh.cpp
@@ -341,6 +341,8 @@ namespace seq {
         // range [lo, hi] (hi inclusive in Z3's regex representation)
         unsigned lo = 0, hi = 0;
         if (seq.re.is_range(re_expr, lo, hi)) {
+            // lo > hi is a degenerate range; should not arise from well-formed minterms
+            SASSERT(lo <= hi);
             if (lo > hi) return char_set();
             // char_range uses exclusive upper bound; Z3 hi is inclusive
             return char_set(char_range(lo, hi + 1));
@@ -363,15 +365,14 @@ namespace seq {
 
         // to_re(str.unit(c)): singleton character set
         expr* str_arg = nullptr;
-        if (seq.re.is_to_re(re_expr, str_arg)) {
-            expr* ch_expr = nullptr;
-            unsigned char_val = 0;
-            if (seq.str.is_unit(str_arg, ch_expr) &&
-                seq.is_const_char(ch_expr, char_val)) {
-                char_set cs;
-                cs.add(char_val);
-                return cs;
-            }
+        expr* ch_expr = nullptr;
+        unsigned char_val = 0;
+        if (seq.re.is_to_re(re_expr, str_arg) &&
+            seq.str.is_unit(str_arg, ch_expr) &&
+            seq.is_const_char(ch_expr, char_val)) {
+            char_set cs;
+            cs.add(char_val);
+            return cs;
         }
 
         // empty regex: no characters can appear

From 4b2f5e2bb0f1e634efc2ffa0359dea51713aee86 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 11 Mar 2026 18:36:28 +0000
Subject: [PATCH 12/13] Add seq_parikh unit tests: 32 tests covering stride,
 constraints, conflict, char_set

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
---
 src/test/CMakeLists.txt |   1 +
 src/test/main.cpp       |   1 +
 src/test/seq_parikh.cpp | 883 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 885 insertions(+)
 create mode 100644 src/test/seq_parikh.cpp

diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt
index acc598edf..1620bd790 100644
--- a/src/test/CMakeLists.txt
+++ b/src/test/CMakeLists.txt
@@ -132,6 +132,7 @@ add_executable(test-z3
   sls_test.cpp
   sls_seq_plugin.cpp
   seq_nielsen.cpp
+  seq_parikh.cpp
   nseq_basic.cpp
   nseq_regex.cpp
   nseq_zipt.cpp
diff --git a/src/test/main.cpp b/src/test/main.cpp
index 9fc8bf1ec..0f378df67 100644
--- a/src/test/main.cpp
+++ b/src/test/main.cpp
@@ -287,6 +287,7 @@ int main(int argc, char ** argv) {
     TST(scoped_vector);
     TST(sls_seq_plugin);
     TST(seq_nielsen);
+    TST(seq_parikh);
     TST(nseq_basic);
     TST(nseq_regex);
     TST(nseq_zipt);
diff --git a/src/test/seq_parikh.cpp b/src/test/seq_parikh.cpp
new file mode 100644
index 000000000..83bbc3da4
--- /dev/null
+++ b/src/test/seq_parikh.cpp
@@ -0,0 +1,883 @@
+/*++
+Copyright (c) 2026 Microsoft Corporation
+
+Module Name:
+
+    seq_parikh.cpp
+
+Abstract:
+
+    Unit tests for seq_parikh (Parikh image filter for the ZIPT Nielsen solver).
+
+    Tests cover:
+    - compute_length_stride / get_length_stride for all regex forms
+    - generate_parikh_constraints: constraint shape, count, and dependency
+    - apply_to_node: integration with nielsen_node
+    - check_parikh_conflict: lightweight feasibility pre-check
+    - minterm_to_char_set: regex-minterm to char_set conversion
+
+Author:
+
+    Clemens Eisenhofer 2026-03-11
+    Nikolaj Bjorner (nbjorner) 2026-03-11
+
+--*/
+
+#include "util/util.h"
+#include "util/zstring.h"
+#include "ast/euf/euf_egraph.h"
+#include "ast/euf/euf_sgraph.h"
+#include "smt/seq/seq_nielsen.h"
+#include "smt/seq/seq_parikh.h"
+#include "ast/arith_decl_plugin.h"
+#include "ast/reg_decl_plugins.h"
+#include "ast/ast_pp.h"
+#include <iostream>
+
+// ---------------------------------------------------------------------------
+// Minimal solver stub (no-op)
+// ---------------------------------------------------------------------------
+class parikh_test_solver : public seq::simple_solver {
+public:
+    void push() override {}
+    void pop(unsigned) override {}
+    void assert_expr(expr*) override {}
+    lbool check() override { return l_true; }
+};
+
+// ---------------------------------------------------------------------------
+// Helpers to build common regex expressions
+// ---------------------------------------------------------------------------
+
+// build to_re("AB") — a fixed two-character string regex
+static expr_ref mk_to_re_ab(ast_manager& m, seq_util& seq) {
+    expr_ref ch_a(seq.str.mk_char('A'), m);
+    expr_ref ch_b(seq.str.mk_char('B'), m);
+    expr_ref unit_a(seq.str.mk_unit(ch_a), m);
+    expr_ref unit_b(seq.str.mk_unit(ch_b), m);
+    expr_ref ab(seq.str.mk_concat(unit_a, unit_b), m);
+    return expr_ref(seq.re.mk_to_re(ab), m);
+}
+
+// build (ab)* — star of the two-character sequence
+static expr_ref mk_ab_star(ast_manager& m, seq_util& seq) {
+    return expr_ref(seq.re.mk_star(mk_to_re_ab(m, seq)), m);
+}
+
+// build (abc)* — star of a three-character sequence
+static expr_ref mk_abc_star(ast_manager& m, seq_util& seq) {
+    expr_ref ch_a(seq.str.mk_char('A'), m);
+    expr_ref ch_b(seq.str.mk_char('B'), m);
+    expr_ref ch_c(seq.str.mk_char('C'), m);
+    expr_ref unit_a(seq.str.mk_unit(ch_a), m);
+    expr_ref unit_b(seq.str.mk_unit(ch_b), m);
+    expr_ref unit_c(seq.str.mk_unit(ch_c), m);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+    expr_ref abc(seq.str.mk_concat(unit_a, seq.str.mk_concat(unit_b, unit_c)), m);
+    return expr_ref(seq.re.mk_star(seq.re.mk_to_re(abc)), m);
+}
+
+// build to_re("A") — single-character regex
+static expr_ref mk_to_re_a(ast_manager& m, seq_util& seq) {
+    expr_ref ch_a(seq.str.mk_char('A'), m);
+    expr_ref unit_a(seq.str.mk_unit(ch_a), m);
+    return expr_ref(seq.re.mk_to_re(unit_a), m);
+}
+
+// ---------------------------------------------------------------------------
+// Stride tests: compute_length_stride / get_length_stride
+// ---------------------------------------------------------------------------
+
+// stride(to_re("AB")) == 0 (fixed length)
+static void test_stride_fixed_length() {
+    std::cout << "test_stride_fixed_length\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq::seq_parikh parikh(sg);
+    seq_util seq(m);
+
+    expr_ref re = mk_to_re_ab(m, seq);
+    SASSERT(parikh.get_length_stride(re) == 0);
+}
+
+// stride((ab)*) == 2
+static void test_stride_star_fixed_body() {
+    std::cout << "test_stride_star_fixed_body\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq::seq_parikh parikh(sg);
+    seq_util seq(m);
+
+    expr_ref re = mk_ab_star(m, seq);
+    unsigned stride = parikh.get_length_stride(re);
+    std::cout << "  stride((ab)*) = " << stride << "\n";
+    SASSERT(stride == 2);
+}
+
+// stride((abc)*) == 3
+static void test_stride_star_three_char() {
+    std::cout << "test_stride_star_three_char\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq::seq_parikh parikh(sg);
+    seq_util seq(m);
+
+    expr_ref re = mk_abc_star(m, seq);
+    unsigned stride = parikh.get_length_stride(re);
+    std::cout << "  stride((abc)*) = " << stride << "\n";
+    SASSERT(stride == 3);
+}
+
+// stride((ab)+) == 2
+static void test_stride_plus() {
+    std::cout << "test_stride_plus\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq::seq_parikh parikh(sg);
+    seq_util seq(m);
+
+    expr_ref re_body = mk_to_re_ab(m, seq);
+    expr_ref re(seq.re.mk_plus(re_body), m);
+    unsigned stride = parikh.get_length_stride(re);
+    std::cout << "  stride((ab)+) = " << stride << "\n";
+    SASSERT(stride == 2);
+}
+
+// stride(a* b*) == 1 — union of independent stars → every length possible
+static void test_stride_concat_stars() {
+    std::cout << "test_stride_concat_stars\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq::seq_parikh parikh(sg);
+    seq_util seq(m);
+
+    expr_ref a_star(seq.re.mk_star(mk_to_re_a(m, seq)), m);
+    expr_ref b_star(seq.re.mk_star(mk_to_re_a(m, seq)), m);
+    expr_ref re(seq.re.mk_concat(a_star, b_star), m);
+    unsigned stride = parikh.get_length_stride(re);
+    std::cout << "  stride(a* b*) = " << stride << "\n";
+    // both stars have stride 1 (single-char body → gcd(1,0)=1) → gcd(1,1)=1
+    SASSERT(stride == 1);
+}
+
+// stride((ab)* | (abc)*) == gcd(2,3) = 1
+static void test_stride_union_no_common_period() {
+    std::cout << "test_stride_union_no_common_period\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq::seq_parikh parikh(sg);
+    seq_util seq(m);
+
+    expr_ref ab_star = mk_ab_star(m, seq);
+    expr_ref abc_star = mk_abc_star(m, seq);
+    expr_ref re(seq.re.mk_union(ab_star, abc_star), m);
+    unsigned stride = parikh.get_length_stride(re);
+    std::cout << "  stride((ab)*|(abc)*) = " << stride << "\n";
+    // lengths: {0,2,4,...} union {0,3,6,...} → GCD(2,3)=1
+    SASSERT(stride == 1);
+}
+
+// stride((ab)*|(de)*) == gcd(2,2) = 2
+static void test_stride_union_same_period() {
+    std::cout << "test_stride_union_same_period\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq::seq_parikh parikh(sg);
+    seq_util seq(m);
+
+    expr_ref ab_star = mk_ab_star(m, seq);
+    // de_star: (de)* — same stride 2
+    expr_ref ch_d(seq.str.mk_char('D'), m);
+    expr_ref ch_e(seq.str.mk_char('E'), m);
+    expr_ref unit_d(seq.str.mk_unit(ch_d), m);
+    expr_ref unit_e(seq.str.mk_unit(ch_e), m);
+    expr_ref de(seq.str.mk_concat(unit_d, unit_e), m);
+    expr_ref de_star(seq.re.mk_star(seq.re.mk_to_re(de)), m);
+    expr_ref re(seq.re.mk_union(ab_star, de_star), m);
+    unsigned stride = parikh.get_length_stride(re);
+    std::cout << "  stride((ab)*|(de)*) = " << stride << "\n";
+    SASSERT(stride == 2);
+}
+
+// stride(loop((ab), 1, 3)) == 2 — loop with fixed-length body
+static void test_stride_loop() {
+    std::cout << "test_stride_loop\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq::seq_parikh parikh(sg);
+    seq_util seq(m);
+
+    expr_ref ab = mk_to_re_ab(m, seq);
+    expr_ref re(seq.re.mk_loop(ab, 1, 3), m);
+    unsigned stride = parikh.get_length_stride(re);
+    std::cout << "  stride(loop(ab,1,3)) = " << stride << "\n";
+    SASSERT(stride == 2);
+}
+
+// stride(re.full_seq) == 1 (every length possible)
+static void test_stride_full_seq() {
+    std::cout << "test_stride_full_seq\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq::seq_parikh parikh(sg);
+    seq_util seq(m);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+
+    expr_ref re(seq.re.mk_full_seq(str_sort), m);
+    unsigned stride = parikh.get_length_stride(re);
+    std::cout << "  stride(.*)  = " << stride << "\n";
+    SASSERT(stride == 1);
+}
+
+// stride(re.empty) == 0
+static void test_stride_empty_regex() {
+    std::cout << "test_stride_empty_regex\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq::seq_parikh parikh(sg);
+    seq_util seq(m);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+
+    expr_ref re(seq.re.mk_empty(str_sort), m);
+    unsigned stride = parikh.get_length_stride(re);
+    std::cout << "  stride(empty) = " << stride << "\n";
+    SASSERT(stride == 0);
+}
+
+// stride(re.epsilon) == 0
+static void test_stride_epsilon() {
+    std::cout << "test_stride_epsilon\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq::seq_parikh parikh(sg);
+    seq_util seq(m);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+
+    // epsilon is to_re("") — empty string
+    sort_ref str_s(seq.str.mk_string_sort(), m);
+    expr_ref empty_str(seq.str.mk_empty(str_s), m);
+    expr_ref re(seq.re.mk_to_re(empty_str), m);
+    unsigned stride = parikh.get_length_stride(re);
+    std::cout << "  stride(epsilon) = " << stride << "\n";
+    SASSERT(stride == 0);
+}
+
+// stride((ab)?) == 2  (gcd(2, 0) = 2 via opt handling; min_len(ab)=2)
+static void test_stride_opt() {
+    std::cout << "test_stride_opt\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq::seq_parikh parikh(sg);
+    seq_util seq(m);
+
+    expr_ref ab = mk_to_re_ab(m, seq);
+    expr_ref re(seq.re.mk_opt(ab), m);
+    unsigned stride = parikh.get_length_stride(re);
+    std::cout << "  stride((ab)?) = " << stride << "\n";
+    SASSERT(stride == 2);
+}
+
+// ---------------------------------------------------------------------------
+// generate_parikh_constraints tests
+// ---------------------------------------------------------------------------
+
+// (ab)* → len(x) = 0 + 2·k, k ≥ 0  (stride 2, min_len 0)
+static void test_generate_constraints_ab_star() {
+    std::cout << "test_generate_constraints_ab_star\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    arith_util arith(m);
+    seq::seq_parikh parikh(sg);
+
+    euf::snode* x = sg.mk_var(symbol("x"));
+    expr_ref re = mk_ab_star(m, seq);
+    euf::snode* regex = sg.mk(re);
+    seq::dep_tracker dep; dep.insert(0);
+    seq::str_mem mem(x, regex, nullptr, 0, dep);
+
+    vector<seq::int_constraint> out;
+    parikh.generate_parikh_constraints(mem, out);
+
+    // expect at least: len(x)=0+2k and k>=0
+    // (no upper bound because max_length is UINT_MAX for unbounded star)
+    std::cout << "  generated " << out.size() << " constraints\n";
+    SASSERT(out.size() >= 2);
+
+    // check that one constraint is an equality (len(x) = 0 + 2·k)
+    bool has_eq = false;
+    for (auto const& ic : out)
+        if (ic.m_kind == seq::int_constraint_kind::eq) has_eq = true;
+    SASSERT(has_eq);
+
+    // check that one constraint is k >= 0
+    bool has_ge = false;
+    for (auto const& ic : out)
+        if (ic.m_kind == seq::int_constraint_kind::ge) has_ge = true;
+    SASSERT(has_ge);
+
+    // should NOT have an upper bound (star is unbounded)
+    bool has_le = false;
+    for (auto const& ic : out)
+        if (ic.m_kind == seq::int_constraint_kind::le) has_le = true;
+    SASSERT(!has_le);
+}
+
+// loop((ab), 1, 3): bounded → k ≤ floor((6-2)/2) = 2
+static void test_generate_constraints_bounded_loop() {
+    std::cout << "test_generate_constraints_bounded_loop\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    seq::seq_parikh parikh(sg);
+
+    euf::snode* x = sg.mk_var(symbol("x"));
+    // loop("ab", 1, 3): min_len=2, max_len=6, stride=2
+    expr_ref ab = mk_to_re_ab(m, seq);
+    expr_ref re(seq.re.mk_loop(ab, 1, 3), m);
+    euf::snode* regex = sg.mk(re);
+    seq::dep_tracker dep; dep.insert(0);
+    seq::str_mem mem(x, regex, nullptr, 0, dep);
+
+    vector<seq::int_constraint> out;
+    parikh.generate_parikh_constraints(mem, out);
+
+    // expect: eq + ge + le = 3 constraints
+    std::cout << "  generated " << out.size() << " constraints\n";
+    SASSERT(out.size() == 3);
+
+    bool has_eq = false, has_ge = false, has_le = false;
+    for (auto const& ic : out) {
+        if (ic.m_kind == seq::int_constraint_kind::eq)  has_eq = true;
+        if (ic.m_kind == seq::int_constraint_kind::ge)  has_ge = true;
+        if (ic.m_kind == seq::int_constraint_kind::le)  has_le = true;
+    }
+    SASSERT(has_eq);
+    SASSERT(has_ge);
+    SASSERT(has_le);
+}
+
+// stride == 1 → no constraints generated
+static void test_generate_constraints_stride_one() {
+    std::cout << "test_generate_constraints_stride_one\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    seq::seq_parikh parikh(sg);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+
+    euf::snode* x = sg.mk_var(symbol("x"));
+    // full_seq: stride=1 → no modular constraint
+    expr_ref re(seq.re.mk_full_seq(str_sort), m);
+    euf::snode* regex = sg.mk(re);
+    seq::dep_tracker dep; dep.insert(0);
+    seq::str_mem mem(x, regex, nullptr, 0, dep);
+
+    vector<seq::int_constraint> out;
+    parikh.generate_parikh_constraints(mem, out);
+    std::cout << "  generated " << out.size() << " constraints (expect 0)\n";
+    SASSERT(out.empty());
+}
+
+// fixed-length regex (min == max) → no constraints generated
+static void test_generate_constraints_fixed_length() {
+    std::cout << "test_generate_constraints_fixed_length\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    seq::seq_parikh parikh(sg);
+
+    euf::snode* x = sg.mk_var(symbol("x"));
+    expr_ref re = mk_to_re_ab(m, seq);  // fixed len 2
+    euf::snode* regex = sg.mk(re);
+    seq::dep_tracker dep; dep.insert(0);
+    seq::str_mem mem(x, regex, nullptr, 0, dep);
+
+    vector<seq::int_constraint> out;
+    parikh.generate_parikh_constraints(mem, out);
+    std::cout << "  generated " << out.size() << " constraints (expect 0)\n";
+    SASSERT(out.empty());
+}
+
+// dependency is propagated to all generated constraints
+static void test_generate_constraints_dep_propagated() {
+    std::cout << "test_generate_constraints_dep_propagated\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    seq::seq_parikh parikh(sg);
+
+    euf::snode* x = sg.mk_var(symbol("x"));
+    expr_ref re = mk_ab_star(m, seq);
+    euf::snode* regex = sg.mk(re);
+    seq::dep_tracker dep; dep.insert(7);
+    seq::str_mem mem(x, regex, nullptr, 0, dep);
+
+    vector<seq::int_constraint> out;
+    parikh.generate_parikh_constraints(mem, out);
+
+    // all generated constraints must carry bit 7 in their dependency
+    for (auto const& ic : out) {
+        SASSERT(!ic.m_dep.empty());
+        seq::dep_tracker d7; d7.insert(7);
+        SASSERT(d7.subset_of(ic.m_dep));
+    }
+    std::cout << "  all constraints carry dep bit 7\n";
+}
+
+// ---------------------------------------------------------------------------
+// apply_to_node tests
+// ---------------------------------------------------------------------------
+
+// applying to a node with one membership adds constraints to node
+static void test_apply_to_node_adds_constraints() {
+    std::cout << "test_apply_to_node_adds_constraints\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    parikh_test_solver solver;
+    seq::nielsen_graph ng(sg, solver);
+    seq::seq_parikh parikh(sg);
+
+    euf::snode* x = sg.mk_var(symbol("x"));
+    expr_ref re = mk_ab_star(m, seq);  // stride 2 → generates constraints
+    euf::snode* regex = sg.mk(re);
+    ng.add_str_mem(x, regex);
+
+    // root node should have no int_constraints initially
+    SASSERT(ng.root() != nullptr);
+    unsigned before = ng.root()->int_constraints().size();
+
+    parikh.apply_to_node(*ng.root());
+
+    unsigned after = ng.root()->int_constraints().size();
+    std::cout << "  before=" << before << " after=" << after << "\n";
+    SASSERT(after > before);
+}
+
+// applying twice is idempotent (m_parikh_applied would prevent double-add
+// via nielsen_graph::apply_parikh_to_node, but seq_parikh::apply_to_node
+// itself does not guard — so calling apply_to_node directly adds again;
+// this test verifies the direct call does add, not the idempotency guard)
+static void test_apply_to_node_stride_one_no_constraints() {
+    std::cout << "test_apply_to_node_stride_one_no_constraints\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+    parikh_test_solver solver;
+    seq::nielsen_graph ng(sg, solver);
+    seq::seq_parikh parikh(sg);
+
+    euf::snode* x = sg.mk_var(symbol("x"));
+    expr_ref re(seq.re.mk_full_seq(str_sort), m);  // stride 1 → no constraints
+    euf::snode* regex = sg.mk(re);
+    ng.add_str_mem(x, regex);
+
+    unsigned before = ng.root()->int_constraints().size();
+    parikh.apply_to_node(*ng.root());
+    unsigned after = ng.root()->int_constraints().size();
+    std::cout << "  before=" << before << " after=" << after << " (expect no change)\n";
+    SASSERT(after == before);
+}
+
+// ---------------------------------------------------------------------------
+// check_parikh_conflict tests
+// ---------------------------------------------------------------------------
+
+// no conflict when var_lb=0, var_ub=UINT_MAX (unconstrained)
+static void test_check_conflict_unconstrained_no_conflict() {
+    std::cout << "test_check_conflict_unconstrained_no_conflict\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    parikh_test_solver solver;
+    seq::nielsen_graph ng(sg, solver);
+    seq::seq_parikh parikh(sg);
+
+    euf::snode* x = sg.mk_var(symbol("x"));
+    expr_ref re = mk_ab_star(m, seq);  // stride 2, min_len 0
+    euf::snode* regex = sg.mk(re);
+    ng.add_str_mem(x, regex);
+
+    // no bounds set → default lb=0, ub=UINT_MAX → no conflict
+    bool conflict = parikh.check_parikh_conflict(*ng.root());
+    std::cout << "  conflict = " << conflict << " (expect 0)\n";
+    SASSERT(!conflict);
+}
+
+// conflict: lb=3, ub=5, stride=2, min_len=0
+// valid lengths: 0,2,4,6,... ∩ [3,5] = {4} → no conflict
+static void test_check_conflict_valid_k_exists() {
+    std::cout << "test_check_conflict_valid_k_exists\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    parikh_test_solver solver;
+    seq::nielsen_graph ng(sg, solver);
+    seq::seq_parikh parikh(sg);
+
+    euf::snode* x = sg.mk_var(symbol("x"));
+    expr_ref re = mk_ab_star(m, seq);  // stride 2, min_len 0; lengths 0,2,4,...
+    euf::snode* regex = sg.mk(re);
+    ng.add_str_mem(x, regex);
+
+    // lb=3, ub=5: length 4 is achievable (k=2) → no conflict
+    seq::dep_tracker dep; dep.insert(0);
+    ng.root()->add_lower_int_bound(x, 3, dep);
+    ng.root()->add_upper_int_bound(x, 5, dep);
+
+    bool conflict = parikh.check_parikh_conflict(*ng.root());
+    std::cout << "  conflict = " << conflict << " (expect 0)\n";
+    SASSERT(!conflict);
+}
+
+// conflict: lb=3, ub=3, stride=2, min_len=0
+// valid lengths: {0,2,4,...} ∩ [3,3] = {} → conflict
+static void test_check_conflict_no_valid_k() {
+    std::cout << "test_check_conflict_no_valid_k\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    parikh_test_solver solver;
+    seq::nielsen_graph ng(sg, solver);
+    seq::seq_parikh parikh(sg);
+
+    euf::snode* x = sg.mk_var(symbol("x"));
+    expr_ref re = mk_ab_star(m, seq);  // stride 2, min_len 0; lengths {0,2,4,...}
+    euf::snode* regex = sg.mk(re);
+    ng.add_str_mem(x, regex);
+
+    // lb=3, ub=3: only odd length 3 — never a multiple of 2 → conflict
+    seq::dep_tracker dep; dep.insert(0);
+    ng.root()->add_lower_int_bound(x, 3, dep);
+    ng.root()->add_upper_int_bound(x, 3, dep);
+
+    bool conflict = parikh.check_parikh_conflict(*ng.root());
+    std::cout << "  conflict = " << conflict << " (expect 1)\n";
+    SASSERT(conflict);
+}
+
+// conflict: lb=5, ub=5, stride=3, min_len=0
+// valid lengths of (abc)*: {0,3,6,...} ∩ {5} = {} → conflict
+static void test_check_conflict_abc_star() {
+    std::cout << "test_check_conflict_abc_star\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    parikh_test_solver solver;
+    seq::nielsen_graph ng(sg, solver);
+    seq::seq_parikh parikh(sg);
+
+    euf::snode* x = sg.mk_var(symbol("x"));
+    expr_ref re = mk_abc_star(m, seq);  // stride 3, min_len 0; lengths {0,3,6,...}
+    euf::snode* regex = sg.mk(re);
+    ng.add_str_mem(x, regex);
+
+    // lb=5, ub=5 → no valid k (5 is not a multiple of 3) → conflict
+    seq::dep_tracker dep; dep.insert(0);
+    ng.root()->add_lower_int_bound(x, 5, dep);
+    ng.root()->add_upper_int_bound(x, 5, dep);
+
+    bool conflict = parikh.check_parikh_conflict(*ng.root());
+    std::cout << "  conflict = " << conflict << " (expect 1)\n";
+    SASSERT(conflict);
+}
+
+// no conflict for stride==1 regex even with narrow bounds
+static void test_check_conflict_stride_one_never_conflicts() {
+    std::cout << "test_check_conflict_stride_one_never_conflicts\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+    parikh_test_solver solver;
+    seq::nielsen_graph ng(sg, solver);
+    seq::seq_parikh parikh(sg);
+
+    euf::snode* x = sg.mk_var(symbol("x"));
+    expr_ref re(seq.re.mk_full_seq(str_sort), m);  // stride 1 → no constraint
+    euf::snode* regex = sg.mk(re);
+    ng.add_str_mem(x, regex);
+
+    seq::dep_tracker dep; dep.insert(0);
+    ng.root()->add_lower_int_bound(x, 7, dep);
+    ng.root()->add_upper_int_bound(x, 7, dep);
+
+    bool conflict = parikh.check_parikh_conflict(*ng.root());
+    std::cout << "  conflict = " << conflict << " (expect 0: stride=1 skipped)\n";
+    SASSERT(!conflict);
+}
+
+// ---------------------------------------------------------------------------
+// minterm_to_char_set tests
+// ---------------------------------------------------------------------------
+
+// re.full_char → full alphabet [0, max_char]
+static void test_minterm_full_char() {
+    std::cout << "test_minterm_full_char\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    seq::seq_parikh parikh(sg);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+
+    expr_ref re(seq.re.mk_full_char(str_sort), m);
+    char_set cs = parikh.minterm_to_char_set(re);
+    std::cout << "  full_char char_count = " << cs.char_count() << "\n";
+    SASSERT(cs.is_full(seq.max_char()));
+}
+
+// re.empty → empty char_set
+static void test_minterm_empty_regex() {
+    std::cout << "test_minterm_empty_regex\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    seq::seq_parikh parikh(sg);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+
+    expr_ref re(seq.re.mk_empty(str_sort), m);
+    char_set cs = parikh.minterm_to_char_set(re);
+    std::cout << "  empty regex → char_set empty: " << cs.is_empty() << "\n";
+    SASSERT(cs.is_empty());
+}
+
+// re.range('A','Z') → 26 characters
+static void test_minterm_range() {
+    std::cout << "test_minterm_range\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    seq::seq_parikh parikh(sg);
+
+    // Z3 re.range takes string arguments "A" and "Z"
+    expr_ref lo_str(seq.str.mk_string(zstring("A")), m);
+    expr_ref hi_str(seq.str.mk_string(zstring("Z")), m);
+    expr_ref re(seq.re.mk_range(lo_str, hi_str), m);
+    char_set cs = parikh.minterm_to_char_set(re);
+    std::cout << "  range(A,Z) char_count = " << cs.char_count() << "\n";
+    SASSERT(cs.char_count() == 26);
+    SASSERT(cs.contains('A'));
+    SASSERT(cs.contains('Z'));
+    SASSERT(!cs.contains('a'));
+}
+
+// complement of re.range('A','Z') should not contain A-Z
+static void test_minterm_complement() {
+    std::cout << "test_minterm_complement\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    seq::seq_parikh parikh(sg);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+
+    expr_ref lo_str(seq.str.mk_string(zstring("A")), m);
+    expr_ref hi_str(seq.str.mk_string(zstring("Z")), m);
+    expr_ref range(seq.re.mk_range(lo_str, hi_str), m);
+    expr_ref re(seq.re.mk_complement(range), m);
+    char_set cs = parikh.minterm_to_char_set(re);
+
+    // complement of [A-Z] should not contain any letter in A-Z
+    for (unsigned c = 'A'; c <= 'Z'; ++c)
+        SASSERT(!cs.contains(c));
+    // but should contain e.g. '0'
+    SASSERT(cs.contains('0'));
+    std::cout << "  complement ok: A-Z excluded, '0' included\n";
+}
+
+// intersection of range('A','Z') and range('M','Z') == range('M','Z')
+static void test_minterm_intersection() {
+    std::cout << "test_minterm_intersection\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    seq::seq_parikh parikh(sg);
+
+    expr_ref lo_az(seq.str.mk_string(zstring("A")), m);
+    expr_ref hi_az(seq.str.mk_string(zstring("Z")), m);
+    expr_ref lo_mz(seq.str.mk_string(zstring("M")), m);
+
+    expr_ref range_az(seq.re.mk_range(lo_az, hi_az), m);
+    expr_ref range_mz(seq.re.mk_range(lo_mz, hi_az), m);
+    expr_ref re(seq.re.mk_inter(range_az, range_mz), m);
+    char_set cs = parikh.minterm_to_char_set(re);
+
+    // intersection [A-Z] ∩ [M-Z] = [M-Z]: 14 characters
+    std::cout << "  intersection [A-Z]∩[M-Z] char_count = " << cs.char_count() << "\n";
+    SASSERT(cs.char_count() == 14); // M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z
+    SASSERT(!cs.contains('A'));
+    SASSERT(cs.contains('M'));
+    SASSERT(cs.contains('Z'));
+}
+
+// diff(range('A','Z'), range('A','M')) == range('N','Z')
+static void test_minterm_diff() {
+    std::cout << "test_minterm_diff\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    seq::seq_parikh parikh(sg);
+
+    expr_ref lo_az(seq.str.mk_string(zstring("A")), m);
+    expr_ref hi_az(seq.str.mk_string(zstring("Z")), m);
+    expr_ref lo_am(seq.str.mk_string(zstring("A")), m);
+    expr_ref hi_am(seq.str.mk_string(zstring("M")), m);
+
+    expr_ref range_az(seq.re.mk_range(lo_az, hi_az), m);
+    expr_ref range_am(seq.re.mk_range(lo_am, hi_am), m);
+    expr_ref re(seq.re.mk_diff(range_az, range_am), m);
+    char_set cs = parikh.minterm_to_char_set(re);
+
+    // diff [A-Z] \ [A-M] = [N-Z]: 13 characters
+    std::cout << "  diff [A-Z]\\[A-M] char_count = " << cs.char_count() << "\n";
+    SASSERT(cs.char_count() == 13); // N..Z
+    SASSERT(!cs.contains('A'));
+    SASSERT(!cs.contains('M'));
+    SASSERT(cs.contains('N'));
+    SASSERT(cs.contains('Z'));
+}
+
+// to_re(unit('A')) → singleton {'A'}
+static void test_minterm_singleton() {
+    std::cout << "test_minterm_singleton\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    seq::seq_parikh parikh(sg);
+
+    expr_ref ch_a(seq.str.mk_char('A'), m);
+    expr_ref unit_a(seq.str.mk_unit(ch_a), m);
+    expr_ref re(seq.re.mk_to_re(unit_a), m);
+    char_set cs = parikh.minterm_to_char_set(re);
+
+    std::cout << "  singleton char_count = " << cs.char_count() << "\n";
+    SASSERT(cs.char_count() == 1);
+    SASSERT(cs.contains('A'));
+    SASSERT(!cs.contains('B'));
+}
+
+// nullptr → full set (conservative fallback)
+static void test_minterm_nullptr_is_full() {
+    std::cout << "test_minterm_nullptr_is_full\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq::seq_parikh parikh(sg);
+    seq_util seq(m);
+
+    char_set cs = parikh.minterm_to_char_set(nullptr);
+    SASSERT(cs.is_full(seq.max_char()));
+    std::cout << "  nullptr → full set ok\n";
+}
+
+// ---------------------------------------------------------------------------
+// Entry point
+// ---------------------------------------------------------------------------
+
+void tst_seq_parikh() {
+    // stride tests
+    test_stride_fixed_length();
+    test_stride_star_fixed_body();
+    test_stride_star_three_char();
+    test_stride_plus();
+    test_stride_concat_stars();
+    test_stride_union_no_common_period();
+    test_stride_union_same_period();
+    test_stride_loop();
+    test_stride_full_seq();
+    test_stride_empty_regex();
+    test_stride_epsilon();
+    test_stride_opt();
+
+    // generate_parikh_constraints tests
+    test_generate_constraints_ab_star();
+    test_generate_constraints_bounded_loop();
+    test_generate_constraints_stride_one();
+    test_generate_constraints_fixed_length();
+    test_generate_constraints_dep_propagated();
+
+    // apply_to_node tests
+    test_apply_to_node_adds_constraints();
+    test_apply_to_node_stride_one_no_constraints();
+
+    // check_parikh_conflict tests
+    test_check_conflict_unconstrained_no_conflict();
+    test_check_conflict_valid_k_exists();
+    test_check_conflict_no_valid_k();
+    test_check_conflict_abc_star();
+    test_check_conflict_stride_one_never_conflicts();
+
+    // minterm_to_char_set tests
+    test_minterm_full_char();
+    test_minterm_empty_regex();
+    test_minterm_range();
+    test_minterm_complement();
+    test_minterm_intersection();
+    test_minterm_diff();
+    test_minterm_singleton();
+    test_minterm_nullptr_is_full();
+}

From a51ba544ea3e90d9d78dbddebc27c0829187f031 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 11 Mar 2026 19:08:11 +0000
Subject: [PATCH 13/13] Fix typo: rename m_parith to m_parikh in
 seq_nielsen.h/.cpp

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
---
 src/smt/seq/seq_nielsen.cpp | 10 +++++-----
 src/smt/seq/seq_nielsen.h   |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp
index 58fd96692..d2f3a9eec 100644
--- a/src/smt/seq/seq_nielsen.cpp
+++ b/src/smt/seq/seq_nielsen.cpp
@@ -433,11 +433,11 @@ namespace seq {
     nielsen_graph::nielsen_graph(euf::sgraph& sg, simple_solver& solver):
         m_sg(sg),
         m_solver(solver),
-        m_parith(alloc(seq_parikh, sg)) {
+        m_parikh(alloc(seq_parikh, sg)) {
     }
 
     nielsen_graph::~nielsen_graph() {
-        dealloc(m_parith);
+        dealloc(m_parikh);
         reset();
     }
 
@@ -1041,12 +1041,12 @@ namespace seq {
 
         // Generate modular length constraints (len(str) = min_len + stride·k, etc.)
         // and append them to the node's integer constraint list.
-        m_parith->apply_to_node(node);
+        m_parikh->apply_to_node(node);
 
         // Lightweight feasibility pre-check: does the Parikh modular constraint
         // contradict the variable's current integer bounds?  If so, mark this
         // node as a Parikh-image conflict immediately (avoids a solver call).
-        if (!node.is_currently_conflict() && m_parith->check_parikh_conflict(node)) {
+        if (!node.is_currently_conflict() && m_parikh->check_parikh_conflict(node)) {
             node.set_general_conflict(true);
             node.set_reason(backtrack_reason::parikh_image);
         }
@@ -2306,7 +2306,7 @@ namespace seq {
                 // generated from minterm m_i, ?c must belong to the character
                 // class described by m_i so that str ∈ derivative(R, m_i).
                 if (mt->get_expr()) {
-                    char_set cs = m_parith->minterm_to_char_set(mt->get_expr());
+                    char_set cs = m_parikh->minterm_to_char_set(mt->get_expr());
                     if (!cs.is_empty())
                         child->add_char_range(fresh_char, cs);
                 }
diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h
index 0be8ae274..36adcc9ae 100644
--- a/src/smt/seq/seq_nielsen.h
+++ b/src/smt/seq/seq_nielsen.h
@@ -718,7 +718,7 @@ namespace seq {
 
         // Parikh image filter: generates modular length constraints from regex
         // memberships.  Allocated in the constructor; owned by this graph.
-        seq_parikh*                   m_parith = nullptr;
+        seq_parikh*                   m_parikh = nullptr;
 
     public:
         // Construct with a caller-supplied solver.  Ownership is NOT transferred;