Rename nseq_parikh→seq_parikh; add m/seq/a member attributes to seq_parikh

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
2026-06-17 06:06:22 +00:00 · 2026-03-11 05:41:16 +00:00 · 2026-03-11 05:41:16 +00:00 · 213ddd36ba
commit 213ddd36ba
parent 4ac5315846
7 changed files with 474 additions and 447 deletions
--- a/src/smt/seq/CMakeLists.txt
+++ b/src/smt/seq/CMakeLists.txt
@ -1,6 +1,6 @@
 z3_add_component(smt_seq
  SOURCES
-    nseq_parikh.cpp
+    seq_parikh.cpp
    seq_nielsen.cpp
  COMPONENT_DEPENDENCIES
    euf
--- a/src/smt/seq/nseq_parikh.cpp
+++ b/src/smt/seq/nseq_parikh.cpp
@ -5,334 +5,11 @@ Module Name:

    nseq_parikh.cpp

-Abstract:
+Note:

-    Parikh image filter implementation for the ZIPT-based Nielsen string
-    solver.  See nseq_parith.h for the full design description.
-
-    The key operation is compute_length_stride(re), which performs a
-    structural traversal of the regex to find the period k such that all
-    string lengths in L(re) are congruent to min_length(re) modulo k.
-    The stride is used to generate modular length constraints that help
-    the integer subsolver prune infeasible Nielsen graph nodes.
-
-Author:
-
-    Clemens Eisenhofer 2026-03-10
-    Nikolaj Bjorner (nbjorner) 2026-03-10
+    This file is retained for backwards compatibility.
+    The canonical implementation is now smt/seq/seq_parikh.cpp.

 --*/
+// intentionally empty — see seq_parikh.cpp

-#include "smt/seq/nseq_parith.h"
-#include "ast/arith_decl_plugin.h"
-#include "ast/seq_decl_plugin.h"
-#include "util/mpz.h"
-#include <string>
-
-namespace seq {
-
-    nseq_parith::nseq_parith(euf::sgraph& sg)
-        : m_sg(sg), m_fresh_cnt(0) {}
-
-    expr_ref nseq_parith::mk_fresh_int_var() {
-        ast_manager& m = m_sg.get_manager();
-        arith_util arith(m);
-        std::string name = "pk!" + std::to_string(m_fresh_cnt++);
-        return expr_ref(m.mk_fresh_const(name.c_str(), arith.mk_int()), m);
-    }
-
-    // -----------------------------------------------------------------------
-    // Stride computation
-    // -----------------------------------------------------------------------
-
-    // compute_length_stride: structural traversal of regex expression.
-    //
-    // Return value semantics:
-    //   0 — fixed length (or empty language): no modular constraint needed
-    //         beyond the min == max bounds.
-    //   1 — all integer lengths ≥ min_len are achievable: no useful modular
-    //         constraint.
-    //   k > 1 — all lengths in L(re) satisfy len ≡ min_len (mod k):
-    //         modular constraint len(str) = min_len + k·j is useful.
-    unsigned nseq_parith::compute_length_stride(expr* re) {
-        if (!re) return 1;
-
-        seq_util& seq = m_sg.get_seq_util();
-        expr* r1 = nullptr, *r2 = nullptr, *s = nullptr;
-        unsigned lo = 0, hi = 0;
-
-        // Empty language: no strings exist; stride is irrelevant.
-        if (seq.re.is_empty(re))
-            return 0;
-
-        // Epsilon regex {""}: single fixed length 0.
-        if (seq.re.is_epsilon(re))
-            return 0;
-
-        // to_re(concrete_string): fixed-length, no modular constraint needed.
-        if (seq.re.is_to_re(re, s)) {
-            // min_length == max_length, covered by bounds.
-            return 0;
-        }
-
-        // Single character: range, full_char — fixed length 1.
-        if (seq.re.is_range(re) || seq.re.is_full_char(re))
-            return 0;
-
-        // full_seq (.* / Σ*): every length ≥ 0 is possible.
-        if (seq.re.is_full_seq(re))
-            return 1;
-
-        // r* — Kleene star.
-        // L(r*) = {ε} ∪ L(r) ∪ L(r)·L(r) ∪ ...
-        // If r has a fixed length k, then L(r*) = {0, k, 2k, ...} → stride k.
-        // If r has variable length, strides from different iterations combine
-        // by GCD.
-        if (seq.re.is_star(re, r1)) {
-            unsigned mn = seq.re.min_length(r1);
-            unsigned mx = seq.re.max_length(r1);
-            // When the body has unbounded length (mx == UINT_MAX), different
-            // iterations can produce many different lengths, and the stride of
-            // the star as a whole degenerates to gcd(mn, mn) = mn (or 1 if
-            // mn == 1).  This is conservative: we use the body's min-length
-            // as the only available fixed quantity.
-            if (mx == UINT_MAX) mx = mn;
-            if (mn == mx) {
-                // Fixed-length body: L(r*) = {0, mn, 2·mn, ...} → stride = mn.
-                // When mn == 1 the stride would be 1, which gives no useful
-                // modular constraint, so return 0 instead.
-                return (mn > 1) ? mn : 0;
-            }
-            // Variable-length body: GCD of min and max lengths
-            return u_gcd(mn, mx);
-        }
-
-        // r+ — one or more: same stride analysis as r*.
-        if (seq.re.is_plus(re, r1)) {
-            unsigned mn = seq.re.min_length(r1);
-            unsigned mx = seq.re.max_length(r1);
-            if (mx == UINT_MAX) mx = mn;  // same conservative treatment as star
-            if (mn == mx)
-                return (mn > 1) ? mn : 0;
-            return u_gcd(mn, mx);
-        }
-
-        // r? — zero or one: lengths = {0} ∪ lengths(r)
-        // stride = GCD(mn_r, stride(r)) unless stride(r) is 0 (fixed length).
-        if (seq.re.is_opt(re, r1)) {
-            unsigned mn = seq.re.min_length(r1);
-            unsigned inner = compute_length_stride(r1);
-            // L(r?) includes length 0 and all lengths of L(r).
-            // GCD(stride(r), min_len(r)) is a valid stride because:
-            //   - the gap from 0 to min_len(r) is min_len(r) itself, and
-            //   - subsequent lengths grow in steps governed by stride(r).
-            // A result > 1 gives a useful modular constraint; result == 1
-            // means every non-negative integer is achievable (no constraint).
-            if (inner == 0)
-                return u_gcd(mn, 0);   // gcd(mn, 0) = mn; useful when mn > 1
-            return u_gcd(inner, mn);
-        }
-
-        // concat(r1, r2): lengths add → stride = GCD(stride(r1), stride(r2)).
-        if (seq.re.is_concat(re, r1, r2)) {
-            unsigned s1 = compute_length_stride(r1);
-            unsigned s2 = compute_length_stride(r2);
-            return u_gcd(s1, s2);
-        }
-
-        // union(r1, r2): lengths from either branch → need GCD of both
-        // strides and the difference between their minimum lengths.
-        if (seq.re.is_union(re, r1, r2)) {
-            unsigned s1 = compute_length_stride(r1);
-            unsigned s2 = compute_length_stride(r2);
-            unsigned m1 = seq.re.min_length(r1);
-            unsigned m2 = seq.re.min_length(r2);
-            unsigned d  = (m1 >= m2) ? (m1 - m2) : (m2 - m1);
-            // Replace 0-strides with d for GCD computation:
-            // a fixed-length branch only introduces constraint via its offset.
-            unsigned g = u_gcd(s1 == 0 ? d : s1, s2 == 0 ? d : s2);
-            g = u_gcd(g, d);
-            return g;
-        }
-
-        // loop(r, lo, hi): lengths = {lo·len(r), ..., hi·len(r)} if r is fixed.
-        // stride = len(r) when r is fixed-length; otherwise GCD.
-        if (seq.re.is_loop(re, r1, lo, hi)) {
-            unsigned mn = seq.re.min_length(r1);
-            unsigned mx = seq.re.max_length(r1);
-            if (mx == UINT_MAX) mx = mn;
-            if (mn == mx)
-                return (mn > 1) ? mn : 0;
-            return u_gcd(mn, mx);
-        }
-        if (seq.re.is_loop(re, r1, lo)) {
-            unsigned mn = seq.re.min_length(r1);
-            unsigned mx = seq.re.max_length(r1);
-            if (mx == UINT_MAX) mx = mn;
-            if (mn == mx)
-                return (mn > 1) ? mn : 0;
-            return u_gcd(mn, mx);
-        }
-
-        // intersection(r1, r2): lengths must be in both languages.
-        // A conservative safe choice: GCD(stride(r1), stride(r2)) is a valid
-        // stride for the intersection (every length in the intersection is
-        // also in r1 and in r2).
-        if (seq.re.is_intersection(re, r1, r2)) {
-            unsigned s1 = compute_length_stride(r1);
-            unsigned s2 = compute_length_stride(r2);
-            return u_gcd(s1, s2);
-        }
-
-        // For complement, diff, reverse, derivative, of_pred, and anything
-        // else we cannot analyse statically: be conservative and return 1
-        // (no useful modular constraint rather than an unsound one).
-        return 1;
-    }
-
-    // -----------------------------------------------------------------------
-    // Constraint generation
-    // -----------------------------------------------------------------------
-
-    void nseq_parith::generate_parikh_constraints(str_mem const& mem,
-                                                   vector<int_constraint>& out) {
-        if (!mem.m_regex || !mem.m_str)
-            return;
-
-        ast_manager& m = m_sg.get_manager();
-        seq_util&    seq  = m_sg.get_seq_util();
-        arith_util   arith(m);
-
-        expr* re_expr = mem.m_regex->get_expr();
-        if (!re_expr || !seq.is_re(re_expr))
-            return;
-
-        // Length bounds from the regex.
-        unsigned min_len = seq.re.min_length(re_expr);
-        unsigned max_len = seq.re.max_length(re_expr);
-
-        // If min_len >= max_len the bounds already pin the length exactly
-        // (or the language is empty — empty language is detected by simplify_and_init
-        // via Brzozowski derivative / is_empty checks, not here).
-        // We only generate modular constraints when the length is variable.
-        if (min_len >= max_len)
-            return;
-
-        unsigned stride = compute_length_stride(re_expr);
-
-        // stride == 1: every integer length is possible — no useful constraint.
-        // stride == 0: fixed length or empty — handled by bounds.
-        if (stride <= 1)
-            return;
-
-        // Build len(str) as an arithmetic expression.
-        expr_ref len_str(seq.str.mk_length(mem.m_str->get_expr()), m);
-
-        // Introduce fresh integer variable k ≥ 0.
-        expr_ref k_var = mk_fresh_int_var();
-
-        // Constraint 1: len(str) = min_len + stride · k
-        expr_ref min_expr(arith.mk_int(min_len), m);
-        expr_ref stride_expr(arith.mk_int(stride), m);
-        expr_ref stride_k(arith.mk_mul(stride_expr, k_var), m);
-        expr_ref rhs(arith.mk_add(min_expr, stride_k), m);
-        out.push_back(int_constraint(len_str, rhs,
-                                     int_constraint_kind::eq, mem.m_dep, m));
-
-        // Constraint 2: k ≥ 0
-        expr_ref zero(arith.mk_int(0), m);
-        out.push_back(int_constraint(k_var, zero,
-                                     int_constraint_kind::ge, mem.m_dep, m));
-
-        // Constraint 3 (optional): k ≤ max_k when max_len is bounded.
-        // max_k = floor((max_len - min_len) / stride)
-        // This gives the solver an explicit upper bound on k.
-        // The subtraction is safe because min_len < max_len is guaranteed
-        // by the early return above.
-        if (max_len != UINT_MAX) {
-            SASSERT(max_len > min_len);
-            unsigned range = max_len - min_len;
-            unsigned max_k = range / stride;
-            expr_ref max_k_expr(arith.mk_int(max_k), m);
-            out.push_back(int_constraint(k_var, max_k_expr,
-                                         int_constraint_kind::le, mem.m_dep, m));
-        }
-    }
-
-    void nseq_parith::apply_to_node(nielsen_node& node) {
-        vector<int_constraint> constraints;
-        for (str_mem const& mem : node.str_mems())
-            generate_parikh_constraints(mem, constraints);
-        for (auto& ic : constraints)
-            node.add_int_constraint(ic);
-    }
-
-    // -----------------------------------------------------------------------
-    // Quick Parikh feasibility check (no solver call)
-    // -----------------------------------------------------------------------
-
-    // Returns true if a Parikh conflict is detected: there exists a membership
-    // str ∈ re for a single-variable str where the modular length constraint
-    //   len(str) = min_len + stride * k  (k ≥ 0)
-    // is inconsistent with the variable's current integer bounds [lb, ub].
-    //
-    // This check is lightweight — it uses only modular arithmetic on the already-
-    // known regex min/max lengths and the per-variable bounds stored in the node.
-    bool nseq_parith::check_parikh_conflict(nielsen_node& node) {
-        seq_util& seq = m_sg.get_seq_util();
-
-        for (str_mem const& mem : node.str_mems()) {
-            if (!mem.m_str || !mem.m_regex || !mem.m_str->is_var())
-                continue;
-
-            expr* re_expr = mem.m_regex->get_expr();
-            if (!re_expr || !seq.is_re(re_expr))
-                continue;
-
-            unsigned min_len = seq.re.min_length(re_expr);
-            unsigned max_len = seq.re.max_length(re_expr);
-            if (min_len >= max_len) continue; // fixed or empty — no stride constraint
-
-            unsigned stride = compute_length_stride(re_expr);
-            if (stride <= 1) continue; // no useful modular constraint
-            // stride > 1 guaranteed from here onward.
-            SASSERT(stride > 1);
-
-            unsigned lb = node.var_lb(mem.m_str);
-            unsigned ub = node.var_ub(mem.m_str);
-
-            // Check: ∃k ≥ 0 such that lb ≤ min_len + stride * k ≤ ub ?
-            //
-            // First find the smallest k satisfying the lower bound:
-            //   k_min = 0                          if min_len ≥ lb
-            //   k_min = ⌈(lb - min_len) / stride⌉  otherwise
-            //
-            // Then verify min_len + stride * k_min ≤ ub.
-            unsigned k_min = 0;
-            if (lb > min_len) {
-                unsigned gap = lb - min_len;
-                // Ceiling division: k_min = ceil(gap / stride).
-                // Guard: (gap + stride - 1) may overflow if gap is close to UINT_MAX.
-                // In that case k_min would be huge, and min_len + stride*k_min would
-                // also overflow ub → treat as a conflict immediately.
-                if (gap > UINT_MAX - (stride - 1)) {
-                    return true; // ceiling division would overflow → k_min too large
-                }
-                k_min = (gap + stride - 1) / stride;
-            }
-
-            // Overflow guard: stride * k_min may overflow unsigned.
-            unsigned len_at_k_min;
-            if (k_min > (UINT_MAX - min_len) / stride) {
-                // Overflow: min_len + stride * k_min > UINT_MAX ≥ ub → conflict.
-                return true;
-            }
-            len_at_k_min = min_len + stride * k_min;
-
-            if (ub != UINT_MAX && len_at_k_min > ub)
-                return true; // no valid k exists → Parikh conflict
-        }
-        return false;
-    }
-
-} // namespace seq
--- a/src/smt/seq/nseq_parith.h
+++ b/src/smt/seq/nseq_parith.h
@ -5,124 +5,16 @@ Module Name:

    nseq_parith.h

-Abstract:
+Note:

-    Parikh image filter for the ZIPT-based Nielsen string solver.
-
-    Implements Parikh-based arithmetic constraint generation for
-    nielsen_node instances.  For a regex membership constraint str ∈ r,
-    the Parikh image of r constrains the multiset of characters in str.
-    This module computes the "length stride" (period) of the length
-    language of r and generates modular arithmetic constraints of the form
-
-        len(str) = min_len + stride · k    (k ≥ 0, k fresh integer)
-
-    which tighten the arithmetic subproblem beyond the simple min/max
-    length bounds already produced by nielsen_node::init_var_bounds_from_mems().
-
-    For example:
-      • str ∈ (ab)*  → min_len = 0, stride = 2  → len(str) = 2·k
-      • str ∈ a(bc)* → min_len = 1, stride = 2  → len(str) = 1 + 2·k
-      • str ∈ ab|abc → stride = 1 (no useful modular constraint)
-
-    The generated int_constraints are added to the node's integer constraint
-    set and discharged by the integer subsolver (see seq_nielsen.h,
-    simple_solver).
-
-    Implements the Parikh filter described in ZIPT
-    (https://github.com/CEisenhofer/ZIPT/tree/parikh/ZIPT/Constraints)
-    replacing ZIPT's PDD-based Parikh subsolver with Z3's linear arithmetic.
-
-Author:
-
-    Clemens Eisenhofer 2026-03-10
-    Nikolaj Bjorner (nbjorner) 2026-03-10
+    This file is retained for backwards compatibility.
+    The canonical header is now smt/seq/seq_parikh.h.

 --*/
 #pragma once
-
-#include "ast/euf/euf_sgraph.h"
-#include "smt/seq/seq_nielsen.h"
+#include "smt/seq/seq_parikh.h"

 namespace seq {
-
-    /**
-     * Parikh image filter: generates modular length constraints from
-     * regex membership constraints in a nielsen_node.
-     *
-     * Usage:
-     *   nseq_parith parith(sg);
-     *   parith.apply_to_node(node);      // adds constraints to node
-     *
-     * Or per-membership:
-     *   vector<int_constraint> out;
-     *   parith.generate_parikh_constraints(mem, out);
-     */
-    class nseq_parith {
-        euf::sgraph& m_sg;
-        unsigned     m_fresh_cnt;  // counter for fresh variable names
-
-        // Compute the stride (period) of the length language of a regex.
-        //
-        // The stride k satisfies: all lengths in L(re) are congruent to
-        // min_length(re) modulo k.  A stride of 1 means every integer
-        // length is possible (no useful modular constraint).  A stride of
-        // 0 is a sentinel meaning the language is empty or has a single
-        // fixed length (already captured by bounds).
-        //
-        // Examples:
-        //   stride(to_re("ab"))   = 0  (fixed length 2)
-        //   stride((ab)*)         = 2  (lengths 0, 2, 4, ...)
-        //   stride((abc)*)        = 3  (lengths 0, 3, 6, ...)
-        //   stride(a*b*)          = 1  (all lengths possible)
-        //   stride((ab)*(cd)*)    = 2  (lengths 0, 2, 4, ...)
-        //   stride((ab)*|(abc)*)  = 1  (lengths 0, 2, 3, 4, ...)
-        unsigned compute_length_stride(expr* re);
-
-        // Create a fresh integer variable (name "pk!N") for use as the
-        // Parikh multiplier variable k in len(str) = min_len + stride·k.
-        expr_ref mk_fresh_int_var();
-
-    public:
-        explicit nseq_parith(euf::sgraph& sg);
-
-        // Generate Parikh modular length constraints for one membership.
-        //
-        // When stride > 1 and min_len < max_len (bounds don't pin length exactly,
-        // and the language is non-empty):
-        //   adds: len(str) = min_len + stride · k   (equality)
-        //         k ≥ 0                              (non-negativity)
-        //         k ≤ (max_len - min_len) / stride   (upper bound, when max_len bounded)
-        // These tighten the integer constraint set for the subsolver.
-        // Dependencies are copied from mem.m_dep.
-        // Does nothing when min_len ≥ max_len (empty or fixed-length language).
-        void generate_parikh_constraints(str_mem const& mem,
-                                         vector<int_constraint>& out);
-
-        // Apply Parikh constraints to all memberships at a node.
-        // Calls generate_parikh_constraints for each str_mem in the node
-        // and appends the resulting int_constraints to node.int_constraints().
-        void apply_to_node(nielsen_node& node);
-
-        // Quick Parikh feasibility check (no solver call).
-        //
-        // For each single-variable membership str ∈ re, checks whether the
-        // modular constraint  len(str) = min_len + stride · k  (k ≥ 0)
-        // has any solution given the current per-variable bounds stored in
-        // node.var_lb(str) and node.var_ub(str).
-        //
-        // Returns true when a conflict is detected (no valid k exists for
-        // some membership).  The caller should then mark the node with
-        // backtrack_reason::parikh_image.
-        //
-        // This is a lightweight pre-check that avoids calling the integer
-        // subsolver.  It is sound (never returns true for a satisfiable node)
-        // but incomplete (may miss conflicts that require the full solver).
-        bool check_parikh_conflict(nielsen_node& node);
-
-        // Compute the length stride of a regex expression.
-        // Exposed for testing and external callers.
-        unsigned get_length_stride(expr* re) { return compute_length_stride(re); }
-    };
-
-} // namespace seq
+    // backwards-compat alias
+    using nseq_parith = seq_parikh;
+}
--- a/src/smt/seq/seq_nielsen.cpp
+++ b/src/smt/seq/seq_nielsen.cpp
@ -20,7 +20,7 @@ Author:
 --*/

 #include "smt/seq/seq_nielsen.h"
-#include "smt/seq/nseq_parith.h"
+#include "smt/seq/seq_parikh.h"
 #include "ast/arith_decl_plugin.h"
 #include "ast/ast_pp.h"
 #include "util/hashtable.h"
@ -433,7 +433,7 @@ namespace seq {
    nielsen_graph::nielsen_graph(euf::sgraph& sg, simple_solver& solver):
        m_sg(sg),
        m_solver(solver),
-        m_parith(alloc(nseq_parith, sg)) {
+        m_parith(alloc(seq_parikh, sg)) {
    }

    nielsen_graph::~nielsen_graph() {
--- a/src/smt/seq/seq_nielsen.h
+++ b/src/smt/seq/seq_nielsen.h
@ -248,7 +248,7 @@ namespace seq {
    class nielsen_node;
    class nielsen_edge;
    class nielsen_graph;
-    class nseq_parith;  // Parikh image filter (defined in nseq_parith.h)
+    class seq_parikh;   // Parikh image filter (see seq_parikh.h)

    /**
     * Abstract interface for an incremental solver used by nielsen_graph
@ -718,7 +718,7 @@ namespace seq {

        // Parikh image filter: generates modular length constraints from regex
        // memberships.  Allocated in the constructor; owned by this graph.
-        nseq_parith*                  m_parith = nullptr;
+        seq_parikh*                   m_parith = nullptr;

    public:
        // Construct with a caller-supplied solver.  Ownership is NOT transferred;
--- a/src/smt/seq/seq_parikh.cpp
+++ b/src/smt/seq/seq_parikh.cpp
@ -0,0 +1,327 @@
+/*++
+Copyright (c) 2026 Microsoft Corporation
+
+Module Name:
+
+    seq_parikh.cpp
+
+Abstract:
+
+    Parikh image filter implementation for the ZIPT-based Nielsen string
+    solver.  See seq_parikh.h for the full design description.
+
+    The key operation is compute_length_stride(re), which performs a
+    structural traversal of the regex to find the period k such that all
+    string lengths in L(re) are congruent to min_length(re) modulo k.
+    The stride is used to generate modular length constraints that help
+    the integer subsolver prune infeasible Nielsen graph nodes.
+
+Author:
+
+    Clemens Eisenhofer 2026-03-10
+    Nikolaj Bjorner (nbjorner) 2026-03-10
+
+--*/
+
+#include "smt/seq/seq_parikh.h"
+#include "util/mpz.h"
+#include <string>
+
+namespace seq {
+
+    seq_parikh::seq_parikh(euf::sgraph& sg)
+        : m(sg.get_manager()), seq(m), a(m), m_fresh_cnt(0) {}
+
+    expr_ref seq_parikh::mk_fresh_int_var() {
+        std::string name = "pk!" + std::to_string(m_fresh_cnt++);
+        return expr_ref(m.mk_fresh_const(name.c_str(), a.mk_int()), m);
+    }
+
+    // -----------------------------------------------------------------------
+    // Stride computation
+    // -----------------------------------------------------------------------
+
+    // compute_length_stride: structural traversal of regex expression.
+    //
+    // Return value semantics:
+    //   0 — fixed length (or empty language): no modular constraint needed
+    //         beyond the min == max bounds.
+    //   1 — all integer lengths ≥ min_len are achievable: no useful modular
+    //         constraint.
+    //   k > 1 — all lengths in L(re) satisfy len ≡ min_len (mod k):
+    //         modular constraint len(str) = min_len + k·j is useful.
+    unsigned seq_parikh::compute_length_stride(expr* re) {
+        if (!re) return 1;
+
+        expr* r1 = nullptr, *r2 = nullptr, *s = nullptr;
+        unsigned lo = 0, hi = 0;
+
+        // Empty language: no strings exist; stride is irrelevant.
+        if (seq.re.is_empty(re))
+            return 0;
+
+        // Epsilon regex {""}: single fixed length 0.
+        if (seq.re.is_epsilon(re))
+            return 0;
+
+        // to_re(concrete_string): fixed-length, no modular constraint needed.
+        if (seq.re.is_to_re(re, s)) {
+            // min_length == max_length, covered by bounds.
+            return 0;
+        }
+
+        // Single character: range, full_char — fixed length 1.
+        if (seq.re.is_range(re) || seq.re.is_full_char(re))
+            return 0;
+
+        // full_seq (.* / Σ*): every length ≥ 0 is possible.
+        if (seq.re.is_full_seq(re))
+            return 1;
+
+        // r* — Kleene star.
+        // L(r*) = {ε} ∪ L(r) ∪ L(r)·L(r) ∪ ...
+        // If r has a fixed length k, then L(r*) = {0, k, 2k, ...} → stride k.
+        // If r has variable length, strides from different iterations combine
+        // by GCD.
+        if (seq.re.is_star(re, r1)) {
+            unsigned mn = seq.re.min_length(r1);
+            unsigned mx = seq.re.max_length(r1);
+            // When the body has unbounded length (mx == UINT_MAX), different
+            // iterations can produce many different lengths, and the stride of
+            // the star as a whole degenerates to gcd(mn, mn) = mn (or 1 if
+            // mn == 1).  This is conservative: we use the body's min-length
+            // as the only available fixed quantity.
+            if (mx == UINT_MAX) mx = mn;
+            if (mn == mx) {
+                // Fixed-length body: L(r*) = {0, mn, 2·mn, ...} → stride = mn.
+                // When mn == 1 the stride would be 1, which gives no useful
+                // modular constraint, so return 0 instead.
+                return (mn > 1) ? mn : 0;
+            }
+            // Variable-length body: GCD of min and max lengths
+            return u_gcd(mn, mx);
+        }
+
+        // r+ — one or more: same stride analysis as r*.
+        if (seq.re.is_plus(re, r1)) {
+            unsigned mn = seq.re.min_length(r1);
+            unsigned mx = seq.re.max_length(r1);
+            if (mx == UINT_MAX) mx = mn;  // same conservative treatment as star
+            if (mn == mx)
+                return (mn > 1) ? mn : 0;
+            return u_gcd(mn, mx);
+        }
+
+        // r? — zero or one: lengths = {0} ∪ lengths(r)
+        // stride = GCD(mn_r, stride(r)) unless stride(r) is 0 (fixed length).
+        if (seq.re.is_opt(re, r1)) {
+            unsigned mn = seq.re.min_length(r1);
+            unsigned inner = compute_length_stride(r1);
+            // L(r?) includes length 0 and all lengths of L(r).
+            // GCD(stride(r), min_len(r)) is a valid stride because:
+            //   - the gap from 0 to min_len(r) is min_len(r) itself, and
+            //   - subsequent lengths grow in steps governed by stride(r).
+            // A result > 1 gives a useful modular constraint; result == 1
+            // means every non-negative integer is achievable (no constraint).
+            if (inner == 0)
+                return u_gcd(mn, 0);   // gcd(mn, 0) = mn; useful when mn > 1
+            return u_gcd(inner, mn);
+        }
+
+        // concat(r1, r2): lengths add → stride = GCD(stride(r1), stride(r2)).
+        if (seq.re.is_concat(re, r1, r2)) {
+            unsigned s1 = compute_length_stride(r1);
+            unsigned s2 = compute_length_stride(r2);
+            return u_gcd(s1, s2);
+        }
+
+        // union(r1, r2): lengths from either branch → need GCD of both
+        // strides and the difference between their minimum lengths.
+        if (seq.re.is_union(re, r1, r2)) {
+            unsigned s1 = compute_length_stride(r1);
+            unsigned s2 = compute_length_stride(r2);
+            unsigned m1 = seq.re.min_length(r1);
+            unsigned m2 = seq.re.min_length(r2);
+            unsigned d  = (m1 >= m2) ? (m1 - m2) : (m2 - m1);
+            // Replace 0-strides with d for GCD computation:
+            // a fixed-length branch only introduces constraint via its offset.
+            unsigned g = u_gcd(s1 == 0 ? d : s1, s2 == 0 ? d : s2);
+            g = u_gcd(g, d);
+            return g;
+        }
+
+        // loop(r, lo, hi): lengths = {lo·len(r), ..., hi·len(r)} if r is fixed.
+        // stride = len(r) when r is fixed-length; otherwise GCD.
+        if (seq.re.is_loop(re, r1, lo, hi)) {
+            unsigned mn = seq.re.min_length(r1);
+            unsigned mx = seq.re.max_length(r1);
+            if (mx == UINT_MAX) mx = mn;
+            if (mn == mx)
+                return (mn > 1) ? mn : 0;
+            return u_gcd(mn, mx);
+        }
+        if (seq.re.is_loop(re, r1, lo)) {
+            unsigned mn = seq.re.min_length(r1);
+            unsigned mx = seq.re.max_length(r1);
+            if (mx == UINT_MAX) mx = mn;
+            if (mn == mx)
+                return (mn > 1) ? mn : 0;
+            return u_gcd(mn, mx);
+        }
+
+        // intersection(r1, r2): lengths must be in both languages.
+        // A conservative safe choice: GCD(stride(r1), stride(r2)) is a valid
+        // stride for the intersection (every length in the intersection is
+        // also in r1 and in r2).
+        if (seq.re.is_intersection(re, r1, r2)) {
+            unsigned s1 = compute_length_stride(r1);
+            unsigned s2 = compute_length_stride(r2);
+            return u_gcd(s1, s2);
+        }
+
+        // For complement, diff, reverse, derivative, of_pred, and anything
+        // else we cannot analyse statically: be conservative and return 1
+        // (no useful modular constraint rather than an unsound one).
+        return 1;
+    }
+
+    // -----------------------------------------------------------------------
+    // Constraint generation
+    // -----------------------------------------------------------------------
+
+    void seq_parikh::generate_parikh_constraints(str_mem const& mem,
+                                                  vector<int_constraint>& out) {
+        if (!mem.m_regex || !mem.m_str)
+            return;
+
+        expr* re_expr = mem.m_regex->get_expr();
+        if (!re_expr || !seq.is_re(re_expr))
+            return;
+
+        // Length bounds from the regex.
+        unsigned min_len = seq.re.min_length(re_expr);
+        unsigned max_len = seq.re.max_length(re_expr);
+
+        // If min_len >= max_len the bounds already pin the length exactly
+        // (or the language is empty — empty language is detected by simplify_and_init
+        // via Brzozowski derivative / is_empty checks, not here).
+        // We only generate modular constraints when the length is variable.
+        if (min_len >= max_len)
+            return;
+
+        unsigned stride = compute_length_stride(re_expr);
+
+        // stride == 1: every integer length is possible — no useful constraint.
+        // stride == 0: fixed length or empty — handled by bounds.
+        if (stride <= 1)
+            return;
+
+        // Build len(str) as an arithmetic expression.
+        expr_ref len_str(seq.str.mk_length(mem.m_str->get_expr()), m);
+
+        // Introduce fresh integer variable k ≥ 0.
+        expr_ref k_var = mk_fresh_int_var();
+
+        // Constraint 1: len(str) = min_len + stride · k
+        expr_ref min_expr(a.mk_int(min_len), m);
+        expr_ref stride_expr(a.mk_int(stride), m);
+        expr_ref stride_k(a.mk_mul(stride_expr, k_var), m);
+        expr_ref rhs(a.mk_add(min_expr, stride_k), m);
+        out.push_back(int_constraint(len_str, rhs,
+                                     int_constraint_kind::eq, mem.m_dep, m));
+
+        // Constraint 2: k ≥ 0
+        expr_ref zero(a.mk_int(0), m);
+        out.push_back(int_constraint(k_var, zero,
+                                     int_constraint_kind::ge, mem.m_dep, m));
+
+        // Constraint 3 (optional): k ≤ max_k when max_len is bounded.
+        // max_k = floor((max_len - min_len) / stride)
+        // This gives the solver an explicit upper bound on k.
+        // The subtraction is safe because min_len < max_len is guaranteed
+        // by the early return above.
+        if (max_len != UINT_MAX) {
+            SASSERT(max_len > min_len);
+            unsigned range = max_len - min_len;
+            unsigned max_k = range / stride;
+            expr_ref max_k_expr(a.mk_int(max_k), m);
+            out.push_back(int_constraint(k_var, max_k_expr,
+                                         int_constraint_kind::le, mem.m_dep, m));
+        }
+    }
+
+    void seq_parikh::apply_to_node(nielsen_node& node) {
+        vector<int_constraint> constraints;
+        for (str_mem const& mem : node.str_mems())
+            generate_parikh_constraints(mem, constraints);
+        for (auto& ic : constraints)
+            node.add_int_constraint(ic);
+    }
+
+    // -----------------------------------------------------------------------
+    // Quick Parikh feasibility check (no solver call)
+    // -----------------------------------------------------------------------
+
+    // Returns true if a Parikh conflict is detected: there exists a membership
+    // str ∈ re for a single-variable str where the modular length constraint
+    //   len(str) = min_len + stride * k  (k ≥ 0)
+    // is inconsistent with the variable's current integer bounds [lb, ub].
+    //
+    // This check is lightweight — it uses only modular arithmetic on the already-
+    // known regex min/max lengths and the per-variable bounds stored in the node.
+    bool seq_parikh::check_parikh_conflict(nielsen_node& node) {
+        for (str_mem const& mem : node.str_mems()) {
+            if (!mem.m_str || !mem.m_regex || !mem.m_str->is_var())
+                continue;
+
+            expr* re_expr = mem.m_regex->get_expr();
+            if (!re_expr || !seq.is_re(re_expr))
+                continue;
+
+            unsigned min_len = seq.re.min_length(re_expr);
+            unsigned max_len = seq.re.max_length(re_expr);
+            if (min_len >= max_len) continue; // fixed or empty — no stride constraint
+
+            unsigned stride = compute_length_stride(re_expr);
+            if (stride <= 1) continue; // no useful modular constraint
+            // stride > 1 guaranteed from here onward.
+            SASSERT(stride > 1);
+
+            unsigned lb = node.var_lb(mem.m_str);
+            unsigned ub = node.var_ub(mem.m_str);
+
+            // Check: ∃k ≥ 0 such that lb ≤ min_len + stride * k ≤ ub ?
+            //
+            // First find the smallest k satisfying the lower bound:
+            //   k_min = 0                          if min_len ≥ lb
+            //   k_min = ⌈(lb - min_len) / stride⌉  otherwise
+            //
+            // Then verify min_len + stride * k_min ≤ ub.
+            unsigned k_min = 0;
+            if (lb > min_len) {
+                unsigned gap = lb - min_len;
+                // Ceiling division: k_min = ceil(gap / stride).
+                // Guard: (gap + stride - 1) may overflow if gap is close to UINT_MAX.
+                // In that case k_min would be huge, and min_len + stride*k_min would
+                // also overflow ub → treat as a conflict immediately.
+                if (gap > UINT_MAX - (stride - 1)) {
+                    return true; // ceiling division would overflow → k_min too large
+                }
+                k_min = (gap + stride - 1) / stride;
+            }
+
+            // Overflow guard: stride * k_min may overflow unsigned.
+            unsigned len_at_k_min;
+            if (k_min > (UINT_MAX - min_len) / stride) {
+                // Overflow: min_len + stride * k_min > UINT_MAX ≥ ub → conflict.
+                return true;
+            }
+            len_at_k_min = min_len + stride * k_min;
+
+            if (ub != UINT_MAX && len_at_k_min > ub)
+                return true; // no valid k exists → Parikh conflict
+        }
+        return false;
+    }
+
+} // namespace seq
--- a/src/smt/seq/seq_parikh.h
+++ b/src/smt/seq/seq_parikh.h
@ -0,0 +1,131 @@
+/*++
+Copyright (c) 2026 Microsoft Corporation
+
+Module Name:
+
+    seq_parikh.h
+
+Abstract:
+
+    Parikh image filter for the ZIPT-based Nielsen string solver.
+
+    Implements Parikh-based arithmetic constraint generation for
+    nielsen_node instances.  For a regex membership constraint str ∈ r,
+    the Parikh image of r constrains the multiset of characters in str.
+    This module computes the "length stride" (period) of the length
+    language of r and generates modular arithmetic constraints of the form
+
+        len(str) = min_len + stride · k    (k ≥ 0, k fresh integer)
+
+    which tighten the arithmetic subproblem beyond the simple min/max
+    length bounds already produced by nielsen_node::init_var_bounds_from_mems().
+
+    For example:
+      • str ∈ (ab)*  → min_len = 0, stride = 2  → len(str) = 2·k
+      • str ∈ a(bc)* → min_len = 1, stride = 2  → len(str) = 1 + 2·k
+      • str ∈ ab|abc → stride = 1 (no useful modular constraint)
+
+    The generated int_constraints are added to the node's integer constraint
+    set and discharged by the integer subsolver (see seq_nielsen.h,
+    simple_solver).
+
+    Implements the Parikh filter described in ZIPT
+    (https://github.com/CEisenhofer/ZIPT/tree/parikh/ZIPT/Constraints)
+    replacing ZIPT's PDD-based Parikh subsolver with Z3's linear arithmetic.
+
+Author:
+
+    Clemens Eisenhofer 2026-03-10
+    Nikolaj Bjorner (nbjorner) 2026-03-10
+
+--*/
+#pragma once
+
+#include "ast/arith_decl_plugin.h"
+#include "ast/seq_decl_plugin.h"
+#include "smt/seq/seq_nielsen.h"
+
+namespace seq {
+
+    /**
+     * Parikh image filter: generates modular length constraints from
+     * regex membership constraints in a nielsen_node.
+     *
+     * Usage:
+     *   seq_parikh parikh(sg);
+     *   parikh.apply_to_node(node);      // adds constraints to node
+     *
+     * Or per-membership:
+     *   vector<int_constraint> out;
+     *   parikh.generate_parikh_constraints(mem, out);
+     */
+    class seq_parikh {
+        ast_manager& m;
+        seq_util     seq;
+        arith_util   a;
+        unsigned     m_fresh_cnt;  // counter for fresh variable names
+
+        // Compute the stride (period) of the length language of a regex.
+        //
+        // The stride k satisfies: all lengths in L(re) are congruent to
+        // min_length(re) modulo k.  A stride of 1 means every integer
+        // length is possible (no useful modular constraint).  A stride of
+        // 0 is a sentinel meaning the language is empty or has a single
+        // fixed length (already captured by bounds).
+        //
+        // Examples:
+        //   stride(to_re("ab"))   = 0  (fixed length 2)
+        //   stride((ab)*)         = 2  (lengths 0, 2, 4, ...)
+        //   stride((abc)*)        = 3  (lengths 0, 3, 6, ...)
+        //   stride(a*b*)          = 1  (all lengths possible)
+        //   stride((ab)*(cd)*)    = 2  (lengths 0, 2, 4, ...)
+        //   stride((ab)*|(abc)*)  = 1  (lengths 0, 2, 3, 4, ...)
+        unsigned compute_length_stride(expr* re);
+
+        // Create a fresh integer variable (name "pk!N") for use as the
+        // Parikh multiplier variable k in len(str) = min_len + stride·k.
+        expr_ref mk_fresh_int_var();
+
+    public:
+        explicit seq_parikh(euf::sgraph& sg);
+
+        // Generate Parikh modular length constraints for one membership.
+        //
+        // When stride > 1 and min_len < max_len (bounds don't pin length exactly,
+        // and the language is non-empty):
+        //   adds: len(str) = min_len + stride · k   (equality)
+        //         k ≥ 0                              (non-negativity)
+        //         k ≤ (max_len - min_len) / stride   (upper bound, when max_len bounded)
+        // These tighten the integer constraint set for the subsolver.
+        // Dependencies are copied from mem.m_dep.
+        // Does nothing when min_len ≥ max_len (empty or fixed-length language).
+        void generate_parikh_constraints(str_mem const& mem,
+                                         vector<int_constraint>& out);
+
+        // Apply Parikh constraints to all memberships at a node.
+        // Calls generate_parikh_constraints for each str_mem in the node
+        // and appends the resulting int_constraints to node.int_constraints().
+        void apply_to_node(nielsen_node& node);
+
+        // Quick Parikh feasibility check (no solver call).
+        //
+        // For each single-variable membership str ∈ re, checks whether the
+        // modular constraint  len(str) = min_len + stride · k  (k ≥ 0)
+        // has any solution given the current per-variable bounds stored in
+        // node.var_lb(str) and node.var_ub(str).
+        //
+        // Returns true when a conflict is detected (no valid k exists for
+        // some membership).  The caller should then mark the node with
+        // backtrack_reason::parikh_image.
+        //
+        // This is a lightweight pre-check that avoids calling the integer
+        // subsolver.  It is sound (never returns true for a satisfiable node)
+        // but incomplete (may miss conflicts that require the full solver).
+        bool check_parikh_conflict(nielsen_node& node);
+
+        // Compute the length stride of a regex expression.
+        // Exposed for testing and external callers.
+        unsigned get_length_stride(expr* re) { return compute_length_stride(re); }
+    };
+
+} // namespace seq