Merge branch 'c3' into copilot/add-parikh-filter-implementation-again

2026-05-26 03:46:22 +00:00 · 2026-03-11 19:27:03 -07:00 · 2026-03-11 19:27:03 -07:00 · 6fa3c7eabb
commit 6fa3c7eabb
parent a51ba544ea db7db05eff
6 changed files with 1157 additions and 230 deletions
--- a/src/smt/nseq_context_solver.h
+++ b/src/smt/nseq_context_solver.h
@ -59,6 +59,10 @@ namespace smt {
        void pop(unsigned num_scopes) override {
            m_kernel.pop(num_scopes);
        }
+
+        void get_model(model_ref& mdl) override {
+            m_kernel.get_model(mdl);
+        }
    };

 }
--- a/src/smt/nseq_model.cpp
+++ b/src/smt/nseq_model.cpp
@ -35,6 +35,8 @@ namespace smt {
        m_var_values.reset();
        m_var_regex.reset();
        m_trail.reset();
+        m_int_model = nullptr;
+        m_mg = &mg;

        m_factory = alloc(seq_factory, m, m_th.get_family_id(), mg.get_model());
        mg.register_factory(m_factory);
@ -42,12 +44,12 @@ namespace smt {
        register_existing_values(nielsen);
        collect_var_regex_constraints(state);

+        // solve integer constraints from the sat_path FIRST so that
+        // m_int_model is available when snode_to_value evaluates power exponents
+        nielsen.solve_sat_path_ints(m_int_model);
+
        // extract variable assignments from the satisfying leaf's substitution path
-        seq::nielsen_node const* sat = nielsen.sat_node();
-        IF_VERBOSE(1, verbose_stream() << "nseq model init: sat_node=" << (sat ? "set" : "null")
-            << " path_len=" << nielsen.sat_path().size() << "\n";);
        extract_assignments(nielsen.sat_path());
-        IF_VERBOSE(1, verbose_stream() << "nseq model: m_var_values has " << m_var_values.size() << " entries\n";);
    }

    model_value_proc* nseq_model::mk_value(enode* n, model_generator& mg) {
@ -103,6 +105,8 @@ namespace smt {
        m_var_values.reset();
        m_var_regex.reset();
        m_trail.reset();
+        m_int_model = nullptr;
+        m_mg = nullptr;
        m_factory = nullptr;
    }

@ -175,6 +179,68 @@ namespace smt {
            return expr_ref(m);
        }

+        if (n->is_power()) {
+            SASSERT(n->num_args() == 2);
+            // Evaluate the base and exponent to produce a concrete string.
+            // The base is a string snode; the exponent is an integer expression
+            // whose value comes from the sat_path integer model.
+            expr_ref base_val = snode_to_value(n->arg(0));
+            if (!base_val)
+                return expr_ref(m);
+
+            euf::snode* exp_snode = n->arg(1);
+            expr* exp_expr = exp_snode ? exp_snode->get_expr() : nullptr;
+            rational exp_val;
+            arith_util arith(m);
+
+            // Try to evaluate exponent: first check if it's a numeral,
+            // then try the int model from sat_path constraints,
+            // finally fall back to the proto_model from model_generator.
+            if (exp_expr && arith.is_numeral(exp_expr, exp_val)) {
+                // already concrete
+            } else if (exp_expr && m_int_model.get()) {
+                expr_ref result(m);
+                if (m_int_model->eval_expr(exp_expr, result, true) && arith.is_numeral(result, exp_val)) {
+                    // evaluated from int model
+                } else if (m_mg) {
+                    proto_model& pm = m_mg->get_model();
+                    if (pm.eval(exp_expr, result, true) && arith.is_numeral(result, exp_val)) {
+                        // evaluated from proto_model
+                    } else {
+                        exp_val = rational(0);
+                    }
+                } else {
+                    exp_val = rational(0);
+                }
+            } else if (exp_expr && m_mg) {
+                expr_ref result(m);
+                proto_model& pm = m_mg->get_model();
+                if (pm.eval(exp_expr, result, true) && arith.is_numeral(result, exp_val)) {
+                    // evaluated from proto_model
+                } else {
+                    exp_val = rational(0);
+                }
+            } else {
+                exp_val = rational(0);
+            }
+
+            if (exp_val.is_neg())
+                exp_val = rational(0);
+
+            // Build the repeated string: base^exp_val
+            if (exp_val.is_zero())
+                return expr_ref(m_seq.str.mk_empty(m_seq.str.mk_string_sort()), m);
+            if (exp_val.is_one())
+                return base_val;
+
+            // For small exponents, concatenate directly
+            unsigned n_val = exp_val.get_unsigned();
+            expr_ref acc(base_val);
+            for (unsigned i = 1; i < n_val; ++i)
+                acc = m_seq.str.mk_concat(acc, base_val);
+            return acc;
+        }
+
        // fallback: use the underlying expression
        expr* e = n->get_expr();
        return e ? expr_ref(e, m) : expr_ref(m);
--- a/src/smt/nseq_model.h
+++ b/src/smt/nseq_model.h
@ -61,6 +61,10 @@ namespace smt {
        // trail for GC protection of generated expressions
        expr_ref_vector m_trail;

+        // integer variable model from sat_path constraints
+        model_ref m_int_model;
+        model_generator* m_mg = nullptr;
+
        // per-variable regex constraints: maps snode id -> intersected regex snode.
        // collected during init() from the state's str_mem list.
        u_map<euf::snode*> m_var_regex;
--- a/src/smt/seq/seq_nielsen.cpp
+++ b/src/smt/seq/seq_nielsen.cpp
--- a/src/smt/seq/seq_nielsen.h
+++ b/src/smt/seq/seq_nielsen.h
@ -241,6 +241,7 @@ Author:
 #include "ast/seq_decl_plugin.h"
 #include "ast/euf/euf_sgraph.h"
 #include <functional>
+#include "model/model.h"

 namespace seq {

@ -265,6 +266,7 @@ namespace seq {
        virtual void    assert_expr(expr* e) = 0;
        virtual void    push() = 0;
        virtual void    pop(unsigned num_scopes) = 0;
+        virtual void    get_model(model_ref& mdl) { mdl = nullptr; }
    };

    // simplification result for constraint processing
@ -373,6 +375,7 @@ namespace seq {
    };

    // string variable substitution: var -> replacement
+    // (can be used as well to substitute arbitrary nodes - like powers)
    // mirrors ZIPT's Subst
    struct nielsen_subst {
        euf::snode* m_var;
@ -523,6 +526,11 @@ namespace seq {
        // Parikh filter: set to true once apply_parikh_to_node has been applied
        // to this node. Prevents duplicate constraint generation across DFS runs.
        bool                    m_parikh_applied = false;
+        // number of int_constraints inherited from the parent node at clone time.
+        // int_constraints[0..m_parent_ic_count) are already asserted at the
+        // parent's solver scope; only [m_parent_ic_count..end) need to be
+        // asserted when this node's solver scope is entered.
+        unsigned                m_parent_ic_count = 0;

    public:
        nielsen_node(nielsen_graph* graph, unsigned id);
@ -618,8 +626,10 @@ namespace seq {
        void apply_subst(euf::sgraph& sg, nielsen_subst const& s);

        // simplify all constraints at this node and initialize status.
+        // cur_path provides the path from root to this node so that the
+        // LP solver can be queried for deterministic power cancellation.
        // Returns proceed, conflict, satisfied, or restart.
-        simplify_result simplify_and_init(nielsen_graph& g);
+        simplify_result simplify_and_init(nielsen_graph& g, svector<nielsen_edge*> const& cur_path = svector<nielsen_edge*>());

        // true if all str_eqs are trivial and there are no str_mems
        bool is_satisfied() const;
@ -665,11 +675,13 @@ namespace seq {
        unsigned m_num_simplify_conflict = 0;
        unsigned m_num_extensions      = 0;
        unsigned m_num_fresh_vars      = 0;
+        unsigned m_num_arith_infeasible = 0;
        unsigned m_max_depth           = 0;
        // modifier application counts
        unsigned m_mod_det             = 0;
        unsigned m_mod_power_epsilon   = 0;
        unsigned m_mod_num_cmp         = 0;
+        unsigned m_mod_split_power_elim = 0;
        unsigned m_mod_const_num_unwinding = 0;
        unsigned m_mod_eq_split        = 0;
        unsigned m_mod_star_intr       = 0;
@ -686,6 +698,7 @@ namespace seq {
    // the overall Nielsen transformation graph
    // mirrors ZIPT's NielsenGraph
    class nielsen_graph {
+        friend class nielsen_node;
        euf::sgraph&                  m_sg;
        region                        m_region;
        ptr_vector<nielsen_node>      m_nodes;
@ -822,6 +835,13 @@ namespace seq {
        // max_len == UINT_MAX means unbounded.
        void compute_regex_length_interval(euf::snode* regex, unsigned& min_len, unsigned& max_len);

+        // solve all integer constraints along the sat_path and return
+        // a model mapping integer variables to concrete values.
+        // Must be called after solve() returns sat.
+        // Returns true if a satisfying model was found.
+        // Caller takes ownership of the returned model pointer.
+        bool solve_sat_path_ints(model_ref& mdl);
+
    private:
        search_result search_dfs(nielsen_node* node, unsigned depth, svector<nielsen_edge*>& cur_path);

@ -890,6 +910,15 @@ namespace seq {
        // mirrors ZIPT's NumCmpModifier
        bool apply_num_cmp(nielsen_node* node);

+        // CommPower-based power elimination split: when one side starts with
+        // a power w^p and CommPower finds c base-pattern occurrences on the
+        // other side but the ordering between p and c is unknown, branch:
+        //   (1) p < c   (2) c ≤ p
+        // After branching, simplify_and_init's CommPower pass resolves the
+        // cancellation deterministically.
+        // mirrors ZIPT's SplitPowerElim + NumCmpModifier
+        bool apply_split_power_elim(nielsen_node* node);
+
        // constant numeric unwinding: for a power token u^n vs a constant
        // (non-variable), branch: (1) n = 0 (u^n = ε), (2) n >= 1 (peel one u).
        // mirrors ZIPT's ConstNumUnwindingModifier
@ -933,7 +962,7 @@ namespace seq {
        euf::snode* find_power_token(nielsen_node* node) const;

        // find a power token facing a constant (char) head
-        bool find_power_vs_const(nielsen_node* node, euf::snode*& power, euf::snode*& other_head, str_eq const*& eq_out) const;
+        bool find_power_vs_non_var(nielsen_node* node, euf::snode*& power, euf::snode*& other_head, str_eq const*& eq_out) const;

        // find a power token facing a variable head
        bool find_power_vs_var(nielsen_node* node, euf::snode*& power, euf::snode*& var_head, str_eq const*& eq_out) const;
@ -949,16 +978,26 @@ namespace seq {
        // Mirrors ZIPT's Constraint.Shared forwarding mechanism.
        void assert_root_constraints_to_solver();

-        // collect int_constraints along the path from root to the given node,
-        // including constraints from edges and nodes.
-        void collect_path_int_constraints(nielsen_node* node,
-                                          svector<nielsen_edge*> const& cur_path,
-                                          vector<int_constraint>& out);
+        // Assert the int_constraints of `node` that are new relative to its
+        // parent (indices [m_parent_ic_count..end)) into the current solver scope.
+        // Called by search_dfs after simplify_and_init so that the newly derived
+        // bounds become visible to subsequent check() and check_lp_le() calls.
+        void assert_node_new_int_constraints(nielsen_node* node);

        // check integer feasibility of the constraints along the current path.
-        // returns true if feasible, false if infeasible.
+        // returns true if feasible (including unknown), false only if l_false.
+        // Precondition: all path constraints have been incrementally asserted to
+        // m_solver by search_dfs via push/pop, so a plain check() suffices.
+        // l_undef (resource limit / timeout) is treated as feasible so that the
+        // search continues rather than reporting a false unsatisfiability.
        bool check_int_feasibility(nielsen_node* node, svector<nielsen_edge*> const& cur_path);

+        // check whether lhs <= rhs is implied by the path constraints.
+        // mirrors ZIPT's NielsenNode.IsLe(): temporarily asserts NOT(lhs <= rhs)
+        // and returns true iff the result is unsatisfiable (i.e., lhs <= rhs is
+        // entailed).  Path constraints are already in the solver incrementally.
+        bool check_lp_le(expr* lhs, expr* rhs, nielsen_node* node, svector<nielsen_edge*> const& cur_path);
+
        // create an integer constraint: lhs <kind> rhs
        int_constraint mk_int_constraint(expr* lhs, expr* rhs, int_constraint_kind kind, dep_tracker const& dep);

--- a/src/test/nseq_zipt.cpp
+++ b/src/test/nseq_zipt.cpp
@ -598,75 +598,83 @@ static void test_zipt_parikh() {
 static void test_tricky_str_equations() {
    std::cout << "test_tricky_str_equations\n";

-    // --- SAT: commutativity / rotation / symmetry ---
+    // ---------------------------------------------------------------
+    // SAT — Conjugacy equations: Xw₂ = w₁X
+    // SAT iff w₂ is a rotation of w₁.  In that case w₁ = qp, w₂ = pq
+    // and X = (qp)^n · q is a family of solutions (n ≥ 0).
+    // All of these are UNSAT for X = ε (the ground parts don't match),
+    // so the solver must find a non-trivial witness.
+    // ---------------------------------------------------------------

-    // XY = YX   (classic commutativity; witness: X="ab", Y="abab")
-    VERIFY(eq_sat("XY", "YX"));
+    // "ba" is a rotation of "ab" (p="b", q="a").  X = "a".
+    VERIFY(eq_sat("Xba",  "abX"));

-    // Xab = abX   (X commutes with the word "ab"; witness: X="ab")
-    VERIFY(eq_sat("Xab", "abX"));
+    // "cb" is a rotation of "bc" (p="c", q="b").  X = "b".
+    VERIFY(eq_sat("Xcb",  "bcX"));

-    // XaY = YaX   (swap-symmetric; witness: X=Y=any, e.g. X=Y="b")
-    VERIFY(eq_sat("XaY", "YaX"));
+    // "bca" is a rotation of "abc" (p="bc", q="a").  X = "a".
+    VERIFY(eq_sat("Xbca", "abcX"));

-    // XYX = YXY   (Markov-type; witness: X=Y)
-    VERIFY(eq_sat("XYX", "YXY"));
+    // "cab" is a rotation of "abc" (p="c", q="ab").  X = "ab".
+    VERIFY(eq_sat("Xcab", "abcX"));

-    // XYZ = ZYX   (reverse-palindrome; witness: X="a",Y="b",Z="a")
-    VERIFY(eq_sat("XYZ", "ZYX"));
+    // ---------------------------------------------------------------
+    // SAT — Power decomposition (ε NOT a solution)
+    // ---------------------------------------------------------------

-    // XabY = YabX   (rotation-like; witness: X="",Y="ab")
-    VERIFY(eq_sat("XabY", "YabX"));
+    // XX = aa  ⇒  X = "a".  (ε gives "" ≠ "aa")
+    VERIFY(eq_sat("XX", "aa"));

-    // aXYa = aYXa   (cancel outer 'a'; reduces to XY=YX; witness: X=Y="")
-    VERIFY(eq_sat("aXYa", "aYXa"));
+    // XbX = aba  ⇒  2|X| + 1 = 3 forces |X| = 1; X = "a".
+    //   (ε gives "b" ≠ "aba")
+    VERIFY(eq_sat("XbX", "aba"));

-    // XaXb = YaYb   (both halves share variable; witness: X=Y)
-    VERIFY(eq_sat("XaXb", "YaYb"));
+    // ---------------------------------------------------------------
+    // SAT — Multi-variable (ε NOT a solution)
+    // ---------------------------------------------------------------

-    // abXba = Xabba   (witness: X="" gives "abba"="abba")
-    VERIFY(eq_sat("abXba", "Xabba"));
+    // X = "b", Y = "a" gives "baab" = "baab".  (ε gives "ab" ≠ "ba")
+    VERIFY(eq_sat("XaYb", "bYaX"));

-    // --- UNSAT: first-character mismatch ---
+    // X = "b", Y = "a" gives "abba" = "abba".  (ε gives "ab" ≠ "ba")
+    VERIFY(eq_sat("abXY", "YXba"));

-    // abXba = baXab   (LHS starts 'a', RHS starts 'b')
-    VERIFY(eq_unsat("abXba", "baXab"));
+    // ---------------------------------------------------------------
+    // UNSAT — Non-conjugate rotation
+    // Xw₂ = w₁X where w₂ is NOT a rotation of w₁.
+    // Heads are variable vs char, so never a trivial first-char clash.
+    // GPowerIntr introduces periodicity, then the period boundaries
+    // give a character mismatch.
+    // ---------------------------------------------------------------

-    // XabX = XbaX   (cancel X prefix/suffix → "ab"="ba"; 'a'≠'b')
-    VERIFY(eq_unsat("XabX",  "XbaX"));
+    // "cba" is the reverse of "abc", NOT a rotation.
+    // Rotations of "abc" are: abc, bca, cab.
+    VERIFY(eq_unsat("Xcba", "abcX"));

-    // --- UNSAT: mismatch exposed after cancellation ---
+    // "acb" is a transposition of "abc", NOT a rotation.
+    VERIFY(eq_unsat("Xacb", "abcX"));

-    // XaYb = XbYa   (cancel X prefix → aYb=bYa; first char 'a'≠'b')
-    VERIFY(eq_unsat("XaYb",  "XbYa"));
+    // ---------------------------------------------------------------
+    // UNSAT — Induction via GPowerIntr
+    // One side starts with a variable that also appears on the other
+    // side behind a ground prefix → self-cycle.  GPowerIntr forces
+    // periodicity; all period branches yield character mismatches.
+    // None of these has a trivial first-char or last-char clash.
+    // ---------------------------------------------------------------

-    // XaYbX = XbYaX   (cancel X prefix+suffix → aYb=bYa; first char 'a'≠'b')
-    VERIFY(eq_unsat("XaYbX", "XbYaX"));
+    // Xa = bX:  LHS head = X.  Scan RHS: [b], X → self-cycle, base "b".
+    // X = b^n ⇒ b^n·a = b^{n+1} ⇒ last chars a ≠ b ⊥.
+    VERIFY(eq_unsat("Xa", "bX"));

-    // XaXbX = XbXaX   (cancel X prefix+suffix → aXb=bXa; first char 'a'≠'b')
-    VERIFY(eq_unsat("XaXbX", "XbXaX"));
+    // acX = Xbc:  RHS head = X.  Scan LHS: [a,c], X → self-cycle, base "ac".
+    // X = (ac)^n   ⇒ ac = bc ⊥ (a ≠ b).
+    // X = (ac)^n·a ⇒ aca = abc ⊥ (c ≠ b).
+    VERIFY(eq_unsat("acX", "Xbc"));

-    // --- UNSAT: induction ---
-
-    // aXb = Xba   (forces X=a^n; final step requires a=b)
-    VERIFY(eq_unsat("aXb",  "Xba"));
-
-    // XaY = YbX   (a≠b; recursive unrolling forces a=b)
-    VERIFY(eq_unsat("XaY",  "YbX"));
-
-    // --- UNSAT: length parity ---
-
-    // XaX = YY   (|XaX|=2|X|+1 is odd; |YY|=2|Y| is even)
-    VERIFY(eq_unsat("XaX",  "YY"));
-
-    // XaaX = YbY   (|XaaX|=2|X|+2 is even; |YbY|=2|Y|+1 is odd)
-    VERIFY(eq_unsat("XaaX", "YbY"));
-
-    // --- UNSAT: midpoint argument ---
-
-    // XaX = YbY   (equal length forces |X|=|Y|; midpoint position |X|
-    //              holds 'a' in LHS and 'b' in RHS, but 'a'≠'b')
-    VERIFY(eq_unsat("XaX",  "YbY"));
+    // bcX = Xab:  RHS head = X.  Scan LHS: [b,c], X → self-cycle, base "bc".
+    // X = (bc)^n   ⇒ bc = ab ⊥ (b ≠ a).
+    // X = (bc)^n·b ⇒ bcb = bab ⊥ (c ≠ a).
+    VERIFY(eq_unsat("bcX", "Xab"));

    std::cout << "  ok\n";
 }