Model reconstruction

2026-07-02 13:26:10 +00:00 · 2026-03-11 18:13:16 +01:00 · 2026-03-11 18:13:16 +01:00 · 99727faf70
commit 99727faf70
parent d23f376b39
5 changed files with 164 additions and 14 deletions
--- a/src/smt/nseq_context_solver.h
+++ b/src/smt/nseq_context_solver.h
@ -59,6 +59,10 @@ namespace smt {
        void pop(unsigned num_scopes) override {
            m_kernel.pop(num_scopes);
        }
+
+        void get_model(model_ref& mdl) override {
+            m_kernel.get_model(mdl);
+        }
    };

 }
--- a/src/smt/nseq_model.cpp
+++ b/src/smt/nseq_model.cpp
@ -35,6 +35,8 @@ namespace smt {
        m_var_values.reset();
        m_var_regex.reset();
        m_trail.reset();
+        m_int_model = nullptr;
+        m_mg = &mg;

        m_factory = alloc(seq_factory, m, m_th.get_family_id(), mg.get_model());
        mg.register_factory(m_factory);
@ -42,12 +44,12 @@ namespace smt {
        register_existing_values(nielsen);
        collect_var_regex_constraints(state);

+        // solve integer constraints from the sat_path FIRST so that
+        // m_int_model is available when snode_to_value evaluates power exponents
+        nielsen.solve_sat_path_ints(m_int_model);
+
        // extract variable assignments from the satisfying leaf's substitution path
-        seq::nielsen_node const* sat = nielsen.sat_node();
-        IF_VERBOSE(1, verbose_stream() << "nseq model init: sat_node=" << (sat ? "set" : "null")
-            << " path_len=" << nielsen.sat_path().size() << "\n";);
        extract_assignments(nielsen.sat_path());
-        IF_VERBOSE(1, verbose_stream() << "nseq model: m_var_values has " << m_var_values.size() << " entries\n";);
    }

    model_value_proc* nseq_model::mk_value(enode* n, model_generator& mg) {
@ -103,6 +105,8 @@ namespace smt {
        m_var_values.reset();
        m_var_regex.reset();
        m_trail.reset();
+        m_int_model = nullptr;
+        m_mg = nullptr;
        m_factory = nullptr;
    }

@ -175,6 +179,68 @@ namespace smt {
            return expr_ref(m);
        }

+        if (n->is_power()) {
+            SASSERT(n->num_args() == 2);
+            // Evaluate the base and exponent to produce a concrete string.
+            // The base is a string snode; the exponent is an integer expression
+            // whose value comes from the sat_path integer model.
+            expr_ref base_val = snode_to_value(n->arg(0));
+            if (!base_val)
+                return expr_ref(m);
+
+            euf::snode* exp_snode = n->arg(1);
+            expr* exp_expr = exp_snode ? exp_snode->get_expr() : nullptr;
+            rational exp_val;
+            arith_util arith(m);
+
+            // Try to evaluate exponent: first check if it's a numeral,
+            // then try the int model from sat_path constraints,
+            // finally fall back to the proto_model from model_generator.
+            if (exp_expr && arith.is_numeral(exp_expr, exp_val)) {
+                // already concrete
+            } else if (exp_expr && m_int_model.get()) {
+                expr_ref result(m);
+                if (m_int_model->eval_expr(exp_expr, result, true) && arith.is_numeral(result, exp_val)) {
+                    // evaluated from int model
+                } else if (m_mg) {
+                    proto_model& pm = m_mg->get_model();
+                    if (pm.eval(exp_expr, result, true) && arith.is_numeral(result, exp_val)) {
+                        // evaluated from proto_model
+                    } else {
+                        exp_val = rational(0);
+                    }
+                } else {
+                    exp_val = rational(0);
+                }
+            } else if (exp_expr && m_mg) {
+                expr_ref result(m);
+                proto_model& pm = m_mg->get_model();
+                if (pm.eval(exp_expr, result, true) && arith.is_numeral(result, exp_val)) {
+                    // evaluated from proto_model
+                } else {
+                    exp_val = rational(0);
+                }
+            } else {
+                exp_val = rational(0);
+            }
+
+            if (exp_val.is_neg())
+                exp_val = rational(0);
+
+            // Build the repeated string: base^exp_val
+            if (exp_val.is_zero())
+                return expr_ref(m_seq.str.mk_empty(m_seq.str.mk_string_sort()), m);
+            if (exp_val.is_one())
+                return base_val;
+
+            // For small exponents, concatenate directly
+            unsigned n_val = exp_val.get_unsigned();
+            expr_ref acc(base_val);
+            for (unsigned i = 1; i < n_val; ++i)
+                acc = m_seq.str.mk_concat(acc, base_val);
+            return acc;
+        }
+
        // fallback: use the underlying expression
        expr* e = n->get_expr();
        return e ? expr_ref(e, m) : expr_ref(m);
--- a/src/smt/nseq_model.h
+++ b/src/smt/nseq_model.h
@ -61,6 +61,10 @@ namespace smt {
        // trail for GC protection of generated expressions
        expr_ref_vector m_trail;

+        // integer variable model from sat_path constraints
+        model_ref m_int_model;
+        model_generator* m_mg = nullptr;
+
        // per-variable regex constraints: maps snode id -> intersected regex snode.
        // collected during init() from the state's str_mem list.
        u_map<euf::snode*> m_var_regex;
--- a/src/smt/seq/seq_nielsen.cpp
+++ b/src/smt/seq/seq_nielsen.cpp
@ -23,6 +23,8 @@ Author:
 #include "ast/arith_decl_plugin.h"
 #include "ast/ast_pp.h"
 #include "ast/rewriter/th_rewriter.h"
+#include "smt/smt_kernel.h"
+#include "params/smt_params.h"
 #include "util/hashtable.h"
 #include <algorithm>
 #include <cstdlib>
@ -976,25 +978,41 @@ namespace seq {
                                         dep_tracker const& dep, bool& changed) {
        euf::snode_vector tokens;
        non_empty_side->collect_tokens(tokens);
-        bool all_vars_or_opaque = true;
+        bool all_eliminable = true;
        bool has_char = false;
        for (euf::snode* t : tokens) {
            if (t->is_char()) has_char = true;
-            else if (!t->is_var() && t->kind() != euf::snode_kind::s_other) {
-                all_vars_or_opaque = false; break;
+            else if (!t->is_var() && !t->is_power() && t->kind() != euf::snode_kind::s_other) {
+                all_eliminable = false; break;
            }
        }
-        if (has_char || !all_vars_or_opaque) {
+        if (has_char || !all_eliminable) {
            m_is_general_conflict = true;
            m_reason = backtrack_reason::symbol_clash;
            return true;
        }
+        ast_manager& m = sg.get_manager();
+        arith_util arith(m);
        for (euf::snode* t : tokens) {
            if (t->is_var()) {
                nielsen_subst s(t, sg.mk_empty(), dep);
                apply_subst(sg, s);
                changed = true;
            }
+            else if (t->is_power()) {
+                // Power equated to empty → exponent must be 0.
+                expr* e = t->get_expr();
+                expr* pow_exp = (e && is_app(e) && to_app(e)->get_num_args() >= 2)
+                    ? to_app(e)->get_arg(1) : nullptr;
+                if (pow_exp) {
+                    expr* zero = arith.mk_numeral(rational(0), true);
+                    m_int_constraints.push_back(
+                        int_constraint(pow_exp, zero, int_constraint_kind::eq, dep, m));
+                }
+                nielsen_subst s(t, sg.mk_empty(), dep);
+                apply_subst(sg, s);
+                changed = true;
+            }
        }
        return false;
    }
@ -1488,6 +1506,8 @@ namespace seq {
                                eq.m_rhs = sg.drop_first(eq.m_rhs);
                                if (lp_le_rp && rp_le_lp) {
                                    // both ≤ → equal → both cancel completely
+                                    // Record the equality constraint so the model knows lp = rp.
+                                    add_int_constraint(g.mk_int_constraint(lp, rp, int_constraint_kind::eq, eq.m_dep));
                                } else {
                                    // strictly less: create diff power d = larger - smaller ≥ 1
                                    expr_ref d(g.mk_fresh_int_var());
@ -1699,20 +1719,23 @@ namespace seq {
            node->set_general_conflict(true);
            return search_result::unsat;
        }
-        if (sr == simplify_result::satisfied || node->is_satisfied()) {
-            m_sat_node = node;
-            m_sat_path = cur_path;
-            return search_result::sat;
-        }

        // integer feasibility check: collect side constraints along the path
-        // and verify they are jointly satisfiable using the LP solver
+        // and verify they are jointly satisfiable using the LP solver.
+        // Must run AFTER simplify_and_init (which may add int_constraints)
+        // and BEFORE the SAT check (equation satisfied doesn't imply ints are feasible).
        if (!cur_path.empty() && !check_int_feasibility(node, cur_path)) {
            node->set_reason(backtrack_reason::arithmetic);
            ++m_stats.m_num_arith_infeasible;
            return search_result::unsat;
        }

+        if (sr == simplify_result::satisfied || node->is_satisfied()) {
+            m_sat_node = node;
+            m_sat_path = cur_path;
+            return search_result::sat;
+        }
+
        // depth bound check
        if (depth >= m_depth_bound)
            return search_result::unknown;
@ -3383,5 +3406,49 @@ namespace seq {
        return expr_ref(m.mk_fresh_const(name.c_str(), arith.mk_int()), m);
    }

+    bool nielsen_graph::solve_sat_path_ints(model_ref& mdl) {
+        mdl = nullptr;
+        if (m_sat_path.empty() && (!m_sat_node || m_sat_node->int_constraints().empty()))
+            return false;
+
+        vector<int_constraint> constraints;
+        collect_path_int_constraints(m_sat_node, m_sat_path, constraints);
+        if (constraints.empty())
+            return false;
+
+        // Use a fresh smt::kernel to solve the integer constraints.
+        // Add constraints incrementally, skipping any that would make the system UNSAT
+        // (the search may have taken contradictory branches).
+        ast_manager& m = m_sg.get_manager();
+        smt_params params;
+        smt::kernel fresh_solver(m, params);
+        IF_VERBOSE(1, verbose_stream() << "solve_sat_path_ints: " << constraints.size() << " constraints\n";);
+        for (auto const& ic : constraints) {
+            expr_ref e = int_constraint_to_expr(ic);
+            IF_VERBOSE(2, verbose_stream() << "  constraint: " << mk_bounded_pp(e, m, 5) << "\n";);
+            fresh_solver.push();
+            fresh_solver.assert_expr(e);
+            if (fresh_solver.check() == l_false) {
+                IF_VERBOSE(1, verbose_stream() << "  SKIPPED (infeasible): " << mk_bounded_pp(e, m, 5) << "\n";);
+                fresh_solver.pop(1);
+            }
+        }
+
+        lbool result = fresh_solver.check();
+        IF_VERBOSE(1, verbose_stream() << "solve_sat_path_ints result: " << result << "\n";);
+        if (result == l_true) {
+            fresh_solver.get_model(mdl);
+            IF_VERBOSE(1, {
+                verbose_stream() << "  int_model:\n";
+                for (unsigned i = 0; i < mdl->get_num_constants(); ++i) {
+                    func_decl* fd = mdl->get_constant(i);
+                    expr* val = mdl->get_const_interp(fd);
+                    if (val) verbose_stream() << "    " << fd->get_name() << " = " << mk_bounded_pp(val, m, 3) << "\n";
+                }
+            });
+        }
+        return mdl.get() != nullptr;
+    }
+
 }

--- a/src/smt/seq/seq_nielsen.h
+++ b/src/smt/seq/seq_nielsen.h
@ -241,6 +241,7 @@ Author:
 #include "ast/seq_decl_plugin.h"
 #include "ast/euf/euf_sgraph.h"
 #include <functional>
+#include "model/model.h"

 namespace seq {

@ -264,6 +265,7 @@ namespace seq {
        virtual void    assert_expr(expr* e) = 0;
        virtual void    push() = 0;
        virtual void    pop(unsigned num_scopes) = 0;
+        virtual void    get_model(model_ref& mdl) { mdl = nullptr; }
    };

    // simplification result for constraint processing
@ -819,6 +821,13 @@ namespace seq {
        // max_len == UINT_MAX means unbounded.
        void compute_regex_length_interval(euf::snode* regex, unsigned& min_len, unsigned& max_len);

+        // solve all integer constraints along the sat_path and return
+        // a model mapping integer variables to concrete values.
+        // Must be called after solve() returns sat.
+        // Returns true if a satisfying model was found.
+        // Caller takes ownership of the returned model pointer.
+        bool solve_sat_path_ints(model_ref& mdl);
+
    private:
        search_result search_dfs(nielsen_node* node, unsigned depth, svector<nielsen_edge*>& cur_path);