diff --git a/src/smt/seq/nseq_parikh.cpp b/src/smt/seq/nseq_parikh.cpp index 9ee4690b8..13efbd94a 100644 --- a/src/smt/seq/nseq_parikh.cpp +++ b/src/smt/seq/nseq_parikh.cpp @@ -241,6 +241,18 @@ namespace seq { expr_ref zero(arith.mk_int(0), m); out.push_back(int_constraint(k_var, zero, int_constraint_kind::ge, mem.m_dep, m)); + + // Constraint 3 (optional): k ≤ max_k when max_len is bounded. + // max_k = floor((max_len - min_len) / stride) + // This gives the solver an explicit upper bound on k, which tightens + // the search space when combined with other constraints on len(str). + if (max_len != UINT_MAX) { + unsigned range = max_len - min_len; // max_len >= min_len here + unsigned max_k = range / stride; + expr_ref max_k_expr(arith.mk_int(max_k), m); + out.push_back(int_constraint(k_var, max_k_expr, + int_constraint_kind::le, mem.m_dep, m)); + } } void nseq_parith::apply_to_node(nielsen_node& node) { @@ -251,4 +263,63 @@ namespace seq { node.add_int_constraint(ic); } + // ----------------------------------------------------------------------- + // Quick Parikh feasibility check (no solver call) + // ----------------------------------------------------------------------- + + // Returns true if a Parikh conflict is detected: there exists a membership + // str ∈ re for a single-variable str where the modular length constraint + // len(str) = min_len + stride * k (k ≥ 0) + // is inconsistent with the variable's current integer bounds [lb, ub]. + // + // This check is lightweight — it uses only modular arithmetic on the already- + // known regex min/max lengths and the per-variable bounds stored in the node. + bool nseq_parith::check_parikh_conflict(nielsen_node& node) { + seq_util& seq = m_sg.get_seq_util(); + + for (str_mem const& mem : node.str_mems()) { + if (!mem.m_str || !mem.m_regex || !mem.m_str->is_var()) + continue; + + expr* re_expr = mem.m_regex->get_expr(); + if (!re_expr || !seq.is_re(re_expr)) + continue; + + unsigned min_len = seq.re.min_length(re_expr); + unsigned max_len = seq.re.max_length(re_expr); + if (min_len >= max_len) continue; // fixed or empty — no stride constraint + + unsigned stride = compute_length_stride(re_expr); + if (stride <= 1) continue; // no useful modular constraint + + unsigned lb = node.var_lb(mem.m_str); + unsigned ub = node.var_ub(mem.m_str); + + // Check: ∃k ≥ 0 such that lb ≤ min_len + stride * k ≤ ub ? + // + // First find the smallest k satisfying the lower bound: + // k_min = 0 if min_len ≥ lb + // k_min = ⌈(lb - min_len) / stride⌉ otherwise + // + // Then verify min_len + stride * k_min ≤ ub. + unsigned k_min = 0; + if (lb > min_len) { + unsigned gap = lb - min_len; + k_min = (gap + stride - 1) / stride; // ceiling division + } + + // Overflow guard: stride * k_min may overflow unsigned. + unsigned len_at_k_min; + if (k_min > (UINT_MAX - min_len) / stride) { + // Overflow: min_len + stride * k_min > UINT_MAX ≥ ub → conflict. + return true; + } + len_at_k_min = min_len + stride * k_min; + + if (ub != UINT_MAX && len_at_k_min > ub) + return true; // no valid k exists → Parikh conflict + } + return false; + } + } // namespace seq diff --git a/src/smt/seq/nseq_parith.h b/src/smt/seq/nseq_parith.h index 6f8fc7ae3..e10e5b6eb 100644 --- a/src/smt/seq/nseq_parith.h +++ b/src/smt/seq/nseq_parith.h @@ -91,6 +91,7 @@ namespace seq { // When stride > 1 and min_len < max_len (bounds don't pin length): // adds: len(str) = min_len + stride · k (equality) // k ≥ 0 (non-negativity) + // k ≤ (max_len - min_len) / stride (upper bound, when max_len bounded) // These tighten the integer constraint set for the subsolver. // Dependencies are copied from mem.m_dep. void generate_parikh_constraints(str_mem const& mem, @@ -101,6 +102,22 @@ namespace seq { // and appends the resulting int_constraints to node.int_constraints(). void apply_to_node(nielsen_node& node); + // Quick Parikh feasibility check (no solver call). + // + // For each single-variable membership str ∈ re, checks whether the + // modular constraint len(str) = min_len + stride · k (k ≥ 0) + // has any solution given the current per-variable bounds stored in + // node.var_lb(str) and node.var_ub(str). + // + // Returns true when a conflict is detected (no valid k exists for + // some membership). The caller should then mark the node with + // backtrack_reason::parikh_image. + // + // This is a lightweight pre-check that avoids calling the integer + // subsolver. It is sound (never returns true for a satisfiable node) + // but incomplete (may miss conflicts that require the full solver). + bool check_parikh_conflict(nielsen_node& node); + // Compute the length stride of a regex expression. // Exposed for testing and external callers. unsigned get_length_stride(expr* re) { return compute_length_stride(re); } diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index 6a2c730ca..98f114fa6 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -20,6 +20,7 @@ Author: --*/ #include "smt/seq/seq_nielsen.h" +#include "smt/seq/nseq_parith.h" #include "ast/arith_decl_plugin.h" #include "ast/ast_pp.h" #include "util/hashtable.h" @@ -431,10 +432,12 @@ namespace seq { nielsen_graph::nielsen_graph(euf::sgraph& sg, simple_solver& solver): m_sg(sg), - m_solver(solver) { + m_solver(solver), + m_parith(alloc(nseq_parith, sg)) { } nielsen_graph::~nielsen_graph() { + dealloc(m_parith); reset(); } @@ -1032,6 +1035,23 @@ namespace seq { // nielsen_graph: search // ----------------------------------------------------------------------- + void nielsen_graph::apply_parikh_to_node(nielsen_node& node) { + if (node.m_parikh_applied) return; + node.m_parikh_applied = true; + + // Generate modular length constraints (len(str) = min_len + stride·k, etc.) + // and append them to the node's integer constraint list. + m_parith->apply_to_node(node); + + // Lightweight feasibility pre-check: does the Parikh modular constraint + // contradict the variable's current integer bounds? If so, mark this + // node as a Parikh-image conflict immediately (avoids a solver call). + if (!node.is_currently_conflict() && m_parith->check_parikh_conflict(node)) { + node.m_is_general_conflict = true; + node.m_reason = backtrack_reason::parikh_image; + } + } + void nielsen_graph::assert_root_constraints_to_solver() { if (m_root_constraints_asserted) return; m_root_constraints_asserted = true; @@ -1129,6 +1149,15 @@ namespace seq { return search_result::sat; } + // Apply Parikh image filter: generate modular length constraints and + // perform a lightweight feasibility pre-check. The filter is guarded + // internally (m_parikh_applied) so it only runs once per node. + apply_parikh_to_node(*node); + if (node->is_currently_conflict()) { + ++m_stats.m_num_simplify_conflict; + return search_result::unsat; + } + // integer feasibility check: collect side constraints along the path // and verify they are jointly satisfiable using the LP solver if (!cur_path.empty() && !check_int_feasibility(node, cur_path)) { diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h index 1660be312..70ab76edc 100644 --- a/src/smt/seq/seq_nielsen.h +++ b/src/smt/seq/seq_nielsen.h @@ -248,6 +248,7 @@ namespace seq { class nielsen_node; class nielsen_edge; class nielsen_graph; + class nseq_parith; // Parikh image filter (defined in nseq_parith.h) /** * Abstract interface for an incremental solver used by nielsen_graph @@ -519,6 +520,10 @@ namespace seq { // evaluation index for run tracking unsigned m_eval_idx = 0; + // Parikh filter: set to true once apply_parikh_to_node has been applied + // to this node. Prevents duplicate constraint generation across DFS runs. + bool m_parikh_applied = false; + public: nielsen_node(nielsen_graph* graph, unsigned id); @@ -711,6 +716,10 @@ namespace seq { // Set to true after assert_root_constraints_to_solver() is first called. bool m_root_constraints_asserted = false; + // Parikh image filter: generates modular length constraints from regex + // memberships. Allocated in the constructor; owned by this graph. + nseq_parith* m_parith = nullptr; + public: // Construct with a caller-supplied solver. Ownership is NOT transferred; // the caller is responsible for keeping the solver alive. @@ -816,6 +825,16 @@ namespace seq { private: search_result search_dfs(nielsen_node* node, unsigned depth, svector& cur_path); + // Apply the Parikh image filter to a node: generate modular length + // constraints from regex memberships and append them to the node's + // int_constraints. Also performs a lightweight feasibility pre-check; + // if a Parikh conflict is detected the node's conflict flag is set with + // backtrack_reason::parikh_image. + // + // Guarded by node.m_parikh_applied so that constraints are generated + // only once per node across DFS iterations. + void apply_parikh_to_node(nielsen_node& node); + // create a fresh variable with a unique name euf::snode* mk_fresh_var();