diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index c20fe2175..7d31dfdb4 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -687,6 +687,10 @@ void seq_decl_plugin::get_op_names(svector & op_names, symbol cons op_names.push_back(builtin_name("int.to.str", OP_STRING_ITOS)); op_names.push_back(builtin_name("re.nostr", _OP_REGEXP_EMPTY)); op_names.push_back(builtin_name("re.complement", OP_RE_COMPLEMENT)); + // Non-greedy quantifiers: semantically identical to greedy for satisfiability + op_names.push_back(builtin_name("re.+?", OP_RE_PLUS)); + op_names.push_back(builtin_name("re.*?", OP_RE_STAR)); + op_names.push_back(builtin_name("re.??", OP_RE_OPTION)); op_names.push_back(builtin_name("str.from_ubv", OP_STRING_UBVTOS)); op_names.push_back(builtin_name("str.from_sbv", OP_STRING_SBVTOS)); } diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index 7f2376fef..bde789e05 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -2453,7 +2453,13 @@ namespace seq { if (!node->is_extended()) { bool ext = generate_extensions(node); IF_VERBOSE(0, display(verbose_stream(), node)); - VERIFY(ext); + if (!ext) { + // No modifier applies: the node is stuck (unsupported equation form). + // Mark as extended with no children to avoid redundant re-exploration, + // and return unknown so the DFS will report FC_GIVEUP. + node->set_extended(true); + return search_result::unknown; + } node->set_extended(true); ++m_stats.m_num_extensions; } diff --git a/src/smt/seq/seq_regex.cpp b/src/smt/seq/seq_regex.cpp index 626e49311..448ef4037 100644 --- a/src/smt/seq/seq_regex.cpp +++ b/src/smt/seq/seq_regex.cpp @@ -612,6 +612,37 @@ namespace seq { return is_empty_bfs(result, max_states); } + // ----------------------------------------------------------------------- + // Check if the language of re accepts at least one non-empty string. + // Returns true if a non-empty string is definitely accepted. + // Returns false conservatively (also false when inconclusive). + // Used by the regex precheck SAT shortcut: only shortcut to SAT when the + // intersection language contains a non-empty witness, avoiding false SAT + // for intersections like [a-u]* ∩ v* = {""} when var ≠ "" is also asserted. + // ----------------------------------------------------------------------- + + bool seq_regex::accepts_nonempty_string(euf::snode* re) { + if (!re || re->is_fail()) return false; + // Quick structural checks: full-seq, full-char, and range always + // accept non-empty strings (they require at least one character). + if (re->is_full_seq() || re->is_full_char()) return true; + // Only explore ground regexes; non-ground could expand unpredictably. + if (!re->is_ground()) return true; // conservative: assume non-empty exists + // For s_other snodes (unrecognized kinds), be conservative. + if (re->kind() == euf::snode_kind::s_other) return true; + // Compute one level of derivatives: if any derivative is non-fail and + // non-empty, then the regex accepts some string starting with that char. + euf::snode_vector reps; + if (!get_alphabet_representatives(re, reps)) + return true; // conservative + for (euf::snode* ch : reps) { + euf::snode* deriv = m_sg.brzozowski_deriv(re, ch); + if (deriv && !deriv->is_fail() && !is_empty_regex(deriv)) + return true; + } + return false; + } + // ----------------------------------------------------------------------- // Language subset check: L(A) ⊆ L(B) // via intersection(A, complement(B)) = ∅ diff --git a/src/smt/seq/seq_regex.h b/src/smt/seq/seq_regex.h index 72f4b71f5..d99fb1439 100644 --- a/src/smt/seq/seq_regex.h +++ b/src/smt/seq/seq_regex.h @@ -191,6 +191,13 @@ namespace seq { lbool check_intersection_emptiness(ptr_vector const& regexes, unsigned max_states = 10000); + // Check if the language of re contains at least one non-empty string. + // Returns true if a non-empty string is definitely accepted. + // Returns false if no non-empty string is accepted, or inconclusive. + // Used by the regex precheck to avoid false SAT shortcuts for languages + // that only contain the empty string, e.g. [a-u]* ∩ v* = {""}. + bool accepts_nonempty_string(euf::snode* re); + // Check if L(subset_re) ⊆ L(superset_re). // Computed as: subset_re ∩ complement(superset_re) = ∅. // Mirrors ZIPT NielsenNode.IsLanguageSubset (NielsenNode.cs:1382-1385) diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp index b1854be09..cde94afbb 100644 --- a/src/smt/theory_nseq.cpp +++ b/src/smt/theory_nseq.cpp @@ -200,6 +200,19 @@ namespace smt { expr* e = ctx.bool_var2expr(v); expr* s = nullptr, *re = nullptr; TRACE(seq, tout << (is_true ? "" : "¬") << mk_bounded_pp(e, m, 3) << "\n";); + // Normalize (= bool_seq_expr true/false) to a direct assignment. + // Handles patterns like (not (= (str.contains "A" x) true)). + { + expr* eq_lhs = nullptr, *eq_rhs = nullptr; + if (m.is_eq(e, eq_lhs, eq_rhs)) { + if (m.is_true(eq_rhs)) + e = eq_lhs; + else if (m.is_false(eq_rhs)) { e = eq_lhs; is_true = !is_true; } + else if (m.is_true(eq_lhs)) + e = eq_rhs; + else if (m.is_false(eq_lhs)) { e = eq_rhs; is_true = !is_true; } + } + } if (m_seq.str.is_in_re(e, s, re)) { euf::snode* sn_str = get_snode(s); euf::snode* sn_re = get_snode(re); @@ -953,6 +966,10 @@ namespace smt { if (std::holds_alternative(item)) { has_eqs = true; break; } bool any_undef = false; + // Track whether any variable's intersection only accepts "" (nullable-only). + // When true, we must not shortcut to SAT: the only witness is "", which may + // be excluded by a disequality (e.g. var ≠ ""). + bool any_nullable_only = false; // Check intersection emptiness for each variable. for (auto& kv : var_to_mems) { @@ -985,19 +1002,35 @@ namespace smt { set_conflict(eqs, lits); return l_true; // conflict asserted } - if (result == l_undef) + if (result == l_undef) { any_undef = true; - // l_false = non-empty intersection, this variable's constraints are satisfiable + continue; + } + // l_false = intersection is non-empty; check if it has a non-empty witness. + // Build the intersection snode to call accepts_nonempty_string. + euf::snode* inter = nullptr; + for (euf::snode* re : regexes) { + if (!inter) { inter = re; continue; } + expr* r1 = inter->get_expr(); + expr* r2 = re->get_expr(); + if (!r1 || !r2) { inter = nullptr; break; } + expr_ref intersection(m_seq.re.mk_inter(r1, r2), m); + inter = m_sgraph.mk(intersection); + if (!inter) break; + } + if (!inter || !m_regex.accepts_nonempty_string(inter)) + any_nullable_only = true; } if (any_undef) return l_undef; // cannot fully determine; let DFS decide // All variables' regex intersections are non-empty. - // If there are no word equations, variables are independent and - // each can be assigned a witness string → SAT. - if (all_primitive && !has_eqs && !has_unhandled_preds()) { - TRACE(seq, tout << "nseq regex precheck: all intersections non-empty, " + // Only shortcut to SAT when every variable's intersection accepts a non-empty + // witness string. If any intersection is nullable-only (= {""}) we cannot + // guarantee a witness consistent with disequalities such as var ≠ "". + if (all_primitive && !has_eqs && !has_unhandled_preds() && !any_nullable_only) { + TRACE(seq, tout << "nseq regex precheck: all intersections have non-empty witnesses, " << "no word eqs → SAT\n";); return l_false; // signals SAT (non-empty / satisfiable) }