From 67906da97ae3cab2242849ce79e0d09093bd3d28 Mon Sep 17 00:00:00 2001 From: CEisenhofer Date: Fri, 5 Jun 2026 19:57:00 +0200 Subject: [PATCH] Corrected string extraction --- src/ast/euf/euf_snode.h | 4 ++- src/smt/seq/seq_nielsen.cpp | 47 +++++++++++++------------------ src/smt/seq/seq_nielsen.h | 12 ++++---- src/smt/theory_nseq.cpp | 55 +++++++++++++++++++------------------ 4 files changed, 56 insertions(+), 62 deletions(-) diff --git a/src/ast/euf/euf_snode.h b/src/ast/euf/euf_snode.h index 614d9f225..ddb54670b 100644 --- a/src/ast/euf/euf_snode.h +++ b/src/ast/euf/euf_snode.h @@ -244,8 +244,10 @@ namespace euf { for (unsigned i = 0; i < cnt; i++) { const snode* const c = arg(i); unsigned val; - if (!seq.is_const_char(c->get_expr(), val)) + if (!c->is_char()) return false; + expr* d = to_app(c->get_expr())->get_arg(0); + VERIFY(seq.is_const_char(d, val)); str += zstring(val); } return true; diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index 13f37e73d..05d4d8efa 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -522,8 +522,6 @@ namespace seq { } void nielsen_graph::add_str_eq(euf::snode* lhs, euf::snode* rhs, smt::enode* l, smt::enode* r) { - if (!root()) - create_root(); const dep_tracker dep = m_dep_mgr.mk_leaf(enode_pair(l, r)); str_eq eq(lhs, rhs, dep); eq.sort(); @@ -537,8 +535,6 @@ namespace seq { } void nielsen_graph::add_str_deq(euf::snode* lhs, euf::snode* rhs, smt::enode* l, smt::enode* r) { - if (!root()) - create_root(); const dep_tracker dep = m_dep_mgr.mk_leaf(enode_pair(l, r)); str_deq deq(lhs, rhs, dep); // check if root node contains this equation already @@ -551,8 +547,6 @@ namespace seq { } void nielsen_graph::add_str_mem(euf::snode* str, euf::snode* regex, sat::literal l) { - if (!root()) - create_root(); // check if root node contains this membership constraint already if (std::ranges::any_of(m_root->str_mems(), [&](const str_mem& e) { return e.m_regex == regex && e.m_str == str; @@ -565,8 +559,6 @@ namespace seq { // test-friendly overloads (no external dependency tracking) void nielsen_graph::add_str_eq(euf::snode* lhs, euf::snode* rhs) { - if (!root()) - create_root(); const dep_tracker dep = m_dep_mgr.mk_leaf(enode_pair(nullptr, nullptr)); str_eq eq(lhs, rhs, dep); eq.sort(); @@ -574,25 +566,23 @@ namespace seq { } void nielsen_graph::add_str_deq(euf::snode* lhs, euf::snode* rhs) { - if (!root()) - create_root(); const dep_tracker dep = m_dep_mgr.mk_leaf(enode_pair(nullptr, nullptr)); str_deq deq(lhs, rhs, dep); m_root->add_str_deq(deq); } void nielsen_graph::add_str_mem(euf::snode* str, euf::snode* regex) { - if (!root()) - create_root(); const dep_tracker dep = nullptr; m_root->add_str_mem(str_mem(str, regex, dep)); } void nielsen_graph::reset() { - for (nielsen_node* n : m_nodes) + for (nielsen_node* n : m_nodes) { dealloc(n); - for (nielsen_edge* e : m_edges) + } + for (nielsen_edge* e : m_edges) { dealloc(e); + } m_nodes.reset(); m_edges.reset(); m_root = nullptr; @@ -1863,6 +1853,7 @@ namespace seq { break; ptr_vector cur_path; // scoped_push _scoped_push(m_dep_mgr); // gc dependencies after search + SASSERT(!m_root->is_currently_conflict()); const search_result r = search_dfs(m_root, cur_path); // the main search loop IF_VERBOSE(1, verbose_stream() << " depth_bound=" << m_depth_bound << " dfs_nodes=" << m_stats.m_num_dfs_nodes @@ -2039,12 +2030,15 @@ namespace seq { display(std::cout, node); } VERIFY(ext); + + if (node->is_currently_conflict()) + // in rare cases, trying to extend can make a complicated conflict visible + return search_result::unsat; + node->set_extended(true); ++m_stats.m_num_extensions; } - SASSERT(!node->is_currently_conflict()); - // explore children bool any_unknown = false; bool all_general_conflict = true; @@ -3637,7 +3631,7 @@ namespace seq { sort* str_sort = nullptr; if (!seq.is_re(r->get_expr(), str_sort)) return false; - std::cout << "Computing sigma of " << snode_label_html(r, m, false) << std::endl; + // std::cout << "Computing sigma of " << snode_label_html(r, m, false) << std::endl; if (r->is_empty()) { const expr_ref eps(seq.re.mk_epsilon(str_sort), m); @@ -3669,15 +3663,12 @@ namespace seq { result.push_back(sigma_pair(ex, eps, m)); return true; } - if (c->is_string()) { - const euf::snode * arg = r->arg0(); - zstring s; - if (arg->is_string(s, seq) && s.length() > 1) { - for (unsigned i = 0; i <= s.length(); ++i) { - expr_ref p(seq.re.mk_to_re(seq.str.mk_string(s.extract(0, i))), m); - expr_ref q(seq.re.mk_to_re(seq.str.mk_string(s.extract(i, s.length() - i))), m); - result.push_back(sigma_pair(p, q, m)); - } + zstring s; + if (c->is_string(s, seq)) { + for (unsigned i = 0; i <= s.length(); ++i) { + expr_ref p(seq.re.mk_to_re(seq.str.mk_string(s.extract(0, i))), m); + expr_ref q(seq.re.mk_to_re(seq.str.mk_string(s.extract(i, s.length() - i))), m); + result.push_back(sigma_pair(p, q, m)); } return true; } @@ -3795,8 +3786,8 @@ namespace seq { } return true; } - // the simplifier should have eliminated everything else already - UNREACHABLE(); + // the simplifier should have eliminated most of them already + // TODO: so far, we are, however, still missing bounded repetitions and ite return false; } diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h index dd24d21a0..eb08c110f 100644 --- a/src/smt/seq/seq_nielsen.h +++ b/src/smt/seq/seq_nielsen.h @@ -1074,18 +1074,18 @@ namespace seq { unsigned num_nodes() const { return m_nodes.size(); } // maximum overall search depth (0 = unlimited) - void set_max_search_depth(const unsigned d) { m_max_search_depth = d; } + void set_max_search_depth(unsigned d) { m_max_search_depth = d; } // maximum total DFS nodes per solve() call (0 = unlimited) - void set_max_nodes(const unsigned n) { m_max_nodes = n; } + void set_max_nodes(unsigned n) { m_max_nodes = n; } // enable/disable Parikh image verification constraints - void set_parikh_enabled(const bool e) { m_parikh_enabled = e; } + void set_parikh_enabled(bool e) { m_parikh_enabled = e; } - void set_signature_split(const bool e) { m_signature_split = e; } + void set_signature_split(bool e) { m_signature_split = e; } - void set_regex_factorization_threshold(const unsigned max) { m_regex_factorization_threshold = max; } - void set_regex_factorization_eager(const bool e) { m_regex_factorization_eager = e; } + void set_regex_factorization_threshold(unsigned max) { m_regex_factorization_threshold = max; } + void set_regex_factorization_eager(bool e) { m_regex_factorization_eager = e; } // display for debugging std::ostream& display(std::ostream& out) const; diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp index 06bc02bd0..e7555a42a 100644 --- a/src/smt/theory_nseq.cpp +++ b/src/smt/theory_nseq.cpp @@ -285,7 +285,7 @@ namespace smt { void theory_nseq::assign_eh(bool_var v, bool is_true) { try { expr* e = ctx.bool_var2expr(v); - std::cout << "assigned [" << sat::literal(v, is_true) << "] " << mk_pp(e, m) << " = " << is_true << std::endl; + // std::cout << "assigned [" << sat::literal(v, is_true) << "] " << mk_pp(e, m) << " = " << is_true << std::endl; expr *s = nullptr, *re = nullptr, *a = nullptr, *b = nullptr; TRACE(seq, tout << (is_true ? "" : "¬") << mk_bounded_pp(e, m, 3) << "\n";); if (m_seq.str.is_in_re(e, s, re)) { @@ -496,32 +496,32 @@ namespace smt { if (!get_fparams().m_nseq_regex_factorization_threshold) return; - if (mem.m_regex->is_intersect()) { - // u \in r1 & r_2 → u \in r1 && u \in r2 - for (const euf::snode* const arg : *mem.m_regex) { - expr_ref in_r(m_seq.re.mk_in_re(s, arg->get_expr()), m); - literal_vector lits; - lits.push_back(~mem.lit); - lits.push_back(mk_literal(in_r)); - ctx.mk_th_axiom(get_id(), lits.size(), lits.data()); - } - m_ignored_mem.insert(mem.lit); - ctx.push_trail(insert_map(m_ignored_mem, mem.lit)); - return; - } - if (mem.m_regex->is_union()) { - // u \in r1 | r_2 → u \in r1 || u \in r2 - literal_vector lits; - lits.push_back(~mem.lit); - for (const euf::snode* const arg : *mem.m_regex) { - expr_ref in_r(m_seq.re.mk_in_re(s, arg->get_expr()), m); - lits.push_back(mk_literal(in_r)); - } - ctx.mk_th_axiom(get_id(), lits.size(), lits.data()); - m_ignored_mem.insert(mem.lit); - ctx.push_trail(insert_map(m_ignored_mem, mem.lit)); - return; - } + //if (mem.m_regex->is_intersect()) { + // // u \in r1 & r_2 → u \in r1 && u \in r2 + // for (const euf::snode* const arg : *mem.m_regex) { + // expr_ref in_r(m_seq.re.mk_in_re(s, arg->get_expr()), m); + // literal_vector lits; + // lits.push_back(~mem.lit); + // lits.push_back(mk_literal(in_r)); + // ctx.mk_th_axiom(get_id(), lits.size(), lits.data()); + // } + // m_ignored_mem.insert(mem.lit); + // ctx.push_trail(insert_map(m_ignored_mem, mem.lit)); + // return; + //} + //if (mem.m_regex->is_union()) { + // // u \in r1 | r_2 → u \in r1 || u \in r2 + // literal_vector lits; + // lits.push_back(~mem.lit); + // for (const euf::snode* const arg : *mem.m_regex) { + // expr_ref in_r(m_seq.re.mk_in_re(s, arg->get_expr()), m); + // lits.push_back(mk_literal(in_r)); + // } + // ctx.mk_th_axiom(get_id(), lits.size(), lits.data()); + // m_ignored_mem.insert(mem.lit); + // ctx.push_trail(insert_map(m_ignored_mem, mem.lit)); + // return; + //} if (mem.m_regex->is_to_re()) { // u \in v (with v is constant) → u = v @@ -677,6 +677,7 @@ namespace smt { void theory_nseq::populate_nielsen_graph() { m_nielsen.reset(); m_can_hot_restart = true; + m_nielsen.create_root(); unsigned num_eqs = 0, num_deqs = 0, num_mems = 0;