diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index b52201da6..02eaf4006 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -137,6 +137,10 @@ namespace seq { return m_str && m_str->length() == 1 && m_str->is_var(); } + bool str_mem::is_trivial() const { + return m_str && m_regex && m_str->is_empty() && m_regex->is_nullable(); + } + bool str_mem::contains_var(euf::snode* var) const { if (!var) return false; if (m_str) { @@ -1489,7 +1493,7 @@ namespace seq { while (changed) { changed = false; - // pass 1: remove trivially satisfied equalities + // pass 1: remove trivially satisfied equalities and memberships unsigned wi = 0; for (unsigned i = 0; i < m_str_eq.size(); ++i) { str_eq& eq = m_str_eq[i]; @@ -1502,6 +1506,18 @@ namespace seq { changed = true; } + unsigned wj = 0; + for (unsigned j = 0; j < m_str_mem.size(); ++j) { + str_mem& mem = m_str_mem[j]; + if (mem.is_trivial()) + continue; + m_str_mem[wj++] = mem; + } + if (wj < m_str_mem.size()) { + m_str_mem.shrink(wj); + changed = true; + } + // pass 2: detect symbol clashes, empty-propagation, and prefix cancellation for (str_eq& eq : m_str_eq) { if (!eq.m_lhs || !eq.m_rhs) @@ -1785,8 +1801,10 @@ namespace seq { for (str_mem& mem : m_str_mem) { if (!mem.m_str || !mem.m_regex) continue; + if (mem.is_primitive()) + continue; for (unsigned od = 0; od < 2; ++od) { - bool fwd = (od == 0); + bool fwd = od == 0; while (mem.m_str && !mem.m_str->is_empty()) { euf::snode* tok = dir_token(mem.m_str, fwd); if (!tok || !tok->is_char()) @@ -1824,6 +1842,8 @@ namespace seq { for (str_mem& mem : m_str_mem) { if (!mem.m_str || !mem.m_regex) continue; + if (mem.is_primitive()) + continue; while (mem.m_str && !mem.m_str->is_empty()) { euf::snode* tok = mem.m_str->first(); if (!tok || !tok->is_unit()) @@ -1929,6 +1949,8 @@ namespace seq { for (str_mem const& mem : m_str_mem) { if (!mem.m_str || !mem.m_regex) continue; + if (mem.is_primitive()) + continue; if (m_graph.check_regex_widening(*this, mem.m_str, mem.m_regex)) { m_is_general_conflict = true; m_reason = backtrack_reason::regex; @@ -2142,7 +2164,11 @@ namespace seq { if (!eq.is_trivial()) return false; } - return m_str_mem.empty(); + for (str_mem const& mem : m_str_mem) { + if (!mem.is_trivial() && !mem.is_primitive()) + return false; + } + return true; } bool nielsen_node::has_opaque_terms() const { @@ -3310,6 +3336,7 @@ namespace seq { for (unsigned mi = 0; mi < node->str_mems().size(); ++mi) { str_mem const& mem = node->str_mems()[mi]; if (!mem.m_str || !mem.m_regex) continue; + if (mem.is_primitive()) continue; euf::snode* first = mem.m_str->first(); if (!first || !first->is_var()) continue; @@ -3617,6 +3644,8 @@ namespace seq { for (str_mem const& mem : node->str_mems()) { if (!mem.m_str || !mem.m_regex) continue; + if (mem.is_primitive()) + continue; euf::snode* first = mem.m_str->first(); if (!first || !first->is_var()) continue; diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h index fcffb8dd5..9dce0ffec 100644 --- a/src/smt/seq/seq_nielsen.h +++ b/src/smt/seq/seq_nielsen.h @@ -395,6 +395,8 @@ namespace seq { // check if the constraint has the form x in R with x a single variable bool is_primitive() const; + bool is_trivial() const; + // check if the constraint contains a given variable bool contains_var(euf::snode* var) const; }; diff --git a/src/smt/seq/seq_parikh.cpp b/src/smt/seq/seq_parikh.cpp index 80450da55..199777864 100644 --- a/src/smt/seq/seq_parikh.cpp +++ b/src/smt/seq/seq_parikh.cpp @@ -306,18 +306,4 @@ namespace seq { } return false; } - - // ----------------------------------------------------------------------- - // minterm → char_set conversion - // ----------------------------------------------------------------------- - - // Convert a regex minterm expression to a char_set. - // - // Minterms are Boolean combinations of character-class predicates - // (re.range, re.full_char, complement, intersection) produced by - // sgraph::compute_minterms(). This function converts them to a - // concrete char_set so that fresh character variables introduced by - // apply_regex_var_split can be constrained with add_char_range(). - - } // namespace seq diff --git a/src/smt/seq/seq_regex.h b/src/smt/seq/seq_regex.h index 58a77d64d..662478d2d 100644 --- a/src/smt/seq/seq_regex.h +++ b/src/smt/seq/seq_regex.h @@ -72,6 +72,7 @@ namespace seq { void get_alphabet_representatives(euf::snode* re, euf::snode_vector& reps); public: + // Convert a regex minterm expression to a char_set. // Used to transform symbolic boundaries (re.range, re.complement, etc.) // into exact character sets (char_set). @@ -85,6 +86,9 @@ namespace seq { // re.to_re(unit(c)) → singleton {c} // re.empty → empty set // anything else → full set (conservative, sound over-approximation) + // ----------------------------------------------------------------------- + // minterm → char_set conversion + // ----------------------------------------------------------------------- char_set minterm_to_char_set(expr* minterm_re); seq_regex(euf::sgraph& sg) : m_sg(sg) {} diff --git a/src/smt/seq_model.cpp b/src/smt/seq_model.cpp index ed10c531d..e0204e8c2 100644 --- a/src/smt/seq_model.cpp +++ b/src/smt/seq_model.cpp @@ -41,7 +41,7 @@ namespace smt { mg.register_factory(m_factory); register_existing_values(nielsen); - collect_var_regex_constraints(state); + collect_var_regex_constraints(nielsen.sat_node()); // solve integer constraints from the sat_path FIRST so that // m_int_model is available when snode_to_value evaluates power exponents @@ -288,14 +288,15 @@ namespace smt { euf::snode* re = nullptr; if (m_var_regex.find(var->id(), re) && re) { expr* re_expr = re->get_expr(); - if (re_expr) { - expr_ref witness(m); - if (m_rewriter.some_seq_in_re(re_expr, witness) == l_true && witness) { - m_trail.push_back(witness); - m_factory->register_value(witness); - return witness; - } - } + SASSERT(re_expr); + expr_ref witness(m); + // We checked non-emptiness during Nielsen already + VERIFY(m_rewriter.some_seq_in_re(re_expr, witness) == l_true && witness); + // std::cout << "Witness for " << mk_pp(var->get_expr(), m) << " in " << + // mk_pp(re_expr, m) << ": " << mk_pp(witness, m) << std::endl; + m_trail.push_back(witness); + m_factory->register_value(witness); + return witness; } // no regex constraint or witness generation failed: use empty string @@ -305,14 +306,13 @@ namespace smt { return m_seq.str.mk_empty(srt); } - void seq_model::collect_var_regex_constraints(seq_state const& state) { - for (auto const& mem : state.str_mems()) { - if (!mem.m_str || !mem.m_regex) - continue; - // only collect for variable snodes (leaf variables needing assignment) - if (!mem.m_str->is_var()) - continue; - unsigned id = mem.m_str->id(); + void seq_model::collect_var_regex_constraints(seq::nielsen_node const* sat_node) { + SASSERT(sat_node); + for (auto const& mem : sat_node->str_mems()) { + SASSERT(mem.m_str && mem.m_regex); + VERIFY(mem.is_primitive()); // everything else should have been eliminated already + euf::snode* first = mem.m_str->first(); + unsigned id = first->id(); euf::snode* existing = nullptr; if (m_var_regex.find(id, existing) && existing) { // intersect with existing constraint: @@ -322,13 +322,12 @@ namespace smt { if (e1 && e2) { expr_ref inter(m_seq.re.mk_inter(e1, e2), m); euf::snode* inter_sn = m_sg.mk(inter); - if (inter_sn) - m_var_regex.insert(id, inter_sn); + SASSERT(inter_sn); + m_var_regex.insert(id, inter_sn); } } - else { + else m_var_regex.insert(id, mem.m_regex); - } } } diff --git a/src/smt/seq_model.h b/src/smt/seq_model.h index 51bd8365b..5702a70ff 100644 --- a/src/smt/seq_model.h +++ b/src/smt/seq_model.h @@ -116,7 +116,7 @@ namespace smt { // collect per-variable regex constraints from the state. // For each positive str_mem, records the regex (or intersects // with existing) into m_var_regex keyed by the string snode id. - void collect_var_regex_constraints(seq_state const& state); + void collect_var_regex_constraints(seq::nielsen_node const* sat_node); }; }