mirror of
https://github.com/Z3Prover/z3
synced 2026-03-18 11:04:09 +00:00
Regex intersection bug fixe
This commit is contained in:
parent
256f1bdf1a
commit
16f693b09a
7 changed files with 420 additions and 137 deletions
|
|
@ -762,11 +762,109 @@ namespace seq {
|
|||
return r;
|
||||
}
|
||||
|
||||
// Helper: render an snode as an HTML label for DOT output.
|
||||
static std::string regex_expr_html(expr* e, ast_manager& m, seq_util& seq) {
|
||||
if (!e) return "null";
|
||||
expr* a = nullptr, * b = nullptr;
|
||||
|
||||
if (seq.re.is_to_re(e, a)) {
|
||||
zstring s;
|
||||
if (seq.str.is_string(a, s)) {
|
||||
return "\"" + dot_html_escape(s.encode()) + "\"";
|
||||
}
|
||||
std::ostringstream os;
|
||||
os << mk_pp(a, m);
|
||||
return dot_html_escape(os.str());
|
||||
}
|
||||
if (seq.re.is_concat(e)) {
|
||||
app* ap = to_app(e);
|
||||
std::string res;
|
||||
if (ap->get_num_args() == 0) return "()";
|
||||
for (unsigned i = 0; i < ap->get_num_args(); ++i) {
|
||||
if (i > 0) res += " ";
|
||||
bool needs_parens = seq.re.is_union(ap->get_arg(i));
|
||||
if (needs_parens) res += "(";
|
||||
res += regex_expr_html(ap->get_arg(i), m, seq);
|
||||
if (needs_parens) res += ")";
|
||||
}
|
||||
return res;
|
||||
}
|
||||
if (seq.re.is_union(e)) {
|
||||
app* ap = to_app(e);
|
||||
std::string res;
|
||||
if (ap->get_num_args() == 0) return "∅";
|
||||
for (unsigned i = 0; i < ap->get_num_args(); ++i) {
|
||||
if (i > 0) res += " | ";
|
||||
res += regex_expr_html(ap->get_arg(i), m, seq);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
if (seq.re.is_intersection(e)) {
|
||||
app* ap = to_app(e);
|
||||
std::string res;
|
||||
for (unsigned i = 0; i < ap->get_num_args(); ++i) {
|
||||
if (i > 0) res += " & ";
|
||||
bool needs_parens = seq.re.is_union(ap->get_arg(i)) || seq.re.is_concat(ap->get_arg(i));
|
||||
if (needs_parens) res += "(";
|
||||
res += regex_expr_html(ap->get_arg(i), m, seq);
|
||||
if (needs_parens) res += ")";
|
||||
}
|
||||
return res;
|
||||
}
|
||||
if (seq.re.is_star(e, a)) {
|
||||
bool needs_parens = seq.re.is_union(a) || seq.re.is_concat(a) || seq.re.is_intersection(a);
|
||||
std::string res = needs_parens ? "(" : "";
|
||||
res += regex_expr_html(a, m, seq);
|
||||
res += needs_parens ? ")<SUP>*</SUP>" : "<SUP>*</SUP>";
|
||||
return res;
|
||||
}
|
||||
if (seq.re.is_plus(e, a)) {
|
||||
bool needs_parens = seq.re.is_union(a) || seq.re.is_concat(a) || seq.re.is_intersection(a);
|
||||
std::string res = needs_parens ? "(" : "";
|
||||
res += regex_expr_html(a, m, seq);
|
||||
res += needs_parens ? ")<SUP>+</SUP>" : "<SUP>+</SUP>";
|
||||
return res;
|
||||
}
|
||||
if (seq.re.is_opt(e, a)) {
|
||||
bool needs_parens = seq.re.is_union(a) || seq.re.is_concat(a) || seq.re.is_intersection(a);
|
||||
std::string res = needs_parens ? "(" : "";
|
||||
res += regex_expr_html(a, m, seq);
|
||||
res += needs_parens ? ")?" : "?";
|
||||
return res;
|
||||
}
|
||||
if (seq.re.is_complement(e, a)) {
|
||||
bool needs_parens = seq.re.is_union(a) || seq.re.is_concat(a) || seq.re.is_intersection(a);
|
||||
std::string res = "~";
|
||||
res += needs_parens ? "(" : "";
|
||||
res += regex_expr_html(a, m, seq);
|
||||
res += needs_parens ? ")" : "";
|
||||
return res;
|
||||
}
|
||||
if (seq.re.is_range(e, a, b)) {
|
||||
zstring s1, s2;
|
||||
std::string c1 = seq.str.is_string(a, s1) ? dot_html_escape(s1.encode()) : arith_expr_html(a, m);
|
||||
std::string c2 = seq.str.is_string(b, s2) ? dot_html_escape(s2.encode()) : arith_expr_html(b, m);
|
||||
return "[" + c1 + "-" + c2 + "]";
|
||||
}
|
||||
if (seq.re.is_full_char(e)) {
|
||||
return "Σ"; // Sigma
|
||||
}
|
||||
if (seq.re.is_full_seq(e)) {
|
||||
return "Σ<SUP>*</SUP>"; // Sigma*
|
||||
}
|
||||
if (seq.re.is_empty(e)) {
|
||||
return "∅"; // empty set
|
||||
}
|
||||
|
||||
std::ostringstream os;
|
||||
os << mk_pp(e, m);
|
||||
return dot_html_escape(os.str());
|
||||
}
|
||||
|
||||
// Helper: render a snode as an HTML label for DOT output.
|
||||
// Groups consecutive s_char tokens into quoted strings, renders s_var by name,
|
||||
// shows s_power with superscripts, s_unit by its inner expression,
|
||||
// and falls back to mk_pp (HTML-escaped) for other token kinds.
|
||||
static std::string snode_label_html(euf::snode const* n, ast_manager& m) {
|
||||
std::string snode_label_html(euf::snode const* n, ast_manager& m) {
|
||||
if (!n) return "null";
|
||||
seq_util seq(m);
|
||||
|
||||
|
|
@ -840,6 +938,8 @@ namespace seq {
|
|||
expr* exp_expr = to_app(e)->get_arg(1);
|
||||
result += arith_expr_html(exp_expr, m);
|
||||
result += "</SUP>";
|
||||
} else if (e && seq.is_re(e)) {
|
||||
result += regex_expr_html(e, m, seq);
|
||||
} else {
|
||||
std::ostringstream os;
|
||||
os << mk_pp(e, m);
|
||||
|
|
@ -3397,9 +3497,6 @@ namespace seq {
|
|||
// -----------------------------------------------------------------------
|
||||
|
||||
bool nielsen_graph::apply_gpower_intr(nielsen_node* node) {
|
||||
ast_manager& m = m_sg.get_manager();
|
||||
arith_util arith(m);
|
||||
|
||||
for (str_eq const& eq : node->str_eqs()) {
|
||||
if (eq.is_trivial()) continue;
|
||||
if (!eq.m_lhs || !eq.m_rhs) continue;
|
||||
|
|
@ -3468,11 +3565,12 @@ namespace seq {
|
|||
if (n % p != 0) continue;
|
||||
bool match = true;
|
||||
for (unsigned i = p; i < n && match; ++i)
|
||||
match = (ground_prefix_orig[i]->id() == ground_prefix_orig[i % p]->id());
|
||||
match = ground_prefix_orig[i]->id() == ground_prefix_orig[i % p]->id();
|
||||
if (match) { period = p; break; }
|
||||
}
|
||||
for (unsigned i = 0; i < period; ++i)
|
||||
for (unsigned i = 0; i < period; ++i) {
|
||||
ground_prefix.push_back(ground_prefix_orig[i]);
|
||||
}
|
||||
|
||||
// If the compressed prefix is a single power snode, unwrap it to use
|
||||
// its base tokens, avoiding nested powers.
|
||||
|
|
|
|||
|
|
@ -494,6 +494,8 @@ namespace seq {
|
|||
}
|
||||
};
|
||||
|
||||
std::string snode_label_html(euf::snode const* n, ast_manager& m);
|
||||
|
||||
// node in the Nielsen graph
|
||||
// mirrors ZIPT's NielsenNode
|
||||
class nielsen_node {
|
||||
|
|
|
|||
|
|
@ -519,135 +519,25 @@ namespace seq {
|
|||
if (regexes.empty())
|
||||
return l_false; // empty intersection = full language (vacuously non-empty)
|
||||
|
||||
// Quick checks: if any regex is fail/empty, intersection is empty
|
||||
for (euf::snode* re : regexes) {
|
||||
if (!re || !re->get_expr())
|
||||
return l_undef;
|
||||
if (re->is_fail() || is_empty_regex(re))
|
||||
return l_true;
|
||||
}
|
||||
|
||||
// Check if all are nullable (intersection accepts ε)
|
||||
bool all_nullable = true;
|
||||
for (euf::snode* re : regexes) {
|
||||
if (!re->is_nullable()) { all_nullable = false; break; }
|
||||
}
|
||||
if (all_nullable)
|
||||
return l_false;
|
||||
|
||||
// Single regex: delegate to is_empty_bfs
|
||||
if (regexes.size() == 1)
|
||||
return is_empty_bfs(regexes[0], max_states);
|
||||
|
||||
// Build product BFS. State = tuple of regex snode ids.
|
||||
// Use a map from state hash to visited set.
|
||||
using state_t = svector<unsigned>;
|
||||
seq_util& seq = m_sg.get_seq_util();
|
||||
ast_manager& mgr = m_sg.get_manager();
|
||||
|
||||
auto state_hash = [](state_t const& s) -> unsigned {
|
||||
unsigned h = 0;
|
||||
for (unsigned id : s)
|
||||
h = h * 31 + id;
|
||||
return h;
|
||||
};
|
||||
|
||||
auto state_eq = [](state_t const& a, state_t const& b) -> bool {
|
||||
if (a.size() != b.size()) return false;
|
||||
for (unsigned i = 0; i < a.size(); ++i)
|
||||
if (a[i] != b[i]) return false;
|
||||
return true;
|
||||
};
|
||||
|
||||
// Use simple set via sorted vector of hashes (good enough for bounded BFS)
|
||||
std::unordered_set<unsigned> visited_hashes;
|
||||
|
||||
struct bfs_state {
|
||||
ptr_vector<euf::snode> regexes;
|
||||
};
|
||||
|
||||
std::vector<bfs_state> worklist;
|
||||
bfs_state initial;
|
||||
initial.regexes.append(regexes);
|
||||
worklist.push_back(std::move(initial));
|
||||
|
||||
state_t init_ids;
|
||||
for (euf::snode* re : regexes)
|
||||
init_ids.push_back(re->id());
|
||||
visited_hashes.insert(state_hash(init_ids));
|
||||
|
||||
unsigned states_explored = 0;
|
||||
bool had_failed = false;
|
||||
|
||||
// Collect alphabet representatives from the intersection of all regexes
|
||||
// (merge boundaries from all)
|
||||
unsigned_vector all_bounds;
|
||||
all_bounds.push_back(0);
|
||||
for (euf::snode* re : regexes)
|
||||
collect_char_boundaries(re, all_bounds);
|
||||
std::sort(all_bounds.begin(), all_bounds.end());
|
||||
|
||||
euf::snode_vector reps;
|
||||
unsigned prev = UINT_MAX;
|
||||
for (unsigned b : all_bounds) {
|
||||
if (b != prev) {
|
||||
reps.push_back(m_sg.mk_char(b));
|
||||
prev = b;
|
||||
}
|
||||
}
|
||||
if (reps.empty())
|
||||
reps.push_back(m_sg.mk_char('a'));
|
||||
|
||||
while (!worklist.empty()) {
|
||||
if (states_explored >= max_states)
|
||||
euf::snode* result = regexes[0];
|
||||
for (unsigned i = 1; i < regexes.size(); ++i) {
|
||||
expr* r1 = result->get_expr();
|
||||
expr* r2 = regexes[i]->get_expr();
|
||||
if (!r1 || !r2) return l_undef;
|
||||
expr_ref inter(seq.re.mk_inter(r1, r2), mgr);
|
||||
result = m_sg.mk(inter);
|
||||
if (!result)
|
||||
return l_undef;
|
||||
|
||||
bfs_state current = std::move(worklist.back());
|
||||
worklist.pop_back();
|
||||
++states_explored;
|
||||
|
||||
for (euf::snode* ch : reps) {
|
||||
ptr_vector<euf::snode> derivs;
|
||||
bool any_fail = false;
|
||||
bool all_null = true;
|
||||
bool deriv_failed = false;
|
||||
|
||||
for (euf::snode* re : current.regexes) {
|
||||
euf::snode* d = m_sg.brzozowski_deriv(re, ch);
|
||||
if (!d) { deriv_failed = true; break; }
|
||||
if (d->is_fail()) { any_fail = true; break; }
|
||||
if (!d->is_nullable()) all_null = false;
|
||||
derivs.push_back(d);
|
||||
}
|
||||
|
||||
if (deriv_failed) { had_failed = true; continue; }
|
||||
if (any_fail) continue; // this character leads to empty intersection
|
||||
|
||||
if (all_null)
|
||||
return l_false; // found an accepting state in the product
|
||||
|
||||
// Check if any component is structurally empty
|
||||
bool any_empty = false;
|
||||
for (euf::snode* d : derivs) {
|
||||
if (is_empty_regex(d)) { any_empty = true; break; }
|
||||
}
|
||||
if (any_empty) continue;
|
||||
|
||||
// Compute state hash and check visited
|
||||
state_t ids;
|
||||
for (euf::snode* d : derivs)
|
||||
ids.push_back(d->id());
|
||||
unsigned h = state_hash(ids);
|
||||
if (visited_hashes.count(h) == 0) {
|
||||
visited_hashes.insert(h);
|
||||
bfs_state next;
|
||||
next.regexes.append(derivs);
|
||||
worklist.push_back(std::move(next));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (had_failed)
|
||||
return l_undef;
|
||||
return l_true; // exhausted all states, intersection is empty
|
||||
return is_empty_bfs(result, max_states);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -425,7 +425,12 @@ namespace smt {
|
|||
// here the actual Nielsen solving happens
|
||||
auto result = m_nielsen.solve();
|
||||
|
||||
|
||||
#ifdef Z3DEBUG
|
||||
// Examining the Nielsen graph is probably the best way of debugging
|
||||
std::string dot = m_nielsen.to_dot();
|
||||
IF_VERBOSE(1, verbose_stream() << dot << "\n";);
|
||||
#endif
|
||||
|
||||
if (result == seq::nielsen_graph::search_result::unsat) {
|
||||
IF_VERBOSE(1, verbose_stream() << "nseq final_check: solve UNSAT\n";);
|
||||
explain_nielsen_conflict();
|
||||
|
|
@ -464,8 +469,8 @@ namespace smt {
|
|||
if (mem_idx < m_nielsen_to_state_mem.size()) {
|
||||
unsigned state_mem_idx = m_nielsen_to_state_mem[mem_idx];
|
||||
mem_source const& src = m_state.get_mem_source(state_mem_idx);
|
||||
if (ctx.get_assignment(src.m_lit) == l_true)
|
||||
lits.push_back(src.m_lit);
|
||||
SASSERT(ctx.get_assignment(src.m_lit) == l_true);
|
||||
lits.push_back(src.m_lit);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -823,9 +828,8 @@ namespace smt {
|
|||
auto& vec = var_to_mems.insert_if_not_there(mem.m_str->id(), unsigned_vector());
|
||||
vec.push_back(i);
|
||||
}
|
||||
else {
|
||||
else
|
||||
all_var_str = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (var_to_mems.empty())
|
||||
|
|
@ -855,19 +859,23 @@ namespace smt {
|
|||
// jointly unsatisfiable. Assert a conflict from all their literals.
|
||||
enode_pair_vector eqs;
|
||||
literal_vector lits;
|
||||
std::cout << "CONFLICT:\n";
|
||||
for (unsigned i : mem_indices) {
|
||||
mem_source const& src = m_state.get_mem_source(i);
|
||||
if (ctx.get_assignment(src.m_lit) == l_true)
|
||||
lits.push_back(src.m_lit);
|
||||
SASSERT(ctx.get_assignment(src.m_lit) == l_true); // we already stored the polarity of the literal
|
||||
lits.push_back(src.m_lit);
|
||||
std::cout << "\t\t";
|
||||
std::cout << mk_pp(ctx.literal2expr(src.m_lit), m) << std::endl;
|
||||
std::cout << "\t\t";
|
||||
std::cout << src.m_lit << std::endl;
|
||||
}
|
||||
TRACE(seq, tout << "nseq regex precheck: empty intersection for var "
|
||||
<< var_id << ", conflict with " << lits.size() << " lits\n";);
|
||||
set_conflict(eqs, lits);
|
||||
return l_true; // conflict asserted
|
||||
}
|
||||
else if (result == l_undef) {
|
||||
if (result == l_undef)
|
||||
any_undef = true;
|
||||
}
|
||||
// l_false = non-empty intersection, this variable's constraints are satisfiable
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue