3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-03-20 11:55:49 +00:00

Some more bug fixes

This commit is contained in:
CEisenhofer 2026-03-18 14:54:12 +01:00
parent ab53889c10
commit b288c2e7dc
6 changed files with 96 additions and 92 deletions

View file

@ -1872,7 +1872,7 @@ namespace seq {
for (euf::snode* mt : minterms) {
if (!mt || mt->is_fail()) continue;
if (!mt->get_expr()) continue;
char_set mt_cs = m_graph.m_parikh->minterm_to_char_set(mt->get_expr());
char_set mt_cs = m_graph.m_seq_regex->minterm_to_char_set(mt->get_expr());
if (cs.is_subset(mt_cs)) {
euf::snode* deriv = sg.brzozowski_deriv(mem.m_regex, mt);
if (!deriv) { found = false; break; }
@ -3628,6 +3628,7 @@ namespace seq {
VERIFY(!minterms.empty());
bool created = false;
std::cout << "Considering regex: " << mk_pp(mem.m_regex->get_expr(), m_sg.get_manager()) << std::endl;
// Branch 1: x → ε (progress)
{
@ -3642,7 +3643,9 @@ namespace seq {
// Branch 2+: for each minterm m_i, x → ?c · x
// where ?c is a symbolic char constrained by the minterm
for (euf::snode* mt : minterms) {
// std::cout << "Processing minterm: " << mk_pp(mt->get_expr(), m_sg.get_manager()) << std::endl;
std::cout << "Processing minterm: " << mk_pp(mt->get_expr(), m_sg.get_manager()) << std::endl;
SASSERT(mt);
SASSERT(mt->get_expr());
SASSERT(!mt->is_fail());
// Try Brzozowski derivative with respect to this minterm
@ -3651,19 +3654,22 @@ namespace seq {
SASSERT(deriv);
if (deriv->is_fail())
continue;
// std::cout << "Result: " << mk_pp(deriv->get_expr(), m_sg.get_manager()) << std::endl;
std::cout << "Result: " << mk_pp(deriv->get_expr(), m_sg.get_manager()) << std::endl;
char_set cs;
if (m_parikh && mt->get_expr())
cs = m_parikh->minterm_to_char_set(mt->get_expr());
SASSERT(m_seq_regex);
char_set cs = m_seq_regex->minterm_to_char_set(mt->get_expr());
std::cout << "char_set:\n";
for (auto& r : cs.ranges()) {
std::cout << "\t[" << r.m_lo << "; " << r.m_hi - 1 << "]" << std::endl;
}
euf::snode* fresh_char = nullptr;
if (cs.is_unit()) {
expr_ref char_expr(m_sg.get_seq_util().str.mk_string(zstring(cs.first_char())), m_sg.get_manager());
fresh_char = m_sg.mk(char_expr);
} else {
fresh_char = mk_fresh_char_var();
}
else
fresh_char = mk_fresh_char_var();
euf::snode* replacement = m_sg.mk_concat(fresh_char, first);
nielsen_node* child = mk_child(node);
@ -3675,9 +3681,8 @@ namespace seq {
// This is the key Parikh pruning step: when x → ?c · x' is
// generated from minterm m_i, ?c must belong to the character
// class described by m_i so that str ∈ derivative(R, m_i).
if (!cs.is_unit() && !cs.is_empty()) {
if (!cs.is_unit() && !cs.is_empty())
child->add_char_range(fresh_char, cs);
}
created = true;
}

View file

@ -318,77 +318,6 @@ namespace seq {
// sgraph::compute_minterms(). This function converts them to a
// concrete char_set so that fresh character variables introduced by
// apply_regex_var_split can be constrained with add_char_range().
//
// The implementation is recursive and handles:
// re.full_char → [0, max_char] (full alphabet)
// re.range(lo, hi) → [lo, hi+1) (hi inclusive in Z3)
// re.complement(r) → alphabet \ minterm_to_char_set(r)
// re.inter(r1, r2) → intersection of both sides
// re.diff(r1, r2) → r1 ∩ complement(r2)
// re.to_re(str.unit(c)) → singleton {c}
// re.empty → empty set
// anything else → [0, max_char] (conservative)
char_set seq_parikh::minterm_to_char_set(expr* re_expr) {
unsigned max_c = seq.max_char();
if (!re_expr)
return char_set::full(max_c);
// full_char: the whole alphabet [0, max_char]
if (seq.re.is_full_char(re_expr))
return char_set::full(max_c);
// range [lo, hi] (hi inclusive in Z3's regex representation)
unsigned lo = 0, hi = 0;
if (seq.re.is_range(re_expr, lo, hi)) {
// lo > hi is a degenerate range; should not arise from well-formed minterms
SASSERT(lo <= hi);
if (lo > hi) return char_set();
// char_range uses exclusive upper bound; Z3 hi is inclusive
return char_set(char_range(lo, hi + 1));
}
// complement: alphabet minus the inner set
expr* inner = nullptr;
if (seq.re.is_complement(re_expr, inner))
return minterm_to_char_set(inner).complement(max_c);
// union: characters present in either set
expr* r1 = nullptr, *r2 = nullptr;
if (seq.re.is_union(re_expr, r1, r2)) {
char_set cs = minterm_to_char_set(r1);
cs.add(minterm_to_char_set(r2));
return cs;
}
// intersection: characters present in both sets
if (seq.re.is_intersection(re_expr, r1, r2))
return minterm_to_char_set(r1).intersect_with(minterm_to_char_set(r2));
// difference: r1 minus r2 = r1 ∩ complement(r2)
if (seq.re.is_diff(re_expr, r1, r2))
return minterm_to_char_set(r1).intersect_with(
minterm_to_char_set(r2).complement(max_c));
// to_re(str.unit(c)): singleton character set
expr* str_arg = nullptr;
expr* ch_expr = nullptr;
unsigned char_val = 0;
if (seq.re.is_to_re(re_expr, str_arg) &&
seq.str.is_unit(str_arg, ch_expr) &&
seq.is_const_char(ch_expr, char_val)) {
char_set cs;
cs.add(char_val);
return cs;
}
// empty regex: no characters can appear
if (seq.re.is_empty(re_expr))
return char_set();
// Conservative fallback: return the full alphabet so that
// no unsound constraints are added for unrecognised expressions.
return char_set::full(max_c);
}
} // namespace seq

View file

@ -134,17 +134,6 @@ namespace seq {
// single, indivisible character equivalence class. Minterms are
// produced by sgraph::compute_minterms and used in
// apply_regex_var_split to constrain fresh character variables.
//
// Supported expressions:
// re.full_char → full set [0, max_char]
// re.range(lo, hi) → char_set [lo, hi] (inclusive on both ends)
// re.complement(r) → complement of minterm_to_char_set(r)
// re.inter(r1, r2) → intersection of both sets
// re.diff(r1, r2) → r1 minus r2 (= r1 ∩ complement(r2))
// re.to_re(unit(c)) → singleton {c}
// re.empty → empty set
// anything else → full set (conservative, sound over-approximation)
char_set minterm_to_char_set(expr* minterm_re);
};
} // namespace seq

View file

@ -1098,4 +1098,68 @@ namespace seq {
return result == l_true;
}
char_set seq_regex::minterm_to_char_set(expr* re_expr) {
seq_util& seq = m_sg.get_seq_util();
unsigned max_c = seq.max_char();
if (!re_expr)
return char_set::full(max_c);
// full_char: the whole alphabet [0, max_char]
if (seq.re.is_full_char(re_expr))
return char_set::full(max_c);
// range [lo, hi] (hi inclusive in Z3's regex representation)
unsigned lo = 0, hi = 0;
if (seq.re.is_range(re_expr, lo, hi)) {
// lo > hi is a degenerate range; should not arise from well-formed minterms
SASSERT(lo <= hi);
if (lo > hi) return char_set();
// char_range uses exclusive upper bound; Z3 hi is inclusive
return char_set(char_range(lo, hi + 1));
}
// complement: alphabet minus the inner set
expr* inner = nullptr;
if (seq.re.is_complement(re_expr, inner))
return minterm_to_char_set(inner).complement(max_c);
// union: characters present in either set
expr* r1 = nullptr, *r2 = nullptr;
if (seq.re.is_union(re_expr, r1, r2)) {
char_set cs = minterm_to_char_set(r1);
cs.add(minterm_to_char_set(r2));
return cs;
}
// intersection: characters present in both sets
if (seq.re.is_intersection(re_expr, r1, r2))
return minterm_to_char_set(r1).intersect_with(minterm_to_char_set(r2));
// difference: r1 minus r2 = r1 ∩ complement(r2)
if (seq.re.is_diff(re_expr, r1, r2))
return minterm_to_char_set(r1).intersect_with(
minterm_to_char_set(r2).complement(max_c));
// to_re(str.unit(c)): singleton character set
expr* str_arg = nullptr;
expr* ch_expr = nullptr;
unsigned char_val = 0;
if (seq.re.is_to_re(re_expr, str_arg) &&
seq.str.is_unit(str_arg, ch_expr) &&
seq.is_const_char(ch_expr, char_val)) {
char_set cs;
cs.add(char_val);
return cs;
}
// empty regex: no characters can appear
if (seq.re.is_empty(re_expr))
return char_set();
// Conservative fallback: return the full alphabet so that
// no unsound constraints are added for unrecognised expressions.
return char_set::full(max_c);
}
}

View file

@ -72,6 +72,21 @@ namespace seq {
void get_alphabet_representatives(euf::snode* re, euf::snode_vector& reps);
public:
// Convert a regex minterm expression to a char_set.
// Used to transform symbolic boundaries (re.range, re.complement, etc.)
// into exact character sets (char_set).
//
// Supported expressions:
// re.full_char → full set [0, max_char]
// re.range(lo, hi) → char_set [lo, hi] (inclusive on both ends)
// re.complement(r) → complement of minterm_to_char_set(r)
// re.inter(r1, r2) → intersection of both sets
// re.diff(r1, r2) → r1 minus r2 (= r1 ∩ complement(r2))
// re.to_re(unit(c)) → singleton {c}
// re.empty → empty set
// anything else → full set (conservative, sound over-approximation)
char_set minterm_to_char_set(expr* minterm_re);
seq_regex(euf::sgraph& sg) : m_sg(sg) {}
euf::sgraph& sg() { return m_sg; }

View file

@ -29,6 +29,7 @@ Author:
#include "ast/euf/euf_sgraph.h"
#include "smt/seq/seq_nielsen.h"
#include "smt/seq/seq_parikh.h"
#include "smt/seq/seq_regex.h"
#include "ast/arith_decl_plugin.h"
#include "ast/reg_decl_plugins.h"
#include "ast/ast_pp.h"
@ -890,3 +891,4 @@ void tst_seq_parikh() {
test_minterm_singleton();
test_minterm_nullptr_is_full();
}