mirror of
https://github.com/Z3Prover/z3
synced 2026-03-20 03:53:10 +00:00
Some more bug fixes
This commit is contained in:
parent
ab53889c10
commit
b288c2e7dc
6 changed files with 96 additions and 92 deletions
|
|
@ -1872,7 +1872,7 @@ namespace seq {
|
|||
for (euf::snode* mt : minterms) {
|
||||
if (!mt || mt->is_fail()) continue;
|
||||
if (!mt->get_expr()) continue;
|
||||
char_set mt_cs = m_graph.m_parikh->minterm_to_char_set(mt->get_expr());
|
||||
char_set mt_cs = m_graph.m_seq_regex->minterm_to_char_set(mt->get_expr());
|
||||
if (cs.is_subset(mt_cs)) {
|
||||
euf::snode* deriv = sg.brzozowski_deriv(mem.m_regex, mt);
|
||||
if (!deriv) { found = false; break; }
|
||||
|
|
@ -3628,6 +3628,7 @@ namespace seq {
|
|||
VERIFY(!minterms.empty());
|
||||
|
||||
bool created = false;
|
||||
std::cout << "Considering regex: " << mk_pp(mem.m_regex->get_expr(), m_sg.get_manager()) << std::endl;
|
||||
|
||||
// Branch 1: x → ε (progress)
|
||||
{
|
||||
|
|
@ -3642,7 +3643,9 @@ namespace seq {
|
|||
// Branch 2+: for each minterm m_i, x → ?c · x
|
||||
// where ?c is a symbolic char constrained by the minterm
|
||||
for (euf::snode* mt : minterms) {
|
||||
// std::cout << "Processing minterm: " << mk_pp(mt->get_expr(), m_sg.get_manager()) << std::endl;
|
||||
std::cout << "Processing minterm: " << mk_pp(mt->get_expr(), m_sg.get_manager()) << std::endl;
|
||||
SASSERT(mt);
|
||||
SASSERT(mt->get_expr());
|
||||
SASSERT(!mt->is_fail());
|
||||
|
||||
// Try Brzozowski derivative with respect to this minterm
|
||||
|
|
@ -3651,19 +3654,22 @@ namespace seq {
|
|||
SASSERT(deriv);
|
||||
if (deriv->is_fail())
|
||||
continue;
|
||||
// std::cout << "Result: " << mk_pp(deriv->get_expr(), m_sg.get_manager()) << std::endl;
|
||||
std::cout << "Result: " << mk_pp(deriv->get_expr(), m_sg.get_manager()) << std::endl;
|
||||
|
||||
char_set cs;
|
||||
if (m_parikh && mt->get_expr())
|
||||
cs = m_parikh->minterm_to_char_set(mt->get_expr());
|
||||
SASSERT(m_seq_regex);
|
||||
char_set cs = m_seq_regex->minterm_to_char_set(mt->get_expr());
|
||||
std::cout << "char_set:\n";
|
||||
for (auto& r : cs.ranges()) {
|
||||
std::cout << "\t[" << r.m_lo << "; " << r.m_hi - 1 << "]" << std::endl;
|
||||
}
|
||||
|
||||
euf::snode* fresh_char = nullptr;
|
||||
if (cs.is_unit()) {
|
||||
expr_ref char_expr(m_sg.get_seq_util().str.mk_string(zstring(cs.first_char())), m_sg.get_manager());
|
||||
fresh_char = m_sg.mk(char_expr);
|
||||
} else {
|
||||
fresh_char = mk_fresh_char_var();
|
||||
}
|
||||
else
|
||||
fresh_char = mk_fresh_char_var();
|
||||
|
||||
euf::snode* replacement = m_sg.mk_concat(fresh_char, first);
|
||||
nielsen_node* child = mk_child(node);
|
||||
|
|
@ -3675,9 +3681,8 @@ namespace seq {
|
|||
// This is the key Parikh pruning step: when x → ?c · x' is
|
||||
// generated from minterm m_i, ?c must belong to the character
|
||||
// class described by m_i so that str ∈ derivative(R, m_i).
|
||||
if (!cs.is_unit() && !cs.is_empty()) {
|
||||
if (!cs.is_unit() && !cs.is_empty())
|
||||
child->add_char_range(fresh_char, cs);
|
||||
}
|
||||
created = true;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -318,77 +318,6 @@ namespace seq {
|
|||
// sgraph::compute_minterms(). This function converts them to a
|
||||
// concrete char_set so that fresh character variables introduced by
|
||||
// apply_regex_var_split can be constrained with add_char_range().
|
||||
//
|
||||
// The implementation is recursive and handles:
|
||||
// re.full_char → [0, max_char] (full alphabet)
|
||||
// re.range(lo, hi) → [lo, hi+1) (hi inclusive in Z3)
|
||||
// re.complement(r) → alphabet \ minterm_to_char_set(r)
|
||||
// re.inter(r1, r2) → intersection of both sides
|
||||
// re.diff(r1, r2) → r1 ∩ complement(r2)
|
||||
// re.to_re(str.unit(c)) → singleton {c}
|
||||
// re.empty → empty set
|
||||
// anything else → [0, max_char] (conservative)
|
||||
char_set seq_parikh::minterm_to_char_set(expr* re_expr) {
|
||||
unsigned max_c = seq.max_char();
|
||||
|
||||
if (!re_expr)
|
||||
return char_set::full(max_c);
|
||||
|
||||
// full_char: the whole alphabet [0, max_char]
|
||||
if (seq.re.is_full_char(re_expr))
|
||||
return char_set::full(max_c);
|
||||
|
||||
// range [lo, hi] (hi inclusive in Z3's regex representation)
|
||||
unsigned lo = 0, hi = 0;
|
||||
if (seq.re.is_range(re_expr, lo, hi)) {
|
||||
// lo > hi is a degenerate range; should not arise from well-formed minterms
|
||||
SASSERT(lo <= hi);
|
||||
if (lo > hi) return char_set();
|
||||
// char_range uses exclusive upper bound; Z3 hi is inclusive
|
||||
return char_set(char_range(lo, hi + 1));
|
||||
}
|
||||
|
||||
// complement: alphabet minus the inner set
|
||||
expr* inner = nullptr;
|
||||
if (seq.re.is_complement(re_expr, inner))
|
||||
return minterm_to_char_set(inner).complement(max_c);
|
||||
|
||||
// union: characters present in either set
|
||||
expr* r1 = nullptr, *r2 = nullptr;
|
||||
if (seq.re.is_union(re_expr, r1, r2)) {
|
||||
char_set cs = minterm_to_char_set(r1);
|
||||
cs.add(minterm_to_char_set(r2));
|
||||
return cs;
|
||||
}
|
||||
|
||||
// intersection: characters present in both sets
|
||||
if (seq.re.is_intersection(re_expr, r1, r2))
|
||||
return minterm_to_char_set(r1).intersect_with(minterm_to_char_set(r2));
|
||||
|
||||
// difference: r1 minus r2 = r1 ∩ complement(r2)
|
||||
if (seq.re.is_diff(re_expr, r1, r2))
|
||||
return minterm_to_char_set(r1).intersect_with(
|
||||
minterm_to_char_set(r2).complement(max_c));
|
||||
|
||||
// to_re(str.unit(c)): singleton character set
|
||||
expr* str_arg = nullptr;
|
||||
expr* ch_expr = nullptr;
|
||||
unsigned char_val = 0;
|
||||
if (seq.re.is_to_re(re_expr, str_arg) &&
|
||||
seq.str.is_unit(str_arg, ch_expr) &&
|
||||
seq.is_const_char(ch_expr, char_val)) {
|
||||
char_set cs;
|
||||
cs.add(char_val);
|
||||
return cs;
|
||||
}
|
||||
|
||||
// empty regex: no characters can appear
|
||||
if (seq.re.is_empty(re_expr))
|
||||
return char_set();
|
||||
|
||||
// Conservative fallback: return the full alphabet so that
|
||||
// no unsound constraints are added for unrecognised expressions.
|
||||
return char_set::full(max_c);
|
||||
}
|
||||
|
||||
} // namespace seq
|
||||
|
|
|
|||
|
|
@ -134,17 +134,6 @@ namespace seq {
|
|||
// single, indivisible character equivalence class. Minterms are
|
||||
// produced by sgraph::compute_minterms and used in
|
||||
// apply_regex_var_split to constrain fresh character variables.
|
||||
//
|
||||
// Supported expressions:
|
||||
// re.full_char → full set [0, max_char]
|
||||
// re.range(lo, hi) → char_set [lo, hi] (inclusive on both ends)
|
||||
// re.complement(r) → complement of minterm_to_char_set(r)
|
||||
// re.inter(r1, r2) → intersection of both sets
|
||||
// re.diff(r1, r2) → r1 minus r2 (= r1 ∩ complement(r2))
|
||||
// re.to_re(unit(c)) → singleton {c}
|
||||
// re.empty → empty set
|
||||
// anything else → full set (conservative, sound over-approximation)
|
||||
char_set minterm_to_char_set(expr* minterm_re);
|
||||
};
|
||||
|
||||
} // namespace seq
|
||||
|
|
|
|||
|
|
@ -1098,4 +1098,68 @@ namespace seq {
|
|||
return result == l_true;
|
||||
}
|
||||
|
||||
char_set seq_regex::minterm_to_char_set(expr* re_expr) {
|
||||
seq_util& seq = m_sg.get_seq_util();
|
||||
unsigned max_c = seq.max_char();
|
||||
|
||||
if (!re_expr)
|
||||
return char_set::full(max_c);
|
||||
|
||||
// full_char: the whole alphabet [0, max_char]
|
||||
if (seq.re.is_full_char(re_expr))
|
||||
return char_set::full(max_c);
|
||||
|
||||
// range [lo, hi] (hi inclusive in Z3's regex representation)
|
||||
unsigned lo = 0, hi = 0;
|
||||
if (seq.re.is_range(re_expr, lo, hi)) {
|
||||
// lo > hi is a degenerate range; should not arise from well-formed minterms
|
||||
SASSERT(lo <= hi);
|
||||
if (lo > hi) return char_set();
|
||||
// char_range uses exclusive upper bound; Z3 hi is inclusive
|
||||
return char_set(char_range(lo, hi + 1));
|
||||
}
|
||||
|
||||
// complement: alphabet minus the inner set
|
||||
expr* inner = nullptr;
|
||||
if (seq.re.is_complement(re_expr, inner))
|
||||
return minterm_to_char_set(inner).complement(max_c);
|
||||
|
||||
// union: characters present in either set
|
||||
expr* r1 = nullptr, *r2 = nullptr;
|
||||
if (seq.re.is_union(re_expr, r1, r2)) {
|
||||
char_set cs = minterm_to_char_set(r1);
|
||||
cs.add(minterm_to_char_set(r2));
|
||||
return cs;
|
||||
}
|
||||
|
||||
// intersection: characters present in both sets
|
||||
if (seq.re.is_intersection(re_expr, r1, r2))
|
||||
return minterm_to_char_set(r1).intersect_with(minterm_to_char_set(r2));
|
||||
|
||||
// difference: r1 minus r2 = r1 ∩ complement(r2)
|
||||
if (seq.re.is_diff(re_expr, r1, r2))
|
||||
return minterm_to_char_set(r1).intersect_with(
|
||||
minterm_to_char_set(r2).complement(max_c));
|
||||
|
||||
// to_re(str.unit(c)): singleton character set
|
||||
expr* str_arg = nullptr;
|
||||
expr* ch_expr = nullptr;
|
||||
unsigned char_val = 0;
|
||||
if (seq.re.is_to_re(re_expr, str_arg) &&
|
||||
seq.str.is_unit(str_arg, ch_expr) &&
|
||||
seq.is_const_char(ch_expr, char_val)) {
|
||||
char_set cs;
|
||||
cs.add(char_val);
|
||||
return cs;
|
||||
}
|
||||
|
||||
// empty regex: no characters can appear
|
||||
if (seq.re.is_empty(re_expr))
|
||||
return char_set();
|
||||
|
||||
// Conservative fallback: return the full alphabet so that
|
||||
// no unsound constraints are added for unrecognised expressions.
|
||||
return char_set::full(max_c);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -72,6 +72,21 @@ namespace seq {
|
|||
void get_alphabet_representatives(euf::snode* re, euf::snode_vector& reps);
|
||||
|
||||
public:
|
||||
// Convert a regex minterm expression to a char_set.
|
||||
// Used to transform symbolic boundaries (re.range, re.complement, etc.)
|
||||
// into exact character sets (char_set).
|
||||
//
|
||||
// Supported expressions:
|
||||
// re.full_char → full set [0, max_char]
|
||||
// re.range(lo, hi) → char_set [lo, hi] (inclusive on both ends)
|
||||
// re.complement(r) → complement of minterm_to_char_set(r)
|
||||
// re.inter(r1, r2) → intersection of both sets
|
||||
// re.diff(r1, r2) → r1 minus r2 (= r1 ∩ complement(r2))
|
||||
// re.to_re(unit(c)) → singleton {c}
|
||||
// re.empty → empty set
|
||||
// anything else → full set (conservative, sound over-approximation)
|
||||
char_set minterm_to_char_set(expr* minterm_re);
|
||||
|
||||
seq_regex(euf::sgraph& sg) : m_sg(sg) {}
|
||||
|
||||
euf::sgraph& sg() { return m_sg; }
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ Author:
|
|||
#include "ast/euf/euf_sgraph.h"
|
||||
#include "smt/seq/seq_nielsen.h"
|
||||
#include "smt/seq/seq_parikh.h"
|
||||
#include "smt/seq/seq_regex.h"
|
||||
#include "ast/arith_decl_plugin.h"
|
||||
#include "ast/reg_decl_plugins.h"
|
||||
#include "ast/ast_pp.h"
|
||||
|
|
@ -890,3 +891,4 @@ void tst_seq_parikh() {
|
|||
test_minterm_singleton();
|
||||
test_minterm_nullptr_is_full();
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue