mirror of
https://github.com/Z3Prover/z3
synced 2026-06-05 16:40:52 +00:00
port RegexCharSplitModifier: implement apply_regex_unit_split in seq_nielsen
Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/5d8898c4-5b66-42b6-a05a-03de8d1f0a94 Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
This commit is contained in:
parent
6d2321e6fe
commit
4b08c629c8
3 changed files with 131 additions and 1 deletions
|
|
@ -2234,7 +2234,11 @@ namespace seq {
|
||||||
if (apply_const_nielsen(node))
|
if (apply_const_nielsen(node))
|
||||||
return ++m_stats.m_mod_const_nielsen, true;
|
return ++m_stats.m_mod_const_nielsen, true;
|
||||||
|
|
||||||
// Priority 9: SignatureSplit - heuristic string equation splitting
|
// Priority 9: RegexUnitSplit - split str_mem c·s ∈ R by minterms of R
|
||||||
|
if (apply_regex_unit_split(node))
|
||||||
|
return ++m_stats.m_mod_regex_unit_split, true;
|
||||||
|
|
||||||
|
// Priority 9b: SignatureSplit - heuristic string equation splitting
|
||||||
if (m_signature_split && apply_signature_split(node))
|
if (m_signature_split && apply_signature_split(node))
|
||||||
return ++m_stats.m_mod_signature_split, true;
|
return ++m_stats.m_mod_signature_split, true;
|
||||||
|
|
||||||
|
|
@ -3112,6 +3116,66 @@ namespace seq {
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
// Modifier: apply_regex_unit_split (RegexCharSplitModifier)
|
||||||
|
// For str_mem c·s ∈ R where c is a symbolic unit token (seq.unit(?c)),
|
||||||
|
// branch over minterms of R: for each minterm m_i with non-fail derivative,
|
||||||
|
// create a child that constrains ?c to the character class of m_i.
|
||||||
|
// Unlike apply_regex_var_split, no substitution and no epsilon branch.
|
||||||
|
// After the constraint is added, simplify_and_init will consume c
|
||||||
|
// deterministically via the uniform derivative check.
|
||||||
|
// mirrors ZIPT's RegexCharSplitModifier
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
|
||||||
|
bool nielsen_graph::apply_regex_unit_split(nielsen_node* node) {
|
||||||
|
for (str_mem const& mem : node->str_mems()) {
|
||||||
|
SASSERT(mem.m_str && mem.m_regex);
|
||||||
|
if (mem.is_primitive())
|
||||||
|
continue;
|
||||||
|
euf::snode* first = mem.m_str->first();
|
||||||
|
if (!first || !first->is_unit())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// Compute minterms of the regex
|
||||||
|
euf::snode_vector minterms;
|
||||||
|
m_sg.compute_minterms(mem.m_regex, minterms);
|
||||||
|
VERIFY(!minterms.empty());
|
||||||
|
|
||||||
|
// Get the current char_range for this token, fall back to full
|
||||||
|
char_set const& existing =
|
||||||
|
node->char_ranges().contains(first->id())
|
||||||
|
? node->char_ranges()[first->id()]
|
||||||
|
: char_set::full(zstring::max_char());
|
||||||
|
|
||||||
|
bool created = false;
|
||||||
|
for (euf::snode* mt : minterms) {
|
||||||
|
SASSERT(mt && mt->get_expr());
|
||||||
|
SASSERT(!mt->is_fail());
|
||||||
|
|
||||||
|
char_set mt_cs = m_seq_regex->minterm_to_char_set(mt->get_expr());
|
||||||
|
// skip minterm if it doesn't overlap with the existing range
|
||||||
|
if (existing.intersect_with(mt_cs).is_empty())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// skip if the regex derivative is empty for this minterm
|
||||||
|
euf::snode* deriv = m_sg.brzozowski_deriv(mem.m_regex, mt);
|
||||||
|
SASSERT(deriv);
|
||||||
|
if (deriv->is_fail())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// create a child and narrow the char_range for this token
|
||||||
|
nielsen_node* child = mk_child(node);
|
||||||
|
mk_edge(node, child, false);
|
||||||
|
child->add_char_range(first, mt_cs);
|
||||||
|
created = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (created)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// -----------------------------------------------------------------------
|
// -----------------------------------------------------------------------
|
||||||
// Modifier: apply_regex_var_split
|
// Modifier: apply_regex_var_split
|
||||||
// For str_mem x·s ∈ R where x is a variable, split using minterms:
|
// For str_mem x·s ∈ R where x is a variable, split using minterms:
|
||||||
|
|
@ -4050,6 +4114,7 @@ namespace seq {
|
||||||
st.update("nseq mod star intr", m_stats.m_mod_star_intr);
|
st.update("nseq mod star intr", m_stats.m_mod_star_intr);
|
||||||
st.update("nseq mod gpower intr", m_stats.m_mod_gpower_intr);
|
st.update("nseq mod gpower intr", m_stats.m_mod_gpower_intr);
|
||||||
st.update("nseq mod const nielsen", m_stats.m_mod_const_nielsen);
|
st.update("nseq mod const nielsen", m_stats.m_mod_const_nielsen);
|
||||||
|
st.update("nseq mod regex unit", m_stats.m_mod_regex_unit_split);
|
||||||
st.update("nseq mod signature split", m_stats.m_mod_signature_split);
|
st.update("nseq mod signature split", m_stats.m_mod_signature_split);
|
||||||
st.update("nseq mod regex var", m_stats.m_mod_regex_var_split);
|
st.update("nseq mod regex var", m_stats.m_mod_regex_var_split);
|
||||||
st.update("nseq mod power split", m_stats.m_mod_power_split);
|
st.update("nseq mod power split", m_stats.m_mod_power_split);
|
||||||
|
|
|
||||||
|
|
@ -186,6 +186,7 @@ Abstract:
|
||||||
GPowerIntrModifier(7), ConstNielsenModifier(8), RegexCharSplitModifier(9),
|
GPowerIntrModifier(7), ConstNielsenModifier(8), RegexCharSplitModifier(9),
|
||||||
RegexVarSplitModifier(10), PowerSplitModifier(11), VarNielsenModifier(12),
|
RegexVarSplitModifier(10), PowerSplitModifier(11), VarNielsenModifier(12),
|
||||||
VarNumUnwindingModifier(13).
|
VarNumUnwindingModifier(13).
|
||||||
|
- Z3-specific heuristic inserted between (9) and (10): SignatureSplit.
|
||||||
- NOT PORTED: DirectedNielsenModifier, DecomposeModifier, CombinedModifier.
|
- NOT PORTED: DirectedNielsenModifier, DecomposeModifier, CombinedModifier.
|
||||||
- NumCmp, ConstNumUnwinding, VarNumUnwinding are approximated (no PDD
|
- NumCmp, ConstNumUnwinding, VarNumUnwinding are approximated (no PDD
|
||||||
integer polynomial infrastructure; power tokens are replaced with ε
|
integer polynomial infrastructure; power tokens are replaced with ε
|
||||||
|
|
|
||||||
|
|
@ -684,11 +684,75 @@ static void test_tricky_str_equations() {
|
||||||
std::cout << " ok\n";
|
std::cout << " ok\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
// Symbolic unit token in regex membership (RegexCharSplitModifier)
|
||||||
|
// Verifies apply_regex_unit_split: when the first token of a str_mem is
|
||||||
|
// a seq.unit(?c) with no initial char_range, the modifier must branch
|
||||||
|
// over regex minterms and constrain ?c.
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
static void test_regex_unit_split() {
|
||||||
|
std::cout << "test_regex_unit_split\n";
|
||||||
|
|
||||||
|
// ?c · "hhh" · X ∈ hhhbbb|bhhh — SAT (?c='b', X="")
|
||||||
|
{
|
||||||
|
nseq_fixture f;
|
||||||
|
sort* cs = f.su.mk_char_sort();
|
||||||
|
expr_ref fc(f.m.mk_fresh_const("?c", cs), f.m);
|
||||||
|
expr_ref unit_e(f.su.str.mk_unit(fc), f.m);
|
||||||
|
euf::snode* u = f.sg.mk(unit_e);
|
||||||
|
euf::snode* X = f.sb.var('X');
|
||||||
|
euf::snode* str = f.sg.mk_concat(u,
|
||||||
|
f.sg.mk_concat(f.sg.mk_char('h'),
|
||||||
|
f.sg.mk_concat(f.sg.mk_char('h'),
|
||||||
|
f.sg.mk_concat(f.sg.mk_char('h'), X))));
|
||||||
|
euf::snode* re = f.rb.parse("hhhbbb|bhhh");
|
||||||
|
f.ng.add_str_mem(str, re);
|
||||||
|
VERIFY(f.ng.solve() == seq::nielsen_graph::search_result::sat);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ?c · "hhh" · X ∉ h(aa)* — UNSAT
|
||||||
|
// 'h'-branch: "hhh" · X ∈ (aa)*, but 'h' ≠ 'a'
|
||||||
|
// all other branches: D(h(aa)*, non-h) = fail
|
||||||
|
{
|
||||||
|
nseq_fixture f;
|
||||||
|
sort* cs = f.su.mk_char_sort();
|
||||||
|
expr_ref fc(f.m.mk_fresh_const("?c", cs), f.m);
|
||||||
|
expr_ref unit_e(f.su.str.mk_unit(fc), f.m);
|
||||||
|
euf::snode* u = f.sg.mk(unit_e);
|
||||||
|
euf::snode* X = f.sb.var('X');
|
||||||
|
euf::snode* str = f.sg.mk_concat(u,
|
||||||
|
f.sg.mk_concat(f.sg.mk_char('h'),
|
||||||
|
f.sg.mk_concat(f.sg.mk_char('h'),
|
||||||
|
f.sg.mk_concat(f.sg.mk_char('h'), X))));
|
||||||
|
euf::snode* re = f.rb.parse("h(aa)*");
|
||||||
|
f.ng.add_str_mem(str, re);
|
||||||
|
VERIFY(f.ng.solve() == seq::nielsen_graph::search_result::unsat);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ?c · X ∈ (a|b)* — SAT (unit split branches on {a,b,others};
|
||||||
|
// 'a'- and 'b'-branches give X ∈ (a|b)*, trivially SAT)
|
||||||
|
{
|
||||||
|
nseq_fixture f;
|
||||||
|
sort* cs = f.su.mk_char_sort();
|
||||||
|
expr_ref fc(f.m.mk_fresh_const("?c", cs), f.m);
|
||||||
|
expr_ref unit_e(f.su.str.mk_unit(fc), f.m);
|
||||||
|
euf::snode* u = f.sg.mk(unit_e);
|
||||||
|
euf::snode* X = f.sb.var('X');
|
||||||
|
euf::snode* str = f.sg.mk_concat(u, X);
|
||||||
|
euf::snode* re = f.rb.parse("(a|b)*");
|
||||||
|
f.ng.add_str_mem(str, re);
|
||||||
|
VERIFY(f.ng.solve() == seq::nielsen_graph::search_result::sat);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << " ok\n";
|
||||||
|
}
|
||||||
|
|
||||||
void tst_nseq_zipt() {
|
void tst_nseq_zipt() {
|
||||||
test_zipt_str_equations();
|
test_zipt_str_equations();
|
||||||
test_tricky_str_equations();
|
test_tricky_str_equations();
|
||||||
test_zipt_regex_ground();
|
test_zipt_regex_ground();
|
||||||
test_zipt_str_membership();
|
test_zipt_str_membership();
|
||||||
test_zipt_parikh();
|
test_zipt_parikh();
|
||||||
|
test_regex_unit_split();
|
||||||
std::cout << "nseq_zipt: all tests passed\n";
|
std::cout << "nseq_zipt: all tests passed\n";
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue