3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2025-08-02 17:30:23 +00:00

Integrate new regex solver (#4602)

* std::cout debugging statements

* comment out std::cout debugging as this is now a shared fork

* convert std::cout to TRACE statements for seq_rewriter and seq_regex

* add cases to min_length and max_length for regexes

* bug fix

* update min_length and max_length functions for REs

* initial pass on simplifying derivative normal forms by eliminating redundant predicates locally

* add seq_regex_brief trace statements

* working on debugging ref count issue

* fix ref count bug and convert trace statements to seq_regex_brief

* add compact tracing for cache hits/misses

* seq_regex fix cache hit/miss tracing and wrapper around is_nullable

* minor

* label and disable more experimental changes for testing

* minor documentation / tracing

* a few more @EXP annotations

* dead state elimination skeleton code

* progress on dead state elimination

* more progress on dead state elimination

* refactor dead state class to separate self-contained state_graph class

* finish factoring state_graph to only work with unsigned values, and implement separate functionality for expr* logic

* implement get_all_derivatives, add debug tracing

* trace statements for debugging is_nullable loop bug

* fix is_nullable loop bug

* comment out local nullable change and mark experimental

* pretty printing for state_graph

* rewrite state graph to remove the fragile assumption that all edges from a state are added at a time

* start of general cycle detection check + fix some comments

* implement full cycle detection procedure

* normalize derivative conditions to form 'ele <= a'

* order derivative conditions by character code

* fix confusing names m_to and m_from

* assign increasing state IDs from 1 instead of using get_id on AST node

* remove elim_condition call in get_dall_derivatives

* use u_map instead of uint_map to avoid memory leak

* remove unnecessary call to is_ground

* debugging

* small improvements to seq_regex_brief tracing

* fix bug on evil2 example

* save work

* new propagate code

* work in progress on using same seq sort for deriv calls

* avoid re-computing derivatives: use same head var for every derivative call

* use min_length on regexes to prune search

* simple implementation of can_be_in_cycle using rank function idea

* add a disabled experimental change

* minor cleanup comments, etc.

* seq_rewriter cleanup for PR

* typo noticed by Nikolaj

* move state graph to util/state_graph

* re-add accidentally removed line

* clean up seq_regex code removing obsolete functions and comments

* a few more cleanup items

* remove experimental functionality for integration

* fix compilation

* remove some tracing and TODOs

* remove old comment

* update copyright dates to 2020

* feedback from Nikolaj

* use [] for map access

* make state_graph methods constant

* avoid recursion in mark_dead_recursive and mark_live_recursive

* a possible bug fix in propagate_nonempty

* write down list of invariants in state_graph

* implement partial invariant check and insert CASSERT statements

* expand on invariant check and tracing

* finish state graph invariant check

* minor tweaks

* regex propagation: convert first two axioms to propagations

* remove obsolete regex solver functionality

Co-authored-by: calebstanford-msr <t-casta@microsoft.com>
This commit is contained in:
Caleb Stanford 2020-07-30 16:54:49 -04:00 committed by GitHub
parent 293b0b8cc2
commit 976e4c91b0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 922 additions and 257 deletions

View file

@ -1,5 +1,5 @@
/*++
Copyright (c) 2011 Microsoft Corporation
Copyright (c) 2020 Microsoft Corporation
Module Name:
@ -24,7 +24,8 @@ namespace smt {
seq_regex::seq_regex(theory_seq& th):
th(th),
ctx(th.get_context()),
m(th.get_manager())
m(th.get_manager()),
m_state_to_expr(m)
{}
seq_util& seq_regex::u() { return th.m_util; }
@ -35,34 +36,6 @@ namespace smt {
arith_util& seq_regex::a() { return th.m_autil; }
void seq_regex::rewrite(expr_ref& e) { th.m_rewrite(e); }
bool seq_regex::can_propagate() const {
for (auto const& p : m_to_propagate) {
literal trigger = p.m_trigger;
if (trigger == null_literal || ctx.get_assignment(trigger) != l_undef)
return true;
}
return false;
}
bool seq_regex::propagate() {
bool change = false;
for (unsigned i = 0; !ctx.inconsistent() && i < m_to_propagate.size(); ++i) {
propagation_lit const& pl = m_to_propagate[i];
literal trigger = pl.m_trigger;
if (trigger != null_literal && ctx.get_assignment(trigger) == l_undef)
continue;
if (propagate(pl.m_lit, trigger)) {
m_to_propagate.erase_and_swap(i--);
change = true;
}
else if (trigger != pl.m_trigger) {
m_to_propagate.set(i, propagation_lit(pl.m_lit, trigger));
}
}
return change;
}
/**
* is_string_equality holds of str.in_re s R,
*
@ -103,14 +76,14 @@ namespace smt {
}
/**
* Propagate the atom (str.in.re s r)
* Propagate the atom (str.in_re s r)
*
* Propagation implements the following inference rules
*
* (not (str.in.re s r)) => (str.in.re s (complement r))
* (str.in.re s r) => r != {}
* (not (str.in_re s r)) => (str.in_re s (complement r))
* (str.in_re s r) => r != {}
*
* (str.in.re s r) => (accept s 0 r)
* (str.in_re s r) => (accept s 0 r)
*/
void seq_regex::propagate_in_re(literal lit) {
@ -118,7 +91,9 @@ namespace smt {
expr* e = ctx.bool_var2expr(lit.var());
VERIFY(str().is_in_re(e, s, r));
TRACE("seq", tout << "propagate " << lit.sign() << " " << mk_pp(e, m) << "\n";);
TRACE("seq_regex", tout << "propagate in RE: " << lit.sign() << " " << mk_pp(e, m) << std::endl;);
STRACE("seq_regex_brief", tout << "PIR(" << mk_pp(s, m) << ","
<< state_str(r) << ") ";);
// convert negative negative membership literals to positive
// ~(s in R) => s in C(R)
@ -140,21 +115,6 @@ namespace smt {
if (is_string_equality(lit))
return;
//
// TBD s in R => R != {}
// non-emptiness enforcement could instead of here,
// be added to propagate_accept after some threshold is met.
//
if (false) {
expr_ref is_empty(m.mk_eq(r, re().mk_empty(m.get_sort(s))), m);
rewrite(is_empty);
literal is_emptyl = th.mk_literal(is_empty);
if (ctx.get_assignment(is_emptyl) != l_false) {
th.propagate_lit(nullptr, 1, &lit, ~is_emptyl);
return;
}
}
expr_ref zero(a().mk_int(0), m);
expr_ref acc = sk().mk_accept(s, zero, r);
literal acc_lit = th.mk_literal(acc);
@ -164,27 +124,30 @@ namespace smt {
th.propagate_lit(nullptr, 1, &lit, acc_lit);
}
void seq_regex::propagate_accept(literal lit) {
// std::cout << "PA ";
literal t = null_literal;
if (!propagate(lit, t))
m_to_propagate.push_back(propagation_lit(lit, t));
}
/**
* Propagate the atom (accept s i r)
*
* Propagation implements the following inference rules
*
* (accept s i r[if(c,r1,r2)]) & c => (accept s i r[r1])
* (accept s i r[if(c,r1,r2)]) & ~c => (accept s i r[r2])
* (accept s i r) & nullable(r) => len(s) >= i
* (accept s i r) & ~nullable(r) => len(s) >= i + 1
* (accept s i r) & len(s) <= i => nullable(r)
* (accept s i r) & len(s) > i => (accept s (+ i 1) D(nth(s,i), r))
* Propagation triggers updating the state graph for dead state detection:
* (accept s i r) => update_state_graph(r)
* (accept s i r) & dead(r) => false
*
* Propagation is also blocked under certain conditions to throttle
* state space exploration past a certain point: see block_unfolding
*
* Otherwise, propagation implements the following inference rules:
*
* Rule 1. (accept s i r) => len(s) >= i + min_len(r)
* Rule 2. (accept s i r) & len(s) <= i => nullable(r)
* Rule 3. (accept s i r) and len(s) > i =>
* (accept s (i + 1) (derivative s[i] r)
*
* Acceptance of a derivative is unfolded into a disjunction over
* all derivatives. Effectively, this implements the following rule,
* but all in one step:
* (accept s i (ite c r1 r2)) =>
* c & (accept s i r1) \/ ~c & (accept s i r2)
*/
bool seq_regex::propagate(literal lit, literal& trigger) {
void seq_regex::propagate_accept(literal lit) {
SASSERT(!lit.sign());
expr* s = nullptr, *i = nullptr, *r = nullptr;
@ -192,146 +155,78 @@ namespace smt {
unsigned idx = 0;
VERIFY(sk().is_accept(e, s, i, idx, r));
// std::cout << "\nP " << idx << " " << r->get_id() << " ";
TRACE("seq", tout << "propagate " << mk_pp(e, m) << "\n";);
TRACE("seq_regex", tout << "propagate accept: "
<< mk_pp(e, m) << std::endl;);
STRACE("seq_regex_brief", tout << std::endl
<< "PA(" << mk_pp(s, m) << "@" << idx
<< "," << state_str(r) << ") ";);
if (re().is_empty(r)) {
STRACE("seq_regex_brief", tout << "(empty) ";);
th.add_axiom(~lit);
return true;
return;
}
if (block_unfolding(lit, idx))
return true;
update_state_graph(r);
propagate_nullable(lit, s, idx, r);
return propagate_derivative(lit, e, s, i, idx, r, trigger);
}
/**
Implement the two axioms as propagations:
(accept s i r) => len(s) >= i
(accept s i r) & ~nullable(r) => len(s) >= i + 1
evaluate nullable(r):
nullable(r) := true -> propagate: (accept s i r) => len(s) >= i
nullable(r) := false -> propagate: (accept s i r) => len(s) >= i + 1
Otherwise:
propagate: (accept s i r) => len(s) >= i
evaluate len(s) <= i:
len(s) <= i := undef -> axiom: (accept s i r) & len(s) <= i => nullable(r)
len(s) <= i := true -> propagate: (accept s i r) & len(s) <= i => nullable(r)
len(s) <= i := false -> noop.
*/
void seq_regex::propagate_nullable(literal lit, expr* s, unsigned idx, expr* r) {
// std::cout << "PN ";
expr_ref is_nullable = seq_rw().is_nullable(r);
rewrite(is_nullable);
literal len_s_ge_i = th.m_ax.mk_ge(th.mk_len(s), idx);
if (m.is_true(is_nullable)) {
th.propagate_lit(nullptr, 1,&lit, len_s_ge_i);
if (m_state_graph.is_dead(get_state_id(r))) {
STRACE("seq_regex_brief", tout << "(dead) ";);
th.add_axiom(~lit);
return;
}
else if (m.is_false(is_nullable)) {
th.propagate_lit(nullptr, 1, &lit, th.m_ax.mk_ge(th.mk_len(s), idx + 1));
//unsigned len = std::max(1u, re().min_length(r));
//th.propagate_lit(nullptr, 1, &lit, th.m_ax.mk_ge(th.mk_len(s), idx + re().min_length(r)));
if (block_unfolding(lit, idx)) {
STRACE("seq_regex_brief", tout << "(blocked) ";);
return;
}
else {
literal is_nullable_lit = th.mk_literal(is_nullable);
STRACE("seq_regex_brief", tout << "(unfold) ";);
// Rule 1: use min_length to prune search
expr_ref s_to_re(re().mk_to_re(s), m);
expr_ref s_plus_r(re().mk_concat(s_to_re, r), m);
unsigned min_len = re().min_length(s_plus_r);
literal len_s_ge_min = th.m_ax.mk_ge(th.mk_len(s), min_len);
th.propagate_lit(nullptr, 1, &lit, len_s_ge_min);
// Axiom equivalent to the above: th.add_axiom(~lit, len_s_ge_min);
// Rule 2: nullable check
literal len_s_le_i = th.m_ax.mk_le(th.mk_len(s), idx);
expr_ref is_nullable = is_nullable_wrapper(r);
if (m.is_false(is_nullable)) {
th.propagate_lit(nullptr, 1, &lit, ~len_s_le_i);
}
else if (!m.is_true(is_nullable)) {
// is_nullable did not simplify
literal is_nullable_lit = th.mk_literal(is_nullable_wrapper(r));
ctx.mark_as_relevant(is_nullable_lit);
literal len_s_le_i = th.m_ax.mk_le(th.mk_len(s), idx);
switch (ctx.get_assignment(len_s_le_i)) {
case l_undef:
th.add_axiom(~lit, ~len_s_le_i, is_nullable_lit);
break;
case l_true: {
literal lits[2] = { lit, len_s_le_i };
th.propagate_lit(nullptr, 2, lits, is_nullable_lit);
break;
}
case l_false:
break;
}
th.propagate_lit(nullptr, 1, &lit, len_s_ge_i);
th.add_axiom(~lit, ~len_s_le_i, is_nullable_lit);
}
}
bool seq_regex::propagate_derivative(literal lit, expr* e, expr* s, expr* i, unsigned idx, expr* r, literal& trigger) {
// (accept s i R) & len(s) > i => (accept s (+ i 1) D(nth(s, i), R)) or conds
// std::cout << "PD ";
expr_ref d(m);
expr_ref head = th.mk_nth(s, i);
d = derivative_wrapper(m.mk_var(0, m.get_sort(head)), r);
// timer tm;
// std::cout << d->get_id() << " " << tm.get_seconds() << "\n";
//if (tm.get_seconds() > 0.3)
// std::cout << d << "\n";
// std::cout.flush();
literal_vector conds;
conds.push_back(~lit);
conds.push_back(th.m_ax.mk_le(th.mk_len(s), idx));
expr* cond = nullptr, *tt = nullptr, *el = nullptr;
var_subst subst(m);
expr_ref_vector sub(m);
sub.push_back(head);
// s in R[if(p,R1,R2)] & p => s in R[R1]
// s in R[if(p,R1,R2)] & ~p => s in R[R2]
while (m.is_ite(d, cond, tt, el)) {
literal lcond = th.mk_literal(subst(cond, sub));
switch (ctx.get_assignment(lcond)) {
case l_true:
conds.push_back(~lcond);
d = tt;
break;
case l_false:
conds.push_back(lcond);
d = el;
break;
case l_undef:
#if 1
ctx.mark_as_relevant(lcond);
trigger = lcond;
return false;
#else
if (re().is_empty(tt)) {
literal_vector ensure_false(conds);
ensure_false.push_back(~lcond);
th.add_axiom(ensure_false);
conds.push_back(lcond);
d = el;
}
else if (re().is_empty(el)) {
literal_vector ensure_true(conds);
ensure_true.push_back(lcond);
th.add_axiom(ensure_true);
conds.push_back(~lcond);
d = tt;
}
else {
ctx.mark_as_relevant(lcond);
trigger = lcond;
return false;
}
break;
#endif
}
// Rule 3: derivative unfolding
literal_vector accept_next;
expr_ref hd = th.mk_nth(s, i);
expr_ref deriv(m);
deriv = derivative_wrapper(hd, r);
accept_next.push_back(~lit);
accept_next.push_back(len_s_le_i);
expr_ref_pair_vector cofactors(m);
get_cofactors(deriv, cofactors);
for (auto const& p : cofactors) {
if (m.is_false(p.first) || re().is_empty(p.second)) continue;
expr_ref cond(p.first, m);
expr_ref deriv_leaf(p.second, m);
expr_ref acc = sk().mk_accept(s, a().mk_int(idx + 1), deriv_leaf);
expr_ref choice(m.mk_and(cond, acc), m);
literal choice_lit = th.mk_literal(choice);
accept_next.push_back(choice_lit);
// TBD: try prioritizing unvisited states here over visited
// ones (in the state graph), to improve performance
STRACE("seq_regex_verbose", tout << "added choice: "
<< mk_pp(choice, m) << std::endl;);
}
if (!is_ground(d)) {
d = subst(d, sub);
}
// at this point there should be no free variables as the ites are at top-level.
if (!re().is_empty(d))
conds.push_back(th.mk_literal(sk().mk_accept(s, a().mk_int(idx + 1), d)));
th.add_axiom(conds);
TRACE("seq", tout << "unfold " << head << "\n" << mk_pp(r, m) << "\n";);
// std::cout << "D ";
return true;
th.add_axiom(accept_next);
}
/**
@ -352,7 +247,7 @@ namespace smt {
* within the same Regex.
*/
bool seq_regex::coallesce_in_re(literal lit) {
return false;
return false; // disabled
expr* s = nullptr, *r = nullptr;
expr* e = ctx.bool_var2expr(lit.var());
VERIFY(str().is_in_re(e, s, r));
@ -372,7 +267,7 @@ namespace smt {
th.m_trail_stack.push(vector_value_trail<theory_seq, s_in_re, true>(m_s_in_re, i));
m_s_in_re[i].m_active = false;
IF_VERBOSE(11, verbose_stream() << "Intersect " << regex << " " <<
mk_pp(entry.m_re, m) << " " << mk_pp(s, m) << " " << mk_pp(entry.m_s, m) << "\n";);
mk_pp(entry.m_re, m) << " " << mk_pp(s, m) << " " << mk_pp(entry.m_s, m) << std::endl;);
regex = re().mk_inter(entry.m_re, regex);
rewrite(regex);
lits.push_back(~entry.m_lit);
@ -402,17 +297,71 @@ namespace smt {
}
/*
Wrapper around the regex symbolic derivative from the rewriter.
Wrapper around calls to is_nullable from the seq rewriter.
Note: the nullable wrapper and derivative wrapper actually use
different sequence rewriters; these are at:
m_seq_rewrite
(returned by seq_rw())
th.m_rewrite.m_imp->m_cfg.m_seq_rw
(private, can't be accessed directly)
As a result operations are cached separately for the nullable
and derivative calls. TBD if caching them using the same rewriter
makes any difference.
*/
expr_ref seq_regex::is_nullable_wrapper(expr* r) {
STRACE("seq_regex", tout << "nullable: " << mk_pp(r, m) << std::endl;);
expr_ref result = seq_rw().is_nullable(r);
rewrite(result);
STRACE("seq_regex", tout << "nullable result: " << mk_pp(result, m) << std::endl;);
STRACE("seq_regex_brief", tout << "n(" << state_str(r) << ")="
<< mk_pp(result, m) << " ";);
return result;
}
/*
Wrapper around the regex symbolic derivative from the seq rewriter.
Ensures that the derivative is written in a normalized BDD form
with optimizations for if-then-else expressions involving the head.
Note: the nullable wrapper and derivative wrapper actually use
different sequence rewriters; these are at:
m_seq_rewrite
(returned by seq_rw())
th.m_rewrite.m_imp->m_cfg.m_seq_rw
(private, can't be accessed directly)
As a result operations are cached separately for the nullable
and derivative calls. TBD if caching them using the same rewriter
makes any difference.
*/
expr_ref seq_regex::derivative_wrapper(expr* hd, expr* r) {
expr_ref result = expr_ref(re().mk_derivative(hd, r), m);
STRACE("seq_regex", tout << "derivative(" << mk_pp(hd, m) << "): " << mk_pp(r, m) << std::endl;);
// Use canonical variable for head
expr_ref hd_canon(m.mk_var(0, m.get_sort(hd)), m);
expr_ref result(re().mk_derivative(hd_canon, r), m);
rewrite(result);
// Substitute with real head
var_subst subst(m);
expr_ref_vector sub(m);
sub.push_back(hd);
result = subst(result, sub);
STRACE("seq_regex", tout << "derivative result: " << mk_pp(result, m) << std::endl;);
STRACE("seq_regex_brief", tout << "d(" << state_str(r) << ")="
<< state_str(result) << " ";);
return result;
}
void seq_regex::propagate_eq(expr* r1, expr* r2) {
TRACE("seq_regex", tout << "propagate EQ: " << mk_pp(r1, m) << ", " << mk_pp(r2, m) << std::endl;);
STRACE("seq_regex_brief", tout << "PEQ ";);
sort* seq_sort = nullptr;
VERIFY(u().is_re(r1, seq_sort));
expr_ref r = symmetric_diff(r1, r2);
@ -423,6 +372,9 @@ namespace smt {
}
void seq_regex::propagate_ne(expr* r1, expr* r2) {
TRACE("seq_regex", tout << "propagate NEQ: " << mk_pp(r1, m) << ", " << mk_pp(r2, m) << std::endl;);
STRACE("seq_regex_brief", tout << "PNEQ ";);
sort* seq_sort = nullptr;
VERIFY(u().is_re(r1, seq_sort));
expr_ref r = symmetric_diff(r1, r2);
@ -452,18 +404,25 @@ namespace smt {
void seq_regex::propagate_is_non_empty(literal lit) {
expr* e = ctx.bool_var2expr(lit.var()), *r = nullptr, *u = nullptr, *n = nullptr;
VERIFY(sk().is_is_non_empty(e, r, u, n));
expr_ref is_nullable = seq_rw().is_nullable(r);
rewrite(is_nullable);
TRACE("seq_regex", tout << "propagate nonempty: " << mk_pp(e, m) << std::endl;);
STRACE("seq_regex_brief", tout
<< std::endl << "PNE(" << expr_id_str(e) << "," << state_str(r)
<< "," << expr_id_str(u) << "," << expr_id_str(n) << ") ";);
expr_ref is_nullable = is_nullable_wrapper(r);
if (m.is_true(is_nullable))
return;
literal null_lit = th.mk_literal(is_nullable);
expr_ref hd = mk_first(r, n);
expr_ref d(m);
d = derivative_wrapper(hd, r);
literal_vector lits;
lits.push_back(~lit);
if (null_lit != false_literal)
lits.push_back(null_lit);
expr_ref_pair_vector cofactors(m);
get_cofactors(d, cofactors);
for (auto const& p : cofactors) {
@ -474,11 +433,12 @@ namespace smt {
rewrite(cond);
if (m.is_false(cond))
continue;
expr_ref next_non_empty = sk().mk_is_non_empty(p.second, re().mk_union(u, p.second), n);
expr_ref next_non_empty = sk().mk_is_non_empty(p.second, re().mk_union(u, r), n);
if (!m.is_true(cond))
next_non_empty = m.mk_and(cond, next_non_empty);
lits.push_back(th.mk_literal(next_non_empty));
}
th.add_axiom(lits);
}
@ -498,6 +458,25 @@ namespace smt {
}
}
void seq_regex::get_all_derivatives(expr* r, expr_ref_vector& results) {
// Get derivative
sort* seq_sort = nullptr;
VERIFY(u().is_re(r, seq_sort));
expr_ref n(m.mk_fresh_const("re.char", seq_sort), m);
expr_ref hd = mk_first(r, n);
expr_ref d(m);
d = derivative_wrapper(hd, r);
// Use get_cofactors method and try to filter out unsatisfiable conds
expr_ref_pair_vector cofactors(m);
get_cofactors(d, cofactors);
STRACE("seq_regex_verbose", tout << "getting all derivatives of: " << mk_pp(r, m) << std::endl;);
for (auto const& p : cofactors) {
if (m.is_false(p.first) || re().is_empty(p.second)) continue;
STRACE("seq_regex_verbose", tout << "adding derivative: " << mk_pp(p.second, m) << std::endl;);
results.push_back(p.second);
}
}
/*
is_empty(r, u) => ~is_nullable(r)
is_empty(r, u) => (forall x . ~cond(x)) or is_empty(r1, u union r) for (cond, r) in min-terms(D(x,r))
@ -507,8 +486,13 @@ namespace smt {
void seq_regex::propagate_is_empty(literal lit) {
expr* e = ctx.bool_var2expr(lit.var()), *r = nullptr, *u = nullptr, *n = nullptr;
VERIFY(sk().is_is_empty(e, r, u, n));
expr_ref is_nullable = seq_rw().is_nullable(r);
rewrite(is_nullable);
expr_ref is_nullable = is_nullable_wrapper(r);
TRACE("seq_regex", tout << "propagate empty: " << mk_pp(e, m) << std::endl;);
STRACE("seq_regex_brief", tout
<< std::endl << "PE(" << expr_id_str(e) << "," << state_str(r)
<< "," << expr_id_str(u) << "," << expr_id_str(n) << ") ";);
if (m.is_true(is_nullable)) {
th.add_axiom(~lit);
return;
@ -546,4 +530,89 @@ namespace smt {
VERIFY(u().is_seq(seq_sort, elem_sort));
return sk().mk("re.first", n, a().mk_int(r->get_id()), elem_sort);
}
/**
* Dead state elimination using the state_graph class
*/
unsigned seq_regex::get_state_id(expr* e) {
// Assign increasing IDs starting from 1
if (!m_expr_to_state.contains(e)) {
m_state_to_expr.push_back(e);
unsigned new_id = m_state_to_expr.size();
m_expr_to_state.insert(e, new_id);
STRACE("seq_regex_brief", tout << "new(" << expr_id_str(e)
<< ")=" << state_str(e) << " ";);
}
return m_expr_to_state.find(e);
}
expr* seq_regex::get_expr_from_id(unsigned id) {
SASSERT(id >= 1);
SASSERT(id <= m_state_to_expr.size());
return m_state_to_expr.get(id);
}
bool seq_regex::can_be_in_cycle(expr *r1, expr *r2) {
// TBD: This can be used to optimize the state graph:
// return false here if it is known that r1 -> r2 can never be
// in a cycle. There are various easy syntactic checks on r1 and r2
// that can be used to infer this (e.g. star height, or length if
// both are star-free).
// This check need not be sound, but if it is not, some dead states
// will be missed.
return true;
}
/*
Update the state graph with expression r and all its derivatives.
*/
bool seq_regex::update_state_graph(expr* r) {
unsigned r_id = get_state_id(r);
if (m_state_graph.is_done(r_id)) return false;
if (m_state_graph.get_size() >= m_max_state_graph_size) {
STRACE("seq_regex", tout << "Warning: ignored state graph update -- max size of seen states reached!" << std::endl;);
STRACE("seq_regex_brief", tout << "(MAX SIZE REACHED) ";);
return false;
}
STRACE("seq_regex", tout << "Updating state graph for regex "
<< mk_pp(r, m) << ") ";);
// Add state
m_state_graph.add_state(r_id);
STRACE("seq_regex_brief", tout << std::endl << "USG("
<< state_str(r) << ") ";);
expr_ref r_nullable = is_nullable_wrapper(r);
if (m.is_true(r_nullable)) {
m_state_graph.mark_live(r_id);
}
else {
// Add edges to all derivatives
expr_ref_vector derivatives(m);
STRACE("seq_regex_verbose", tout
<< std::endl << " getting all derivs: " << r_id << " ";);
get_all_derivatives(r, derivatives);
for (auto const& dr: derivatives) {
unsigned dr_id = get_state_id(dr);
STRACE("seq_regex_verbose", tout
<< std::endl << " traversing deriv: " << dr_id << " ";);
m_state_graph.add_state(dr_id);
bool maybecycle = can_be_in_cycle(r, dr);
m_state_graph.add_edge(r_id, dr_id, maybecycle);
}
m_state_graph.mark_done(r_id);
}
STRACE("seq_regex_brief", tout << std::endl;);
STRACE("seq_regex_brief", m_state_graph.display(tout););
return true;
}
std::string seq_regex::state_str(expr* e) {
if (m_expr_to_state.contains(e))
return std::to_string(get_state_id(e));
else
return expr_id_str(e);
}
std::string seq_regex::expr_id_str(expr* e) {
return std::string("id") + std::to_string(e->get_id());
}
}