3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2025-04-29 20:05:51 +00:00

new regex automata start; add complexity estimation

This commit is contained in:
Murphy Berzish 2017-12-04 18:05:00 -05:00
parent b581ab70ed
commit fbe8d1577e
7 changed files with 127 additions and 3 deletions

View file

@ -200,6 +200,9 @@ void re2automaton::set_solver(expr_solver* solver) {
m_sa = alloc(symbolic_automata_t, sm, *m_ba.get());
}
eautomaton* re2automaton::mk_product(eautomaton* a1, eautomaton* a2) {
return m_sa->mk_product(*a1, *a2);
}
eautomaton* re2automaton::operator()(expr* e) {
eautomaton* r = re2aut(e);

View file

@ -53,7 +53,9 @@ public:
bool is_range() const { return m_ty == t_range; }
sort* get_sort() const { return m_sort; }
expr* get_char() const { SASSERT(is_char()); return m_t; }
expr* get_pred() const { SASSERT(is_pred()); return m_t; }
expr* get_lo() const { SASSERT(is_range()); return m_t; }
expr* get_hi() const { SASSERT(is_range()); return m_s; }
};
class sym_expr_manager {
@ -87,6 +89,7 @@ public:
~re2automaton();
eautomaton* operator()(expr* e);
void set_solver(expr_solver* solver);
eautomaton* mk_product(eautomaton *a1, eautomaton *a2);
};
/**

View file

@ -79,6 +79,7 @@ def_module_params(module_name='smt',
('theory_aware_branching', BOOL, False, 'Allow the context to use extra information from theory solvers regarding literal branching prioritization.'),
('str.finite_overlap_models', BOOL, False, 'attempt a finite model search for overlapping variables instead of completely giving up on the arrangement'),
('str.overlap_priority', DOUBLE, -0.1, 'theory-aware priority for overlapping variable cases; use smt.theory_aware_branching=true'),
('str.regex_automata', BOOL, True, 'use automata-based reasoning for regular expressions (Z3str3 only)'),
('core.minimize', BOOL, False, 'minimize unsat core produced by SMT context'),
('core.extend_patterns', BOOL, False, 'extend unsat core with literals that trigger (potential) quantifier instances'),
('core.extend_patterns.max_distance', UINT, UINT_MAX, 'limits the distance of a pattern-extended unsat core'),

View file

@ -31,4 +31,5 @@ void theory_str_params::updt_params(params_ref const & _p) {
m_UseBinarySearch = p.str_use_binary_search();
m_BinarySearchInitialUpperBound = p.str_binary_search_start();
m_OverlapTheoryAwarePriority = p.str_overlap_priority();
m_RegexAutomata = p.str_regex_automata();
}

View file

@ -80,6 +80,13 @@ struct theory_str_params {
double m_OverlapTheoryAwarePriority;
/*
* If RegexAutomata is set to true,
* Z3str3 will use automata-based methods to reason about
* regular expression constraints.
*/
bool m_RegexAutomata;
theory_str_params(params_ref const & p = params_ref()):
m_StrongArrangements(true),
m_AggressiveLengthTesting(false),
@ -91,7 +98,8 @@ struct theory_str_params {
m_FiniteOverlapModels(false),
m_UseBinarySearch(false),
m_BinarySearchInitialUpperBound(64),
m_OverlapTheoryAwarePriority(-0.1)
m_OverlapTheoryAwarePriority(-0.1),
m_RegexAutomata(true)
{
updt_params(p);
}

View file

@ -6446,6 +6446,111 @@ namespace smt {
}
}
// saturating unsigned addition
unsigned inline _qadd(unsigned a, unsigned b) {
if (a == UINT_MAX || b == UINT_MAX) {
return UINT_MAX;
}
unsigned result = a + b;
if (result < a || result < b) {
return UINT_MAX;
}
return result;
}
// saturating unsigned multiply
unsigned inline _qmul(unsigned a, unsigned b) {
if (a == UINT_MAX || b == UINT_MAX) {
return UINT_MAX;
}
unsigned result = a * b;
if (result < a || result < b) {
return UINT_MAX;
}
return result;
}
unsigned theory_str::estimate_regex_complexity(expr * re) {
ENSURE(u.is_re(re));
expr * sub1;
expr * sub2;
if (u.re.is_to_re(re, sub1)) {
SASSERT(u.str.is_string(sub1));
zstring str;
u.str.is_string(sub1, str);
return str.length();
} else if (u.re.is_complement(re, sub1)) {
return estimate_regex_complexity_under_complement(sub1);
} else if (u.re.is_concat(re, sub1, sub2)) {
unsigned cx1 = estimate_regex_complexity(sub1);
unsigned cx2 = estimate_regex_complexity(sub2);
return _qadd(cx1, cx2);
} else if (u.re.is_union(re, sub1, sub2)) {
unsigned cx1 = estimate_regex_complexity(sub1);
unsigned cx2 = estimate_regex_complexity(sub2);
return _qadd(cx1, cx2);
} else if (u.re.is_star(re, sub1) || u.re.is_plus(re, sub1)) {
unsigned cx = estimate_regex_complexity(sub1);
return _qmul(2, cx);
} else if (u.re.is_range(re, sub1, sub2)) {
SASSERT(u.str.is_string(sub1));
SASSERT(u.str.is_string(sub2));
zstring str1, str2;
u.str.is_string(sub1, str1);
u.str.is_string(sub2, str2);
SASSERT(str1.length() == 1);
SASSERT(str2.length() == 1);
return 1 + str2[0] - str1[0];
} else if (u.re.is_full(re)) {
return 1;
} else {
TRACE("str", tout << "WARNING: unknown regex term " << mk_pp(re, get_manager()) << std::endl;);
return 1;
}
}
unsigned theory_str::estimate_regex_complexity_under_complement(expr * re) {
ENSURE(u.is_re(re));
expr * sub1;
expr * sub2;
if (u.re.is_to_re(re, sub1)) {
SASSERT(u.str.is_string(sub1));
zstring str;
u.str.is_string(sub1, str);
return str.length();
} else if (u.re.is_complement(re, sub1)) {
// Why don't we return the regular complexity here?
// We could, but this might be called from under another complemented subexpression.
// It's better to give a worst-case complexity.
return estimate_regex_complexity_under_complement(sub1);
} else if (u.re.is_concat(re, sub1, sub2)) {
unsigned cx1 = estimate_regex_complexity_under_complement(sub1);
unsigned cx2 = estimate_regex_complexity_under_complement(sub2);
return _qadd(_qmul(2, cx1), cx2);
} else if (u.re.is_union(re, sub1, sub2)) {
unsigned cx1 = estimate_regex_complexity_under_complement(sub1);
unsigned cx2 = estimate_regex_complexity_under_complement(sub2);
return _qmul(cx1, cx2);
} else if (u.re.is_star(re, sub1) || u.re.is_plus(re, sub1)) {
unsigned cx = estimate_regex_complexity_under_complement(sub1);
return _qmul(2, cx);
} else if (u.re.is_range(re, sub1, sub2)) {
SASSERT(u.str.is_string(sub1));
SASSERT(u.str.is_string(sub2));
zstring str1, str2;
u.str.is_string(sub1, str1);
u.str.is_string(sub2, str2);
SASSERT(str1.length() == 1);
SASSERT(str2.length() == 1);
return 1 + str2[0] - str1[0];
} else if (u.re.is_full(re)) {
return 1;
} else {
TRACE("str", tout << "WARNING: unknown regex term " << mk_pp(re, get_manager()) << std::endl;);
return 1;
}
}
/*
* strArgmt::solve_concat_eq_str()
* Solve concatenations of the form:

View file

@ -457,11 +457,14 @@ protected:
expr * mk_RegexIn(expr * str, expr * regexp);
void instantiate_axiom_RegexIn(enode * e);
app * mk_unroll(expr * n, expr * bound);
void process_unroll_eq_const_str(expr * unrollFunc, expr * constStr);
void unroll_str2reg_constStr(expr * unrollFunc, expr * eqConstStr);
void process_concat_eq_unroll(expr * concat, expr * unroll);
// regex automata
unsigned estimate_regex_complexity(expr * re);
unsigned estimate_regex_complexity_under_complement(expr * re);
void set_up_axioms(expr * ex);
void handle_equality(expr * lhs, expr * rhs);