From fbe8d1577ed54671a6d9fab0a59520225b701127 Mon Sep 17 00:00:00 2001 From: Murphy Berzish Date: Mon, 4 Dec 2017 18:05:00 -0500 Subject: [PATCH] new regex automata start; add complexity estimation --- src/ast/rewriter/seq_rewriter.cpp | 3 + src/ast/rewriter/seq_rewriter.h | 5 +- src/smt/params/smt_params_helper.pyg | 1 + src/smt/params/theory_str_params.cpp | 1 + src/smt/params/theory_str_params.h | 10 ++- src/smt/theory_str.cpp | 105 +++++++++++++++++++++++++++ src/smt/theory_str.h | 5 +- 7 files changed, 127 insertions(+), 3 deletions(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index fa95278b4..6f501c6e0 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -200,6 +200,9 @@ void re2automaton::set_solver(expr_solver* solver) { m_sa = alloc(symbolic_automata_t, sm, *m_ba.get()); } +eautomaton* re2automaton::mk_product(eautomaton* a1, eautomaton* a2) { + return m_sa->mk_product(*a1, *a2); +} eautomaton* re2automaton::operator()(expr* e) { eautomaton* r = re2aut(e); diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index 69f319168..95b043bd7 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -53,7 +53,9 @@ public: bool is_range() const { return m_ty == t_range; } sort* get_sort() const { return m_sort; } expr* get_char() const { SASSERT(is_char()); return m_t; } - + expr* get_pred() const { SASSERT(is_pred()); return m_t; } + expr* get_lo() const { SASSERT(is_range()); return m_t; } + expr* get_hi() const { SASSERT(is_range()); return m_s; } }; class sym_expr_manager { @@ -87,6 +89,7 @@ public: ~re2automaton(); eautomaton* operator()(expr* e); void set_solver(expr_solver* solver); + eautomaton* mk_product(eautomaton *a1, eautomaton *a2); }; /** diff --git a/src/smt/params/smt_params_helper.pyg b/src/smt/params/smt_params_helper.pyg index 937aa6a2b..7f35feb64 100644 --- a/src/smt/params/smt_params_helper.pyg +++ b/src/smt/params/smt_params_helper.pyg @@ -79,6 +79,7 @@ def_module_params(module_name='smt', ('theory_aware_branching', BOOL, False, 'Allow the context to use extra information from theory solvers regarding literal branching prioritization.'), ('str.finite_overlap_models', BOOL, False, 'attempt a finite model search for overlapping variables instead of completely giving up on the arrangement'), ('str.overlap_priority', DOUBLE, -0.1, 'theory-aware priority for overlapping variable cases; use smt.theory_aware_branching=true'), + ('str.regex_automata', BOOL, True, 'use automata-based reasoning for regular expressions (Z3str3 only)'), ('core.minimize', BOOL, False, 'minimize unsat core produced by SMT context'), ('core.extend_patterns', BOOL, False, 'extend unsat core with literals that trigger (potential) quantifier instances'), ('core.extend_patterns.max_distance', UINT, UINT_MAX, 'limits the distance of a pattern-extended unsat core'), diff --git a/src/smt/params/theory_str_params.cpp b/src/smt/params/theory_str_params.cpp index afbfc33fc..b5451b9dc 100644 --- a/src/smt/params/theory_str_params.cpp +++ b/src/smt/params/theory_str_params.cpp @@ -31,4 +31,5 @@ void theory_str_params::updt_params(params_ref const & _p) { m_UseBinarySearch = p.str_use_binary_search(); m_BinarySearchInitialUpperBound = p.str_binary_search_start(); m_OverlapTheoryAwarePriority = p.str_overlap_priority(); + m_RegexAutomata = p.str_regex_automata(); } diff --git a/src/smt/params/theory_str_params.h b/src/smt/params/theory_str_params.h index c841609db..4135cf0d6 100644 --- a/src/smt/params/theory_str_params.h +++ b/src/smt/params/theory_str_params.h @@ -80,6 +80,13 @@ struct theory_str_params { double m_OverlapTheoryAwarePriority; + /* + * If RegexAutomata is set to true, + * Z3str3 will use automata-based methods to reason about + * regular expression constraints. + */ + bool m_RegexAutomata; + theory_str_params(params_ref const & p = params_ref()): m_StrongArrangements(true), m_AggressiveLengthTesting(false), @@ -91,7 +98,8 @@ struct theory_str_params { m_FiniteOverlapModels(false), m_UseBinarySearch(false), m_BinarySearchInitialUpperBound(64), - m_OverlapTheoryAwarePriority(-0.1) + m_OverlapTheoryAwarePriority(-0.1), + m_RegexAutomata(true) { updt_params(p); } diff --git a/src/smt/theory_str.cpp b/src/smt/theory_str.cpp index 7936b581c..a859dae9d 100644 --- a/src/smt/theory_str.cpp +++ b/src/smt/theory_str.cpp @@ -6446,6 +6446,111 @@ namespace smt { } } + // saturating unsigned addition + unsigned inline _qadd(unsigned a, unsigned b) { + if (a == UINT_MAX || b == UINT_MAX) { + return UINT_MAX; + } + unsigned result = a + b; + if (result < a || result < b) { + return UINT_MAX; + } + return result; + } + + // saturating unsigned multiply + unsigned inline _qmul(unsigned a, unsigned b) { + if (a == UINT_MAX || b == UINT_MAX) { + return UINT_MAX; + } + unsigned result = a * b; + if (result < a || result < b) { + return UINT_MAX; + } + return result; + } + + unsigned theory_str::estimate_regex_complexity(expr * re) { + ENSURE(u.is_re(re)); + expr * sub1; + expr * sub2; + if (u.re.is_to_re(re, sub1)) { + SASSERT(u.str.is_string(sub1)); + zstring str; + u.str.is_string(sub1, str); + return str.length(); + } else if (u.re.is_complement(re, sub1)) { + return estimate_regex_complexity_under_complement(sub1); + } else if (u.re.is_concat(re, sub1, sub2)) { + unsigned cx1 = estimate_regex_complexity(sub1); + unsigned cx2 = estimate_regex_complexity(sub2); + return _qadd(cx1, cx2); + } else if (u.re.is_union(re, sub1, sub2)) { + unsigned cx1 = estimate_regex_complexity(sub1); + unsigned cx2 = estimate_regex_complexity(sub2); + return _qadd(cx1, cx2); + } else if (u.re.is_star(re, sub1) || u.re.is_plus(re, sub1)) { + unsigned cx = estimate_regex_complexity(sub1); + return _qmul(2, cx); + } else if (u.re.is_range(re, sub1, sub2)) { + SASSERT(u.str.is_string(sub1)); + SASSERT(u.str.is_string(sub2)); + zstring str1, str2; + u.str.is_string(sub1, str1); + u.str.is_string(sub2, str2); + SASSERT(str1.length() == 1); + SASSERT(str2.length() == 1); + return 1 + str2[0] - str1[0]; + } else if (u.re.is_full(re)) { + return 1; + } else { + TRACE("str", tout << "WARNING: unknown regex term " << mk_pp(re, get_manager()) << std::endl;); + return 1; + } + } + + unsigned theory_str::estimate_regex_complexity_under_complement(expr * re) { + ENSURE(u.is_re(re)); + expr * sub1; + expr * sub2; + if (u.re.is_to_re(re, sub1)) { + SASSERT(u.str.is_string(sub1)); + zstring str; + u.str.is_string(sub1, str); + return str.length(); + } else if (u.re.is_complement(re, sub1)) { + // Why don't we return the regular complexity here? + // We could, but this might be called from under another complemented subexpression. + // It's better to give a worst-case complexity. + return estimate_regex_complexity_under_complement(sub1); + } else if (u.re.is_concat(re, sub1, sub2)) { + unsigned cx1 = estimate_regex_complexity_under_complement(sub1); + unsigned cx2 = estimate_regex_complexity_under_complement(sub2); + return _qadd(_qmul(2, cx1), cx2); + } else if (u.re.is_union(re, sub1, sub2)) { + unsigned cx1 = estimate_regex_complexity_under_complement(sub1); + unsigned cx2 = estimate_regex_complexity_under_complement(sub2); + return _qmul(cx1, cx2); + } else if (u.re.is_star(re, sub1) || u.re.is_plus(re, sub1)) { + unsigned cx = estimate_regex_complexity_under_complement(sub1); + return _qmul(2, cx); + } else if (u.re.is_range(re, sub1, sub2)) { + SASSERT(u.str.is_string(sub1)); + SASSERT(u.str.is_string(sub2)); + zstring str1, str2; + u.str.is_string(sub1, str1); + u.str.is_string(sub2, str2); + SASSERT(str1.length() == 1); + SASSERT(str2.length() == 1); + return 1 + str2[0] - str1[0]; + } else if (u.re.is_full(re)) { + return 1; + } else { + TRACE("str", tout << "WARNING: unknown regex term " << mk_pp(re, get_manager()) << std::endl;); + return 1; + } + } + /* * strArgmt::solve_concat_eq_str() * Solve concatenations of the form: diff --git a/src/smt/theory_str.h b/src/smt/theory_str.h index 03fd31162..72643c4be 100644 --- a/src/smt/theory_str.h +++ b/src/smt/theory_str.h @@ -457,11 +457,14 @@ protected: expr * mk_RegexIn(expr * str, expr * regexp); void instantiate_axiom_RegexIn(enode * e); app * mk_unroll(expr * n, expr * bound); - void process_unroll_eq_const_str(expr * unrollFunc, expr * constStr); void unroll_str2reg_constStr(expr * unrollFunc, expr * eqConstStr); void process_concat_eq_unroll(expr * concat, expr * unroll); + // regex automata + unsigned estimate_regex_complexity(expr * re); + unsigned estimate_regex_complexity_under_complement(expr * re); + void set_up_axioms(expr * ex); void handle_equality(expr * lhs, expr * rhs);