mirror of
https://github.com/Z3Prover/z3
synced 2025-04-08 10:25:18 +00:00
Merge pull request #1562 from mtrberzi/regex-develop
automata-based regex engine for Z3str3
This commit is contained in:
commit
f1c51982f8
|
@ -200,6 +200,9 @@ void re2automaton::set_solver(expr_solver* solver) {
|
|||
m_sa = alloc(symbolic_automata_t, sm, *m_ba.get());
|
||||
}
|
||||
|
||||
eautomaton* re2automaton::mk_product(eautomaton* a1, eautomaton* a2) {
|
||||
return m_sa->mk_product(*a1, *a2);
|
||||
}
|
||||
|
||||
eautomaton* re2automaton::operator()(expr* e) {
|
||||
eautomaton* r = re2aut(e);
|
||||
|
|
|
@ -53,7 +53,9 @@ public:
|
|||
bool is_range() const { return m_ty == t_range; }
|
||||
sort* get_sort() const { return m_sort; }
|
||||
expr* get_char() const { SASSERT(is_char()); return m_t; }
|
||||
|
||||
expr* get_pred() const { SASSERT(is_pred()); return m_t; }
|
||||
expr* get_lo() const { SASSERT(is_range()); return m_t; }
|
||||
expr* get_hi() const { SASSERT(is_range()); return m_s; }
|
||||
};
|
||||
|
||||
class sym_expr_manager {
|
||||
|
@ -87,6 +89,7 @@ public:
|
|||
~re2automaton();
|
||||
eautomaton* operator()(expr* e);
|
||||
void set_solver(expr_solver* solver);
|
||||
eautomaton* mk_product(eautomaton *a1, eautomaton *a2);
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
@ -79,6 +79,12 @@ def_module_params(module_name='smt',
|
|||
('theory_aware_branching', BOOL, False, 'Allow the context to use extra information from theory solvers regarding literal branching prioritization.'),
|
||||
('str.finite_overlap_models', BOOL, False, 'attempt a finite model search for overlapping variables instead of completely giving up on the arrangement'),
|
||||
('str.overlap_priority', DOUBLE, -0.1, 'theory-aware priority for overlapping variable cases; use smt.theory_aware_branching=true'),
|
||||
('str.regex_automata', BOOL, True, 'use automata-based reasoning for regular expressions (Z3str3 only)'),
|
||||
('str.regex_automata_difficulty_threshold', UINT, 1000, 'difficulty threshold for regex automata heuristics'),
|
||||
('str.regex_automata_intersection_difficulty_threshold', UINT, 1000, 'difficulty threshold for regex intersection heuristics'),
|
||||
('str.regex_automata_failed_automaton_threshold', UINT, 10, 'number of failed automaton construction attempts after which a full automaton is automatically built'),
|
||||
('str.regex_automata_failed_intersection_threshold', UINT, 10, 'number of failed automaton intersection attempts after which intersection is always computed'),
|
||||
('str.regex_automata_length_attempt_threshold', UINT, 10, 'number of length/path constraint attempts before checking unsatisfiability of regex terms'),
|
||||
('core.minimize', BOOL, False, 'minimize unsat core produced by SMT context'),
|
||||
('core.extend_patterns', BOOL, False, 'extend unsat core with literals that trigger (potential) quantifier instances'),
|
||||
('core.extend_patterns.max_distance', UINT, UINT_MAX, 'limits the distance of a pattern-extended unsat core'),
|
||||
|
|
|
@ -31,4 +31,10 @@ void theory_str_params::updt_params(params_ref const & _p) {
|
|||
m_UseBinarySearch = p.str_use_binary_search();
|
||||
m_BinarySearchInitialUpperBound = p.str_binary_search_start();
|
||||
m_OverlapTheoryAwarePriority = p.str_overlap_priority();
|
||||
m_RegexAutomata = p.str_regex_automata();
|
||||
m_RegexAutomata_DifficultyThreshold = p.str_regex_automata_difficulty_threshold();
|
||||
m_RegexAutomata_IntersectionDifficultyThreshold = p.str_regex_automata_intersection_difficulty_threshold();
|
||||
m_RegexAutomata_FailedAutomatonThreshold = p.str_regex_automata_failed_automaton_threshold();
|
||||
m_RegexAutomata_FailedIntersectionThreshold = p.str_regex_automata_failed_intersection_threshold();
|
||||
m_RegexAutomata_LengthAttemptThreshold = p.str_regex_automata_length_attempt_threshold();
|
||||
}
|
||||
|
|
|
@ -80,6 +80,43 @@ struct theory_str_params {
|
|||
|
||||
double m_OverlapTheoryAwarePriority;
|
||||
|
||||
/*
|
||||
* If RegexAutomata is set to true,
|
||||
* Z3str3 will use automata-based methods to reason about
|
||||
* regular expression constraints.
|
||||
*/
|
||||
bool m_RegexAutomata;
|
||||
|
||||
/*
|
||||
* RegexAutomata_DifficultyThreshold is the lowest difficulty above which Z3str3
|
||||
* will not eagerly construct an automaton for a regular expression term.
|
||||
*/
|
||||
unsigned m_RegexAutomata_DifficultyThreshold;
|
||||
|
||||
/*
|
||||
* RegexAutomata_IntersectionDifficultyThreshold is the lowest difficulty above which Z3str3
|
||||
* will not eagerly intersect automata to check unsatisfiability.
|
||||
*/
|
||||
unsigned m_RegexAutomata_IntersectionDifficultyThreshold;
|
||||
|
||||
/*
|
||||
* RegexAutomata_FailedAutomatonThreshold is the number of failed attempts to build an automaton
|
||||
* after which a full automaton (i.e. with no length information) will be built regardless of difficulty.
|
||||
*/
|
||||
unsigned m_RegexAutomata_FailedAutomatonThreshold;
|
||||
|
||||
/*
|
||||
* RegexAutomaton_FailedIntersectionThreshold is the number of failed attempts to perform automaton
|
||||
* intersection after which intersection will always be performed regardless of difficulty.
|
||||
*/
|
||||
unsigned m_RegexAutomata_FailedIntersectionThreshold;
|
||||
|
||||
/*
|
||||
* RegexAutomaton_LengthAttemptThreshold is the number of attempts to satisfy length/path constraints
|
||||
* before which we begin checking unsatisfiability of a regex term.
|
||||
*/
|
||||
unsigned m_RegexAutomata_LengthAttemptThreshold;
|
||||
|
||||
theory_str_params(params_ref const & p = params_ref()):
|
||||
m_StrongArrangements(true),
|
||||
m_AggressiveLengthTesting(false),
|
||||
|
@ -91,7 +128,13 @@ struct theory_str_params {
|
|||
m_FiniteOverlapModels(false),
|
||||
m_UseBinarySearch(false),
|
||||
m_BinarySearchInitialUpperBound(64),
|
||||
m_OverlapTheoryAwarePriority(-0.1)
|
||||
m_OverlapTheoryAwarePriority(-0.1),
|
||||
m_RegexAutomata(true),
|
||||
m_RegexAutomata_DifficultyThreshold(1000),
|
||||
m_RegexAutomata_IntersectionDifficultyThreshold(1000),
|
||||
m_RegexAutomata_FailedAutomatonThreshold(10),
|
||||
m_RegexAutomata_FailedIntersectionThreshold(10),
|
||||
m_RegexAutomata_LengthAttemptThreshold(10)
|
||||
{
|
||||
updt_params(p);
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -20,9 +20,11 @@
|
|||
#include "util/trail.h"
|
||||
#include "util/union_find.h"
|
||||
#include "util/scoped_ptr_vector.h"
|
||||
#include "util/hashtable.h"
|
||||
#include "ast/ast_pp.h"
|
||||
#include "ast/arith_decl_plugin.h"
|
||||
#include "ast/rewriter/th_rewriter.h"
|
||||
#include "ast/rewriter/seq_rewriter.h"
|
||||
#include "ast/seq_decl_plugin.h"
|
||||
#include "smt/smt_theory.h"
|
||||
#include "smt/params/theory_str_params.h"
|
||||
|
@ -36,6 +38,7 @@
|
|||
namespace smt {
|
||||
|
||||
typedef hashtable<symbol, symbol_hash_proc, symbol_eq_proc> symbol_set;
|
||||
typedef int_hashtable<int_hash, default_eq<int> > integer_set;
|
||||
|
||||
class str_value_factory : public value_factory {
|
||||
seq_util u;
|
||||
|
@ -148,6 +151,70 @@ public:
|
|||
bool matches(zstring input);
|
||||
};
|
||||
|
||||
class regex_automaton_under_assumptions {
|
||||
protected:
|
||||
expr * re_term;
|
||||
eautomaton * aut;
|
||||
bool polarity;
|
||||
|
||||
bool assume_lower_bound;
|
||||
rational lower_bound;
|
||||
|
||||
bool assume_upper_bound;
|
||||
rational upper_bound;
|
||||
public:
|
||||
regex_automaton_under_assumptions() :
|
||||
re_term(NULL), aut(NULL), polarity(false),
|
||||
assume_lower_bound(false), assume_upper_bound(false) {}
|
||||
|
||||
regex_automaton_under_assumptions(expr * re_term, eautomaton * aut, bool polarity) :
|
||||
re_term(re_term), aut(aut), polarity(polarity),
|
||||
assume_lower_bound(false), assume_upper_bound(false) {}
|
||||
|
||||
void set_lower_bound(rational & lb) {
|
||||
lower_bound = lb;
|
||||
assume_lower_bound = true;
|
||||
}
|
||||
void unset_lower_bound() {
|
||||
assume_lower_bound = false;
|
||||
}
|
||||
|
||||
void set_upper_bound(rational & ub) {
|
||||
upper_bound = ub;
|
||||
assume_upper_bound = true;
|
||||
}
|
||||
void unset_upper_bound() {
|
||||
assume_upper_bound = false;
|
||||
}
|
||||
|
||||
bool get_lower_bound(rational & lb) const {
|
||||
if (assume_lower_bound) {
|
||||
lb = lower_bound;
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool get_upper_bound(rational & ub) const {
|
||||
if (assume_upper_bound) {
|
||||
ub = upper_bound;
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
eautomaton * get_automaton() const { return aut; }
|
||||
expr * get_regex_term() const { return re_term; }
|
||||
bool get_polarity() const { return polarity; }
|
||||
|
||||
virtual ~regex_automaton_under_assumptions() {
|
||||
// don't free str_in_re or aut;
|
||||
// they are managed separately
|
||||
}
|
||||
};
|
||||
|
||||
class theory_str : public theory {
|
||||
struct T_cut
|
||||
{
|
||||
|
@ -250,6 +317,8 @@ protected:
|
|||
|
||||
str_value_factory * m_factory;
|
||||
|
||||
re2automaton m_mk_aut;
|
||||
|
||||
// Unique identifier appended to unused variables to ensure that model construction
|
||||
// does not introduce equalities when they weren't enforced.
|
||||
unsigned m_unused_id;
|
||||
|
@ -267,6 +336,10 @@ protected:
|
|||
// enode lists for library-aware/high-level string terms (e.g. substr, contains)
|
||||
ptr_vector<enode> m_library_aware_axiom_todo;
|
||||
|
||||
// list of axioms that are re-asserted every time the scope is popped
|
||||
expr_ref_vector m_persisted_axioms;
|
||||
expr_ref_vector m_persisted_axiom_todo;
|
||||
|
||||
// hashtable of all exprs for which we've already set up term-specific axioms --
|
||||
// this prevents infinite recursive descent with respect to axioms that
|
||||
// include an occurrence of the term for which axioms are being generated
|
||||
|
@ -320,7 +393,31 @@ protected:
|
|||
// TBD: do a curried map for determinism.
|
||||
std::map<std::pair<expr*, zstring>, expr*> regex_in_bool_map;
|
||||
obj_map<expr, std::set<zstring> > regex_in_var_reg_str_map;
|
||||
|
||||
// regex automata
|
||||
scoped_ptr_vector<eautomaton> m_automata;
|
||||
ptr_vector<eautomaton> regex_automata;
|
||||
obj_hashtable<expr> regex_terms;
|
||||
obj_map<expr, ptr_vector<expr> > regex_terms_by_string; // S --> [ (str.in.re S *) ]
|
||||
obj_map<expr, svector<regex_automaton_under_assumptions> > regex_automaton_assumptions; // RegEx --> [ aut+assumptions ]
|
||||
obj_map<expr, nfa> regex_nfa_cache; // Regex term --> NFA
|
||||
obj_hashtable<expr> regex_terms_with_path_constraints; // set of string terms which have had path constraints asserted in the current scope
|
||||
obj_hashtable<expr> regex_terms_with_length_constraints; // set of regex terms which had had length constraints asserted in the current scope
|
||||
obj_map<expr, expr*> regex_term_to_length_constraint; // (str.in.re S R) -> (length constraint over S wrt. R)
|
||||
obj_map<expr, ptr_vector<expr> > regex_term_to_extra_length_vars; // extra length vars used in regex_term_to_length_constraint entries
|
||||
|
||||
// keep track of the last lower/upper bound we saw for each string term
|
||||
// so we don't perform duplicate work
|
||||
obj_map<expr, rational> regex_last_lower_bound;
|
||||
obj_map<expr, rational> regex_last_upper_bound;
|
||||
|
||||
// each counter maps a (str.in.re) expression to an integer.
|
||||
// use helper functions regex_inc_counter() and regex_get_counter() to access
|
||||
obj_map<expr, unsigned> regex_length_attempt_count;
|
||||
obj_map<expr, unsigned> regex_fail_count;
|
||||
obj_map<expr, unsigned> regex_intersection_fail_count;
|
||||
|
||||
obj_map<expr, ptr_vector<expr> > string_chars; // S --> [S_0, S_1, ...] for character terms S_i
|
||||
|
||||
svector<char> char_set;
|
||||
std::map<char, int> charSetLookupTable;
|
||||
|
@ -439,14 +536,32 @@ protected:
|
|||
void instantiate_axiom_str_to_int(enode * e);
|
||||
void instantiate_axiom_int_to_str(enode * e);
|
||||
|
||||
void add_persisted_axiom(expr * a);
|
||||
|
||||
expr * mk_RegexIn(expr * str, expr * regexp);
|
||||
void instantiate_axiom_RegexIn(enode * e);
|
||||
app * mk_unroll(expr * n, expr * bound);
|
||||
|
||||
void process_unroll_eq_const_str(expr * unrollFunc, expr * constStr);
|
||||
void unroll_str2reg_constStr(expr * unrollFunc, expr * eqConstStr);
|
||||
void process_concat_eq_unroll(expr * concat, expr * unroll);
|
||||
|
||||
// regex automata and length-aware regex
|
||||
unsigned estimate_regex_complexity(expr * re);
|
||||
unsigned estimate_regex_complexity_under_complement(expr * re);
|
||||
unsigned estimate_automata_intersection_difficulty(eautomaton * aut1, eautomaton * aut2);
|
||||
bool check_regex_length_linearity(expr * re);
|
||||
bool check_regex_length_linearity_helper(expr * re, bool already_star);
|
||||
expr_ref infer_all_regex_lengths(expr * lenVar, expr * re, expr_ref_vector & freeVariables);
|
||||
void check_subterm_lengths(expr * re, integer_set & lens);
|
||||
void find_automaton_initial_bounds(expr * str_in_re, eautomaton * aut);
|
||||
bool refine_automaton_lower_bound(eautomaton * aut, rational current_lower_bound, rational & refined_lower_bound);
|
||||
bool refine_automaton_upper_bound(eautomaton * aut, rational current_upper_bound, rational & refined_upper_bound);
|
||||
expr_ref generate_regex_path_constraints(expr * stringTerm, eautomaton * aut, rational lenVal, expr_ref & characterConstraints);
|
||||
void aut_path_add_next(u_map<expr*>& next, expr_ref_vector& trail, unsigned idx, expr* cond);
|
||||
expr_ref aut_path_rewrite_constraint(expr * cond, expr * ch_var);
|
||||
void regex_inc_counter(obj_map<expr, unsigned> & counter_map, expr * key);
|
||||
unsigned regex_get_counter(obj_map<expr, unsigned> & counter_map, expr * key);
|
||||
|
||||
void set_up_axioms(expr * ex);
|
||||
void handle_equality(expr * lhs, expr * rhs);
|
||||
|
||||
|
@ -535,6 +650,7 @@ protected:
|
|||
std::map<expr*, std::map<expr*, int> > & concat_eq_concat_map,
|
||||
std::map<expr*, std::set<expr*> > & unrollGroupMap);
|
||||
|
||||
bool term_appears_as_subterm(expr * needle, expr * haystack);
|
||||
void classify_ast_by_type(expr * node, std::map<expr*, int> & varMap,
|
||||
std::map<expr*, int> & concatMap, std::map<expr*, int> & unrollMap);
|
||||
void classify_ast_by_type_in_positive_context(std::map<expr*, int> & varMap,
|
||||
|
@ -623,6 +739,7 @@ protected:
|
|||
void new_diseq_eh(theory_var, theory_var) override;
|
||||
|
||||
theory* mk_fresh(context*) override { return alloc(theory_str, get_manager(), m_params); }
|
||||
void init(context * ctx) override;
|
||||
void init_search_eh() override;
|
||||
void add_theory_assumptions(expr_ref_vector & assumptions) override;
|
||||
lbool validate_unsat_core(expr_ref_vector & unsat_core) override;
|
||||
|
|
Loading…
Reference in a new issue