3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2025-04-22 16:45:31 +00:00

starting regex support

This commit is contained in:
Murphy Berzish 2016-06-23 15:24:35 -04:00
parent 4c34629806
commit 04803d7a3b
4 changed files with 157 additions and 5 deletions

View file

@ -38,6 +38,9 @@ str_decl_plugin::str_decl_plugin():
m_replace_decl(0),
m_re_str2regex_decl(0),
m_re_regexin_decl(0),
m_re_regexconcat_decl(0),
m_re_regexstar_decl(0),
m_re_regexunion_decl(0),
m_arith_plugin(0),
m_arith_fid(0),
m_int_sort(0){
@ -63,6 +66,9 @@ void str_decl_plugin::finalize(void) {
DEC_REF(m_replace_decl);
DEC_REF(m_re_str2regex_decl);
DEC_REF(m_re_regexin_decl);
DEC_REF(m_re_regexconcat_decl);
DEC_REF(m_re_regexstar_decl);
DEC_REF(m_re_regexunion_decl);
DEC_REF(m_int_sort);
}
@ -139,6 +145,15 @@ void str_decl_plugin::set_manager(ast_manager * m, family_id id) {
m_re_regexin_decl = m->mk_func_decl(symbol("RegexIn"), s, re, boolT, func_decl_info(id, OP_RE_REGEXIN));
m_manager->inc_ref(m_re_regexin_decl);
m_re_regexconcat_decl = m->mk_func_decl(symbol("RegexConcat"), re, re, re, func_decl_info(id, OP_RE_REGEXCONCAT));
m_manager->inc_ref(m_re_regexconcat_decl);
m_re_regexstar_decl = m->mk_func_decl(symbol("RegexStar"), re, re, func_decl_info(id, OP_RE_REGEXSTAR));
m_manager->inc_ref(m_re_regexstar_decl);
m_re_regexunion_decl = m->mk_func_decl(symbol("RegexUnion"), re, re, re, func_decl_info(id, OP_RE_REGEXUNION));
m_manager->inc_ref(m_re_regexunion_decl);
}
decl_plugin * str_decl_plugin::mk_fresh() {
@ -168,6 +183,9 @@ func_decl * str_decl_plugin::mk_func_decl(decl_kind k) {
case OP_STR_REPLACE: return m_replace_decl;
case OP_RE_STR2REGEX: return m_re_str2regex_decl;
case OP_RE_REGEXIN: return m_re_regexin_decl;
case OP_RE_REGEXCONCAT: return m_re_regexconcat_decl;
case OP_RE_REGEXSTAR: return m_re_regexstar_decl;
case OP_RE_REGEXUNION: return m_re_regexunion_decl;
default: return 0;
}
}
@ -235,6 +253,9 @@ void str_decl_plugin::get_op_names(svector<builtin_name> & op_names, symbol cons
op_names.push_back(builtin_name("Replace", OP_STR_REPLACE));
op_names.push_back(builtin_name("Str2Reg", OP_RE_STR2REGEX));
op_names.push_back(builtin_name("RegexIn", OP_RE_REGEXIN));
op_names.push_back(builtin_name("RegexConcat", OP_RE_REGEXCONCAT));
op_names.push_back(builtin_name("RegexStar", OP_RE_REGEXSTAR));
op_names.push_back(builtin_name("RegexUnion", OP_RE_REGEXUNION));
}
void str_decl_plugin::get_sort_names(svector<builtin_name> & sort_names, symbol const & logic) {

View file

@ -44,6 +44,9 @@ enum str_op_kind {
// regular expression operators
OP_RE_STR2REGEX,
OP_RE_REGEXIN,
OP_RE_REGEXCONCAT,
OP_RE_REGEXSTAR,
OP_RE_REGEXUNION,
// end
LAST_STR_OP
};
@ -69,6 +72,9 @@ protected:
func_decl * m_re_str2regex_decl;
func_decl * m_re_regexin_decl;
func_decl * m_re_regexconcat_decl;
func_decl * m_re_regexstar_decl;
func_decl * m_re_regexunion_decl;
arith_decl_plugin * m_arith_plugin;
family_id m_arith_fid;

View file

@ -479,6 +479,29 @@ app * theory_str::mk_str_var(std::string name) {
return a;
}
app * theory_str::mk_regex_rep_var() {
context & ctx = get_context();
ast_manager & m = get_manager();
sort * string_sort = m.mk_sort(get_family_id(), STRING_SORT);
app * a = m.mk_fresh_const("regex", string_sort);
ctx.internalize(a, false);
SASSERT(ctx.get_enode(a) != NULL);
SASSERT(ctx.e_internalized(a));
mk_var(ctx.get_enode(a));
m_basicstr_axiom_todo.push_back(ctx.get_enode(a));
m_trail.push_back(a);
// TODO cross-check which variable sets we need
variable_set.insert(a);
//internal_variable_set.insert(a);
regex_variable_set.insert(a);
track_variable_scope(a);
return a;
}
app * theory_str::mk_nonempty_str_var() {
context & ctx = get_context();
ast_manager & m = get_manager();
@ -613,6 +636,7 @@ bool theory_str::can_propagate() {
|| !m_axiom_CharAt_todo.empty() || !m_axiom_StartsWith_todo.empty() || !m_axiom_EndsWith_todo.empty()
|| !m_axiom_Contains_todo.empty() || !m_axiom_Indexof_todo.empty() || !m_axiom_Indexof2_todo.empty() || !m_axiom_LastIndexof_todo.empty()
|| !m_axiom_Substr_todo.empty() || !m_axiom_Replace_todo.empty()
|| !m_axiom_RegexIn_todo.empty()
;
}
@ -681,6 +705,11 @@ void theory_str::propagate() {
instantiate_axiom_Replace(m_axiom_Replace_todo[i]);
}
m_axiom_Replace_todo.reset();
for (unsigned i = 0; i < m_axiom_RegexIn_todo.size(); ++i) {
instantiate_axiom_RegexIn(m_axiom_RegexIn_todo[i]);
}
m_axiom_RegexIn_todo.reset();
}
}
@ -1247,6 +1276,84 @@ void theory_str::instantiate_axiom_Replace(enode * e) {
assert_axiom(finalAxiom);
}
expr * theory_str::mk_RegexIn(expr * str, expr * regexp) {
expr * args[2] = {str, regexp};
app * regexIn = get_manager().mk_app(get_id(), OP_RE_REGEXIN, 0, 0, 2, args);
// immediately force internalization so that axiom setup does not fail
get_context().internalize(regexIn, false);
set_up_axioms(regexIn);
return regexIn;
}
void theory_str::instantiate_axiom_RegexIn(enode * e) {
context & ctx = get_context();
ast_manager & m = get_manager();
app * expr = e->get_owner();
if (axiomatized_terms.contains(expr)) {
TRACE("t_str_detail", tout << "already set up RegexIn axiom for " << mk_pp(expr, m) << std::endl;);
return;
}
axiomatized_terms.insert(expr);
TRACE("t_str_detail", tout << "instantiate RegexIn axiom for " << mk_pp(expr, m) << std::endl;);
// I don't think we need to port regexInBoolMap and regexInVarStrMap,
// but they would go here from reduce_regexIn
expr_ref str(expr->get_arg(0), m);
app * regex = to_app(expr->get_arg(1));
if (is_Str2Reg(regex)) {
expr_ref rxStr(regex->get_arg(0), m);
// want to assert 'expr IFF (str == rxStr)'
expr_ref rhs(ctx.mk_eq_atom(str, rxStr), m);
expr_ref finalAxiom(m.mk_iff(expr, rhs), m);
SASSERT(finalAxiom);
assert_axiom(finalAxiom);
} else if (is_RegexConcat(regex)) {
expr_ref var1(mk_regex_rep_var(), m);
expr_ref var2(mk_regex_rep_var(), m);
expr_ref rhs(mk_concat(var1, var2), m);
expr_ref rx1(regex->get_arg(0), m);
expr_ref rx2(regex->get_arg(1), m);
expr_ref var1InRegex1(mk_RegexIn(var1, rx1), m);
expr_ref var2InRegex2(mk_RegexIn(var2, rx2), m);
expr_ref_vector items(m);
items.push_back(var1InRegex1);
items.push_back(var2InRegex2);
items.push_back(ctx.mk_eq_atom(expr, ctx.mk_eq_atom(str, rhs)));
expr_ref finalAxiom(mk_and(items), m);
SASSERT(finalAxiom);
assert_axiom(finalAxiom);
/*
Z3_ast var1 = mk_regexRepVar(t);
Z3_ast var2 = mk_regexRepVar(t);
rhs = mk_concat(t, var1, var2);
Z3_ast regex1 = Z3_get_app_arg(ctx, arg1_func_app, 0);
Z3_ast regex2 = Z3_get_app_arg(ctx, arg1_func_app, 1);
Z3_ast var1InRegex1 = mk_2_arg_app(ctx, td->RegexIn, var1, regex1);
Z3_ast var2InRegex2 = mk_2_arg_app(ctx, td->RegexIn, var2, regex2);
std::vector<Z3_ast> items;
items.push_back(var1InRegex1);
items.push_back(var2InRegex2);
items.push_back(Z3_mk_eq(ctx, resBoolVar, Z3_mk_eq(ctx, args[0], rhs)));
extraAssert = mk_and_fromVector(t, items);
return resBoolVar;
*/
} else if (is_RegexUnion(regex)) {
} else if (is_RegexStar(regex)) {
} else {
TRACE("t_str_detail", tout << "ERROR: unknown regex expression " << mk_pp(regex, m) << "!" << std::endl;);
NOT_IMPLEMENTED_YET();
}
}
void theory_str::attach_new_th_var(enode * n) {
context & ctx = get_context();
theory_var v = mk_var(n);
@ -4165,6 +4272,8 @@ void theory_str::set_up_axioms(expr * ex) {
m_axiom_EndsWith_todo.push_back(n);
} else if (is_Contains(ap)) {
m_axiom_Contains_todo.push_back(n);
} else if (is_RegexIn(ap)) {
m_axiom_RegexIn_todo.push_back(n);
}
}
} else {
@ -4319,6 +4428,7 @@ void theory_str::pop_scope_eh(unsigned num_scopes) {
for (std::set<expr*>::iterator var_it = vars.begin(); var_it != vars.end(); ++var_it) {
variable_set.erase(*var_it);
internal_variable_set.erase(*var_it);
regex_variable_set.erase(*var_it);
count += 1;
}
TRACE("t_str_detail", tout << "cleaned up " << count << " variables" << std::endl;);
@ -5994,13 +6104,10 @@ void theory_str::process_free_var(std::map<expr*, int> & freeVar_map) {
for (std::map<expr*, int>::iterator fvIt = freeVar_map.begin(); fvIt != freeVar_map.end(); fvIt++) {
expr * freeVar = fvIt->first;
/*
std::string vName = std::string(Z3_ast_to_string(ctx, freeVar));
if (vName.length() >= 9 && vName.substr(0, 9) == "$$_regVar") {
// skip all regular expression vars
if (regex_variable_set.find(freeVar) != regex_variable_set.end()) {
continue;
}
*/
// TODO skip all regular expression vars
// Iterate the EQC of freeVar, its eqc variable should not be in the eqcRepSet.
// If found, have to filter it out

View file

@ -117,6 +117,7 @@ namespace smt {
ptr_vector<enode> m_axiom_LastIndexof_todo;
ptr_vector<enode> m_axiom_Substr_todo;
ptr_vector<enode> m_axiom_Replace_todo;
ptr_vector<enode> m_axiom_RegexIn_todo;
// hashtable of all exprs for which we've already set up term-specific axioms --
// this prevents infinite recursive descent with respect to axioms that
@ -135,6 +136,7 @@ namespace smt {
std::set<expr*> variable_set;
std::set<expr*> internal_variable_set;
std::set<expr*> regex_variable_set;
std::map<int, std::set<expr*> > internal_variable_scope_levels;
obj_hashtable<expr> internal_lenTest_vars;
@ -180,6 +182,7 @@ namespace smt {
app * mk_nonempty_str_var();
app * mk_internal_xor_var();
expr * mk_internal_valTest_var(expr * node, int len, int vTries);
app * mk_regex_rep_var();
bool is_concat(app const * a) const { return a->is_app_of(get_id(), OP_STRCAT); }
bool is_concat(enode const * n) const { return is_concat(n->get_owner()); }
@ -206,6 +209,18 @@ namespace smt {
bool is_Replace(app const * a) const { return a->is_app_of(get_id(), OP_STR_REPLACE); }
bool is_Replace(enode const * n) const { return is_Replace(n->get_owner()); }
bool is_RegexIn(app const * a) const { return a->is_app_of(get_id(), OP_RE_REGEXIN); }
bool is_RegexIn(enode const * n) const { return is_RegexIn(n->get_owner()); }
bool is_RegexConcat(app const * a) const { return a->is_app_of(get_id(), OP_RE_REGEXCONCAT); }
bool is_RegexConcat(enode const * n) const { return is_RegexConcat(n->get_owner()); }
bool is_RegexStar(app const * a) const { return a->is_app_of(get_id(), OP_RE_REGEXSTAR); }
bool is_RegexStar(enode const * n) const { return is_RegexStar(n->get_owner()); }
bool is_RegexUnion(app const * a) const { return a->is_app_of(get_id(), OP_RE_REGEXUNION); }
bool is_RegexUnion(enode const * n) const { return is_RegexUnion(n->get_owner()); }
bool is_Str2Reg(app const * a) const { return a->is_app_of(get_id(), OP_RE_STR2REGEX); }
bool is_Str2Reg(enode const * n) const { return is_Str2Reg(n->get_owner()); }
void instantiate_concat_axiom(enode * cat);
void instantiate_basic_string_axioms(enode * str);
void instantiate_str_eq_length_axiom(enode * lhs, enode * rhs);
@ -220,6 +235,9 @@ namespace smt {
void instantiate_axiom_Substr(enode * e);
void instantiate_axiom_Replace(enode * e);
expr * mk_RegexIn(expr * str, expr * regexp);
void instantiate_axiom_RegexIn(enode * e);
void set_up_axioms(expr * ex);
void handle_equality(expr * lhs, expr * rhs);