diff --git a/src/ast/str_decl_plugin.cpp b/src/ast/str_decl_plugin.cpp index 7cd03fa16..b140e11c3 100644 --- a/src/ast/str_decl_plugin.cpp +++ b/src/ast/str_decl_plugin.cpp @@ -38,6 +38,9 @@ str_decl_plugin::str_decl_plugin(): m_replace_decl(0), m_re_str2regex_decl(0), m_re_regexin_decl(0), + m_re_regexconcat_decl(0), + m_re_regexstar_decl(0), + m_re_regexunion_decl(0), m_arith_plugin(0), m_arith_fid(0), m_int_sort(0){ @@ -63,6 +66,9 @@ void str_decl_plugin::finalize(void) { DEC_REF(m_replace_decl); DEC_REF(m_re_str2regex_decl); DEC_REF(m_re_regexin_decl); + DEC_REF(m_re_regexconcat_decl); + DEC_REF(m_re_regexstar_decl); + DEC_REF(m_re_regexunion_decl); DEC_REF(m_int_sort); } @@ -139,6 +145,15 @@ void str_decl_plugin::set_manager(ast_manager * m, family_id id) { m_re_regexin_decl = m->mk_func_decl(symbol("RegexIn"), s, re, boolT, func_decl_info(id, OP_RE_REGEXIN)); m_manager->inc_ref(m_re_regexin_decl); + m_re_regexconcat_decl = m->mk_func_decl(symbol("RegexConcat"), re, re, re, func_decl_info(id, OP_RE_REGEXCONCAT)); + m_manager->inc_ref(m_re_regexconcat_decl); + + m_re_regexstar_decl = m->mk_func_decl(symbol("RegexStar"), re, re, func_decl_info(id, OP_RE_REGEXSTAR)); + m_manager->inc_ref(m_re_regexstar_decl); + + m_re_regexunion_decl = m->mk_func_decl(symbol("RegexUnion"), re, re, re, func_decl_info(id, OP_RE_REGEXUNION)); + m_manager->inc_ref(m_re_regexunion_decl); + } decl_plugin * str_decl_plugin::mk_fresh() { @@ -168,6 +183,9 @@ func_decl * str_decl_plugin::mk_func_decl(decl_kind k) { case OP_STR_REPLACE: return m_replace_decl; case OP_RE_STR2REGEX: return m_re_str2regex_decl; case OP_RE_REGEXIN: return m_re_regexin_decl; + case OP_RE_REGEXCONCAT: return m_re_regexconcat_decl; + case OP_RE_REGEXSTAR: return m_re_regexstar_decl; + case OP_RE_REGEXUNION: return m_re_regexunion_decl; default: return 0; } } @@ -235,6 +253,9 @@ void str_decl_plugin::get_op_names(svector & op_names, symbol cons op_names.push_back(builtin_name("Replace", OP_STR_REPLACE)); op_names.push_back(builtin_name("Str2Reg", OP_RE_STR2REGEX)); op_names.push_back(builtin_name("RegexIn", OP_RE_REGEXIN)); + op_names.push_back(builtin_name("RegexConcat", OP_RE_REGEXCONCAT)); + op_names.push_back(builtin_name("RegexStar", OP_RE_REGEXSTAR)); + op_names.push_back(builtin_name("RegexUnion", OP_RE_REGEXUNION)); } void str_decl_plugin::get_sort_names(svector & sort_names, symbol const & logic) { diff --git a/src/ast/str_decl_plugin.h b/src/ast/str_decl_plugin.h index 496468e5a..ccd2915af 100644 --- a/src/ast/str_decl_plugin.h +++ b/src/ast/str_decl_plugin.h @@ -44,6 +44,9 @@ enum str_op_kind { // regular expression operators OP_RE_STR2REGEX, OP_RE_REGEXIN, + OP_RE_REGEXCONCAT, + OP_RE_REGEXSTAR, + OP_RE_REGEXUNION, // end LAST_STR_OP }; @@ -69,6 +72,9 @@ protected: func_decl * m_re_str2regex_decl; func_decl * m_re_regexin_decl; + func_decl * m_re_regexconcat_decl; + func_decl * m_re_regexstar_decl; + func_decl * m_re_regexunion_decl; arith_decl_plugin * m_arith_plugin; family_id m_arith_fid; diff --git a/src/smt/theory_str.cpp b/src/smt/theory_str.cpp index e8df17c58..ab7bb13ef 100644 --- a/src/smt/theory_str.cpp +++ b/src/smt/theory_str.cpp @@ -479,6 +479,29 @@ app * theory_str::mk_str_var(std::string name) { return a; } +app * theory_str::mk_regex_rep_var() { + context & ctx = get_context(); + ast_manager & m = get_manager(); + + sort * string_sort = m.mk_sort(get_family_id(), STRING_SORT); + app * a = m.mk_fresh_const("regex", string_sort); + + ctx.internalize(a, false); + SASSERT(ctx.get_enode(a) != NULL); + SASSERT(ctx.e_internalized(a)); + mk_var(ctx.get_enode(a)); + m_basicstr_axiom_todo.push_back(ctx.get_enode(a)); + + m_trail.push_back(a); + // TODO cross-check which variable sets we need + variable_set.insert(a); + //internal_variable_set.insert(a); + regex_variable_set.insert(a); + track_variable_scope(a); + + return a; +} + app * theory_str::mk_nonempty_str_var() { context & ctx = get_context(); ast_manager & m = get_manager(); @@ -613,6 +636,7 @@ bool theory_str::can_propagate() { || !m_axiom_CharAt_todo.empty() || !m_axiom_StartsWith_todo.empty() || !m_axiom_EndsWith_todo.empty() || !m_axiom_Contains_todo.empty() || !m_axiom_Indexof_todo.empty() || !m_axiom_Indexof2_todo.empty() || !m_axiom_LastIndexof_todo.empty() || !m_axiom_Substr_todo.empty() || !m_axiom_Replace_todo.empty() + || !m_axiom_RegexIn_todo.empty() ; } @@ -681,6 +705,11 @@ void theory_str::propagate() { instantiate_axiom_Replace(m_axiom_Replace_todo[i]); } m_axiom_Replace_todo.reset(); + + for (unsigned i = 0; i < m_axiom_RegexIn_todo.size(); ++i) { + instantiate_axiom_RegexIn(m_axiom_RegexIn_todo[i]); + } + m_axiom_RegexIn_todo.reset(); } } @@ -1247,6 +1276,84 @@ void theory_str::instantiate_axiom_Replace(enode * e) { assert_axiom(finalAxiom); } +expr * theory_str::mk_RegexIn(expr * str, expr * regexp) { + expr * args[2] = {str, regexp}; + app * regexIn = get_manager().mk_app(get_id(), OP_RE_REGEXIN, 0, 0, 2, args); + // immediately force internalization so that axiom setup does not fail + get_context().internalize(regexIn, false); + set_up_axioms(regexIn); + return regexIn; +} + +void theory_str::instantiate_axiom_RegexIn(enode * e) { + context & ctx = get_context(); + ast_manager & m = get_manager(); + + app * expr = e->get_owner(); + if (axiomatized_terms.contains(expr)) { + TRACE("t_str_detail", tout << "already set up RegexIn axiom for " << mk_pp(expr, m) << std::endl;); + return; + } + axiomatized_terms.insert(expr); + + TRACE("t_str_detail", tout << "instantiate RegexIn axiom for " << mk_pp(expr, m) << std::endl;); + + // I don't think we need to port regexInBoolMap and regexInVarStrMap, + // but they would go here from reduce_regexIn + + expr_ref str(expr->get_arg(0), m); + app * regex = to_app(expr->get_arg(1)); + + if (is_Str2Reg(regex)) { + expr_ref rxStr(regex->get_arg(0), m); + // want to assert 'expr IFF (str == rxStr)' + expr_ref rhs(ctx.mk_eq_atom(str, rxStr), m); + expr_ref finalAxiom(m.mk_iff(expr, rhs), m); + SASSERT(finalAxiom); + assert_axiom(finalAxiom); + } else if (is_RegexConcat(regex)) { + expr_ref var1(mk_regex_rep_var(), m); + expr_ref var2(mk_regex_rep_var(), m); + expr_ref rhs(mk_concat(var1, var2), m); + expr_ref rx1(regex->get_arg(0), m); + expr_ref rx2(regex->get_arg(1), m); + expr_ref var1InRegex1(mk_RegexIn(var1, rx1), m); + expr_ref var2InRegex2(mk_RegexIn(var2, rx2), m); + + expr_ref_vector items(m); + items.push_back(var1InRegex1); + items.push_back(var2InRegex2); + items.push_back(ctx.mk_eq_atom(expr, ctx.mk_eq_atom(str, rhs))); + + expr_ref finalAxiom(mk_and(items), m); + SASSERT(finalAxiom); + assert_axiom(finalAxiom); + /* + Z3_ast var1 = mk_regexRepVar(t); + Z3_ast var2 = mk_regexRepVar(t); + rhs = mk_concat(t, var1, var2); + + Z3_ast regex1 = Z3_get_app_arg(ctx, arg1_func_app, 0); + Z3_ast regex2 = Z3_get_app_arg(ctx, arg1_func_app, 1); + Z3_ast var1InRegex1 = mk_2_arg_app(ctx, td->RegexIn, var1, regex1); + Z3_ast var2InRegex2 = mk_2_arg_app(ctx, td->RegexIn, var2, regex2); + std::vector items; + items.push_back(var1InRegex1); + items.push_back(var2InRegex2); + items.push_back(Z3_mk_eq(ctx, resBoolVar, Z3_mk_eq(ctx, args[0], rhs))); + extraAssert = mk_and_fromVector(t, items); + return resBoolVar; + */ + } else if (is_RegexUnion(regex)) { + + } else if (is_RegexStar(regex)) { + + } else { + TRACE("t_str_detail", tout << "ERROR: unknown regex expression " << mk_pp(regex, m) << "!" << std::endl;); + NOT_IMPLEMENTED_YET(); + } +} + void theory_str::attach_new_th_var(enode * n) { context & ctx = get_context(); theory_var v = mk_var(n); @@ -4165,6 +4272,8 @@ void theory_str::set_up_axioms(expr * ex) { m_axiom_EndsWith_todo.push_back(n); } else if (is_Contains(ap)) { m_axiom_Contains_todo.push_back(n); + } else if (is_RegexIn(ap)) { + m_axiom_RegexIn_todo.push_back(n); } } } else { @@ -4319,6 +4428,7 @@ void theory_str::pop_scope_eh(unsigned num_scopes) { for (std::set::iterator var_it = vars.begin(); var_it != vars.end(); ++var_it) { variable_set.erase(*var_it); internal_variable_set.erase(*var_it); + regex_variable_set.erase(*var_it); count += 1; } TRACE("t_str_detail", tout << "cleaned up " << count << " variables" << std::endl;); @@ -5994,13 +6104,10 @@ void theory_str::process_free_var(std::map & freeVar_map) { for (std::map::iterator fvIt = freeVar_map.begin(); fvIt != freeVar_map.end(); fvIt++) { expr * freeVar = fvIt->first; - /* - std::string vName = std::string(Z3_ast_to_string(ctx, freeVar)); - if (vName.length() >= 9 && vName.substr(0, 9) == "$$_regVar") { + // skip all regular expression vars + if (regex_variable_set.find(freeVar) != regex_variable_set.end()) { continue; } - */ - // TODO skip all regular expression vars // Iterate the EQC of freeVar, its eqc variable should not be in the eqcRepSet. // If found, have to filter it out diff --git a/src/smt/theory_str.h b/src/smt/theory_str.h index 151dbc53f..9aead1105 100644 --- a/src/smt/theory_str.h +++ b/src/smt/theory_str.h @@ -117,6 +117,7 @@ namespace smt { ptr_vector m_axiom_LastIndexof_todo; ptr_vector m_axiom_Substr_todo; ptr_vector m_axiom_Replace_todo; + ptr_vector m_axiom_RegexIn_todo; // hashtable of all exprs for which we've already set up term-specific axioms -- // this prevents infinite recursive descent with respect to axioms that @@ -135,6 +136,7 @@ namespace smt { std::set variable_set; std::set internal_variable_set; + std::set regex_variable_set; std::map > internal_variable_scope_levels; obj_hashtable internal_lenTest_vars; @@ -180,6 +182,7 @@ namespace smt { app * mk_nonempty_str_var(); app * mk_internal_xor_var(); expr * mk_internal_valTest_var(expr * node, int len, int vTries); + app * mk_regex_rep_var(); bool is_concat(app const * a) const { return a->is_app_of(get_id(), OP_STRCAT); } bool is_concat(enode const * n) const { return is_concat(n->get_owner()); } @@ -206,6 +209,18 @@ namespace smt { bool is_Replace(app const * a) const { return a->is_app_of(get_id(), OP_STR_REPLACE); } bool is_Replace(enode const * n) const { return is_Replace(n->get_owner()); } + bool is_RegexIn(app const * a) const { return a->is_app_of(get_id(), OP_RE_REGEXIN); } + bool is_RegexIn(enode const * n) const { return is_RegexIn(n->get_owner()); } + bool is_RegexConcat(app const * a) const { return a->is_app_of(get_id(), OP_RE_REGEXCONCAT); } + bool is_RegexConcat(enode const * n) const { return is_RegexConcat(n->get_owner()); } + bool is_RegexStar(app const * a) const { return a->is_app_of(get_id(), OP_RE_REGEXSTAR); } + bool is_RegexStar(enode const * n) const { return is_RegexStar(n->get_owner()); } + bool is_RegexUnion(app const * a) const { return a->is_app_of(get_id(), OP_RE_REGEXUNION); } + bool is_RegexUnion(enode const * n) const { return is_RegexUnion(n->get_owner()); } + bool is_Str2Reg(app const * a) const { return a->is_app_of(get_id(), OP_RE_STR2REGEX); } + bool is_Str2Reg(enode const * n) const { return is_Str2Reg(n->get_owner()); } + + void instantiate_concat_axiom(enode * cat); void instantiate_basic_string_axioms(enode * str); void instantiate_str_eq_length_axiom(enode * lhs, enode * rhs); @@ -220,6 +235,9 @@ namespace smt { void instantiate_axiom_Substr(enode * e); void instantiate_axiom_Replace(enode * e); + expr * mk_RegexIn(expr * str, expr * regexp); + void instantiate_axiom_RegexIn(enode * e); + void set_up_axioms(expr * ex); void handle_equality(expr * lhs, expr * rhs);