diff --git a/src/ast/ast.h b/src/ast/ast.h index 8547675a8..e4e1b4d58 100644 --- a/src/ast/ast.h +++ b/src/ast/ast.h @@ -2013,6 +2013,7 @@ public: app * mk_distinct_expanded(unsigned num_args, expr * const * args); app * mk_true() { return m_true; } app * mk_false() { return m_false; } + app * mk_bool_val(bool b) { return b?m_true:m_false; } app * mk_interp(expr * arg) { return mk_app(m_basic_family_id, OP_INTERP, arg); } diff --git a/src/ast/ast_smt2_pp.cpp b/src/ast/ast_smt2_pp.cpp index c74b4f185..9ffbbd127 100644 --- a/src/ast/ast_smt2_pp.cpp +++ b/src/ast/ast_smt2_pp.cpp @@ -363,6 +363,23 @@ format * smt2_pp_environment::pp_arith_literal(app * t, bool decimal, unsigned d } } +format * smt2_pp_environment::pp_string_literal(app * t) { + std::string s; + VERIFY (get_sutil().str.is_const(t, s)); + std::ostringstream buffer; + buffer << "\""; + for (unsigned i = 0; i < s.length(); ++i) { + if (s[i] == '\"') { + buffer << "\"\""; + } + else { + buffer << s[i]; + } + } + buffer << "\""; + return mk_string(get_manager(), buffer.str().c_str()); +} + format * smt2_pp_environment::pp_datalog_literal(app * t) { uint64 v; VERIFY (get_dlutil().is_numeral(t, v)); @@ -578,6 +595,9 @@ class smt2_printer { if (m_env.get_autil().is_numeral(c) || m_env.get_autil().is_irrational_algebraic_numeral(c)) { f = m_env.pp_arith_literal(c, m_pp_decimal, m_pp_decimal_precision); } + else if (m_env.get_sutil().str.is_const(c)) { + f = m_env.pp_string_literal(c); + } else if (m_env.get_bvutil().is_numeral(c)) { f = m_env.pp_bv_literal(c, m_pp_bv_lits, m_pp_bv_neg); } diff --git a/src/ast/ast_smt2_pp.h b/src/ast/ast_smt2_pp.h index 04c3b35d4..45d18ceff 100644 --- a/src/ast/ast_smt2_pp.h +++ b/src/ast/ast_smt2_pp.h @@ -29,6 +29,7 @@ Revision History: #include"array_decl_plugin.h" #include"fpa_decl_plugin.h" #include"dl_decl_plugin.h" +#include"seq_decl_plugin.h" #include"smt2_util.h" class smt2_pp_environment { @@ -47,6 +48,7 @@ public: virtual bv_util & get_bvutil() = 0; virtual array_util & get_arutil() = 0; virtual fpa_util & get_futil() = 0; + virtual seq_util & get_sutil() = 0; virtual datalog::dl_decl_util& get_dlutil() = 0; virtual bool uses(symbol const & s) const = 0; virtual format_ns::format * pp_fdecl(func_decl * f, unsigned & len); @@ -54,6 +56,7 @@ public: virtual format_ns::format * pp_arith_literal(app * t, bool decimal, unsigned prec); virtual format_ns::format * pp_float_literal(app * t, bool use_bv_lits, bool use_float_real_lits); virtual format_ns::format * pp_datalog_literal(app * t); + virtual format_ns::format * pp_string_literal(app * t); virtual format_ns::format * pp_sort(sort * s); virtual format_ns::format * pp_fdecl_ref(func_decl * f); format_ns::format * pp_fdecl_name(symbol const & fname, unsigned & len) const; @@ -70,12 +73,14 @@ class smt2_pp_environment_dbg : public smt2_pp_environment { bv_util m_bvutil; array_util m_arutil; fpa_util m_futil; + seq_util m_sutil; datalog::dl_decl_util m_dlutil; public: - smt2_pp_environment_dbg(ast_manager & m):m_manager(m), m_autil(m), m_bvutil(m), m_arutil(m), m_futil(m), m_dlutil(m) {} + smt2_pp_environment_dbg(ast_manager & m):m_manager(m), m_autil(m), m_bvutil(m), m_arutil(m), m_futil(m), m_sutil(m), m_dlutil(m) {} virtual ast_manager & get_manager() const { return m_manager; } virtual arith_util & get_autil() { return m_autil; } virtual bv_util & get_bvutil() { return m_bvutil; } + virtual seq_util & get_sutil() { return m_sutil; } virtual array_util & get_arutil() { return m_arutil; } virtual fpa_util & get_futil() { return m_futil; } virtual datalog::dl_decl_util& get_dlutil() { return m_dlutil; } diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp new file mode 100644 index 000000000..35f22d746 --- /dev/null +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -0,0 +1,214 @@ +/*++ +Copyright (c) 2015 Microsoft Corporation + +Module Name: + + seq_rewriter.cpp + +Abstract: + + Basic rewriting rules for sequences constraints. + +Author: + + Nikolaj Bjorner (nbjorner) 2015-12-5 + +Notes: + +--*/ + +#include"seq_rewriter.h" +#include"arith_decl_plugin.h" + + +br_status seq_rewriter::mk_app_core(func_decl * f, unsigned num_args, expr * const * args, expr_ref & result) { + SASSERT(f->get_family_id() == get_fid()); + + switch(f->get_decl_kind()) { + + case OP_SEQ_UNIT: + case OP_SEQ_EMPTY: + case OP_SEQ_CONCAT: + case OP_SEQ_CONS: + case OP_SEQ_REV_CONS: + case OP_SEQ_HEAD: + case OP_SEQ_TAIL: + case OP_SEQ_LAST: + case OP_SEQ_FIRST: + case OP_SEQ_PREFIX_OF: + case OP_SEQ_SUFFIX_OF: + case OP_SEQ_SUBSEQ_OF: + case OP_SEQ_EXTRACT: + case OP_SEQ_NTH: + case OP_SEQ_LENGTH: + + case OP_RE_PLUS: + case OP_RE_STAR: + case OP_RE_OPTION: + case OP_RE_RANGE: + case OP_RE_CONCAT: + case OP_RE_UNION: + case OP_RE_INTERSECT: + case OP_RE_COMPLEMENT: + case OP_RE_DIFFERENCE: + case OP_RE_LOOP: + case OP_RE_EMPTY_SET: + case OP_RE_FULL_SET: + case OP_RE_EMPTY_SEQ: + case OP_RE_OF_SEQ: + case OP_RE_OF_PRED: + case OP_RE_MEMBER: + return BR_FAILED; + + // string specific operators. + case OP_STRING_CONST: + return BR_FAILED; + case OP_STRING_CONCAT: + SASSERT(num_args == 2); + return mk_str_concat(args[0], args[1], result); + case OP_STRING_LENGTH: + SASSERT(num_args == 1); + return mk_str_length(args[0], result); + case OP_STRING_SUBSTR: + SASSERT(num_args == 3); + return mk_str_substr(args[0], args[1], args[2], result); + case OP_STRING_STRCTN: + SASSERT(num_args == 2); + return mk_str_strctn(args[0], args[1], result); + case OP_STRING_CHARAT: + SASSERT(num_args == 2); + return mk_str_charat(args[0], args[1], result); + case OP_STRING_STRIDOF: + SASSERT(num_args == 2); + return mk_str_stridof(args[0], args[1], result); + case OP_STRING_STRREPL: + SASSERT(num_args == 3); + return mk_str_strrepl(args[0], args[1], args[2], result); + case OP_STRING_PREFIX: + SASSERT(num_args == 2); + return mk_str_prefix(args[0], args[1], result); + case OP_STRING_SUFFIX: + SASSERT(num_args == 2); + return mk_str_suffix(args[0], args[1], result); + case OP_STRING_ITOS: + SASSERT(num_args == 1); + return mk_str_itos(args[0], result); + case OP_STRING_STOI: + SASSERT(num_args == 1); + return mk_str_stoi(args[0], result); + //case OP_STRING_U16TOS: + //case OP_STRING_STOU16: + //case OP_STRING_U32TOS: + //case OP_STRING_STOU32: + case OP_STRING_IN_REGEXP: + case OP_STRING_TO_REGEXP: + case OP_REGEXP_CONCAT: + case OP_REGEXP_UNION: + case OP_REGEXP_INTER: + case OP_REGEXP_STAR: + case OP_REGEXP_PLUS: + case OP_REGEXP_OPT: + case OP_REGEXP_RANGE: + case OP_REGEXP_LOOP: + return BR_FAILED; + } + return BR_FAILED; +} + +br_status seq_rewriter::mk_str_concat(expr* a, expr* b, expr_ref& result) { + std::string c, d; + if (m_util.str.is_const(a, c) && m_util.str.is_const(b, d)) { + result = m_util.str.mk_string(c + d); + return BR_DONE; + } + return BR_FAILED; +} +br_status seq_rewriter::mk_str_length(expr* a, expr_ref& result) { + std::string b; + if (m_util.str.is_const(a, b)) { + result = arith_util(m()).mk_numeral(rational(b.length()), true); + } + return BR_FAILED; +} +br_status seq_rewriter::mk_str_substr(expr* a, expr* b, expr* c, expr_ref& result) { + return BR_FAILED; +} +br_status seq_rewriter::mk_str_strctn(expr* a, expr* b, expr_ref& result) { + std::string c, d; + if (m_util.str.is_const(a, c) && m_util.str.is_const(b, d)) { + result = m().mk_bool_val(0 != strstr(d.c_str(), c.c_str())); + return BR_DONE; + } + return BR_FAILED; +} +br_status seq_rewriter::mk_str_charat(expr* a, expr* b, expr_ref& result) { + return BR_FAILED; +} +br_status seq_rewriter::mk_str_stridof(expr* a, expr* b, expr_ref& result) { + return BR_FAILED; +} +br_status seq_rewriter::mk_str_strrepl(expr* a, expr* b, expr* c, expr_ref& result) { + return BR_FAILED; +} +br_status seq_rewriter::mk_str_prefix(expr* a, expr* b, expr_ref& result) { + std::string s1, s2; + if (m_util.str.is_const(a, s1) && m_util.str.is_const(b, s2)) { + bool prefix = s1.length() <= s2.length(); + for (unsigned i = 0; i < s1.length() && prefix; ++i) { + prefix = s1[i] == s2[i]; + } + result = m().mk_bool_val(prefix); + return BR_DONE; + } + return BR_FAILED; +} +br_status seq_rewriter::mk_str_suffix(expr* a, expr* b, expr_ref& result) { + std::string s1, s2; + if (m_util.str.is_const(a, s1) && m_util.str.is_const(b, s2)) { + bool suffix = s1.length() <= s2.length(); + for (unsigned i = 0; i < s1.length() && suffix; ++i) { + suffix = s1[s1.length() - i - 1] == s2[s2.length() - i - 1]; + } + result = m().mk_bool_val(suffix); + return BR_DONE; + } + return BR_FAILED; +} +br_status seq_rewriter::mk_str_itos(expr* a, expr_ref& result) { + arith_util autil(m()); + rational r; + if (autil.is_numeral(a, r)) { + result = m_util.str.mk_string(r.to_string()); + return BR_DONE; + } + return BR_FAILED; +} +br_status seq_rewriter::mk_str_stoi(expr* a, expr_ref& result) { + arith_util autil(m()); + std::string s; + if (m_util.str.is_const(a, s)) { + + } + return BR_FAILED; +} +br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) { + return BR_FAILED; +} +br_status seq_rewriter::mk_str_to_regexp(expr* a, expr_ref& result) { + return BR_FAILED; +} +br_status seq_rewriter::mk_re_concat(expr* a, expr* b, expr_ref& result) { + return BR_FAILED; +} +br_status seq_rewriter::mk_re_union(expr* a, expr* b, expr_ref& result) { + return BR_FAILED; +} +br_status seq_rewriter::mk_re_star(expr* a, expr_ref& result) { + return BR_FAILED; +} +br_status seq_rewriter::mk_re_plus(expr* a, expr_ref& result) { + return BR_FAILED; +} +br_status seq_rewriter::mk_re_opt(expr* a, expr_ref& result) { + return BR_FAILED; +} diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h new file mode 100644 index 000000000..62ef249a6 --- /dev/null +++ b/src/ast/rewriter/seq_rewriter.h @@ -0,0 +1,67 @@ +/*++ +Copyright (c) 2015 Microsoft Corporation + +Module Name: + + seq_rewriter.h + +Abstract: + + Basic rewriting rules for sequences constraints. + +Author: + + Nikolaj Bjorner (nbjorner) 2015-12-5 + +Notes: + +--*/ +#ifndef SEQ_REWRITER_H_ +#define SEQ_REWRITER_H_ + +#include"seq_decl_plugin.h" +#include"rewriter_types.h" +#include"params.h" +#include"lbool.h" + + +/** + \brief Cheap rewrite rules for seq constraints +*/ +class seq_rewriter { + seq_util m_util; + + br_status mk_str_concat(expr* a, expr* b, expr_ref& result); + br_status mk_str_length(expr* a, expr_ref& result); + br_status mk_str_substr(expr* a, expr* b, expr* c, expr_ref& result); + br_status mk_str_strctn(expr* a, expr* b, expr_ref& result); + br_status mk_str_charat(expr* a, expr* b, expr_ref& result); + br_status mk_str_stridof(expr* a, expr* b, expr_ref& result); + br_status mk_str_strrepl(expr* a, expr* b, expr* c, expr_ref& result); + br_status mk_str_prefix(expr* a, expr* b, expr_ref& result); + br_status mk_str_suffix(expr* a, expr* b, expr_ref& result); + br_status mk_str_itos(expr* a, expr_ref& result); + br_status mk_str_stoi(expr* a, expr_ref& result); + br_status mk_str_in_regexp(expr* a, expr* b, expr_ref& result); + br_status mk_str_to_regexp(expr* a, expr_ref& result); + br_status mk_re_concat(expr* a, expr* b, expr_ref& result); + br_status mk_re_union(expr* a, expr* b, expr_ref& result); + br_status mk_re_star(expr* a, expr_ref& result); + br_status mk_re_plus(expr* a, expr_ref& result); + br_status mk_re_opt(expr* a, expr_ref& result); + +public: + seq_rewriter(ast_manager & m, params_ref const & p = params_ref()): + m_util(m) { + } + ast_manager & m() const { return m_util.get_manager(); } + family_id get_fid() const { return m_util.get_family_id(); } + + void updt_params(params_ref const & p) {} + static void get_param_descrs(param_descrs & r) {} + + br_status mk_app_core(func_decl * f, unsigned num_args, expr * const * args, expr_ref & result); + +}; + +#endif diff --git a/src/ast/rewriter/th_rewriter.cpp b/src/ast/rewriter/th_rewriter.cpp index b7197d239..a8aea47b6 100644 --- a/src/ast/rewriter/th_rewriter.cpp +++ b/src/ast/rewriter/th_rewriter.cpp @@ -26,6 +26,7 @@ Notes: #include"fpa_rewriter.h" #include"dl_rewriter.h" #include"pb_rewriter.h" +#include"seq_rewriter.h" #include"rewriter_def.h" #include"expr_substitution.h" #include"ast_smt2_pp.h" @@ -43,6 +44,7 @@ struct th_rewriter_cfg : public default_rewriter_cfg { fpa_rewriter m_f_rw; dl_rewriter m_dl_rw; pb_rewriter m_pb_rw; + seq_rewriter m_seq_rw; arith_util m_a_util; bv_util m_bv_util; unsigned long long m_max_memory; // in bytes @@ -76,6 +78,7 @@ struct th_rewriter_cfg : public default_rewriter_cfg { m_bv_rw.updt_params(p); m_ar_rw.updt_params(p); m_f_rw.updt_params(p); + m_seq_rw.updt_params(p); updt_local_params(p); } @@ -200,6 +203,8 @@ struct th_rewriter_cfg : public default_rewriter_cfg { return m_dl_rw.mk_app_core(f, num, args, result); if (fid == m_pb_rw.get_fid()) return m_pb_rw.mk_app_core(f, num, args, result); + if (fid == m_seq_rw.get_fid()) + return m_seq_rw.mk_app_core(f, num, args, result); return BR_FAILED; } @@ -650,6 +655,7 @@ struct th_rewriter_cfg : public default_rewriter_cfg { m_f_rw(m, p), m_dl_rw(m), m_pb_rw(m), + m_seq_rw(m), m_a_util(m), m_bv_util(m), m_used_dependencies(m), diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h index 0cdfbd5a8..b81f87ec0 100644 --- a/src/ast/seq_decl_plugin.h +++ b/src/ast/seq_decl_plugin.h @@ -134,7 +134,7 @@ public: virtual ~seq_decl_plugin() {} virtual void finalize(); - + virtual decl_plugin * mk_fresh() { return alloc(seq_decl_plugin); } virtual sort * mk_sort(decl_kind k, unsigned num_parameters, parameter const * parameters); @@ -159,6 +159,8 @@ class seq_util { family_id m_fid; public: + ast_manager& get_manager() const { return m; } + class str { seq_util& u; ast_manager& m; @@ -168,6 +170,7 @@ public: app* mk_string(symbol const& s); app* mk_string(char const* s) { return mk_string(symbol(s)); } + app* mk_string(std::string const& s) { return mk_string(symbol(s.c_str())); } app* mk_concat(expr* a, expr* b) { expr* es[2] = { a, b }; return m.mk_app(m_fid, OP_STRING_CONCAT, 2, es); } app* mk_length(expr* a) { return m.mk_app(m_fid, OP_STRING_LENGTH, 1, &a); } app* mk_substr(expr* a, expr* b, expr* c) { expr* es[3] = { a, b, c }; return m.mk_app(m_fid, OP_STRING_SUBSTR, 3, es); } diff --git a/src/cmd_context/cmd_context.cpp b/src/cmd_context/cmd_context.cpp index 02b2b6525..1b5d9d470 100644 --- a/src/cmd_context/cmd_context.cpp +++ b/src/cmd_context/cmd_context.cpp @@ -245,6 +245,7 @@ protected: bv_util m_bvutil; array_util m_arutil; fpa_util m_futil; + seq_util m_sutil; datalog::dl_decl_util m_dlutil; format_ns::format * pp_fdecl_name(symbol const & s, func_decls const & fs, func_decl * f, unsigned & len) { @@ -265,13 +266,14 @@ protected: } public: - pp_env(cmd_context & o):m_owner(o), m_autil(o.m()), m_bvutil(o.m()), m_arutil(o.m()), m_futil(o.m()), m_dlutil(o.m()) {} + pp_env(cmd_context & o):m_owner(o), m_autil(o.m()), m_bvutil(o.m()), m_arutil(o.m()), m_futil(o.m()), m_sutil(o.m()), m_dlutil(o.m()) {} virtual ~pp_env() {} virtual ast_manager & get_manager() const { return m_owner.m(); } virtual arith_util & get_autil() { return m_autil; } virtual bv_util & get_bvutil() { return m_bvutil; } virtual array_util & get_arutil() { return m_arutil; } virtual fpa_util & get_futil() { return m_futil; } + virtual seq_util & get_sutil() { return m_sutil; } virtual datalog::dl_decl_util& get_dlutil() { return m_dlutil; } virtual bool uses(symbol const & s) const { return diff --git a/src/parsers/smt2/smt2parser.cpp b/src/parsers/smt2/smt2parser.cpp index 2f16f01d7..404c04d3a 100644 --- a/src/parsers/smt2/smt2parser.cpp +++ b/src/parsers/smt2/smt2parser.cpp @@ -1069,7 +1069,7 @@ namespace smt2 { void parse_string_const() { SASSERT(curr() == scanner::STRING_TOKEN); - expr_stack().push_back(sutil().str.mk_string(curr_id())); + expr_stack().push_back(sutil().str.mk_string(m_scanner.get_string())); TRACE("smt2parser", tout << "new string: " << mk_pp(expr_stack().back(), m()) << "\n";); next(); } diff --git a/src/parsers/smt2/smt2scanner.cpp b/src/parsers/smt2/smt2scanner.cpp index bc62c2646..ed16003c2 100644 --- a/src/parsers/smt2/smt2scanner.cpp +++ b/src/parsers/smt2/smt2scanner.cpp @@ -171,10 +171,7 @@ namespace smt2 { throw scanner_exception("unexpected end of string", m_line, m_spos); if (c == '\"') { next(); - if (curr() == '\"') { - m_string.push_back(c); - } - else { + if (curr() != '\"') { m_string.push_back(0); return STRING_TOKEN; }