From 75c935a4cb22ca6390a2a06e5612be73f87f4d7c Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Fri, 4 Dec 2015 12:09:15 -0800 Subject: [PATCH] add tokens to parse strings Signed-off-by: Nikolaj Bjorner --- src/ast/seq_decl_plugin.cpp | 118 ++++++++++++++++++++++++++++---- src/ast/seq_decl_plugin.h | 53 +++++++++++++- src/parsers/smt2/smt2parser.cpp | 19 +++++ 3 files changed, 174 insertions(+), 16 deletions(-) diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index 330227193..612ffc9db 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -21,11 +21,14 @@ Revision History: #include "array_decl_plugin.h" #include -seq_decl_plugin::seq_decl_plugin(): m_init(false) {} +seq_decl_plugin::seq_decl_plugin(): m_init(false), + m_stringc_sym("String"), + m_string(0) {} void seq_decl_plugin::finalize() { for (unsigned i = 0; i < m_sigs.size(); ++i) dealloc(m_sigs[i]); + m_manager->dec_ref(m_string); } bool seq_decl_plugin::is_sort_param(sort* s, unsigned& idx) { @@ -119,9 +122,17 @@ void seq_decl_plugin::init() { m_init = true; sort* A = m.mk_uninterpreted_sort(symbol((unsigned)0)); sort* B = m.mk_uninterpreted_sort(symbol((unsigned)1)); + sort* strT = m_string; parameter paramA(A); + parameter paramS(strT); sort* seqA = m.mk_sort(m_family_id, SEQ_SORT, 1, ¶mA); sort* reA = m.mk_sort(m_family_id, RE_SORT, 1, ¶mA); + sort* reT = m.mk_sort(m_family_id, RE_SORT, 1, ¶mS); + sort* boolT = m.mk_bool_sort(); + sort* intT = arith_util(m).mk_int(); + sort* predA = array_util(m).mk_array_sort(A, boolT); + sort* u16T = 0; + sort* u32T = 0; sort* seqAseqA[2] = { seqA, seqA }; sort* seqAA[2] = { seqA, A }; sort* seqAB[2] = { seqA, B }; @@ -130,9 +141,11 @@ void seq_decl_plugin::init() { sort* reAreA[2] = { reA, reA }; sort* AA[2] = { A, A }; sort* seqABB[3] = { seqA, B, B }; - sort* boolT = m.mk_bool_sort(); - sort* intT = arith_util(m).mk_int(); - sort* predA = array_util(m).mk_array_sort(A, boolT); + sort* str2T[2] = { strT, strT }; + sort* str3T[3] = { strT, strT, strT }; + sort* strTint2T[3] { strT, intT, intT }; + sort* re2T[2] = { reT, reT }; + sort* strTreT[2] = { strT, reT }; m_sigs.resize(LAST_SEQ_OP); // TBD: have (par ..) construct and load parameterized signature from premable. m_sigs[OP_SEQ_UNIT] = alloc(psig, m, "seq-unit", 1, 1, &A, seqA); @@ -158,14 +171,46 @@ void seq_decl_plugin::init() { m_sigs[OP_RE_UNION] = alloc(psig, m, "re-union", 1, 2, reAreA, reA); m_sigs[OP_RE_INTERSECT] = alloc(psig, m, "re-intersect", 1, 2, reAreA, reA); m_sigs[OP_RE_DIFFERENCE] = alloc(psig, m, "re-difference", 1, 2, reAreA, reA); - m_sigs[OP_RE_COMPLEMENT] = alloc(psig, m, "re-complement", 1, 1, &reA, reA); - m_sigs[OP_RE_LOOP] = alloc(psig, m, "re-loop", 1, 1, &reA, reA); + m_sigs[OP_RE_COMPLEMENT] = alloc(psig, m, "re-complement", 1, 1, &reA, reA); + m_sigs[OP_RE_LOOP] = alloc(psig, m, "re-loop", 1, 1, &reA, reA); m_sigs[OP_RE_EMPTY_SEQ] = alloc(psig, m, "re-empty-seq", 1, 0, 0, reA); - m_sigs[OP_RE_EMPTY_SET] = alloc(psig, m, "re-empty-set", 1, 0, 0, reA); - m_sigs[OP_RE_FULL_SET] = alloc(psig, m, "re-full-set", 1, 0, 0, reA); - m_sigs[OP_RE_OF_SEQ] = alloc(psig, m, "re-of-seq", 1, 1, &seqA, reA); - m_sigs[OP_RE_OF_PRED] = alloc(psig, m, "re-of-pred", 1, 1, &predA, reA); - m_sigs[OP_RE_MEMBER] = alloc(psig, m, "re-member", 1, 2, seqAreA, boolT); + m_sigs[OP_RE_EMPTY_SET] = alloc(psig, m, "re-empty-set", 1, 0, 0, reA); + m_sigs[OP_RE_FULL_SET] = alloc(psig, m, "re-full-set", 1, 0, 0, reA); + m_sigs[OP_RE_OF_SEQ] = alloc(psig, m, "re-of-seq", 1, 1, &seqA, reA); + m_sigs[OP_RE_OF_PRED] = alloc(psig, m, "re-of-pred", 1, 1, &predA, reA); + m_sigs[OP_RE_MEMBER] = alloc(psig, m, "re-member", 1, 2, seqAreA, boolT); + m_sigs[OP_STRING_CONST] = 0; + m_sigs[OP_STRING_CONCAT] = alloc(psig, m, "str.++", 0, 2, str2T, strT); + m_sigs[OP_STRING_LENGTH] = alloc(psig, m, "str.len", 0, 1, &strT, intT); + m_sigs[OP_STRING_SUBSTR] = alloc(psig, m, "str.substr", 0, 3, strTint2T, boolT); + m_sigs[OP_STRING_STRCTN] = alloc(psig, m, "str.contains", 0, 2, str2T, boolT); + m_sigs[OP_STRING_CHARAT] = alloc(psig, m, "str.at", 0, 2, strTint2T, strT); + m_sigs[OP_STRING_STRIDOF] = alloc(psig, m, "str.indexof", 0, 2, str2T, intT); + m_sigs[OP_STRING_STRREPL] = alloc(psig, m, "str.replace", 0, 3, str3T, strT); + m_sigs[OP_STRING_PREFIX] = alloc(psig, m, "str.prefixof", 0, 2, str2T, boolT); + m_sigs[OP_STRING_SUFFIX] = alloc(psig, m, "str.suffixof", 0, 2, str2T, boolT); + m_sigs[OP_STRING_ITOS] = alloc(psig, m, "int.to.str", 0, 1, &intT, strT); + m_sigs[OP_STRING_STOI] = alloc(psig, m, "str.to.int", 0, 1, &strT, intT); + //m_sigs[OP_STRING_U16TOS] = alloc(psig, m, "u16.to.str", 0, 1, &intT, strT); + //m_sigs[OP_STRING_STOU16] = alloc(psig, m, "str.to.u16", 0, 1, &strT, intT); + //m_sigs[OP_STRING_U32TOS] = alloc(psig, m, "u32.to.str", 0, 1, &intT, strT); + //m_sigs[OP_STRING_STOU32] = alloc(psig, m, "str.to.u32", 0, 1, &strT, intT); + m_sigs[OP_STRING_IN_REGEXP] = alloc(psig, m, "str.in.re", 0, 2, strTreT, boolT); + m_sigs[OP_STRING_TO_REGEXP] = alloc(psig, m, "str.to.re", 0, 1, &strT, reT); + m_sigs[OP_REGEXP_CONCAT] = alloc(psig, m, "re.++", 0, 2, re2T, reT); + m_sigs[OP_REGEXP_UNION] = alloc(psig, m, "re.union", 0, 2, re2T, reT); + m_sigs[OP_REGEXP_INTER] = alloc(psig, m, "re.inter", 0, 2, re2T, reT); + m_sigs[OP_REGEXP_STAR] = alloc(psig, m, "re.*", 0, 1, &reT, reT); + m_sigs[OP_REGEXP_PLUS] = alloc(psig, m, "re.+", 0, 1, &reT, reT); + m_sigs[OP_REGEXP_OPT] = alloc(psig, m, "re.opt", 0, 1, &reT, reT); + m_sigs[OP_REGEXP_RANGE] = alloc(psig, m, "re.range", 0, 2, str2T, reT); + m_sigs[OP_REGEXP_LOOP] = alloc(psig, m, "re.loop", 0, 2, strTint2T, reT); // maybe 3 arguments. +} + +void seq_decl_plugin::set_manager(ast_manager* m, family_id id) { + decl_plugin::set_manager(m, id); + m_string = m->mk_sort(symbol("String"), sort_info(m_family_id, STRING_SORT, 0, (parameter const*)0)); + m->inc_ref(m_string); } sort * seq_decl_plugin::mk_sort(decl_kind k, unsigned num_parameters, parameter const * parameters) { @@ -188,6 +233,8 @@ sort * seq_decl_plugin::mk_sort(decl_kind k, unsigned num_parameters, parameter m.raise_exception("invalid regex sort, parameter is not a sort"); } return m.mk_sort(symbol("RegEx"), sort_info(m_family_id, RE_SORT, num_parameters, parameters)); + case STRING_SORT: + return m_string; default: UNREACHABLE(); return 0; @@ -239,7 +286,38 @@ func_decl * seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters, if (num_parameters != 2 || !parameters[0].is_int() || !parameters[1].is_int()) { m.raise_exception("Expecting two numeral parameters to function re-loop"); } - return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k, num_parameters, parameters)); + return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k, num_parameters, parameters)); + case OP_STRING_CONST: + if (!(num_parameters == 1 && arity == 0 && parameters[0].is_symbol())) { + m.raise_exception("invalid string declaration"); + } + return m.mk_const_decl(m_stringc_sym, m_string, + func_decl_info(m_family_id, OP_STRING_CONST, num_parameters, parameters)); + + case OP_STRING_CONCAT: + case OP_STRING_LENGTH: + case OP_STRING_SUBSTR: + case OP_STRING_STRCTN: + case OP_STRING_CHARAT: + case OP_STRING_STRIDOF: + case OP_STRING_STRREPL: + case OP_STRING_PREFIX: + case OP_STRING_SUFFIX: + case OP_STRING_ITOS: + case OP_STRING_STOI: + case OP_STRING_IN_REGEXP: + case OP_STRING_TO_REGEXP: + case OP_REGEXP_CONCAT: + case OP_REGEXP_UNION: + case OP_REGEXP_INTER: + case OP_REGEXP_STAR: + case OP_REGEXP_PLUS: + case OP_REGEXP_OPT: + case OP_REGEXP_RANGE: + case OP_REGEXP_LOOP: + match(*m_sigs[k], arity, domain, range, rng); + return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k)); + default: UNREACHABLE(); return 0; @@ -249,7 +327,9 @@ func_decl * seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters, void seq_decl_plugin::get_op_names(svector & op_names, symbol const & logic) { init(); for (unsigned i = 0; i < m_sigs.size(); ++i) { - op_names.push_back(builtin_name(m_sigs[i]->m_name.str().c_str(), i)); + if (m_sigs[i]) { + op_names.push_back(builtin_name(m_sigs[i]->m_name.str().c_str(), i)); + } } } @@ -257,9 +337,21 @@ void seq_decl_plugin::get_sort_names(svector & sort_names, symbol init(); sort_names.push_back(builtin_name("Seq", SEQ_SORT)); sort_names.push_back(builtin_name("RegEx", RE_SORT)); + sort_names.push_back(builtin_name("String", STRING_SORT)); +} + +app* seq_decl_plugin::mk_string(symbol const& s) { + parameter param(s); + func_decl* f = m_manager->mk_const_decl(m_stringc_sym, m_string, + func_decl_info(m_family_id, OP_STRING_CONST, 1, ¶m)); + return m_manager->mk_const(f); } bool seq_decl_plugin::is_value(app* e) const { // TBD: empty sequence is a value. return false; } + +app* seq_util::mk_string(symbol const& s) { + return seq.mk_string(s); +} diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h index c71c7317e..3d1faba42 100644 --- a/src/ast/seq_decl_plugin.h +++ b/src/ast/seq_decl_plugin.h @@ -24,7 +24,8 @@ Revision History: enum seq_sort_kind { SEQ_SORT, - RE_SORT + RE_SORT, + STRING_SORT }; enum seq_op_kind { @@ -60,6 +61,35 @@ enum seq_op_kind { OP_RE_OF_SEQ, OP_RE_OF_PRED, OP_RE_MEMBER, + + + // string specific operators. + OP_STRING_CONST, + OP_STRING_CONCAT, + OP_STRING_LENGTH, + OP_STRING_SUBSTR, + OP_STRING_STRCTN, + OP_STRING_CHARAT, + OP_STRING_STRIDOF, + OP_STRING_STRREPL, + OP_STRING_PREFIX, + OP_STRING_SUFFIX, + OP_STRING_ITOS, + OP_STRING_STOI, + //OP_STRING_U16TOS, + //OP_STRING_STOU16, + //OP_STRING_U32TOS, + //OP_STRING_STOU32, + OP_STRING_IN_REGEXP, + OP_STRING_TO_REGEXP, + OP_REGEXP_CONCAT, + OP_REGEXP_UNION, + OP_REGEXP_INTER, + OP_REGEXP_STAR, + OP_REGEXP_PLUS, + OP_REGEXP_OPT, + OP_REGEXP_RANGE, + OP_REGEXP_LOOP, LAST_SEQ_OP }; @@ -83,7 +113,9 @@ class seq_decl_plugin : public decl_plugin { }; ptr_vector m_sigs; - bool m_init; + bool m_init; + symbol m_stringc_sym; + sort* m_string; void match(psig& sig, unsigned dsz, sort* const* dom, sort* range, sort_ref& rng); @@ -95,6 +127,8 @@ class seq_decl_plugin : public decl_plugin { void init(); + virtual void set_manager(ast_manager * m, family_id id); + public: seq_decl_plugin(); @@ -116,7 +150,20 @@ public: virtual bool is_unique_value(app * e) const { return is_value(e); } - + app* mk_string(symbol const& s); +}; + +class seq_util { + ast_manager& m; + seq_decl_plugin& seq; +public: + seq_util(ast_manager& m): + m(m), + seq(*static_cast(m.get_plugin(m.mk_family_id("seq")))) { + } + ~seq_util() {} + + app* mk_string(symbol const& s); }; #endif /* SEQ_DECL_PLUGIN_H_ */ diff --git a/src/parsers/smt2/smt2parser.cpp b/src/parsers/smt2/smt2parser.cpp index b95be8ee5..62751c025 100644 --- a/src/parsers/smt2/smt2parser.cpp +++ b/src/parsers/smt2/smt2parser.cpp @@ -22,6 +22,7 @@ Revision History: #include"datatype_decl_plugin.h" #include"bv_decl_plugin.h" #include"arith_decl_plugin.h" +#include"seq_decl_plugin.h" #include"ast_pp.h" #include"well_sorted.h" #include"pattern_validation.h" @@ -65,6 +66,7 @@ namespace smt2 { scoped_ptr m_bv_util; scoped_ptr m_arith_util; + scoped_ptr m_seq_util; scoped_ptr m_pattern_validator; scoped_ptr m_var_shifter; @@ -270,6 +272,12 @@ namespace smt2 { return *(m_arith_util.get()); } + seq_util & sutil() { + if (m_seq_util.get() == 0) + m_seq_util = alloc(seq_util, m()); + return *(m_seq_util.get()); + } + bv_util & butil() { if (m_bv_util.get() == 0) m_bv_util = alloc(bv_util, m()); @@ -1059,6 +1067,13 @@ namespace smt2 { next(); } + void parse_string_const() { + SASSERT(curr() == scanner::STRING_TOKEN); + expr_stack().push_back(sutil().mk_string(curr_id())); + TRACE("smt2parser", tout << "new string: " << mk_pp(expr_stack().back(), m()) << "\n";); + next(); + } + void push_pattern_frame() { // TODO: It seems the only reliable way to parse patterns is: // Parse as an S-Expr, then try to convert it to an useful pattern. @@ -1723,6 +1738,9 @@ namespace smt2 { break; case scanner::KEYWORD_TOKEN: throw parser_exception("invalid expression, unexpected keyword"); + case scanner::STRING_TOKEN: + parse_string_const(); + break; default: throw parser_exception("invalid expression, unexpected input"); } @@ -2609,6 +2627,7 @@ namespace smt2 { m_bv_util = 0; m_arith_util = 0; + m_seq_util = 0; m_pattern_validator = 0; m_var_shifter = 0; }