/*++ Copyright (c) 2017 Microsoft Corporation and Arie Gurfinkel Module Name: spacer_quant_generalizer.cpp Abstract: Quantified lemma generalizer. Author: Yakir Vizel Arie Gurfinkel Revision History: --*/ #include "muz/spacer/spacer_context.h" #include "muz/spacer/spacer_generalizers.h" #include "muz/spacer/spacer_manager.h" #include "ast/expr_abstract.h" #include "ast/rewriter/var_subst.h" #include "ast/for_each_expr.h" #include "ast/rewriter/factor_equivs.h" #include "ast/rewriter/expr_safe_replace.h" #include "ast/substitution/matcher.h" #include "ast/expr_functors.h" #include "muz/spacer/spacer_sem_matcher.h" using namespace spacer; namespace { struct index_lt_proc : public std::binary_function { arith_util m_arith; index_lt_proc(ast_manager &m) : m_arith(m) {} bool operator() (app *a, app *b) { // XXX This order is a bit strange. // XXX It does the job in our current application, but only because // XXX we assume that we only compare expressions of the form (B + k), // XXX where B is fixed and k is a number. // XXX Might be better to specialize just for that specific use case. rational val1, val2; bool is_num1 = m_arith.is_numeral(a, val1); bool is_num2 = m_arith.is_numeral(b, val2); if (is_num1 && is_num2) { return val1 < val2; } else if (is_num1 != is_num2) { return is_num1; } is_num1 = false; is_num2 = false; // compare the first numeric argument of a to first numeric argument of b // if available for (unsigned i = 0, sz = a->get_num_args(); !is_num1 && i < sz; ++i) { is_num1 = m_arith.is_numeral (a->get_arg(i), val1); } for (unsigned i = 0, sz = b->get_num_args(); !is_num2 && i < sz; ++i) { is_num2 = m_arith.is_numeral(b->get_arg(i), val2); } if (is_num1 && is_num2) { return val1 < val2; } else if (is_num1 != is_num2) { return is_num1; } else { return a->get_id() < b->get_id(); } } }; } namespace spacer { lemma_quantifier_generalizer::lemma_quantifier_generalizer(context &ctx, bool normalize_cube) : lemma_generalizer(ctx), m(ctx.get_ast_manager()), m_arith(m), m_cube(m), m_normalize_cube(normalize_cube), m_offset(0) {} void lemma_quantifier_generalizer::collect_statistics(statistics &st) const { st.update("time.spacer.solve.reach.gen.quant", m_st.watch.get_seconds()); st.update("quantifier gen", m_st.count); st.update("quantifier gen failures", m_st.num_failures); } /** Finds candidates terms to be existentially abstracted. A term t is a candidate if (a) t is ground (b) t appears in an expression of the form (select A t) for some array A (c) t appears in an expression of the form (select A (+ t v)) where v is ground The goal is to pick candidates that might result in a lemma in the essentially uninterpreted fragment of FOL or its extensions. */ void lemma_quantifier_generalizer::find_candidates(expr *e, app_ref_vector &candidates) { if (!contains_selects(e, m)) return; app_ref_vector indices(m); get_select_indices(e, indices, m); app_ref_vector extra(m); expr_sparse_mark marked_args; // Make sure not to try and quantify already-quantified indices for (unsigned idx=0, sz = indices.size(); idx < sz; idx++) { // skip expressions that already contain a quantified variable if (has_zk_const(indices.get(idx))) { continue; } app *index = indices.get(idx); TRACE ("spacer_qgen", tout << "Candidate: "<< mk_pp(index, m) << " in " << mk_pp(e, m) << "\n";); extra.push_back(index); if (m_arith.is_add(index)) { for (expr * arg : *index) { if (!is_app(arg) || marked_args.is_marked(arg)) {continue;} marked_args.mark(arg); candidates.push_back (to_app(arg)); } } } std::sort(candidates.c_ptr(), candidates.c_ptr() + candidates.size(), index_lt_proc(m)); // keep actual select indecies in the order found at the back of // candidate list. There is no particular reason for this order candidates.append(extra); } /// returns true if expression e contains a sub-expression of the form (select A idx) where /// idx contains exactly one skolem from zks. Returns idx and the skolem bool lemma_quantifier_generalizer::match_sk_idx(expr *e, app_ref_vector const &zks, expr *&idx, app *&sk) { if (zks.size() != 1) return false; contains_app has_zk(m, zks.get(0)); if (!contains_selects(e, m)) return false; app_ref_vector indicies(m); get_select_indices(e, indicies, m); if (indicies.size() > 2) return false; unsigned i=0; if (indicies.size() == 1) { if (!has_zk(indicies.get(0))) return false; } else { if (has_zk(indicies.get(0)) && !has_zk(indicies.get(1))) i = 0; else if (!has_zk(indicies.get(0)) && has_zk(indicies.get(1))) i = 1; else if (!has_zk(indicies.get(0)) && !has_zk(indicies.get(1))) return false; } idx = indicies.get(i); sk = zks.get(0); return true; } namespace { expr* times_minus_one(expr *e, arith_util &arith) { expr *r; if (arith.is_times_minus_one (e, r)) { return r; } return arith.mk_mul(arith.mk_numeral(rational(-1), arith.is_int(get_sort(e))), e); } } /** Attempts to rewrite a cube so that quantified variable appears as a top level argument of select-term Find sub-expression of the form (select A (+ sk!0 t)) and replaces (+ sk!0 t) --> sk!0 and sk!0 --> (+ sk!0 (* (- 1) t)) Current implementation is an ugly hack for one special case */ void lemma_quantifier_generalizer::cleanup(expr_ref_vector &cube, app_ref_vector const &zks, expr_ref &bind) { if (zks.size() != 1) return; arith_util arith(m); expr *idx = nullptr; app *sk = nullptr; expr_ref rep(m); for (expr *e : cube) { if (match_sk_idx(e, zks, idx, sk)) { CTRACE("spacer_qgen", idx != sk, tout << "Possible cleanup of " << mk_pp(idx, m) << " in " << mk_pp(e, m) << " on " << mk_pp(sk, m) << "\n";); if (!arith.is_add(idx)) continue; app *a = to_app(idx); bool found = false; expr_ref_vector kids(m); expr_ref_vector kids_bind(m); for (expr* arg : *a) { if (arg == sk) { found = true; kids.push_back(arg); kids_bind.push_back(bind); } else { kids.push_back (times_minus_one(arg, arith)); kids_bind.push_back (times_minus_one(arg, arith)); } } if (!found) continue; rep = arith.mk_add(kids.size(), kids.c_ptr()); bind = arith.mk_add(kids_bind.size(), kids_bind.c_ptr()); TRACE("spacer_qgen", tout << "replace " << mk_pp(idx, m) << " with " << mk_pp(rep, m) << "\n";); break; } } if (rep) { expr_safe_replace rw(m); rw.insert(sk, rep); rw.insert(idx, sk); rw(cube); TRACE("spacer_qgen", tout << "Cleaned cube to: " << mk_and(cube) << "\n";); } } /** Create an abstract cube by abstracting a given term with a given variable. On return, gnd_cube contains all ground literals from m_cube abs_cube contains all newly quantified literals from m_cube lb contains an expression determining the lower bound on the variable ub contains an expression determining the upper bound on the variable Conjunction of gnd_cube and abs_cube is the new quantified cube lb and ub are null if no bound was found */ void lemma_quantifier_generalizer::mk_abs_cube(lemma_ref &lemma, app *term, var *var, expr_ref_vector &gnd_cube, expr_ref_vector &abs_cube, expr *&lb, expr *&ub, unsigned &stride) { // create an abstraction function that maps candidate term to variables expr_safe_replace sub(m); // term -> var sub.insert(term , var); rational val; if (m_arith.is_numeral(term, val)) { bool is_int = val.is_int(); expr_ref minus_one(m); minus_one = m_arith.mk_numeral(rational(-1), is_int); // term+1 -> var+1 if term is a number sub.insert( m_arith.mk_numeral(val + 1, is_int), m_arith.mk_add(var, m_arith.mk_numeral(rational(1), is_int))); // -term-1 -> -1*var + -1 if term is a number sub.insert( m_arith.mk_numeral(-1*val + -1, is_int), m_arith.mk_add (m_arith.mk_mul (minus_one, var), minus_one)); } lb = nullptr; ub = nullptr; for (expr *lit : m_cube) { expr_ref abs_lit(m); sub (lit, abs_lit); if (lit == abs_lit) { gnd_cube.push_back(lit); } else { expr *e1, *e2; // generalize v=num into v>=num if (m.is_eq(abs_lit, e1, e2) && (e1 == var || e2 == var)) { if (m_arith.is_numeral(e1)) { abs_lit = m_arith.mk_ge (var, e1); } else if (m_arith.is_numeral(e2)) { abs_lit = m_arith.mk_ge(var, e2); } } abs_cube.push_back(abs_lit); if (contains_selects(abs_lit, m)) { expr_ref_vector pob_cube(m); flatten_and(lemma->get_pob()->post(), pob_cube); find_stride(pob_cube, abs_lit, stride); } if (!lb && is_lb(var, abs_lit)) { lb = abs_lit; } else if (!ub && is_ub(var, abs_lit)) { ub = abs_lit; } } } } // -- returns true if e is an upper bound for var bool lemma_quantifier_generalizer::is_ub(var *var, expr *e) { expr *e1, *e2; // var <= e2 if ((m_arith.is_le (e, e1, e2) || m_arith.is_lt(e, e1, e2)) && var == e1) { return true; } // e1 >= var if ((m_arith.is_ge(e, e1, e2) || m_arith.is_gt(e, e1, e2)) && var == e2) { return true; } // t <= -1*var if ((m_arith.is_le (e, e1, e2) || m_arith.is_lt(e, e1, e2)) && m_arith.is_times_minus_one(e2, e2) && e2 == var) { return true; } // -1*var >= t if ((m_arith.is_ge(e, e1, e2) || m_arith.is_gt(e, e1, e2)) && m_arith.is_times_minus_one(e1, e1) && e1 == var) { return true; } // ! (var >= e2) if (m.is_not (e, e1) && is_lb(var, e1)) { return true; } // var + t1 <= t2 if ((m_arith.is_le(e, e1, e2) || m_arith.is_lt(e, e1, e2)) && m_arith.is_add(e1)) { app *a = to_app(e1); for (expr* arg : *a) { if (arg == var) return true; } } // t1 <= t2 + -1*var if ((m_arith.is_le(e, e1, e2) || m_arith.is_lt(e, e1, e2)) && m_arith.is_add(e2)) { app *a = to_app(e2); for (expr* arg : *a) { if (m_arith.is_times_minus_one(arg, arg) && arg == var) return true; } } // t1 >= t2 + var if ((m_arith.is_ge(e, e1, e2) || m_arith.is_gt(e, e1, e2)) && m_arith.is_add(e2)) { app *a = to_app(e2); for (expr * arg : *a) { if (arg == var) return true; } } // -1*var + t1 >= t2 if ((m_arith.is_ge(e, e1, e2) || m_arith.is_gt(e, e1, e2)) && m_arith.is_add(e1)) { app *a = to_app(e1); for (expr * arg : *a) { if (m_arith.is_times_minus_one(arg, arg) && arg == var) return true; } } return false; } // -- returns true if e is a lower bound for var bool lemma_quantifier_generalizer::is_lb(var *var, expr *e) { expr *e1, *e2; // var >= e2 if ((m_arith.is_ge (e, e1, e2) || m_arith.is_gt(e, e1, e2)) && var == e1) { return true; } // e1 <= var if ((m_arith.is_le(e, e1, e2) || m_arith.is_lt(e, e1, e2)) && var == e2) { return true; } // t >= -1*var if ((m_arith.is_ge (e, e1, e2) || m_arith.is_gt(e, e1, e2)) && m_arith.is_times_minus_one(e2, e2) && e2 == var) { return true; } // -1*var <= t if ((m_arith.is_le(e, e1, e2) || m_arith.is_lt(e, e1, e2)) && m_arith.is_times_minus_one(e1, e1) && e1 == var) { return true; } // ! (var <= e2) if (m.is_not (e, e1) && is_ub(var, e1)) { return true; } // var + t1 >= t2 if ((m_arith.is_ge(e, e1, e2) || m_arith.is_gt(e, e1, e2)) && m_arith.is_add(e1)) { app *a = to_app(e1); for (expr * arg : *a) { if (arg == var) return true; } } // t1 >= t2 + -1*var if ((m_arith.is_ge(e, e1, e2) || m_arith.is_gt(e, e1, e2)) && m_arith.is_add(e2)) { app *a = to_app(e2); for (expr * arg : *a) { if (m_arith.is_times_minus_one(arg, arg) && arg == var) return true; } } // t1 <= t2 + var if ((m_arith.is_le(e, e1, e2) || m_arith.is_lt(e, e1, e2)) && m_arith.is_add(e2)) { app *a = to_app(e2); for (expr * arg : *a) { if (arg == var) return true; } } // -1*var + t1 <= t2 if ((m_arith.is_le(e, e1, e2) || m_arith.is_lt(e, e1, e2)) && m_arith.is_add(e1)) { app *a = to_app(e1); for (expr * arg : *a) { if (m_arith.is_times_minus_one(arg, arg) && arg == var) return true; } } return false; } bool lemma_quantifier_generalizer::generalize (lemma_ref &lemma, app *term) { expr *lb = nullptr, *ub = nullptr; unsigned stride = 1; expr_ref_vector gnd_cube(m); expr_ref_vector abs_cube(m); var_ref var(m); var = m.mk_var (m_offset, get_sort(term)); mk_abs_cube(lemma, term, var, gnd_cube, abs_cube, lb, ub, stride); if (abs_cube.empty()) {return false;} TRACE("spacer_qgen", tout << "abs_cube is: " << mk_and(abs_cube) << "\n"; tout << "lb = "; if (lb) tout << mk_pp(lb, m); else tout << "none"; tout << "\n"; tout << "ub = "; if (ub) tout << mk_pp(ub, m); else tout << "none"; tout << "\n";); if (!lb && !ub) return false; // -- guess lower or upper bound if missing if (!lb) { abs_cube.push_back (m_arith.mk_ge (var, term)); lb = abs_cube.back(); } if (!ub) { abs_cube.push_back (m_arith.mk_lt(var, term)); ub = abs_cube.back(); } rational init; expr_ref constant(m); if (is_var(to_app(lb)->get_arg(0))) constant = to_app(lb)->get_arg(1); else constant = to_app(lb)->get_arg(0); if (stride > 1 && m_arith.is_numeral(constant, init)) { unsigned mod = init.get_unsigned() % stride; TRACE("spacer_qgen", tout << "mod=" << mod << " init=" << init << " stride=" << stride << "\n"; tout.flush();); abs_cube.push_back(m.mk_eq( m_arith.mk_mod(var, m_arith.mk_numeral(rational(stride), true)), m_arith.mk_numeral(rational(mod), true))); } // skolemize expr_ref gnd(m); app_ref_vector zks(m); ground_expr(mk_and(abs_cube), gnd, zks); flatten_and(gnd, gnd_cube); TRACE("spacer_qgen", tout << "New CUBE is: " << gnd_cube << "\n";); // check if the result is a true lemma unsigned uses_level = 0; pred_transformer &pt = lemma->get_pob()->pt(); if (pt.check_inductive(lemma->level(), gnd_cube, uses_level, lemma->weakness())) { TRACE("spacer_qgen", tout << "Quantifier Generalization Succeeded!\n" << "New CUBE is: " << gnd_cube << "\n";); SASSERT(zks.size() >= static_cast(m_offset)); // lift quantified variables to top of select expr_ref ext_bind(m); ext_bind = term; cleanup(gnd_cube, zks, ext_bind); // XXX better do that check before changing bind in cleanup() // XXX Or not because substitution might introduce _n variable into bind if (m_ctx.get_manager().is_n_formula(ext_bind)) { // XXX this creates an instance, but not necessarily the needed one // XXX This is sound because any instance of // XXX universal quantifier is sound // XXX needs better long term solution. leave // comment here for the future m_ctx.get_manager().formula_n2o(ext_bind, ext_bind, 0); } lemma->update_cube(lemma->get_pob(), gnd_cube); lemma->set_level(uses_level); SASSERT(var->get_idx() < zks.size()); SASSERT(is_app(ext_bind)); lemma->add_skolem(zks.get(var->get_idx()), to_app(ext_bind)); return true; } return false; } bool lemma_quantifier_generalizer::find_stride(expr_ref_vector &c, expr_ref &pattern, unsigned &stride) { expr_ref tmp(m); tmp = mk_and(c); normalize(tmp, tmp, false, true); c.reset(); flatten_and(tmp, c); app_ref_vector indices(m); get_select_indices(pattern, indices, m); // TODO if (indices.size() > 1) return false; app *p_index = indices.get(0); if (is_var(p_index)) return false; std::vector instances; for (expr* lit : c) { if (!contains_selects(lit, m)) continue; indices.reset(); get_select_indices(lit, indices, m); // TODO: if (indices.size() > 1) continue; app *candidate = indices.get(0); unsigned size = p_index->get_num_args(); unsigned matched = 0; for (unsigned p=0; p < size; p++) { expr *arg = p_index->get_arg(p); if (is_var(arg)) { rational val; if (p < candidate->get_num_args() && m_arith.is_numeral(candidate->get_arg(p), val) && val.is_unsigned()) { instances.push_back(val.get_unsigned()); } } else { for (expr* cand : *candidate) { if (cand == arg) { matched++; break; } } } } if (matched < size - 1) continue; if (candidate->get_num_args() == matched) instances.push_back(0); TRACE("spacer_qgen", tout << "Match succeeded!\n";); } if (instances.size() <= 1) return false; std::sort(instances.begin(), instances.end()); stride = instances[1]-instances[0]; TRACE("spacer_qgen", tout << "Index Stride is: " << stride << "\n";); return true; } void lemma_quantifier_generalizer::operator()(lemma_ref &lemma) { if (lemma->get_cube().empty()) return; if (!lemma->has_pob()) return; m_st.count++; scoped_watch _w_(m_st.watch); TRACE("spacer_qgen", tout << "initial cube: " << mk_and(lemma->get_cube()) << "\n";); // setup the cube m_cube.reset(); m_cube.append(lemma->get_cube()); if (m_normalize_cube) { // -- re-normalize the cube expr_ref c(m); c = mk_and(m_cube); normalize(c, c, false, true); m_cube.reset(); flatten_and(c, m_cube); TRACE("spacer_qgen", tout << "normalized cube:\n" << mk_and(m_cube) << "\n";); } // first unused free variable m_offset = lemma->get_pob()->get_free_vars_size(); // for every literal, find a candidate term to abstract for (unsigned i=0; i < m_cube.size(); i++) { expr *r = m_cube.get(i); // generate candidates for abstraction app_ref_vector candidates(m); find_candidates(r, candidates); if (candidates.empty()) continue; // for every candidate for (unsigned arg=0, sz = candidates.size(); arg < sz; arg++) { if (generalize (lemma, candidates.get(arg))) { return; } else { ++m_st.num_failures; } } } } }