diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index 5125b4284..f39988925 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -831,6 +831,12 @@ namespace seq { return out; } + std::string nielsen_graph::to_dot() const { + std::stringstream ss; + to_dot(ss); + return ss.str(); + } + // ----------------------------------------------------------------------- // nielsen_node: simplify_and_init // ----------------------------------------------------------------------- @@ -1037,12 +1043,17 @@ namespace seq { m_sat_node = nullptr; m_sat_path.reset(); - // Iterative deepening: start at depth 3, increment by 1 on each failure. + // Iterative deepening: increment by 1 on each failure. // m_max_search_depth == 0 means unlimited; otherwise stop when bound exceeds it. m_depth_bound = 3; while (true) { - if (m_cancel_fn && m_cancel_fn()) + if (m_cancel_fn && m_cancel_fn()) { +#ifdef Z3DEBUG + // Examining the Nielsen graph is probably the best way of debugging + std::string dot = to_dot(); +#endif break; + } if (m_max_search_depth > 0 && m_depth_bound > m_max_search_depth) break; inc_run_idx(); @@ -1758,7 +1769,7 @@ namespace seq { if (apply_star_intr(node)) return ++m_stats.m_mod_star_intr, true; - // Priority 7: GPowerIntr - generalized power introduction + // Priority 7: GPowerIntr - ground power introduction if (apply_gpower_intr(node)) return ++m_stats.m_mod_gpower_intr, true; @@ -2078,10 +2089,12 @@ namespace seq { // ----------------------------------------------------------------------- // Modifier: apply_gpower_intr - // Ground power introduction: for a variable x matched against a - // ground repeated pattern, introduce x = base^n · prefix with fresh n. - // Generates integer side constraint n >= 0 for the fresh exponent. - // mirrors ZIPT's GPowerIntrModifier + // Generalized power introduction: for an equation where one side's head + // is a variable v and the other side has a ground prefix followed by a + // variable x that forms a dependency cycle back to v, introduce + // v = base^n · suffix where base is the ground prefix. + // Generates side constraints n >= 0 and 0 <= len(suffix) < len(base). + // mirrors ZIPT's GPowerIntrModifier (SplitGroundPower + TryGetPowerSplitBase) // ----------------------------------------------------------------------- bool nielsen_graph::apply_gpower_intr(nielsen_node* node) { @@ -2093,84 +2106,105 @@ namespace seq { if (eq.is_trivial()) continue; if (!eq.m_lhs || !eq.m_rhs) continue; - euf::snode* var_side = nullptr; - euf::snode* ground_side = nullptr; - - // One side must be a single variable, other side must be ground euf::snode* lhead = eq.m_lhs->first(); euf::snode* rhead = eq.m_rhs->first(); + if (!lhead || !rhead) continue; - if (lhead && lhead->is_var() && eq.m_lhs->length() == 1 && eq.m_rhs->is_ground()) { - var_side = lhead; - ground_side = eq.m_rhs; + // Try both orientations (mirrors ZIPT calling SplitGroundPower + // with (t2, LHS) and (t1, RHS) from ExtendDir) + // Orientation 1: RHS head is var, scan LHS for ground prefix + cycle var + if (rhead->is_var() && !lhead->is_var()) { + euf::snode_vector toks; + eq.m_lhs->collect_tokens(toks); + euf::snode_vector ground_prefix; + euf::snode* target_var = nullptr; + for (unsigned i = 0; i < toks.size(); ++i) { + if (toks[i]->is_var()) { target_var = toks[i]; break; } + ground_prefix.push_back(toks[i]); + } + if (target_var && !ground_prefix.empty() && target_var->id() == rhead->id()) { + if (fire_gpower_intro(node, eq, rhead, ground_prefix)) + return true; + } } - else if (rhead && rhead->is_var() && eq.m_rhs->length() == 1 && eq.m_lhs->is_ground()) { - var_side = rhead; - ground_side = eq.m_lhs; + // Orientation 2: LHS head is var, scan RHS for ground prefix + cycle var + if (lhead->is_var() && !rhead->is_var()) { + euf::snode_vector toks; + eq.m_rhs->collect_tokens(toks); + euf::snode_vector ground_prefix; + euf::snode* target_var = nullptr; + for (unsigned i = 0; i < toks.size(); ++i) { + if (toks[i]->is_var()) { target_var = toks[i]; break; } + ground_prefix.push_back(toks[i]); + } + if (target_var && !ground_prefix.empty() && target_var->id() == lhead->id()) { + if (fire_gpower_intro(node, eq, lhead, ground_prefix)) + return true; + } } - else continue; - - if (!ground_side || ground_side->is_empty()) continue; - if (ground_side->length() < 2) continue; // need a repeated pattern - - // Extract the first token as the "base" for power introduction - euf::snode* base_char = ground_side->first(); - if (!base_char || !base_char->is_char()) continue; - - // Check if the ground side has a repeated leading character - euf::snode_vector toks; - ground_side->collect_tokens(toks); - unsigned repeat_len = 0; - for (unsigned i = 0; i < toks.size(); ++i) { - if (toks[i]->id() == base_char->id()) - ++repeat_len; - else break; - } - if (repeat_len < 2) continue; // need at least 2 repetitions - - // Introduce: x = base^n · fresh_suffix - // Create fresh integer variable n for the exponent - expr_ref fresh_n = mk_fresh_int_var(); - expr* base_expr = base_char->get_expr(); - - // Create the power snode: base^n - euf::snode* fresh_suffix = mk_fresh_var(); - euf::snode* fresh_power = nullptr; - if (base_expr) { - // Build the seq.power(base_str, n) expression and register it - expr_ref base_str(seq.str.mk_unit(base_expr), m); - expr_ref power_expr(seq.str.mk_power(base_str, fresh_n), m); - fresh_power = m_sg.mk(power_expr); - } - if (!fresh_power) - fresh_power = mk_fresh_var(); // fallback - - euf::snode* replacement = m_sg.mk_concat(fresh_power, fresh_suffix); - - nielsen_node* child = mk_child(node); - nielsen_edge* e = mk_edge(node, child, true); - nielsen_subst s(var_side, replacement, eq.m_dep); - e->add_subst(s); - child->apply_subst(m_sg, s); - - // Side constraint: n >= 0 - expr* zero = arith.mk_int(0); - e->add_side_int(mk_int_constraint(fresh_n, zero, int_constraint_kind::ge, eq.m_dep)); - - // Side constraint: len(fresh_suffix) < len(base) = 1 - // This ensures the remainder is shorter than the base character - if (fresh_suffix->get_expr()) { - expr_ref len_suffix(seq.str.mk_length(fresh_suffix->get_expr()), m); - expr* one = arith.mk_int(1); - e->add_side_int(mk_int_constraint(len_suffix, one, int_constraint_kind::le, eq.m_dep)); - e->add_side_int(mk_int_constraint(len_suffix, zero, int_constraint_kind::ge, eq.m_dep)); - } - - return true; + // TODO: Extend to transitive cycles across multiple equations + // (ZIPT's varDep + HasDepCycle). Currently only self-cycles are detected. } return false; } + bool nielsen_graph::fire_gpower_intro( + nielsen_node* node, str_eq const& eq, + euf::snode* var, euf::snode_vector const& ground_prefix) { + ast_manager& m = m_sg.get_manager(); + arith_util arith(m); + seq_util& seq = m_sg.get_seq_util(); + unsigned base_len = ground_prefix.size(); + + // Build base string expression from ground prefix tokens. + // Each s_char snode's get_expr() is already seq.unit(ch) (a string). + expr_ref base_str(m); + for (unsigned i = 0; i < base_len; ++i) { + expr* tok_expr = ground_prefix[i]->get_expr(); + if (!tok_expr) return false; + if (i == 0) + base_str = tok_expr; + else + base_str = seq.str.mk_concat(base_str, tok_expr); + } + + // Create fresh exponent variable and power expression: base^n + expr_ref fresh_n = mk_fresh_int_var(); + expr_ref power_expr(seq.str.mk_power(base_str, fresh_n), m); + euf::snode* power_snode = m_sg.mk(power_expr); + if (!power_snode) return false; + + // Create fresh suffix variable + euf::snode* fresh_suffix = mk_fresh_var(); + euf::snode* replacement = m_sg.mk_concat(power_snode, fresh_suffix); + + // Create child node with substitution var → base^n · suffix + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, true); + nielsen_subst s(var, replacement, eq.m_dep); + e->add_subst(s); + child->apply_subst(m_sg, s); + + // Side constraint: n >= 0 + expr* zero = arith.mk_int(0); + e->add_side_int(mk_int_constraint(fresh_n, zero, int_constraint_kind::ge, eq.m_dep)); + + // Side constraint: 0 <= len(suffix) < len(base) + if (fresh_suffix->get_expr()) { + expr_ref len_suffix(seq.str.mk_length(fresh_suffix->get_expr()), m); + if (base_len <= 1) { + // len(suffix) < 1 means len(suffix) = 0 + e->add_side_int(mk_int_constraint(len_suffix, zero, int_constraint_kind::eq, eq.m_dep)); + } else { + expr* max_val = arith.mk_int(base_len - 1); + e->add_side_int(mk_int_constraint(len_suffix, max_val, int_constraint_kind::le, eq.m_dep)); + e->add_side_int(mk_int_constraint(len_suffix, zero, int_constraint_kind::ge, eq.m_dep)); + } + } + + return true; + } + // ----------------------------------------------------------------------- // Modifier: apply_regex_var_split // For str_mem x·s ∈ R where x is a variable, split using minterms: diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h index a812f90d6..5b7ac2b21 100644 --- a/src/smt/seq/seq_nielsen.h +++ b/src/smt/seq/seq_nielsen.h @@ -793,6 +793,8 @@ namespace seq { // mirrors ZIPT's NielsenGraph.ToDot() std::ostream& to_dot(std::ostream& out) const; + std::string to_dot() const; + // reset all nodes and state void reset(); @@ -894,12 +896,16 @@ namespace seq { // mirrors ZIPT's StarIntrModifier bool apply_star_intr(nielsen_node* node); - // generalized power introduction: for a variable x matched against - // a ground repeated pattern, introduce x = base^n · prefix(base) - // with fresh power variable n and side constraint n >= 0. + // generalized power introduction: for an equation where one head is + // a variable v and the other side has ground prefix + a variable x + // forming a cycle back to v, introduce v = base^n · suffix. // mirrors ZIPT's GPowerIntrModifier bool apply_gpower_intr(nielsen_node* node); + // helper for apply_gpower_intr: fires the substitution + bool fire_gpower_intro(nielsen_node* node, str_eq const& eq, + euf::snode* var, euf::snode_vector const& ground_prefix); + // regex variable split: for str_mem x·s ∈ R where x is a variable, // split using minterms: x → ε, or x → c·x' for each minterm c. // More general than regex_char_split, uses minterm partitioning. diff --git a/src/test/nseq_zipt.cpp b/src/test/nseq_zipt.cpp index 50ad6d9de..ba2c1d2dd 100644 --- a/src/test/nseq_zipt.cpp +++ b/src/test/nseq_zipt.cpp @@ -175,7 +175,7 @@ struct nseq_fixture { euf::snode* R(const char* s) { return rb.parse(s); } }; -static constexpr int TEST_TIMEOUT_SEC = 10; +static constexpr int TEST_TIMEOUT_SEC = 2; static void set_timeout(nseq_fixture& f) { auto start = std::chrono::steady_clock::now(); diff --git a/src/test/seq_nielsen.cpp b/src/test/seq_nielsen.cpp index 69ffb4a0f..1a6321ab2 100644 --- a/src/test/seq_nielsen.cpp +++ b/src/test/seq_nielsen.cpp @@ -2459,9 +2459,9 @@ static void test_star_intr_with_backedge() { } } -// test_gpower_intr_repeated_chars: x = AAB → GPowerIntr fires (2+ repeated 'A') -static void test_gpower_intr_repeated_chars() { - std::cout << "test_gpower_intr_repeated_chars\n"; +// test_gpower_intr_self_cycle: aX = Xa → self-cycle, GPowerIntr fires +static void test_gpower_intr_self_cycle() { + std::cout << "test_gpower_intr_self_cycle\n"; ast_manager m; reg_decl_plugins(m); euf::egraph eg(m); @@ -2472,23 +2472,24 @@ static void test_gpower_intr_repeated_chars() { euf::snode* x = sg.mk_var(symbol("x")); euf::snode* a1 = sg.mk_char('A'); euf::snode* a2 = sg.mk_char('A'); - euf::snode* b = sg.mk_char('B'); - euf::snode* aab = sg.mk_concat(a1, sg.mk_concat(a2, b)); + euf::snode* lhs = sg.mk_concat(a1, x); // Ax + euf::snode* rhs = sg.mk_concat(x, a2); // xA - // x = AAB → single var vs ground with 2 repeated 'A' at front - ng.add_str_eq(x, aab); + // Ax = xA → variable x appears on both sides with ground prefix 'A' + // GPowerIntr detects self-cycle and introduces x = A^n · suffix + ng.add_str_eq(lhs, rhs); seq::nielsen_node* root = ng.root(); bool extended = ng.generate_extensions(root); SASSERT(extended); - // gpower_intr should fire (priority 7), producing 1 child: x = fresh_power · fresh_suffix + SASSERT(ng.stats().m_mod_gpower_intr == 1); SASSERT(root->outgoing().size() == 1); std::cout << " gpower_intr generated " << root->outgoing().size() << " children\n"; } -// test_gpower_intr_no_repeat: x = AB → no repeated pattern → GPowerIntr doesn't fire -static void test_gpower_intr_no_repeat() { - std::cout << "test_gpower_intr_no_repeat\n"; +// test_gpower_intr_no_cycle: aX = Yb → no cycle (X ≠ Y), GPowerIntr doesn't fire +static void test_gpower_intr_no_cycle() { + std::cout << "test_gpower_intr_no_cycle\n"; ast_manager m; reg_decl_plugins(m); euf::egraph eg(m); @@ -2497,19 +2498,21 @@ static void test_gpower_intr_no_repeat() { seq::nielsen_graph ng(sg); euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); euf::snode* a = sg.mk_char('A'); euf::snode* b = sg.mk_char('B'); - euf::snode* ab = sg.mk_concat(a, b); + euf::snode* lhs = sg.mk_concat(a, x); // Ax + euf::snode* rhs = sg.mk_concat(y, b); // Yb - // x = AB → det fires (x is single var, AB doesn't contain x → x → AB) - ng.add_str_eq(x, ab); + // Ax = Yb → Y is head of RHS, scan LHS: prefix=[A], target=x, but x ≠ y → no cycle + // GPowerIntr does NOT fire; ConstNielsen (priority 8) fires instead + ng.add_str_eq(lhs, rhs); seq::nielsen_node* root = ng.root(); bool extended = ng.generate_extensions(root); SASSERT(extended); - // gpower_intr should NOT fire (< 2 repeats) - // det (priority 1) fires: x → AB, 1 child - SASSERT(root->outgoing().size() == 1); + SASSERT(ng.stats().m_mod_gpower_intr == 0); + std::cout << " gpower_intr did not fire (no cycle)\n"; } // test_regex_var_split_basic: x ∈ re → uses minterms for splitting @@ -3167,8 +3170,8 @@ void tst_seq_nielsen() { test_num_cmp_no_power(); test_star_intr_no_backedge(); test_star_intr_with_backedge(); - test_gpower_intr_repeated_chars(); - test_gpower_intr_no_repeat(); + test_gpower_intr_self_cycle(); + test_gpower_intr_no_cycle(); test_regex_var_split_basic(); test_power_split_no_power(); test_var_num_unwinding_no_power();