Added right-to-left rules

2026-06-16 05:45:42 +00:00 · 2026-03-13 17:25:14 +01:00 · 2026-03-13 17:25:14 +01:00 · e384e8f3d4
commit e384e8f3d4
parent ff6534b53e
8 changed files with 817 additions and 578 deletions
--- a/scripts/compare_seq_solvers.py
+++ b/scripts/compare_seq_solvers.py
@ -22,8 +22,9 @@ DEFAULT_TIMEOUT = 5  # seconds
 COMMON_ARGS = ["model_validate=true"]

 SOLVERS = {
-    "nseq": "smt.string_solver=nseq",
-    "seq":  "smt.string_solver=seq",
+    "nseq": ["smt.string_solver=nseq"],
+    "nseq_np": ["smt.string_solver=nseq", "smt.nseq.parikh=false"],
+    "seq":  ["smt.string_solver=seq"],
 }


@ -79,12 +80,12 @@ def _parse_result(output: str) -> str:
    return "timeout"


-def run_z3(z3_bin: str, smt_file: Path, solver_arg: str, timeout_s: int = DEFAULT_TIMEOUT) -> tuple[str, float]:
-    """Run z3 on a file with the given solver argument.
+def run_z3(z3_bin: str, smt_file: Path, solver_args: list[str], timeout_s: int = DEFAULT_TIMEOUT) -> tuple[str, float]:
+    """Run z3 on a file with the given solver arguments.
    Returns (result, elapsed) where result is 'sat', 'unsat', or 'timeout'/'error'.
    """
    timeout_ms = timeout_s * 1000
-    cmd = [z3_bin, f"-t:{timeout_ms}", solver_arg] + COMMON_ARGS + [str(smt_file)]
+    cmd = [z3_bin, f"-t:{timeout_ms}"] + solver_args + COMMON_ARGS + [str(smt_file)]
    start = time.monotonic()
    try:
        proc = subprocess.run(cmd, capture_output=True, text=True,
@ -118,10 +119,19 @@ def run_zipt(zipt_bin: str, smt_file: Path, timeout_s: int = DEFAULT_TIMEOUT) ->
        return f"error:{e}", elapsed


-def classify(res_nseq: str, res_seq: str) -> str:
+def classify(res_nseq: str, res_seq: str, res_nseq_np: str | None = None) -> str:
    """Classify a pair of results into a category."""
    timed_nseq = res_nseq == "timeout"
    timed_seq  = res_seq  == "timeout"
+    
+    if res_nseq_np:
+        timed_nseq_np = res_nseq_np == "timeout"
+        if timed_nseq and timed_seq and timed_nseq_np: return "all_timeout"
+        if not timed_nseq and not timed_seq and not timed_nseq_np:
+            if res_nseq == "unknown" or res_seq == "unknown" or res_nseq_np == "unknown":
+                return "all_terminate_unknown_involved"
+            if res_nseq == res_seq == res_nseq_np: return "all_agree"
+            return "diverge"

    if timed_nseq and timed_seq:
        return "both_timeout"
@ -138,10 +148,15 @@ def classify(res_nseq: str, res_seq: str) -> str:


 def process_file(z3_bin: str, smt_file: Path, timeout_s: int = DEFAULT_TIMEOUT,
-                 zipt_bin: str | None = None) -> dict:
+                 zipt_bin: str | None = None, run_nseq_np: bool = False) -> dict:
    res_nseq, t_nseq = run_z3(z3_bin, smt_file, SOLVERS["nseq"], timeout_s)
    res_seq,  t_seq  = run_z3(z3_bin, smt_file, SOLVERS["seq"], timeout_s)
-    cat = classify(res_nseq, res_seq)
+    
+    res_nseq_np, t_nseq_np = None, None
+    if run_nseq_np:
+        res_nseq_np, t_nseq_np = run_z3(z3_bin, smt_file, SOLVERS["nseq_np"], timeout_s)
+
+    cat = classify(res_nseq, res_seq, res_nseq_np)
    smtlib_status = read_smtlib_status(smt_file)
    status = determine_status(res_nseq, res_seq, smtlib_status)
    result = {
@ -155,6 +170,8 @@ def process_file(z3_bin: str, smt_file: Path, timeout_s: int = DEFAULT_TIMEOUT,
        "status":         status,
        "zipt":           None,
        "t_zipt":         None,
+        "nseq_np":        res_nseq_np,
+        "t_nseq_np":      t_nseq_np,
    }
    if zipt_bin:
        res_zipt, t_zipt = run_zipt(zipt_bin, smt_file, timeout_s)
@ -173,6 +190,8 @@ def main():
                        help=f"Per-solver timeout in seconds (default: {DEFAULT_TIMEOUT})")
    parser.add_argument("--zipt", metavar="PATH", default=None,
                        help="Path to ZIPT binary (optional; if omitted, ZIPT is not benchmarked)")
+    parser.add_argument("--no-parikh", action="store_true",
+                        help="Also run nseq with nseq.parikh=false")
    parser.add_argument("--csv", metavar="FILE", help="Also write results to a CSV file")
    args = parser.parse_args()

@ -190,31 +209,39 @@ def main():
        print(f"No {args.ext} files found under {root}", file=sys.stderr)
        sys.exit(1)

-    solvers_label = "nseq, seq" + (", zipt" if zipt_bin else "")
+    solvers_label = "nseq, seq"
+    if args.no_parikh: solvers_label += ", nseq_np"
+    if zipt_bin: solvers_label += ", zipt"
    print(f"Found {len(files)} files. Solvers: {solvers_label}. "
          f"Workers: {args.jobs}, timeout: {timeout_s}s\n")

    results = []
    with ThreadPoolExecutor(max_workers=args.jobs) as pool:
-        futures = {pool.submit(process_file, z3_bin, f, timeout_s, zipt_bin): f for f in files}
+        futures = {pool.submit(process_file, z3_bin, f, timeout_s, zipt_bin, args.no_parikh): f for f in files}
        done = 0
        for fut in as_completed(futures):
            done += 1
            r = fut.result()
            results.append(r)
+            np_part = ""
+            if args.no_parikh:
+                np_part = f"  nseq_np={r['nseq_np']:8s} ({r['t_nseq_np']:.1f}s)"
            zipt_part = ""
            if zipt_bin:
                zipt_part = f"  zipt={r['zipt']:8s} ({r['t_zipt']:.1f}s)"
            print(f"[{done:4d}/{len(files)}] {r['category']:35s}  nseq={r['nseq']:8s} ({r['t_nseq']:.1f}s)  "
-                  f"seq={r['seq']:8s} ({r['t_seq']:.1f}s){zipt_part}  {r['file'].name}")
+                  f"seq={r['seq']:8s} ({r['t_seq']:.1f}s){np_part}{zipt_part}  {r['file'].name}")

    # ── Summary ──────────────────────────────────────────────────────────────
    categories = {
+        "all_timeout":                    [],
        "both_timeout":                   [],
        "only_seq_terminates":            [],
        "only_nseq_terminates":           [],
        "both_agree":                     [],
        "both_terminate_unknown_involved":[],
+        "all_terminate_unknown_involved":[],
+        "all_agree":                      [],
        "diverge":                        [],
    }
    for r in results:
@ -283,9 +310,15 @@ def main():
    if args.csv:
        import csv
        csv_path = Path(args.csv)
-        fieldnames = ["file", "nseq", "seq", "t_nseq", "t_seq", "category", "smtlib_status", "status"]
+        fieldnames = ["file", "nseq", "seq"]
+        if args.no_parikh:
+            fieldnames.append("nseq_np")
+        fieldnames.extend(["t_nseq", "t_seq"])
+        if args.no_parikh:
+            fieldnames.append("t_nseq_np")
+        fieldnames.extend(["category", "smtlib_status", "status"])
        if zipt_bin:
-            fieldnames[4:4] = ["zipt", "t_zipt"]
+            fieldnames.extend(["zipt", "t_zipt"])
        with csv_path.open("w", newline="", encoding="utf-8") as f:
            writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction="ignore")
            writer.writeheader()
--- a/src/params/smt_params.cpp
+++ b/src/params/smt_params.cpp
@ -54,6 +54,7 @@ void smt_params::updt_local_params(params_ref const & _p) {
    m_logic = _p.get_sym("logic", m_logic);
    m_string_solver = p.string_solver();
    m_nseq_max_depth = p.nseq_max_depth();
+    m_nseq_parikh = p.nseq_parikh();
    m_up_persist_clauses = p.up_persist_clauses();
    validate_string_solver(m_string_solver);
    if (_p.get_bool("arith.greatest_error_pivot", false))
--- a/src/params/smt_params.h
+++ b/src/params/smt_params.h
@ -249,6 +249,7 @@ struct smt_params : public preprocessor_params,
    // -----------------------------------
    symbol m_string_solver;
    unsigned m_nseq_max_depth = 0;
+    bool m_nseq_parikh = true;

    smt_params(params_ref const & p = params_ref()):
        m_string_solver(symbol("auto")){
--- a/src/params/smt_params_helper.pyg
+++ b/src/params/smt_params_helper.pyg
@ -124,6 +124,7 @@ def_module_params(module_name='smt',
                          ('theory_case_split', BOOL, False, 'Allow the context to use heuristics involving theory case splits, which are a set of literals of which exactly one can be assigned True. If this option is false, the context will generate extra axioms to enforce this instead.'),
                          ('string_solver', SYMBOL, 'seq', 'solver for string/sequence theories. options are: \'z3str3\' (specialized string solver), \'seq\' (sequence solver), \'auto\' (use static features to choose best solver), \'empty\' (a no-op solver that forces an answer unknown if strings were used), \'none\' (no solver), \'nseq\' (Nielsen-based string solver)'),
                          ('nseq.max_depth', UINT, 0, 'maximum Nielsen search depth for theory_nseq (0 = unlimited)'),
+                          ('nseq.parikh', BOOL, True, 'enable Parikh image checks in nseq solver'),
                          ('core.validate', BOOL, False, '[internal] validate unsat core produced by SMT context. This option is intended for debugging'),
                          ('seq.split_w_len', BOOL, True, 'enable splitting guided by length constraints'),
                          ('seq.validate', BOOL, False, 'enable self-validation of theory axioms created by seq theory'),
--- a/src/smt/seq/seq_nielsen.cpp
+++ b/src/smt/seq/seq_nielsen.cpp
--- a/src/smt/seq/seq_nielsen.h
+++ b/src/smt/seq/seq_nielsen.h
@ -715,6 +715,7 @@ namespace seq {
        unsigned                      m_run_idx = 0;
        unsigned                      m_depth_bound = 0;
        unsigned                      m_max_search_depth = 0;
+        bool                          m_parikh_enabled = true;
        unsigned                      m_next_mem_id = 0;
        unsigned                      m_fresh_cnt = 0;
        unsigned                      m_num_input_eqs = 0;
@ -796,6 +797,9 @@ namespace seq {

        // maximum overall search depth (0 = unlimited)
        void set_max_search_depth(unsigned d) { m_max_search_depth = d; }
+        
+        // enable/disable Parikh image verification constraints
+        void set_parikh_enabled(bool e) { m_parikh_enabled = e; }

        // set a cancellation callback; solve() checks this periodically
        void set_cancel_fn(std::function<bool()> fn) { m_cancel_fn = std::move(fn); }
@ -960,9 +964,11 @@ namespace seq {
        // mirrors ZIPT's GPowerIntrModifier
        bool apply_gpower_intr(nielsen_node* node);

-        // helper for apply_gpower_intr: fires the substitution
+        // helper for apply_gpower_intr: fires the substitution.
+        // `fwd=true` uses left-to-right decomposition; `fwd=false` mirrors ZIPT's
+        // backward (right-to-left) direction.
        bool fire_gpower_intro(nielsen_node* node, str_eq const& eq,
-                               euf::snode* var, euf::snode_vector const& ground_prefix_orig);
+                               euf::snode* var, euf::snode_vector const& ground_prefix_orig, bool fwd);

        // regex variable split: for str_mem x·s ∈ R where x is a variable,
        // split using minterms: x → ε, or x → c·x' for each minterm c.
@ -986,11 +992,13 @@ namespace seq {
        // find the first power token in any str_eq at this node
        euf::snode* find_power_token(nielsen_node* node) const;

-        // find a power token facing a constant (char) head
-        bool find_power_vs_non_var(nielsen_node* node, euf::snode*& power, euf::snode*& other_head, str_eq const*& eq_out) const;
+        // find a power token facing a constant (char/non-var) token at either end
+        // of an equation; returns orientation via `fwd` (true=head, false=tail).
+        bool find_power_vs_non_var(nielsen_node* node, euf::snode*& power, euf::snode*& other_head, str_eq const*& eq_out, bool& fwd) const;

-        // find a power token facing a variable head
-        bool find_power_vs_var(nielsen_node* node, euf::snode*& power, euf::snode*& var_head, str_eq const*& eq_out) const;
+        // find a power token facing a variable token at either end of an
+        // equation; returns orientation via `fwd` (true=head, false=tail).
+        bool find_power_vs_var(nielsen_node* node, euf::snode*& power, euf::snode*& var_head, str_eq const*& eq_out, bool& fwd) const;

        // -----------------------------------------------
        // Integer feasibility subsolver methods
--- a/src/smt/theory_nseq.cpp
+++ b/src/smt/theory_nseq.cpp
@ -416,6 +416,7 @@ namespace smt {
        ++m_num_final_checks;

        m_nielsen.set_max_search_depth(get_fparams().m_nseq_max_depth);
+        m_nielsen.set_parikh_enabled(get_fparams().m_nseq_parikh);
        IF_VERBOSE(1, verbose_stream() << "nseq final_check: calling solve()\n";);

        // here the actual Nielsen solving happens
--- a/src/test/seq_nielsen.cpp
+++ b/src/test/seq_nielsen.cpp
@ -731,6 +731,56 @@ static void test_const_nielsen_priority_over_eq_split() {
    SASSERT(root->outgoing().size() == 2);
 }

+// test const_nielsen tail direction: x·A = w·y
+// forward heads are vars (x,w), so forward const_nielsen doesn't apply.
+// backward tails are char/var (A,y), so RTL const_nielsen must fire.
+static void test_const_nielsen_tail_char_var() {
+    std::cout << "test_const_nielsen_tail_char_var\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+
+    dummy_simple_solver solver;
+    seq::nielsen_graph ng(sg, solver);
+
+    euf::snode* x = sg.mk_var(symbol("x"));
+    euf::snode* w = sg.mk_var(symbol("w"));
+    euf::snode* y = sg.mk_var(symbol("y"));
+    euf::snode* a = sg.mk_char('A');
+    euf::snode* lhs = sg.mk_concat(x, a); // x·A
+    euf::snode* rhs = sg.mk_concat(w, y); // w·y
+
+    ng.add_str_eq(lhs, rhs);
+    seq::nielsen_node* root = ng.root();
+
+    bool extended = ng.generate_extensions(root);
+    SASSERT(extended);
+    SASSERT(root->outgoing().size() == 2);
+
+    bool saw_empty = false;
+    bool saw_tail = false;
+    for (seq::nielsen_edge* e : root->outgoing()) {
+        SASSERT(e->subst().size() == 1);
+        seq::nielsen_subst const& s = e->subst()[0];
+        SASSERT(s.m_var == y);
+        if (s.m_replacement && s.m_replacement->is_empty()) {
+            saw_empty = true;
+            SASSERT(e->is_progress());
+        }
+        else {
+            euf::snode_vector toks;
+            s.m_replacement->collect_tokens(toks);
+            SASSERT(toks.size() == 2);
+            SASSERT(toks[0]->is_var() && toks[0]->id() == y->id());
+            SASSERT(toks[1]->is_char() && toks[1]->id() == a->id());
+            saw_tail = true;
+            SASSERT(!e->is_progress());
+        }
+    }
+    SASSERT(saw_empty && saw_tail);
+}
+
 // test: both sides start with vars → var_nielsen (3 children), not const_nielsen
 static void test_const_nielsen_not_applicable_both_vars() {
    std::cout << "test_const_nielsen_not_applicable_both_vars\n";
@ -1606,6 +1656,35 @@ static void test_simplify_prefix_cancel() {
    SASSERT(node->str_eqs()[0].m_rhs->is_var());
 }

+// test simplify_and_init: suffix cancellation of matching chars (RTL)
+static void test_simplify_suffix_cancel_rtl() {
+    std::cout << "test_simplify_suffix_cancel_rtl\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+
+    dummy_simple_solver solver;
+    seq::nielsen_graph ng(sg, solver);
+    euf::snode* a = sg.mk_char('A');
+    euf::snode* x = sg.mk_var(symbol("x"));
+    euf::snode* y = sg.mk_var(symbol("y"));
+
+    // x·A = y·A → suffix cancel A (RTL) → x = y
+    euf::snode* xa = sg.mk_concat(x, a);
+    euf::snode* ya = sg.mk_concat(y, a);
+
+    seq::nielsen_node* node = ng.mk_node();
+    seq::dep_tracker dep; dep.insert(0);
+    node->add_str_eq(seq::str_eq(xa, ya, dep));
+
+    auto sr = node->simplify_and_init(ng);
+    SASSERT(sr == seq::simplify_result::proceed);
+    SASSERT(node->str_eqs().size() == 1);
+    SASSERT(node->str_eqs()[0].m_lhs->is_var());
+    SASSERT(node->str_eqs()[0].m_rhs->is_var());
+}
+
 // test simplify_and_init: symbol clash at first position
 static void test_simplify_symbol_clash() {
    std::cout << "test_simplify_symbol_clash\n";
@ -1831,6 +1910,44 @@ static void test_simplify_brzozowski_sat() {
    SASSERT(sr == seq::simplify_result::satisfied);
 }

+// test simplify_and_init: backward Brzozowski consumes a trailing char (RTL)
+static void test_simplify_brzozowski_rtl_suffix() {
+    std::cout << "test_simplify_brzozowski_rtl_suffix\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+
+    dummy_simple_solver solver;    seq::nielsen_graph ng(sg, solver);
+    euf::snode* x = sg.mk_var(symbol("x"));
+    euf::snode* a = sg.mk_char('A');
+    euf::snode* xa = sg.mk_concat(x, a);
+    euf::snode* e = sg.mk_empty();
+
+    expr_ref ch_b(seq.str.mk_char('B'), m);
+    expr_ref unit_b(seq.str.mk_unit(ch_b), m);
+    expr_ref ch_a(seq.str.mk_char('A'), m);
+    expr_ref unit_a(seq.str.mk_unit(ch_a), m);
+    expr_ref ba(seq.str.mk_concat(unit_b, unit_a), m);
+    expr_ref to_re_ba(seq.re.mk_to_re(ba), m);
+    euf::snode* regex = sg.mk(to_re_ba);
+
+    // x·"A" ∈ to_re("BA") → RTL consume trailing 'A' → x ∈ to_re("B")
+    seq::nielsen_node* node = ng.mk_node();
+    seq::dep_tracker dep; dep.insert(0);
+    node->add_str_mem(seq::str_mem(xa, regex, e, 0, dep));
+
+    auto sr = node->simplify_and_init(ng);
+    SASSERT(sr == seq::simplify_result::proceed);
+    SASSERT(node->str_mems().size() == 1);
+    SASSERT(node->str_mems()[0].m_str->is_var());
+    SASSERT(node->str_mems()[0].m_str->id() == x->id());
+
+    euf::snode* deriv_b = sg.brzozowski_deriv(node->str_mems()[0].m_regex, sg.mk_char('B'));
+    SASSERT(deriv_b && deriv_b->is_nullable());
+}
+
 // test simplify_and_init: multiple eqs with mixed status
 static void test_simplify_multiple_eqs() {
    std::cout << "test_simplify_multiple_eqs\n";
@ -3541,6 +3658,7 @@ void tst_seq_nielsen() {
    test_const_nielsen_solve_sat();
    test_const_nielsen_solve_unsat();
    test_const_nielsen_priority_over_eq_split();
+    test_const_nielsen_tail_char_var();
    test_const_nielsen_not_applicable_both_vars();
    test_const_nielsen_multi_char_solve();
    test_var_nielsen_basic();
@ -3574,6 +3692,7 @@ void tst_seq_nielsen() {
    test_solve_children_failed_reason();
    test_solve_eval_idx_tracking();
    test_simplify_prefix_cancel();
+    test_simplify_suffix_cancel_rtl();
    test_simplify_symbol_clash();
    test_simplify_empty_propagation();
    test_simplify_empty_vs_char();
@ -3583,6 +3702,7 @@ void tst_seq_nielsen() {
    test_simplify_regex_infeasible();
    test_simplify_nullable_removal();
    test_simplify_brzozowski_sat();
+    test_simplify_brzozowski_rtl_suffix();
    test_simplify_multiple_eqs();
    test_det_cancel_child_eq();
    test_const_nielsen_child_substitutions();