From baf118c8388294d253e5e8de81ccb046e3ec6bfe Mon Sep 17 00:00:00 2001 From: Krystine Sherwin <93062060+KrystalDelusion@users.noreply.github.com> Date: Wed, 9 Jul 2025 09:59:22 +1200 Subject: [PATCH 1/2] Try to remove database on -f If the database is open (based on the presence of certain files), skip deletion. There is a (very) small window where another process *could* try to open the database at the same time that it's being deleted, but it will then fail during the database setup with `sqlite3.OperationalError: disk I/O error`, but given the failure is immediate I think it's fine. --- sbysrc/sby.py | 8 +++++++- sbysrc/sby_status.py | 12 ++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/sbysrc/sby.py b/sbysrc/sby.py index 0deefc1..843fabd 100644 --- a/sbysrc/sby.py +++ b/sbysrc/sby.py @@ -22,7 +22,7 @@ import json, os, sys, shutil, tempfile, re from sby_cmdline import parser_func from sby_core import SbyConfig, SbyTask, SbyAbort, SbyTaskloop, process_filename, dress_message from sby_jobserver import SbyJobClient, process_jobserver_environment -from sby_status import SbyStatusDb +from sby_status import SbyStatusDb, remove_db, FileInUseError import time, platform, click release_version = 'unknown SBY version' @@ -464,6 +464,12 @@ def start_task(taskloop, taskname): print("*", file=gitignore) with open(f"{my_workdir}/status.path", "w") as status_path: print(my_status_db, file=status_path) + if os.path.exists(f"{my_workdir}/{my_status_db}") and opt_force: + try: + remove_db(f"{my_workdir}/{my_status_db}") + except FileInUseError: + # don't delete an open database + pass junit_ts_name = os.path.basename(sbyfile[:-4]) if sbyfile is not None else workdir if workdir is not None else "stdin" junit_tc_name = taskname if taskname is not None else "default" diff --git a/sbysrc/sby_status.py b/sbysrc/sby_status.py index 26352ec..dcd41e6 100644 --- a/sbysrc/sby_status.py +++ b/sbysrc/sby_status.py @@ -100,6 +100,10 @@ def transaction(method: Fn) -> Fn: return wrapper # type: ignore +class FileInUseError(Exception): + def __init__(self, *args, file: Path|str = "file"): + super().__init__(f"Found {file}, try again later", *args) + class SbyStatusDb: def __init__(self, path: Path, task, timeout: float = 5.0, live_csv = False): @@ -529,3 +533,11 @@ def filter_latest_task_ids(all_tasks: dict[int, dict[str]]): for task_id, task_dict in all_tasks.items(): latest[task_dict["workdir"]] = task_id return list(latest.values()) + +def remove_db(path): + path = Path(path) + lock_exts = [".sqlite-wal", ".sqlite-shm"] + for lock_file in [path.with_suffix(ext) for ext in lock_exts]: + if lock_file.exists(): + raise FileInUseError(file=lock_file) + os.remove(path) From a251ec052444099748a4b194d40e4c0624528355 Mon Sep 17 00:00:00 2001 From: Krystine Sherwin <93062060+KrystalDelusion@users.noreply.github.com> Date: Wed, 9 Jul 2025 09:59:23 +1200 Subject: [PATCH 2/2] Handle unreliable lock files --- sbysrc/sby_status.py | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/sbysrc/sby_status.py b/sbysrc/sby_status.py index dcd41e6..e0aca1f 100644 --- a/sbysrc/sby_status.py +++ b/sbysrc/sby_status.py @@ -537,7 +537,35 @@ def filter_latest_task_ids(all_tasks: dict[int, dict[str]]): def remove_db(path): path = Path(path) lock_exts = [".sqlite-wal", ".sqlite-shm"] + maybe_locked = False for lock_file in [path.with_suffix(ext) for ext in lock_exts]: if lock_file.exists(): - raise FileInUseError(file=lock_file) - os.remove(path) + # lock file may be a false positive if it wasn't cleaned up + maybe_locked = True + break + + if not maybe_locked: + # safe to delete + os.remove(path) + return + + # test database directly + with sqlite3.connect(path, isolation_level="EXCLUSIVE", timeout=1) as con: + cur = con.cursor() + # single result rows + cur.row_factory = lambda _, r: r[0] + + # changing journal_mode is disallowed if there are multiple connections + try: + cur.execute("PRAGMA journal_mode=DELETE") + except sqlite3.OperationalError as err: + if "database is locked" in err.args[0]: + raise FileInUseError(file=path) + else: + raise + + # no other connections, delete all tables + drop_script = cur.execute("SELECT name FROM sqlite_master WHERE type = 'table';").fetchall() + for table in drop_script: + print(table) + cur.execute(f"DROP TABLE {table}")