Questions Regarding Image Attachments

If a record in the table is deleted, will the associated attachments and images be cleaned up? Furthermore, if I compress an image and upload the new version, will the old image be cleared out?

When you delete a record with an image in it, the file is NOT deleted. You must delete the file using the file management in the base.

Can I use a script or modify the configuration to batch process and remove unreferenced attachments?

Will the --rm-fs option clean up all orphaned attachments that are no longer linked to any table entries?

Yes, you can write a script for the job.

We are also working on a plugin that can do the job. This plugin will be released in the foreseeable future.

cat > /root/script/cleanup_unreferenced_base_assets.py <<'PY'
#!/usr/bin/env python3
from seatable_api import Base
import argparse, csv, json, os, sys, urllib.parse, urllib.request, urllib.error
from collections import defaultdict

def norm_path(p, base_uuid=""):
    if not p:
        return ""
    p = str(p).strip()
    if p.startswith("http://") or p.startswith("https://"):
        decoded = urllib.parse.unquote(p)
        markers = [
            f"/asset-preview/{base_uuid}/",
            f"/asset/{base_uuid}/",
            f"/dtable-asset/{base_uuid}/",
        ]
        for m in markers:
            if m in decoded:
                p = decoded.split(m, 1)[1]
                break
    p = urllib.parse.unquote(p)
    p = p.split("?", 1)[0]
    p = p.lstrip("/")
    return p

def parent_and_name(path):
    path = "/" + path.strip("/")
    parent, name = path.rsplit("/", 1)
    return parent or "/", name

def auth_base(server, base_api_token):
    base = Base(base_api_token, server)
    base.auth()
    return base

def iter_rows(base, table_name, page_size=1000):
    start = 0
    while True:
        rows = base.list_rows(table_name, start=start, limit=page_size)
        if not rows:
            break
        yield from rows
        if len(rows) < page_size:
            break
        start += page_size

def collect_referenced(base):
    base_uuid = base.dtable_uuid
    referenced = defaultdict(list)
    metadata = base.get_metadata()

    for table in metadata.get("tables", []):
        table_name = table["name"]
        cols = [
            (col["name"], col.get("type"))
            for col in table.get("columns", [])
            if col.get("type") in ("file", "image")
        ]
        if not cols:
            continue

        for row in iter_rows(base, table_name):
            row_id = row.get("_id", "")
            for col_name, col_type in cols:
                value = row.get(col_name) or []

                if col_type == "file":
                    for item in value:
                        if isinstance(item, dict) and item.get("url"):
                            path = norm_path(item.get("url"), base_uuid)
                            referenced[path].append({
                                "table": table_name,
                                "row_id": row_id,
                                "column": col_name,
                                "type": "file",
                                "name": item.get("name", ""),
                                "size": item.get("size", ""),
                            })

                if col_type == "image":
                    for url in value:
                        path = norm_path(url, base_uuid)
                        referenced[path].append({
                            "table": table_name,
                            "row_id": row_id,
                            "column": col_name,
                            "type": "image",
                            "name": "",
                            "size": "",
                        })

    return referenced

def request_json(method, url, account_token, body=None):
    data = None
    headers = {
        "Authorization": f"Bearer {account_token}",
        "Accept": "application/json",
    }
    if body is not None:
        data = json.dumps(body, ensure_ascii=False).encode("utf-8")
        headers["Content-Type"] = "application/json"

    req = urllib.request.Request(url, data=data, headers=headers, method=method)
    try:
        with urllib.request.urlopen(req, timeout=60) as resp:
            raw = resp.read().decode("utf-8")
            return json.loads(raw) if raw else {}
    except urllib.error.HTTPError as e:
        detail = e.read().decode("utf-8", errors="replace")
        raise RuntimeError(f"HTTP {e.code} {e.reason}: {detail}") from e

def list_assets(server, account_token, base_uuid):
    assets = []

    def list_dir(parent_dir):
        url = (
            server.rstrip("/")
            + f"/api/v2.1/dtable-asset/{base_uuid}/"
            + "?parent_dir="
            + urllib.parse.quote(parent_dir, safe="")
        )
        data = request_json("GET", url, account_token)
        for item in data.get("dirent_list", []):
            name = item.get("obj_name") or item.get("name")
            if not name:
                continue

            child = parent_dir.rstrip("/") + "/" + name if parent_dir != "/" else "/" + name

            if item.get("is_file") is True:
                assets.append({
                    "path": norm_path(child, base_uuid),
                    "size": item.get("file_size") or item.get("size") or "",
                    "last_update": item.get("last_update", ""),
                })
            else:
                list_dir(child)

    list_dir("/")
    return assets

def write_tsv(path, header, rows):
    with open(path, "w", encoding="utf-8", newline="") as f:
        w = csv.writer(f, delimiter="\t")
        w.writerow(header)
        w.writerows(rows)

def delete_assets(server, account_token, base_uuid, unreferenced):
    grouped = defaultdict(list)
    for item in unreferenced:
        parent, name = parent_and_name(item["path"])
        grouped[parent].append(name)

    deleted = 0
    for parent, names in grouped.items():
        for i in range(0, len(names), 100):
            batch = names[i:i + 100]
            url = server.rstrip("/") + f"/api/v2.1/dtable-asset/{base_uuid}/batch-delete-assets/"
            body = {
                "parent_path": parent,
                "asset_names": batch,
            }
            request_json("DELETE", url, account_token, body)
            deleted += len(batch)
            print(f"deleted {len(batch)} from {parent}", file=sys.stderr)

    return deleted

def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--server", default=os.environ.get("SEATABLE_SERVER", "https://table.fsghro.com"))
    ap.add_argument("--account-token", default=os.environ.get("ACCOUNT_TOKEN"))
    ap.add_argument("--base-api-token", default=os.environ.get("BASE_API_TOKEN"))
    ap.add_argument("--out-dir", default="/root/script")
    ap.add_argument("--execute", action="store_true", help="actually delete unreferenced assets")
    ap.add_argument("--yes", action="store_true", help="required together with --execute")
    args = ap.parse_args()

    if not args.account_token:
        raise SystemExit("missing ACCOUNT_TOKEN")
    if not args.base_api_token:
        raise SystemExit("missing BASE_API_TOKEN")

    os.makedirs(args.out_dir, exist_ok=True)

    base = auth_base(args.server, args.base_api_token)
    base_uuid = base.dtable_uuid

    print(f"base_uuid: {base_uuid}", file=sys.stderr)
    print("collecting referenced files from tables...", file=sys.stderr)
    referenced = collect_referenced(base)

    print("listing all base assets...", file=sys.stderr)
    assets = list_assets(args.server, args.account_token, base_uuid)

    asset_paths = {a["path"] for a in assets}
    ref_paths = set(referenced.keys())

    unreferenced = [a for a in assets if a["path"] not in ref_paths]
    missing_assets = sorted(ref_paths - asset_paths)

    write_tsv(
        os.path.join(args.out_dir, "base_referenced_assets.tsv"),
        ["path", "reference_count", "tables", "columns", "sample_rows"],
        [
            [
                path,
                len(refs),
                ",".join(sorted({r["table"] for r in refs})),
                ",".join(sorted({r["column"] for r in refs})),
                ",".join(r["row_id"] for r in refs[:20]),
            ]
            for path, refs in sorted(referenced.items())
        ],
    )

    write_tsv(
        os.path.join(args.out_dir, "base_all_assets.tsv"),
        ["path", "size", "last_update", "is_referenced", "reference_count"],
        [
            [
                a["path"],
                a["size"],
                a["last_update"],
                "yes" if a["path"] in referenced else "no",
                len(referenced.get(a["path"], [])),
            ]
            for a in sorted(assets, key=lambda x: x["path"])
        ],
    )

    write_tsv(
        os.path.join(args.out_dir, "base_unreferenced_assets.tsv"),
        ["path", "size", "last_update"],
        [[a["path"], a["size"], a["last_update"]] for a in sorted(unreferenced, key=lambda x: x["path"])],
    )

    write_tsv(
        os.path.join(args.out_dir, "base_referenced_but_missing_assets.tsv"),
        ["path"],
        [[p] for p in missing_assets],
    )

    print(f"all assets: {len(assets)}")
    print(f"referenced paths: {len(referenced)}")
    print(f"unreferenced assets: {len(unreferenced)}")
    print(f"referenced but missing in asset dir: {len(missing_assets)}")
    print(f"wrote: {args.out_dir}/base_all_assets.tsv")
    print(f"wrote: {args.out_dir}/base_referenced_assets.tsv")
    print(f"wrote: {args.out_dir}/base_unreferenced_assets.tsv")

    if not args.execute:
        print("DRY RUN: nothing deleted. Add --execute --yes to delete unreferenced assets.")
        return

    if not args.yes:
        raise SystemExit("refusing to delete without --yes")

    deleted = delete_assets(args.server, args.account_token, base_uuid, unreferenced)
    print(f"deleted assets: {deleted}")

if __name__ == "__main__":
    main()
PY

chmod +x /root/script/cleanup_unreferenced_base_assets.py

export SEATABLE_SERVER="https://abc.test.com"
export ACCOUNT_TOKEN="your account_token"
export BASE_API_TOKEN="your base_api_token"

dry run
python3 /root/script/cleanup_unreferenced_base_assets.py 

python3 /root/script/cleanup_unreferenced_base_assets.py --execute --yes

Tested and works. Nice. Thanks AI。

Is this code usable? Are there any hidden risks I should know about?

Please understand that I (or anyone from the SeaTable team) will not review an AI-generated script of this size.

If someone else on the forum is willing to commit the time, you are lucky.