If a record in the table is deleted, will the associated attachments and images be cleaned up? Furthermore, if I compress an image and upload the new version, will the old image be cleared out?
When you delete a record with an image in it, the file is NOT deleted. You must delete the file using the file management in the base.
Can I use a script or modify the configuration to batch process and remove unreferenced attachments?
Will the --rm-fs option clean up all orphaned attachments that are no longer linked to any table entries?
Yes, you can write a script for the job.
We are also working on a plugin that can do the job. This plugin will be released in the foreseeable future.
cat > /root/script/cleanup_unreferenced_base_assets.py <<'PY'
#!/usr/bin/env python3
from seatable_api import Base
import argparse, csv, json, os, sys, urllib.parse, urllib.request, urllib.error
from collections import defaultdict
def norm_path(p, base_uuid=""):
if not p:
return ""
p = str(p).strip()
if p.startswith("http://") or p.startswith("https://"):
decoded = urllib.parse.unquote(p)
markers = [
f"/asset-preview/{base_uuid}/",
f"/asset/{base_uuid}/",
f"/dtable-asset/{base_uuid}/",
]
for m in markers:
if m in decoded:
p = decoded.split(m, 1)[1]
break
p = urllib.parse.unquote(p)
p = p.split("?", 1)[0]
p = p.lstrip("/")
return p
def parent_and_name(path):
path = "/" + path.strip("/")
parent, name = path.rsplit("/", 1)
return parent or "/", name
def auth_base(server, base_api_token):
base = Base(base_api_token, server)
base.auth()
return base
def iter_rows(base, table_name, page_size=1000):
start = 0
while True:
rows = base.list_rows(table_name, start=start, limit=page_size)
if not rows:
break
yield from rows
if len(rows) < page_size:
break
start += page_size
def collect_referenced(base):
base_uuid = base.dtable_uuid
referenced = defaultdict(list)
metadata = base.get_metadata()
for table in metadata.get("tables", []):
table_name = table["name"]
cols = [
(col["name"], col.get("type"))
for col in table.get("columns", [])
if col.get("type") in ("file", "image")
]
if not cols:
continue
for row in iter_rows(base, table_name):
row_id = row.get("_id", "")
for col_name, col_type in cols:
value = row.get(col_name) or []
if col_type == "file":
for item in value:
if isinstance(item, dict) and item.get("url"):
path = norm_path(item.get("url"), base_uuid)
referenced[path].append({
"table": table_name,
"row_id": row_id,
"column": col_name,
"type": "file",
"name": item.get("name", ""),
"size": item.get("size", ""),
})
if col_type == "image":
for url in value:
path = norm_path(url, base_uuid)
referenced[path].append({
"table": table_name,
"row_id": row_id,
"column": col_name,
"type": "image",
"name": "",
"size": "",
})
return referenced
def request_json(method, url, account_token, body=None):
data = None
headers = {
"Authorization": f"Bearer {account_token}",
"Accept": "application/json",
}
if body is not None:
data = json.dumps(body, ensure_ascii=False).encode("utf-8")
headers["Content-Type"] = "application/json"
req = urllib.request.Request(url, data=data, headers=headers, method=method)
try:
with urllib.request.urlopen(req, timeout=60) as resp:
raw = resp.read().decode("utf-8")
return json.loads(raw) if raw else {}
except urllib.error.HTTPError as e:
detail = e.read().decode("utf-8", errors="replace")
raise RuntimeError(f"HTTP {e.code} {e.reason}: {detail}") from e
def list_assets(server, account_token, base_uuid):
assets = []
def list_dir(parent_dir):
url = (
server.rstrip("/")
+ f"/api/v2.1/dtable-asset/{base_uuid}/"
+ "?parent_dir="
+ urllib.parse.quote(parent_dir, safe="")
)
data = request_json("GET", url, account_token)
for item in data.get("dirent_list", []):
name = item.get("obj_name") or item.get("name")
if not name:
continue
child = parent_dir.rstrip("/") + "/" + name if parent_dir != "/" else "/" + name
if item.get("is_file") is True:
assets.append({
"path": norm_path(child, base_uuid),
"size": item.get("file_size") or item.get("size") or "",
"last_update": item.get("last_update", ""),
})
else:
list_dir(child)
list_dir("/")
return assets
def write_tsv(path, header, rows):
with open(path, "w", encoding="utf-8", newline="") as f:
w = csv.writer(f, delimiter="\t")
w.writerow(header)
w.writerows(rows)
def delete_assets(server, account_token, base_uuid, unreferenced):
grouped = defaultdict(list)
for item in unreferenced:
parent, name = parent_and_name(item["path"])
grouped[parent].append(name)
deleted = 0
for parent, names in grouped.items():
for i in range(0, len(names), 100):
batch = names[i:i + 100]
url = server.rstrip("/") + f"/api/v2.1/dtable-asset/{base_uuid}/batch-delete-assets/"
body = {
"parent_path": parent,
"asset_names": batch,
}
request_json("DELETE", url, account_token, body)
deleted += len(batch)
print(f"deleted {len(batch)} from {parent}", file=sys.stderr)
return deleted
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--server", default=os.environ.get("SEATABLE_SERVER", "https://table.fsghro.com"))
ap.add_argument("--account-token", default=os.environ.get("ACCOUNT_TOKEN"))
ap.add_argument("--base-api-token", default=os.environ.get("BASE_API_TOKEN"))
ap.add_argument("--out-dir", default="/root/script")
ap.add_argument("--execute", action="store_true", help="actually delete unreferenced assets")
ap.add_argument("--yes", action="store_true", help="required together with --execute")
args = ap.parse_args()
if not args.account_token:
raise SystemExit("missing ACCOUNT_TOKEN")
if not args.base_api_token:
raise SystemExit("missing BASE_API_TOKEN")
os.makedirs(args.out_dir, exist_ok=True)
base = auth_base(args.server, args.base_api_token)
base_uuid = base.dtable_uuid
print(f"base_uuid: {base_uuid}", file=sys.stderr)
print("collecting referenced files from tables...", file=sys.stderr)
referenced = collect_referenced(base)
print("listing all base assets...", file=sys.stderr)
assets = list_assets(args.server, args.account_token, base_uuid)
asset_paths = {a["path"] for a in assets}
ref_paths = set(referenced.keys())
unreferenced = [a for a in assets if a["path"] not in ref_paths]
missing_assets = sorted(ref_paths - asset_paths)
write_tsv(
os.path.join(args.out_dir, "base_referenced_assets.tsv"),
["path", "reference_count", "tables", "columns", "sample_rows"],
[
[
path,
len(refs),
",".join(sorted({r["table"] for r in refs})),
",".join(sorted({r["column"] for r in refs})),
",".join(r["row_id"] for r in refs[:20]),
]
for path, refs in sorted(referenced.items())
],
)
write_tsv(
os.path.join(args.out_dir, "base_all_assets.tsv"),
["path", "size", "last_update", "is_referenced", "reference_count"],
[
[
a["path"],
a["size"],
a["last_update"],
"yes" if a["path"] in referenced else "no",
len(referenced.get(a["path"], [])),
]
for a in sorted(assets, key=lambda x: x["path"])
],
)
write_tsv(
os.path.join(args.out_dir, "base_unreferenced_assets.tsv"),
["path", "size", "last_update"],
[[a["path"], a["size"], a["last_update"]] for a in sorted(unreferenced, key=lambda x: x["path"])],
)
write_tsv(
os.path.join(args.out_dir, "base_referenced_but_missing_assets.tsv"),
["path"],
[[p] for p in missing_assets],
)
print(f"all assets: {len(assets)}")
print(f"referenced paths: {len(referenced)}")
print(f"unreferenced assets: {len(unreferenced)}")
print(f"referenced but missing in asset dir: {len(missing_assets)}")
print(f"wrote: {args.out_dir}/base_all_assets.tsv")
print(f"wrote: {args.out_dir}/base_referenced_assets.tsv")
print(f"wrote: {args.out_dir}/base_unreferenced_assets.tsv")
if not args.execute:
print("DRY RUN: nothing deleted. Add --execute --yes to delete unreferenced assets.")
return
if not args.yes:
raise SystemExit("refusing to delete without --yes")
deleted = delete_assets(args.server, args.account_token, base_uuid, unreferenced)
print(f"deleted assets: {deleted}")
if __name__ == "__main__":
main()
PY
chmod +x /root/script/cleanup_unreferenced_base_assets.py
export SEATABLE_SERVER="https://abc.test.com"
export ACCOUNT_TOKEN="your account_token"
export BASE_API_TOKEN="your base_api_token"
dry run
python3 /root/script/cleanup_unreferenced_base_assets.py
python3 /root/script/cleanup_unreferenced_base_assets.py --execute --yes
Tested and works. Nice. Thanks AI。
Is this code usable? Are there any hidden risks I should know about?
Please understand that I (or anyone from the SeaTable team) will not review an AI-generated script of this size.
If someone else on the forum is willing to commit the time, you are lucky.