"""API datasets (ml.mebboat.it/api/datasets). Upload/list/get/download/delete. Storage: MinIO bucket "ml" con key "datasets/." Postgres db "ml" tabella "datasets" """ from __future__ import annotations import json import uuid from typing import Optional from fastapi import APIRouter, Depends, File, Form, HTTPException, Query, UploadFile from core import db, minio_client from core.auth import require_auth router = APIRouter(prefix="/api/datasets", tags=["datasets"]) # Bucket MinIO fisso per tutti i dataset (no prefix nelle key). BUCKET = "ml.datasets" _EXT = {"csv": "csv", "json": "json", "netcdf": "nc"} def _row(r) -> dict: if r is None: return None d = dict(r) # asyncpg ritorna JSONB come dict giĆ ; date/time come datetime for k in ("created_at", "updated_at", "start_date", "end_date"): if d.get(k) is not None and hasattr(d[k], "isoformat"): d[k] = d[k].isoformat() return d @router.get("") async def list_datasets( type: Optional[str] = Query(None), tags: Optional[str] = Query(None), mine: Optional[int] = Query(None), search: Optional[str] = Query(None), user=Depends(require_auth), ): where = [] args: list = [] if type: args.append(type) where.append(f"type = ${len(args)}") if tags: tag_arr = [t.strip() for t in tags.split(",") if t.strip()] if tag_arr: args.append(tag_arr) where.append(f"tags && ${len(args)}") if mine and user.get("username"): args.append(user["username"]) where.append(f"created_by = ${len(args)}") if search: args.append(f"%{search}%") where.append(f"(nome ILIKE ${len(args)} OR description ILIKE ${len(args)})") sql = "SELECT * FROM datasets" if where: sql += " WHERE " + " AND ".join(where) sql += " ORDER BY created_at DESC LIMIT 500" rows = await db.fetch(sql, *args) return {"count": len(rows), "datasets": [_row(r) for r in rows]} @router.post("", status_code=201) async def upload_dataset( file: UploadFile = File(...), metadata: str = Form("{}"), user=Depends(require_auth), ): try: meta = json.loads(metadata or "{}") except json.JSONDecodeError: raise HTTPException(400, "metadata must be valid JSON") fmt = meta.get("format") or meta.get("type") or "csv" if fmt not in ("csv", "json", "netcdf"): fmt = "csv" ext = _EXT[fmt] ds_id = str(uuid.uuid4()) file_key = f"{ds_id}.{ext}" data = await file.read() minio_client.put_bytes(file_key, data, content_type=file.content_type or "application/octet-stream", bucket=BUCKET) created_by = user.get("username") or meta.get("created_by") or "unknown" row = await db.fetchrow( """ INSERT INTO datasets ( id, file_key, nome, description, tags, type, format, notes, created_by, size_bytes, copernicus_id ) VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11) RETURNING * """, uuid.UUID(ds_id), file_key, meta.get("nome") or file.filename or file_key, meta.get("description"), meta.get("tags") or [], meta.get("dataset_type") or "custom", fmt, meta.get("notes"), created_by, len(data), meta.get("copernicus_id") or meta.get("copernicus_dataset_id"), ) return _row(row) @router.get("/{dataset_id}") async def get_dataset(dataset_id: str, user=Depends(require_auth)): row = await db.fetchrow("SELECT * FROM datasets WHERE id = $1", uuid.UUID(dataset_id)) if not row: raise HTTPException(404, "not found") return _row(row) @router.get("/{dataset_id}/download") async def download_dataset(dataset_id: str, user=Depends(require_auth)): row = await db.fetchrow("SELECT file_key FROM datasets WHERE id = $1", uuid.UUID(dataset_id)) if not row: raise HTTPException(404, "not found") url = minio_client.presigned_get(row["file_key"], 3600, bucket=BUCKET) return {"url": url, "expires_in": 3600} @router.patch("/{dataset_id}") async def patch_dataset(dataset_id: str, body: dict, user=Depends(require_auth)): allowed = {"nome", "description", "tags", "notes"} sets = [] args: list = [] for k, v in body.items(): if k in allowed: args.append(v) sets.append(f"{k} = ${len(args)}") if not sets: raise HTTPException(400, "no fields to update") # Trigger updated_at non presente nel DB: lo aggiorniamo manualmente. sets.append("updated_at = NOW()") args.append(uuid.UUID(dataset_id)) row = await db.fetchrow( f"UPDATE datasets SET {', '.join(sets)} WHERE id = ${len(args)} RETURNING *", *args, ) if not row: raise HTTPException(404, "not found") return _row(row) @router.delete("/{dataset_id}", status_code=204) async def delete_dataset(dataset_id: str, user=Depends(require_auth)): row = await db.fetchrow("SELECT file_key FROM datasets WHERE id = $1", uuid.UUID(dataset_id)) if not row: raise HTTPException(404, "not found") minio_client.remove(row["file_key"], bucket=BUCKET) await db.execute("DELETE FROM datasets WHERE id = $1", uuid.UUID(dataset_id)) return None