Files
OLD-server-architecture/ml/routers/datasets.py
Giuseppe Raffa 0ce879aa44 feat: Add new API endpoints and HTML pages for ML model management
- Implemented HTML pages for datasets, models, training, testing, and results.
- Created API endpoints for managing repositories, results, tests, and training sessions.
- Added functionality for streaming training progress via Server-Sent Events (SSE).
- Introduced a Dockerfile for the ML runner with necessary dependencies.
- Developed an SDK for user code execution within the runner container.
- Enhanced CSS styles for improved UI layout and navigation.
- Established a layout template for consistent HTML structure across pages.
- Added JavaScript for dynamic interactions on the models page.
- Implemented WebSocket handling for real-time communication with kiosk devices and controllers.
- Implemented model registration and management API at /api/models
- Added Gitea proxy API for repository interactions at /api/repos
- Created results API for listing and comparing training results at /api/results
- Developed training management API for enqueueing and retrieving training jobs at /api/trainings
- Introduced SSE endpoint for live training progress updates
- Added HTML pages for models, datasets, and training management
- Created a Dockerfile for the ML runner with necessary dependencies
- Developed SDK for user code execution within the runner container
- Enhanced CSS styles for improved UI/UX
- Implemented WebSocket communication for real-time device and controller interactions in the kiosk system
2026-04-28 09:24:38 +02:00

161 lines
5.2 KiB
Python

"""API datasets (ml.mebboat.it/api/datasets).
Upload/list/get/download/delete. Storage:
MinIO bucket "ml" con key "datasets/<uuid>.<ext>"
Postgres db "ml" tabella "datasets"
"""
from __future__ import annotations
import json
import uuid
from typing import Optional
from fastapi import APIRouter, Depends, File, Form, HTTPException, Query, UploadFile
from core import db, minio_client
from core.auth import require_auth
router = APIRouter(prefix="/api/datasets", tags=["datasets"])
# Bucket MinIO fisso per tutti i dataset (no prefix nelle key).
BUCKET = "ml.datasets"
_EXT = {"csv": "csv", "json": "json", "netcdf": "nc"}
def _row(r) -> dict:
if r is None:
return None
d = dict(r)
# asyncpg ritorna JSONB come dict già; date/time come datetime
for k in ("created_at", "updated_at", "start_date", "end_date"):
if d.get(k) is not None and hasattr(d[k], "isoformat"):
d[k] = d[k].isoformat()
return d
@router.get("")
async def list_datasets(
type: Optional[str] = Query(None),
tags: Optional[str] = Query(None),
mine: Optional[int] = Query(None),
search: Optional[str] = Query(None),
user=Depends(require_auth),
):
where = []
args: list = []
if type:
args.append(type)
where.append(f"type = ${len(args)}")
if tags:
tag_arr = [t.strip() for t in tags.split(",") if t.strip()]
if tag_arr:
args.append(tag_arr)
where.append(f"tags && ${len(args)}")
if mine and user.get("username"):
args.append(user["username"])
where.append(f"created_by = ${len(args)}")
if search:
args.append(f"%{search}%")
where.append(f"(nome ILIKE ${len(args)} OR description ILIKE ${len(args)})")
sql = "SELECT * FROM datasets"
if where:
sql += " WHERE " + " AND ".join(where)
sql += " ORDER BY created_at DESC LIMIT 500"
rows = await db.fetch(sql, *args)
return {"count": len(rows), "datasets": [_row(r) for r in rows]}
@router.post("", status_code=201)
async def upload_dataset(
file: UploadFile = File(...),
metadata: str = Form("{}"),
user=Depends(require_auth),
):
try:
meta = json.loads(metadata or "{}")
except json.JSONDecodeError:
raise HTTPException(400, "metadata must be valid JSON")
fmt = meta.get("format") or meta.get("type") or "csv"
if fmt not in ("csv", "json", "netcdf"):
fmt = "csv"
ext = _EXT[fmt]
ds_id = str(uuid.uuid4())
file_key = f"{ds_id}.{ext}"
data = await file.read()
minio_client.put_bytes(file_key, data, content_type=file.content_type or "application/octet-stream", bucket=BUCKET)
created_by = user.get("username") or meta.get("created_by") or "unknown"
row = await db.fetchrow(
"""
INSERT INTO datasets (
id, file_key, nome, description, tags, type, format, notes,
created_by, size_bytes, copernicus_id
) VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11)
RETURNING *
""",
uuid.UUID(ds_id),
file_key,
meta.get("nome") or file.filename or file_key,
meta.get("description"),
meta.get("tags") or [],
meta.get("dataset_type") or "custom",
fmt,
meta.get("notes"),
created_by,
len(data),
meta.get("copernicus_id") or meta.get("copernicus_dataset_id"),
)
return _row(row)
@router.get("/{dataset_id}")
async def get_dataset(dataset_id: str, user=Depends(require_auth)):
row = await db.fetchrow("SELECT * FROM datasets WHERE id = $1", uuid.UUID(dataset_id))
if not row:
raise HTTPException(404, "not found")
return _row(row)
@router.get("/{dataset_id}/download")
async def download_dataset(dataset_id: str, user=Depends(require_auth)):
row = await db.fetchrow("SELECT file_key FROM datasets WHERE id = $1", uuid.UUID(dataset_id))
if not row:
raise HTTPException(404, "not found")
url = minio_client.presigned_get(row["file_key"], 3600, bucket=BUCKET)
return {"url": url, "expires_in": 3600}
@router.patch("/{dataset_id}")
async def patch_dataset(dataset_id: str, body: dict, user=Depends(require_auth)):
allowed = {"nome", "description", "tags", "notes"}
sets = []
args: list = []
for k, v in body.items():
if k in allowed:
args.append(v)
sets.append(f"{k} = ${len(args)}")
if not sets:
raise HTTPException(400, "no fields to update")
# Trigger updated_at non presente nel DB: lo aggiorniamo manualmente.
sets.append("updated_at = NOW()")
args.append(uuid.UUID(dataset_id))
row = await db.fetchrow(
f"UPDATE datasets SET {', '.join(sets)} WHERE id = ${len(args)} RETURNING *",
*args,
)
if not row:
raise HTTPException(404, "not found")
return _row(row)
@router.delete("/{dataset_id}", status_code=204)
async def delete_dataset(dataset_id: str, user=Depends(require_auth)):
row = await db.fetchrow("SELECT file_key FROM datasets WHERE id = $1", uuid.UUID(dataset_id))
if not row:
raise HTTPException(404, "not found")
minio_client.remove(row["file_key"], bucket=BUCKET)
await db.execute("DELETE FROM datasets WHERE id = $1", uuid.UUID(dataset_id))
return None