feat: Add new API endpoints and HTML pages for ML model management
- Implemented HTML pages for datasets, models, training, testing, and results. - Created API endpoints for managing repositories, results, tests, and training sessions. - Added functionality for streaming training progress via Server-Sent Events (SSE). - Introduced a Dockerfile for the ML runner with necessary dependencies. - Developed an SDK for user code execution within the runner container. - Enhanced CSS styles for improved UI layout and navigation. - Established a layout template for consistent HTML structure across pages. - Added JavaScript for dynamic interactions on the models page. - Implemented WebSocket handling for real-time communication with kiosk devices and controllers. - Implemented model registration and management API at /api/models - Added Gitea proxy API for repository interactions at /api/repos - Created results API for listing and comparing training results at /api/results - Developed training management API for enqueueing and retrieving training jobs at /api/trainings - Introduced SSE endpoint for live training progress updates - Added HTML pages for models, datasets, and training management - Created a Dockerfile for the ML runner with necessary dependencies - Developed SDK for user code execution within the runner container - Enhanced CSS styles for improved UI/UX - Implemented WebSocket communication for real-time device and controller interactions in the kiosk system
This commit is contained in:
160
ml/routers/datasets.py
Normal file
160
ml/routers/datasets.py
Normal file
@@ -0,0 +1,160 @@
|
||||
"""API datasets (ml.mebboat.it/api/datasets).
|
||||
|
||||
Upload/list/get/download/delete. Storage:
|
||||
MinIO bucket "ml" con key "datasets/<uuid>.<ext>"
|
||||
Postgres db "ml" tabella "datasets"
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import uuid
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, File, Form, HTTPException, Query, UploadFile
|
||||
|
||||
from core import db, minio_client
|
||||
from core.auth import require_auth
|
||||
|
||||
router = APIRouter(prefix="/api/datasets", tags=["datasets"])
|
||||
|
||||
# Bucket MinIO fisso per tutti i dataset (no prefix nelle key).
|
||||
BUCKET = "ml.datasets"
|
||||
_EXT = {"csv": "csv", "json": "json", "netcdf": "nc"}
|
||||
|
||||
|
||||
def _row(r) -> dict:
|
||||
if r is None:
|
||||
return None
|
||||
d = dict(r)
|
||||
# asyncpg ritorna JSONB come dict già; date/time come datetime
|
||||
for k in ("created_at", "updated_at", "start_date", "end_date"):
|
||||
if d.get(k) is not None and hasattr(d[k], "isoformat"):
|
||||
d[k] = d[k].isoformat()
|
||||
return d
|
||||
|
||||
|
||||
@router.get("")
|
||||
async def list_datasets(
|
||||
type: Optional[str] = Query(None),
|
||||
tags: Optional[str] = Query(None),
|
||||
mine: Optional[int] = Query(None),
|
||||
search: Optional[str] = Query(None),
|
||||
user=Depends(require_auth),
|
||||
):
|
||||
where = []
|
||||
args: list = []
|
||||
if type:
|
||||
args.append(type)
|
||||
where.append(f"type = ${len(args)}")
|
||||
if tags:
|
||||
tag_arr = [t.strip() for t in tags.split(",") if t.strip()]
|
||||
if tag_arr:
|
||||
args.append(tag_arr)
|
||||
where.append(f"tags && ${len(args)}")
|
||||
if mine and user.get("username"):
|
||||
args.append(user["username"])
|
||||
where.append(f"created_by = ${len(args)}")
|
||||
if search:
|
||||
args.append(f"%{search}%")
|
||||
where.append(f"(nome ILIKE ${len(args)} OR description ILIKE ${len(args)})")
|
||||
sql = "SELECT * FROM datasets"
|
||||
if where:
|
||||
sql += " WHERE " + " AND ".join(where)
|
||||
sql += " ORDER BY created_at DESC LIMIT 500"
|
||||
rows = await db.fetch(sql, *args)
|
||||
return {"count": len(rows), "datasets": [_row(r) for r in rows]}
|
||||
|
||||
|
||||
@router.post("", status_code=201)
|
||||
async def upload_dataset(
|
||||
file: UploadFile = File(...),
|
||||
metadata: str = Form("{}"),
|
||||
user=Depends(require_auth),
|
||||
):
|
||||
try:
|
||||
meta = json.loads(metadata or "{}")
|
||||
except json.JSONDecodeError:
|
||||
raise HTTPException(400, "metadata must be valid JSON")
|
||||
|
||||
fmt = meta.get("format") or meta.get("type") or "csv"
|
||||
if fmt not in ("csv", "json", "netcdf"):
|
||||
fmt = "csv"
|
||||
ext = _EXT[fmt]
|
||||
ds_id = str(uuid.uuid4())
|
||||
file_key = f"{ds_id}.{ext}"
|
||||
|
||||
data = await file.read()
|
||||
minio_client.put_bytes(file_key, data, content_type=file.content_type or "application/octet-stream", bucket=BUCKET)
|
||||
|
||||
created_by = user.get("username") or meta.get("created_by") or "unknown"
|
||||
row = await db.fetchrow(
|
||||
"""
|
||||
INSERT INTO datasets (
|
||||
id, file_key, nome, description, tags, type, format, notes,
|
||||
created_by, size_bytes, copernicus_id
|
||||
) VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11)
|
||||
RETURNING *
|
||||
""",
|
||||
uuid.UUID(ds_id),
|
||||
file_key,
|
||||
meta.get("nome") or file.filename or file_key,
|
||||
meta.get("description"),
|
||||
meta.get("tags") or [],
|
||||
meta.get("dataset_type") or "custom",
|
||||
fmt,
|
||||
meta.get("notes"),
|
||||
created_by,
|
||||
len(data),
|
||||
meta.get("copernicus_id") or meta.get("copernicus_dataset_id"),
|
||||
)
|
||||
return _row(row)
|
||||
|
||||
|
||||
@router.get("/{dataset_id}")
|
||||
async def get_dataset(dataset_id: str, user=Depends(require_auth)):
|
||||
row = await db.fetchrow("SELECT * FROM datasets WHERE id = $1", uuid.UUID(dataset_id))
|
||||
if not row:
|
||||
raise HTTPException(404, "not found")
|
||||
return _row(row)
|
||||
|
||||
|
||||
@router.get("/{dataset_id}/download")
|
||||
async def download_dataset(dataset_id: str, user=Depends(require_auth)):
|
||||
row = await db.fetchrow("SELECT file_key FROM datasets WHERE id = $1", uuid.UUID(dataset_id))
|
||||
if not row:
|
||||
raise HTTPException(404, "not found")
|
||||
url = minio_client.presigned_get(row["file_key"], 3600, bucket=BUCKET)
|
||||
return {"url": url, "expires_in": 3600}
|
||||
|
||||
|
||||
@router.patch("/{dataset_id}")
|
||||
async def patch_dataset(dataset_id: str, body: dict, user=Depends(require_auth)):
|
||||
allowed = {"nome", "description", "tags", "notes"}
|
||||
sets = []
|
||||
args: list = []
|
||||
for k, v in body.items():
|
||||
if k in allowed:
|
||||
args.append(v)
|
||||
sets.append(f"{k} = ${len(args)}")
|
||||
if not sets:
|
||||
raise HTTPException(400, "no fields to update")
|
||||
# Trigger updated_at non presente nel DB: lo aggiorniamo manualmente.
|
||||
sets.append("updated_at = NOW()")
|
||||
args.append(uuid.UUID(dataset_id))
|
||||
row = await db.fetchrow(
|
||||
f"UPDATE datasets SET {', '.join(sets)} WHERE id = ${len(args)} RETURNING *",
|
||||
*args,
|
||||
)
|
||||
if not row:
|
||||
raise HTTPException(404, "not found")
|
||||
return _row(row)
|
||||
|
||||
|
||||
@router.delete("/{dataset_id}", status_code=204)
|
||||
async def delete_dataset(dataset_id: str, user=Depends(require_auth)):
|
||||
row = await db.fetchrow("SELECT file_key FROM datasets WHERE id = $1", uuid.UUID(dataset_id))
|
||||
if not row:
|
||||
raise HTTPException(404, "not found")
|
||||
minio_client.remove(row["file_key"], bucket=BUCKET)
|
||||
await db.execute("DELETE FROM datasets WHERE id = $1", uuid.UUID(dataset_id))
|
||||
return None
|
||||
Reference in New Issue
Block a user