- Implemented HTML pages for datasets, models, training, testing, and results. - Created API endpoints for managing repositories, results, tests, and training sessions. - Added functionality for streaming training progress via Server-Sent Events (SSE). - Introduced a Dockerfile for the ML runner with necessary dependencies. - Developed an SDK for user code execution within the runner container. - Enhanced CSS styles for improved UI layout and navigation. - Established a layout template for consistent HTML structure across pages. - Added JavaScript for dynamic interactions on the models page. - Implemented WebSocket handling for real-time communication with kiosk devices and controllers. - Implemented model registration and management API at /api/models - Added Gitea proxy API for repository interactions at /api/repos - Created results API for listing and comparing training results at /api/results - Developed training management API for enqueueing and retrieving training jobs at /api/trainings - Introduced SSE endpoint for live training progress updates - Added HTML pages for models, datasets, and training management - Created a Dockerfile for the ML runner with necessary dependencies - Developed SDK for user code execution within the runner container - Enhanced CSS styles for improved UI/UX - Implemented WebSocket communication for real-time device and controller interactions in the kiosk system
161 lines
5.2 KiB
Python
161 lines
5.2 KiB
Python
"""API datasets (ml.mebboat.it/api/datasets).
|
|
|
|
Upload/list/get/download/delete. Storage:
|
|
MinIO bucket "ml" con key "datasets/<uuid>.<ext>"
|
|
Postgres db "ml" tabella "datasets"
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import uuid
|
|
from typing import Optional
|
|
|
|
from fastapi import APIRouter, Depends, File, Form, HTTPException, Query, UploadFile
|
|
|
|
from core import db, minio_client
|
|
from core.auth import require_auth
|
|
|
|
router = APIRouter(prefix="/api/datasets", tags=["datasets"])
|
|
|
|
# Bucket MinIO fisso per tutti i dataset (no prefix nelle key).
|
|
BUCKET = "ml.datasets"
|
|
_EXT = {"csv": "csv", "json": "json", "netcdf": "nc"}
|
|
|
|
|
|
def _row(r) -> dict:
|
|
if r is None:
|
|
return None
|
|
d = dict(r)
|
|
# asyncpg ritorna JSONB come dict già; date/time come datetime
|
|
for k in ("created_at", "updated_at", "start_date", "end_date"):
|
|
if d.get(k) is not None and hasattr(d[k], "isoformat"):
|
|
d[k] = d[k].isoformat()
|
|
return d
|
|
|
|
|
|
@router.get("")
|
|
async def list_datasets(
|
|
type: Optional[str] = Query(None),
|
|
tags: Optional[str] = Query(None),
|
|
mine: Optional[int] = Query(None),
|
|
search: Optional[str] = Query(None),
|
|
user=Depends(require_auth),
|
|
):
|
|
where = []
|
|
args: list = []
|
|
if type:
|
|
args.append(type)
|
|
where.append(f"type = ${len(args)}")
|
|
if tags:
|
|
tag_arr = [t.strip() for t in tags.split(",") if t.strip()]
|
|
if tag_arr:
|
|
args.append(tag_arr)
|
|
where.append(f"tags && ${len(args)}")
|
|
if mine and user.get("username"):
|
|
args.append(user["username"])
|
|
where.append(f"created_by = ${len(args)}")
|
|
if search:
|
|
args.append(f"%{search}%")
|
|
where.append(f"(nome ILIKE ${len(args)} OR description ILIKE ${len(args)})")
|
|
sql = "SELECT * FROM datasets"
|
|
if where:
|
|
sql += " WHERE " + " AND ".join(where)
|
|
sql += " ORDER BY created_at DESC LIMIT 500"
|
|
rows = await db.fetch(sql, *args)
|
|
return {"count": len(rows), "datasets": [_row(r) for r in rows]}
|
|
|
|
|
|
@router.post("", status_code=201)
|
|
async def upload_dataset(
|
|
file: UploadFile = File(...),
|
|
metadata: str = Form("{}"),
|
|
user=Depends(require_auth),
|
|
):
|
|
try:
|
|
meta = json.loads(metadata or "{}")
|
|
except json.JSONDecodeError:
|
|
raise HTTPException(400, "metadata must be valid JSON")
|
|
|
|
fmt = meta.get("format") or meta.get("type") or "csv"
|
|
if fmt not in ("csv", "json", "netcdf"):
|
|
fmt = "csv"
|
|
ext = _EXT[fmt]
|
|
ds_id = str(uuid.uuid4())
|
|
file_key = f"{ds_id}.{ext}"
|
|
|
|
data = await file.read()
|
|
minio_client.put_bytes(file_key, data, content_type=file.content_type or "application/octet-stream", bucket=BUCKET)
|
|
|
|
created_by = user.get("username") or meta.get("created_by") or "unknown"
|
|
row = await db.fetchrow(
|
|
"""
|
|
INSERT INTO datasets (
|
|
id, file_key, nome, description, tags, type, format, notes,
|
|
created_by, size_bytes, copernicus_id
|
|
) VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11)
|
|
RETURNING *
|
|
""",
|
|
uuid.UUID(ds_id),
|
|
file_key,
|
|
meta.get("nome") or file.filename or file_key,
|
|
meta.get("description"),
|
|
meta.get("tags") or [],
|
|
meta.get("dataset_type") or "custom",
|
|
fmt,
|
|
meta.get("notes"),
|
|
created_by,
|
|
len(data),
|
|
meta.get("copernicus_id") or meta.get("copernicus_dataset_id"),
|
|
)
|
|
return _row(row)
|
|
|
|
|
|
@router.get("/{dataset_id}")
|
|
async def get_dataset(dataset_id: str, user=Depends(require_auth)):
|
|
row = await db.fetchrow("SELECT * FROM datasets WHERE id = $1", uuid.UUID(dataset_id))
|
|
if not row:
|
|
raise HTTPException(404, "not found")
|
|
return _row(row)
|
|
|
|
|
|
@router.get("/{dataset_id}/download")
|
|
async def download_dataset(dataset_id: str, user=Depends(require_auth)):
|
|
row = await db.fetchrow("SELECT file_key FROM datasets WHERE id = $1", uuid.UUID(dataset_id))
|
|
if not row:
|
|
raise HTTPException(404, "not found")
|
|
url = minio_client.presigned_get(row["file_key"], 3600, bucket=BUCKET)
|
|
return {"url": url, "expires_in": 3600}
|
|
|
|
|
|
@router.patch("/{dataset_id}")
|
|
async def patch_dataset(dataset_id: str, body: dict, user=Depends(require_auth)):
|
|
allowed = {"nome", "description", "tags", "notes"}
|
|
sets = []
|
|
args: list = []
|
|
for k, v in body.items():
|
|
if k in allowed:
|
|
args.append(v)
|
|
sets.append(f"{k} = ${len(args)}")
|
|
if not sets:
|
|
raise HTTPException(400, "no fields to update")
|
|
# Trigger updated_at non presente nel DB: lo aggiorniamo manualmente.
|
|
sets.append("updated_at = NOW()")
|
|
args.append(uuid.UUID(dataset_id))
|
|
row = await db.fetchrow(
|
|
f"UPDATE datasets SET {', '.join(sets)} WHERE id = ${len(args)} RETURNING *",
|
|
*args,
|
|
)
|
|
if not row:
|
|
raise HTTPException(404, "not found")
|
|
return _row(row)
|
|
|
|
|
|
@router.delete("/{dataset_id}", status_code=204)
|
|
async def delete_dataset(dataset_id: str, user=Depends(require_auth)):
|
|
row = await db.fetchrow("SELECT file_key FROM datasets WHERE id = $1", uuid.UUID(dataset_id))
|
|
if not row:
|
|
raise HTTPException(404, "not found")
|
|
minio_client.remove(row["file_key"], bucket=BUCKET)
|
|
await db.execute("DELETE FROM datasets WHERE id = $1", uuid.UUID(dataset_id))
|
|
return None
|