feat: Add new API endpoints and HTML pages for ML model management

- Implemented HTML pages for datasets, models, training, testing, and results.
- Created API endpoints for managing repositories, results, tests, and training sessions.
- Added functionality for streaming training progress via Server-Sent Events (SSE).
- Introduced a Dockerfile for the ML runner with necessary dependencies.
- Developed an SDK for user code execution within the runner container.
- Enhanced CSS styles for improved UI layout and navigation.
- Established a layout template for consistent HTML structure across pages.
- Added JavaScript for dynamic interactions on the models page.
- Implemented WebSocket handling for real-time communication with kiosk devices and controllers.
- Implemented model registration and management API at /api/models
- Added Gitea proxy API for repository interactions at /api/repos
- Created results API for listing and comparing training results at /api/results
- Developed training management API for enqueueing and retrieving training jobs at /api/trainings
- Introduced SSE endpoint for live training progress updates
- Added HTML pages for models, datasets, and training management
- Created a Dockerfile for the ML runner with necessary dependencies
- Developed SDK for user code execution within the runner container
- Enhanced CSS styles for improved UI/UX
- Implemented WebSocket communication for real-time device and controller interactions in the kiosk system
This commit is contained in:
Giuseppe Raffa
2026-04-28 09:24:38 +02:00
parent ee478e52ef
commit 0ce879aa44
81 changed files with 7491 additions and 746 deletions

View File

@@ -7,6 +7,7 @@ Flusso:
import json
import os
import threading
import uuid
from typing import Any, Dict
@@ -24,6 +25,13 @@ API_URL = os.getenv("API_SERVICE_URL", "http://api:3003")
# TTL per lo stato dei job: 48 ore (i job completati vengono puliti automaticamente)
_JOB_TTL = 48 * 3600
# Limite di download Copernicus concorrenti. Le subset() dell'SDK sono
# CPU + memoria intensive (xarray + netCDF + pandas conversion) e sul server
# le risorse sono limitate. Senza semaforo, N utenti che cliccano insieme
# saturano la RAM e fanno OOM-kill del processo.
_DOWNLOAD_CONCURRENCY = int(os.getenv("MARINE_DOWNLOAD_CONCURRENCY", "2"))
_download_semaphore = threading.BoundedSemaphore(_DOWNLOAD_CONCURRENCY)
def _job_key(session_id: str) -> str:
"""Genera la chiave Redis per un job."""
@@ -42,7 +50,7 @@ def _set_job(session_id: str, **kwargs):
if job is None:
return
job.update(kwargs)
cache_set(_job_key(session_id), job, _JOB_TTL)
cache_set(_job_key(session_id), job, _JOB_TTL, disk_ttl=0)
def _run_download(session_id: str, req: DownloadJobRequest, username: str, user_token: str):
@@ -55,20 +63,26 @@ def _run_download(session_id: str, req: DownloadJobRequest, username: str, user_
_set_job(session_id, progress=pct, message=msg)
try:
_set_job(session_id, status="downloading", progress=5, message="Scarico da Copernicus Marine...")
_set_job(session_id, status="queued", progress=2, message="In coda (max concorrenti raggiunto)...")
# Scarica dati dal catalogo Copernicus
df = copernicus.download_dataset(
dataset_id=req.dataset_id,
variables=req.variables,
min_longitude=req.min_longitude,
max_longitude=req.max_longitude,
min_latitude=req.min_latitude,
max_latitude=req.max_latitude,
start_datetime=req.start_date,
end_datetime=req.end_date,
progress_callback=progress,
)
# Acquisisce uno slot di download (blocca se già al limite). Garantisce
# che il numero di chiamate Copernicus simultanee non superi
# MARINE_DOWNLOAD_CONCURRENCY, proteggendo CPU/RAM del server.
with _download_semaphore:
_set_job(session_id, status="downloading", progress=5, message="Scarico da Copernicus Marine...")
# Scarica dati dal catalogo Copernicus
df = copernicus.download_dataset(
dataset_id=req.dataset_id,
variables=req.variables,
min_longitude=req.min_longitude,
max_longitude=req.max_longitude,
min_latitude=req.min_latitude,
max_latitude=req.max_latitude,
start_datetime=req.start_date,
end_datetime=req.end_date,
progress_callback=progress,
)
_set_job(session_id, status="converting", progress=80, message="Creo il file...")
@@ -85,7 +99,7 @@ def _run_download(session_id: str, req: DownloadJobRequest, username: str, user_
"created_by": username,
"type": req.format,
"notes": req.notes,
"copernicus_dataset_id": req.dataset_id,
"copernicus_id": req.dataset_id,
"variables": req.variables,
"variable_renames": req.variable_renames,
"bbox": [req.min_longitude, req.min_latitude, req.max_longitude, req.max_latitude],
@@ -129,7 +143,7 @@ async def new_download_session(
"message": "In coda",
"dataset_id": None,
}
cache_set(_job_key(session_id), initial_state, _JOB_TTL)
cache_set(_job_key(session_id), initial_state, _JOB_TTL, disk_ttl=0)
# Avvia il download in background
background_tasks.add_task(_run_download, session_id, req, user["username"], user["token"])