feat: Add new API endpoints and HTML pages for ML model management

- Implemented HTML pages for datasets, models, training, testing, and results. - Created API endpoints for managing repositories, results, tests, and training sessions. - Added functionality for streaming training progress via Server-Sent Events (SSE). - Introduced a Dockerfile for the ML runner with necessary dependencies. - Developed an SDK for user code execution within the runner container. - Enhanced CSS styles for improved UI layout and navigation. - Established a layout template for consistent HTML structure across pages. - Added JavaScript for dynamic interactions on the models page. - Implemented WebSocket handling for real-time communication with kiosk devices and controllers. - Implemented model registration and management API at /api/models - Added Gitea proxy API for repository interactions at /api/repos - Created results API for listing and comparing training results at /api/results - Developed training management API for enqueueing and retrieving training jobs at /api/trainings - Introduced SSE endpoint for live training progress updates - Added HTML pages for models, datasets, and training management - Created a Dockerfile for the ML runner with necessary dependencies - Developed SDK for user code execution within the runner container - Enhanced CSS styles for improved UI/UX - Implemented WebSocket communication for real-time device and controller interactions in the kiosk system
2026-04-28 09:24:38 +02:00
parent ee478e52ef
commit 0ce879aa44
81 changed files with 7491 additions and 746 deletions
--- a/copernicus/routers/jobs.py
+++ b/copernicus/routers/jobs.py
@@ -7,6 +7,7 @@ Flusso:

 import json
 import os
+import threading
 import uuid
 from typing import Any, Dict

@@ -24,6 +25,13 @@ API_URL = os.getenv("API_SERVICE_URL", "http://api:3003")
 # TTL per lo stato dei job: 48 ore (i job completati vengono puliti automaticamente)
 _JOB_TTL = 48 * 3600

+# Limite di download Copernicus concorrenti. Le subset() dell'SDK sono
+# CPU + memoria intensive (xarray + netCDF + pandas conversion) e sul server
+# le risorse sono limitate. Senza semaforo, N utenti che cliccano insieme
+# saturano la RAM e fanno OOM-kill del processo.
+_DOWNLOAD_CONCURRENCY = int(os.getenv("MARINE_DOWNLOAD_CONCURRENCY", "2"))
+_download_semaphore = threading.BoundedSemaphore(_DOWNLOAD_CONCURRENCY)
+

 def _job_key(session_id: str) -> str:
    """Genera la chiave Redis per un job."""
@@ -42,7 +50,7 @@ def _set_job(session_id: str, **kwargs):
    if job is None:
        return
    job.update(kwargs)
-    cache_set(_job_key(session_id), job, _JOB_TTL)
+    cache_set(_job_key(session_id), job, _JOB_TTL, disk_ttl=0)


 def _run_download(session_id: str, req: DownloadJobRequest, username: str, user_token: str):
@@ -55,20 +63,26 @@ def _run_download(session_id: str, req: DownloadJobRequest, username: str, user_
        _set_job(session_id, progress=pct, message=msg)

    try:
-        _set_job(session_id, status="downloading", progress=5, message="Scarico da Copernicus Marine...")
+        _set_job(session_id, status="queued", progress=2, message="In coda (max concorrenti raggiunto)...")

-        # Scarica dati dal catalogo Copernicus
-        df = copernicus.download_dataset(
-            dataset_id=req.dataset_id,
-            variables=req.variables,
-            min_longitude=req.min_longitude,
-            max_longitude=req.max_longitude,
-            min_latitude=req.min_latitude,
-            max_latitude=req.max_latitude,
-            start_datetime=req.start_date,
-            end_datetime=req.end_date,
-            progress_callback=progress,
-        )
+        # Acquisisce uno slot di download (blocca se già al limite). Garantisce
+        # che il numero di chiamate Copernicus simultanee non superi
+        # MARINE_DOWNLOAD_CONCURRENCY, proteggendo CPU/RAM del server.
+        with _download_semaphore:
+            _set_job(session_id, status="downloading", progress=5, message="Scarico da Copernicus Marine...")
+
+            # Scarica dati dal catalogo Copernicus
+            df = copernicus.download_dataset(
+                dataset_id=req.dataset_id,
+                variables=req.variables,
+                min_longitude=req.min_longitude,
+                max_longitude=req.max_longitude,
+                min_latitude=req.min_latitude,
+                max_latitude=req.max_latitude,
+                start_datetime=req.start_date,
+                end_datetime=req.end_date,
+                progress_callback=progress,
+            )

        _set_job(session_id, status="converting", progress=80, message="Creo il file...")

@@ -85,7 +99,7 @@ def _run_download(session_id: str, req: DownloadJobRequest, username: str, user_
            "created_by":            username,
            "type":                  req.format,
            "notes":                 req.notes,
-            "copernicus_dataset_id": req.dataset_id,
+            "copernicus_id":         req.dataset_id,
            "variables":             req.variables,
            "variable_renames":      req.variable_renames,
            "bbox":                  [req.min_longitude, req.min_latitude, req.max_longitude, req.max_latitude],
@@ -129,7 +143,7 @@ async def new_download_session(
        "message": "In coda",
        "dataset_id": None,
    }
-    cache_set(_job_key(session_id), initial_state, _JOB_TTL)
+    cache_set(_job_key(session_id), initial_state, _JOB_TTL, disk_ttl=0)

    # Avvia il download in background
    background_tasks.add_task(_run_download, session_id, req, user["username"], user["token"])