feat: Add new API endpoints and HTML pages for ML model management

- Implemented HTML pages for datasets, models, training, testing, and results.
- Created API endpoints for managing repositories, results, tests, and training sessions.
- Added functionality for streaming training progress via Server-Sent Events (SSE).
- Introduced a Dockerfile for the ML runner with necessary dependencies.
- Developed an SDK for user code execution within the runner container.
- Enhanced CSS styles for improved UI layout and navigation.
- Established a layout template for consistent HTML structure across pages.
- Added JavaScript for dynamic interactions on the models page.
- Implemented WebSocket handling for real-time communication with kiosk devices and controllers.
- Implemented model registration and management API at /api/models
- Added Gitea proxy API for repository interactions at /api/repos
- Created results API for listing and comparing training results at /api/results
- Developed training management API for enqueueing and retrieving training jobs at /api/trainings
- Introduced SSE endpoint for live training progress updates
- Added HTML pages for models, datasets, and training management
- Created a Dockerfile for the ML runner with necessary dependencies
- Developed SDK for user code execution within the runner container
- Enhanced CSS styles for improved UI/UX
- Implemented WebSocket communication for real-time device and controller interactions in the kiosk system
This commit is contained in:
Giuseppe Raffa
2026-04-28 09:24:38 +02:00
parent ee478e52ef
commit 0ce879aa44
81 changed files with 7491 additions and 746 deletions

View File

@@ -1,125 +1,292 @@
"""
Redis Keys:
- marine:catalog:full → lista dei dataset completo (TTL 1h)
- marine:catalog:search:{hash} → risultati ricerca (TTL 30min)
- marine:job:{session_id} → stato job download (TTL 48h)
Cache two-tier per il servizio Marine.
L1 = Redis (RAM): scadenza 2 ore, velocissima, condivisa tra processi.
L2 = SQLite+disco: persistente (200GB), fallback quando Redis non c'è
o quando L1 è scaduta. Scadenza configurabile (default 30 giorni).
Flusso lettura:
1. Prova L1 (Redis). Se hit → ritorna.
2. Prova L2 (SQLite). Se hit non scaduta → ritorna E ripopola L1 (re-warm).
3. Miss totale → None.
Flusso scrittura:
Scrive in entrambi i tier contemporaneamente.
Chiavi standard:
- marine:catalog:full → lista completa dataset Copernicus
- marine:catalog:search:{hash} → risultati ricerca utente
- marine:job:{session_id} → stato job download (solo Redis, ephemeri)
"""
import gzip
import json
import os
import logging
import os
import sqlite3
import threading
import time
from pathlib import Path
from typing import Any, Optional
import redis
logger = logging.getLogger(__name__)
# Configurazione Redis da variabili ambiente
# ── Config ───────────────────────────────────────────────────────────────
REDIS_HOST = os.getenv("REDIS_HOST", "meb-redis")
REDIS_PORT = int(os.getenv("REDIS_PORT", "6379"))
# Pool di connessioni condiviso (thread-safe, riutilizzabile)
# Il volume persistente è montato dal container, default /app/cache
CACHE_DIR = Path(os.getenv("CACHE_DIR", "/app/cache"))
CACHE_DB = CACHE_DIR / "catalog.sqlite"
BLOB_DIR = CACHE_DIR / "blobs"
# TTL default
DEFAULT_REDIS_TTL = 2 * 3600 # 2 ore (L1)
DEFAULT_DISK_TTL = 30 * 24 * 3600 # 30 giorni (L2)
# Soglia sopra la quale il valore va in un file su disco invece che in sqlite
BLOB_THRESHOLD_BYTES = 64 * 1024 # 64 KB
# ── Stato globale ────────────────────────────────────────────────────────
_pool: Optional[redis.ConnectionPool] = None
_client: Optional[redis.Redis] = None
_redis_disabled = False
_sqlite_lock = threading.Lock()
_sqlite_initialized = False
def _get_client() -> Optional[redis.Redis]:
"""Restituisce il client Redis singleton con connection pool.
Ritorna None se Redis non è raggiungibile."""
global _pool, _client
# ── Redis (L1) ───────────────────────────────────────────────────────────
def _get_redis() -> Optional[redis.Redis]:
global _pool, _client, _redis_disabled
if _redis_disabled:
return None
if _client is not None:
return _client
try:
_pool = redis.ConnectionPool(
host=REDIS_HOST,
port=REDIS_PORT,
# Decodifica automatica delle risposte in stringhe UTF-8
decode_responses=True,
# Massimo 5 connessioni nel pool (VPS 1-core, non serve di più)
decode_responses=False, # tratto blob binari (gzip)
max_connections=5,
# Timeout connessione e socket per evitare blocchi
socket_connect_timeout=3,
socket_timeout=3,
# Riprova automaticamente se la connessione viene interrotta
retry_on_timeout=True,
)
_client = redis.Redis(connection_pool=_pool)
# Test connessione
_client.ping()
logger.info("[Redis] Connessione stabilita per il servizio Marine")
logger.info("[Cache] Redis L1 connesso")
return _client
except Exception as e:
logger.warning(f"[Redis] Non disponibile, la cache è disabilitata: {e}")
logger.warning(f"[Cache] Redis non disponibile, uso solo disco: {e}")
_redis_disabled = True
_client = None
return None
def cache_get(key: str) -> Optional[Any]:
"""Legge un valore dalla cache Redis.
# ── SQLite (L2) ──────────────────────────────────────────────────────────
def _ensure_sqlite() -> sqlite3.Connection:
"""Apre/crea il db SQLite su disco. Crea anche la dir blob."""
global _sqlite_initialized
CACHE_DIR.mkdir(parents=True, exist_ok=True)
BLOB_DIR.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(str(CACHE_DB), timeout=5.0, isolation_level=None)
conn.execute("PRAGMA journal_mode=WAL")
conn.execute("PRAGMA synchronous=NORMAL")
if not _sqlite_initialized:
conn.execute("""
CREATE TABLE IF NOT EXISTS cache (
key TEXT PRIMARY KEY,
expires_at INTEGER NOT NULL,
is_blob INTEGER NOT NULL DEFAULT 0,
value BLOB,
blob_path TEXT,
size_bytes INTEGER NOT NULL,
updated_at INTEGER NOT NULL
)
""")
conn.execute("CREATE INDEX IF NOT EXISTS idx_cache_expires ON cache(expires_at)")
_sqlite_initialized = True
return conn
Args:
key: Chiave Redis (es. 'marine:catalog:full')
Returns:
Il valore deserializzato da JSON, oppure None se non trovato o errore
"""
def _blob_path(key: str) -> Path:
# Nome file safe: solo caratteri alfanumerici + hash per unicità
safe = "".join(c if c.isalnum() or c in ("-", "_") else "_" for c in key)
return BLOB_DIR / f"{safe}.json.gz"
def _disk_get(key: str) -> Optional[Any]:
try:
client = _get_client()
if client is None:
with _sqlite_lock:
conn = _ensure_sqlite()
row = conn.execute(
"SELECT expires_at, is_blob, value, blob_path FROM cache WHERE key = ?",
(key,)
).fetchone()
if row is None:
return None
data = client.get(key)
if data is None:
expires_at, is_blob, value, blob_path = row
if expires_at < int(time.time()):
# Scaduta: la elimino in lazy
_disk_delete(key)
return None
return json.loads(data)
if is_blob:
data = Path(blob_path).read_bytes()
else:
data = value
return json.loads(gzip.decompress(data).decode("utf-8"))
except Exception as e:
logger.warning(f"[Redis] Errore lettura chiave '{key}': {e}")
logger.warning(f"[Cache] Errore lettura disco '{key}': {e}")
return None
def cache_set(key: str, value: Any, ttl: int = 3600) -> bool:
"""Scrive un valore nella cache Redis con TTL.
Args:
key: Chiave Redis
value: Valore da serializzare in JSON
ttl: Tempo di vita in secondi (default: 1 ora)
Returns:
True se scritto con successo, False altrimenti
"""
def _disk_set(key: str, raw_gz: bytes, ttl: int) -> None:
try:
client = _get_client()
if client is None:
return False
serialized = json.dumps(value)
client.setex(key, ttl, serialized)
return True
expires_at = int(time.time()) + ttl
updated_at = int(time.time())
size = len(raw_gz)
if size > BLOB_THRESHOLD_BYTES:
path = _blob_path(key)
path.write_bytes(raw_gz)
with _sqlite_lock:
conn = _ensure_sqlite()
conn.execute(
"INSERT OR REPLACE INTO cache(key, expires_at, is_blob, value, blob_path, size_bytes, updated_at) "
"VALUES(?,?,?,?,?,?,?)",
(key, expires_at, 1, None, str(path), size, updated_at)
)
else:
with _sqlite_lock:
conn = _ensure_sqlite()
conn.execute(
"INSERT OR REPLACE INTO cache(key, expires_at, is_blob, value, blob_path, size_bytes, updated_at) "
"VALUES(?,?,?,?,?,?,?)",
(key, expires_at, 0, raw_gz, None, size, updated_at)
)
except Exception as e:
logger.warning(f"[Redis] Errore scrittura chiave '{key}': {e}")
logger.warning(f"[Cache] Errore scrittura disco '{key}': {e}")
def _disk_delete(key: str) -> None:
try:
with _sqlite_lock:
conn = _ensure_sqlite()
row = conn.execute("SELECT blob_path FROM cache WHERE key = ?", (key,)).fetchone()
conn.execute("DELETE FROM cache WHERE key = ?", (key,))
if row and row[0]:
try:
Path(row[0]).unlink(missing_ok=True)
except Exception:
pass
except Exception as e:
logger.warning(f"[Cache] Errore delete disco '{key}': {e}")
# ── API pubblica ─────────────────────────────────────────────────────────
def cache_get(key: str) -> Optional[Any]:
"""Legge L1 → L2. Se L2 hit, ripopola L1 (re-warm)."""
# L1
client = _get_redis()
if client is not None:
try:
raw = client.get(key)
if raw is not None:
return json.loads(gzip.decompress(raw).decode("utf-8"))
except Exception as e:
logger.warning(f"[Cache] Errore Redis '{key}': {e}")
# L2
value = _disk_get(key)
if value is not None and client is not None:
# Re-warm L1 con TTL standard
try:
raw_gz = gzip.compress(json.dumps(value).encode("utf-8"))
client.setex(key, DEFAULT_REDIS_TTL, raw_gz)
except Exception:
pass
return value
def cache_set(key: str, value: Any, ttl: int = DEFAULT_REDIS_TTL, disk_ttl: Optional[int] = None) -> bool:
"""Scrive in L1 (ttl) e L2 (disk_ttl, default 30 giorni).
Per chiavi ephemere (es. job state) passa disk_ttl=0 per saltare il disco."""
if disk_ttl is None:
disk_ttl = DEFAULT_DISK_TTL
try:
serialized = json.dumps(value).encode("utf-8")
raw_gz = gzip.compress(serialized)
except Exception as e:
logger.warning(f"[Cache] Errore serializzazione '{key}': {e}")
return False
ok = False
# L1
client = _get_redis()
if client is not None:
try:
client.setex(key, ttl, raw_gz)
ok = True
except Exception as e:
logger.warning(f"[Cache] Errore scrittura Redis '{key}': {e}")
# L2
if disk_ttl > 0:
_disk_set(key, raw_gz, disk_ttl)
ok = True
return ok
def cache_delete(key: str) -> bool:
"""Elimina una chiave dalla cache Redis.
client = _get_redis()
if client is not None:
try:
client.delete(key)
except Exception:
pass
_disk_delete(key)
return True
Args:
key: Chiave Redis da eliminare
Returns:
True se eliminata, False altrimenti
"""
def cache_stats() -> dict:
"""Ritorna statistiche della cache: utile per /health e debug."""
stats = {"redis": False, "disk": {"entries": 0, "bytes": 0, "blobs": 0}}
if _get_redis() is not None:
stats["redis"] = True
try:
client = _get_client()
if client is None:
return False
with _sqlite_lock:
conn = _ensure_sqlite()
row = conn.execute(
"SELECT COUNT(*), COALESCE(SUM(size_bytes),0), COALESCE(SUM(is_blob),0) FROM cache"
).fetchone()
stats["disk"]["entries"] = row[0]
stats["disk"]["bytes"] = row[1]
stats["disk"]["blobs"] = row[2]
except Exception:
pass
return stats
client.delete(key)
return True
def cache_sweep() -> int:
"""Rimuove voci scadute su disco (da chiamare periodicamente). Ritorna numero eliminate."""
try:
now = int(time.time())
with _sqlite_lock:
conn = _ensure_sqlite()
rows = conn.execute(
"SELECT key, blob_path FROM cache WHERE expires_at < ?", (now,)
).fetchall()
conn.execute("DELETE FROM cache WHERE expires_at < ?", (now,))
for _, path in rows:
if path:
try:
Path(path).unlink(missing_ok=True)
except Exception:
pass
return len(rows)
except Exception as e:
logger.warning(f"[Redis] Errore eliminazione chiave '{key}': {e}")
return False
logger.warning(f"[Cache] Errore sweep: {e}")
return 0