feat: Add new API endpoints and HTML pages for ML model management

- Implemented HTML pages for datasets, models, training, testing, and results. - Created API endpoints for managing repositories, results, tests, and training sessions. - Added functionality for streaming training progress via Server-Sent Events (SSE). - Introduced a Dockerfile for the ML runner with necessary dependencies. - Developed an SDK for user code execution within the runner container. - Enhanced CSS styles for improved UI layout and navigation. - Established a layout template for consistent HTML structure across pages. - Added JavaScript for dynamic interactions on the models page. - Implemented WebSocket handling for real-time communication with kiosk devices and controllers. - Implemented model registration and management API at /api/models - Added Gitea proxy API for repository interactions at /api/repos - Created results API for listing and comparing training results at /api/results - Developed training management API for enqueueing and retrieving training jobs at /api/trainings - Introduced SSE endpoint for live training progress updates - Added HTML pages for models, datasets, and training management - Created a Dockerfile for the ML runner with necessary dependencies - Developed SDK for user code execution within the runner container - Enhanced CSS styles for improved UI/UX - Implemented WebSocket communication for real-time device and controller interactions in the kiosk system
2026-04-28 09:24:38 +02:00
parent ee478e52ef
commit 0ce879aa44
81 changed files with 7491 additions and 746 deletions
--- a/ml/core/influx_client.py
+++ b/ml/core/influx_client.py
@@ -0,0 +1,75 @@
+"""Client InfluxDB (influxdb-client sync wrapper in thread-pool per async).
+
+Le scritture usano il batching async dell'SDK invece di SYNCHRONOUS.
+Le metriche di training arrivano in burst (logs container, stats loop ogni 5s):
+con SYNCHRONOUS ogni write era una HTTP request bloccante. Con WriteOptions
+batched, l'SDK accumula i Point e fa flush periodico in background, senza
+perdere durabilità (flush forzato a fine training).
+"""
+from __future__ import annotations
+
+import asyncio
+from typing import Iterable, Optional
+
+from influxdb_client import InfluxDBClient, Point, WriteOptions
+
+from core.config import settings
+
+_client: Optional[InfluxDBClient] = None
+_write_api = None
+
+
+def client() -> InfluxDBClient:
+    global _client, _write_api
+    if _client is None:
+        _client = InfluxDBClient(
+            url=settings.influx_url, token=settings.influx_token, org=settings.influx_org
+        )
+        _write_api = _client.write_api(write_options=WriteOptions(
+            batch_size=200,
+            flush_interval=2_000,
+            jitter_interval=200,
+            retry_interval=2_000,
+            max_retries=3,
+        ))
+    return _client
+
+
+def _wa():
+    client()
+    return _write_api
+
+
+async def write_points(points: Iterable[Point]) -> None:
+    wa = _wa()
+    pts = list(points)
+    await asyncio.to_thread(wa.write, settings.influx_bucket, settings.influx_org, pts)
+
+
+async def flush() -> None:
+    """Forza il flush del buffer batched. Da chiamare a fine training per
+    garantire che tutte le metriche raccolte siano persistite."""
+    if _write_api is None:
+        return
+    try:
+        await asyncio.to_thread(_write_api.flush)
+    except Exception:
+        pass
+
+
+async def query_flux(flux: str) -> list[dict]:
+    c = client()
+    def _q():
+        tables = c.query_api().query(flux, org=settings.influx_org)
+        out = []
+        for table in tables:
+            for r in table.records:
+                out.append({
+                    "time": r.get_time().isoformat() if r.get_time() else None,
+                    "measurement": r.get_measurement(),
+                    "field": r.get_field(),
+                    "value": r.get_value(),
+                    "tags": {k: v for k, v in r.values.items() if k.startswith("_") is False and k not in ("result", "table")},
+                })
+        return out
+    return await asyncio.to_thread(_q)