fix: strip markdown code fences from LLM JSON responses

Claude wraps JSON in ```json ... ``` fences even when instructed to return raw JSON. This caused all TopicResult validations to fail with "Invalid JSON at line 1 column 1". Strip fences before parsing. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-09 15:10:39 +01:00
parent 5c06b1a342
commit 5870b5eede
1 changed files with 18 additions and 2 deletions
--- a/backend/services/llm_service.py
+++ b/backend/services/llm_service.py
@@ -12,6 +12,7 @@ from __future__ import annotations
 import json
 import logging
 import random
+import re
 import time
 from typing import Type, TypeVar

@@ -111,9 +112,10 @@ class LLMService:
                    elapsed,
                )

-                # Valida con Pydantic
+                # Rimuovi eventuali code fences markdown e valida con Pydantic
+                clean_text = self._strip_code_fences(raw_text)
                try:
-                    result = response_schema.model_validate_json(raw_text)
+                    result = response_schema.model_validate_json(clean_text)
                    # Pausa inter-request dopo chiamata riuscita
                    time.sleep(self._inter_request_delay)
                    return result
@@ -259,6 +261,20 @@ class LLMService:
    # Metodi privati
    # ---------------------------------------------------------------------------

+    @staticmethod
+    def _strip_code_fences(text: str) -> str:
+        """Rimuove i code fences markdown dalla risposta LLM.
+
+        Claude a volte wrappa il JSON in ```json ... ``` anche quando
+        gli si chiede di rispondere solo con JSON.
+        """
+        stripped = text.strip()
+        # Rimuove ```json ... ``` o ``` ... ```
+        match = re.match(r"^```(?:json)?\s*\n?(.*?)\n?\s*```$", stripped, re.DOTALL)
+        if match:
+            return match.group(1).strip()
+        return stripped
+
    @staticmethod
    def _parse_retry_after(error: anthropic.RateLimitError) -> float:
        """Estrae il valore retry-after dall'eccezione RateLimitError.