feat(04-01): integrazione Unsplash in pipeline + CSVBuilder + export

- CSVBuilder.build_csv() e build_csv_content() accettano image_url_map opzionale
- _resolve_image() risolve keyword->URL Unsplash con fallback keyword originale
- _build_rows() chiama _resolve_image per cover, slides e cta image keywords
- JobStatus ha campo image_url_map con persistenza su disco JSON
- GenerationPipeline._resolve_unsplash_keywords() chiamato dopo batch LLM
- Carica unsplash_api_key da settings.json, crea UnsplashService, chiama resolve_keywords
- image_url_map salvato nel job JSON per riuso in export con edits
- Export router recupera image_url_map dal job JSON e passa a build_csv_content
- generate_single NON risolve Unsplash (velocità e riuso map job originale)
This commit is contained in:
Michele
2026-03-09 08:10:06 +01:00
parent afba4c5e9e
commit 9e7205eca2
3 changed files with 157 additions and 9 deletions

View File

@@ -18,6 +18,7 @@ from dataclasses import dataclass, field
from pathlib import Path
from typing import Literal, Optional
from backend.config import DATA_PATH
from backend.schemas.calendar import CalendarRequest, CalendarResponse, CalendarSlot
from backend.schemas.generate import (
GenerateResponse,
@@ -30,6 +31,7 @@ from backend.services.csv_builder import CSVBuilder
from backend.services.format_selector import FormatSelector
from backend.services.llm_service import LLMService
from backend.services.prompt_service import PromptService
from backend.services.unsplash_service import UnsplashService
logger = logging.getLogger(__name__)
@@ -69,6 +71,8 @@ class JobStatus:
calendar: Optional[CalendarResponse] = None
error: Optional[str] = None
campagna: str = ""
image_url_map: Optional[dict[str, str]] = None
"""Mappa keyword->URL Unsplash risolta dopo la generazione batch. None se Unsplash non configurato."""
# ---------------------------------------------------------------------------
@@ -372,6 +376,18 @@ class GenerationPipeline:
job.results.append(post_result)
job.completed += 1
# Risolvi keyword immagine via Unsplash (se API key configurata)
image_url_map: Optional[dict[str, str]] = None
try:
image_url_map = await self._resolve_unsplash_keywords(job)
job.image_url_map = image_url_map
except Exception as e:
logger.warning(
"Unsplash resolution fallita | job_id=%s | errore=%s — continuo con keyword testuali",
job_id,
str(e),
)
# Genera CSV con i risultati
success_results = [r for r in job.results if r.status == "success"]
if success_results:
@@ -381,12 +397,14 @@ class GenerationPipeline:
calendar=calendar,
job_id=job_id,
output_dir=self._outputs_path,
image_url_map=image_url_map,
)
logger.info(
"CSV generato | job_id=%s | success=%d/%d",
"CSV generato | job_id=%s | success=%d/%d | url_unsplash=%d",
job_id,
len(success_results),
job.total,
len(image_url_map) if image_url_map else 0,
)
# Salva metadata job su disco per persistenza
@@ -499,6 +517,90 @@ class GenerationPipeline:
# Altrimenti usa la mappa formato -> prompt
return _FORMAT_TO_PROMPT.get(formato, _DEFAULT_PROMPT)
# ---------------------------------------------------------------------------
# Integrazione Unsplash
# ---------------------------------------------------------------------------
async def _resolve_unsplash_keywords(
self,
job: JobStatus,
) -> Optional[dict[str, str]]:
"""Risolve le keyword immagine dei post in URL Unsplash.
Carica la settings da disco per verificare se unsplash_api_key e' configurata.
Se non e' configurata, ritorna None (nessuna risoluzione, usa keyword testuali).
Estrae tutte le keyword uniche dai PostResult success:
- cover_image_keyword
- slide.image_keyword per ogni slide
- cta_image_keyword
Args:
job: JobStatus con i risultati generati.
Returns:
Dizionario {keyword: url} per le keyword risolte, o None se Unsplash non configurato.
"""
import json as _json
# Carica settings per controllare unsplash_api_key
settings_path = DATA_PATH / "config" / "settings.json"
unsplash_api_key: Optional[str] = None
if settings_path.exists():
try:
data = _json.loads(settings_path.read_text(encoding="utf-8"))
unsplash_api_key = data.get("unsplash_api_key")
except Exception as e:
logger.warning("Errore lettura settings per Unsplash: %s", str(e))
if not unsplash_api_key:
logger.debug("unsplash_api_key non configurata — skip risoluzione keyword")
return None
# Estrai tutte le keyword uniche dai post success
keywords: list[str] = []
for post_result in job.results:
if post_result.status != "success" or post_result.post is None:
continue
post = post_result.post
if post.cover_image_keyword:
keywords.append(post.cover_image_keyword)
for slide in post.slides:
if slide.image_keyword:
keywords.append(slide.image_keyword)
if post.cta_image_keyword:
keywords.append(post.cta_image_keyword)
if not keywords:
logger.debug("Nessuna keyword immagine trovata nei post")
return None
logger.info(
"Avvio risoluzione Unsplash | job_id=%s | keyword_totali=%d",
job.job_id,
len(keywords),
)
# Crea e usa UnsplashService
unsplash_cache_path = DATA_PATH / "unsplash_cache.json"
unsplash = UnsplashService(
api_key=unsplash_api_key,
cache_path=unsplash_cache_path,
)
try:
image_url_map = await unsplash.resolve_keywords(keywords)
logger.info(
"Risoluzione Unsplash completata | job_id=%s | risolte=%d/%d",
job.job_id,
len(image_url_map),
len(set(keywords)),
)
return image_url_map if image_url_map else None
finally:
await unsplash.close()
# ---------------------------------------------------------------------------
# Persistenza su disco
# ---------------------------------------------------------------------------
@@ -523,6 +625,7 @@ class GenerationPipeline:
"error": job.error,
"results": [r.model_dump() for r in job.results],
"calendar": job.calendar.model_dump() if job.calendar else None,
"image_url_map": job.image_url_map,
}
with open(job_path, "w", encoding="utf-8") as f:
@@ -566,6 +669,7 @@ class GenerationPipeline:
calendar=calendar,
error=data.get("error"),
campagna=data.get("campagna", ""),
image_url_map=data.get("image_url_map"),
)
# Metti in memoria per accesso futuro