From 9e7205eca211370455f4b59d966fb126be2999d3 Mon Sep 17 00:00:00 2001 From: Michele Date: Mon, 9 Mar 2026 08:10:06 +0100 Subject: [PATCH] feat(04-01): integrazione Unsplash in pipeline + CSVBuilder + export MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - CSVBuilder.build_csv() e build_csv_content() accettano image_url_map opzionale - _resolve_image() risolve keyword->URL Unsplash con fallback keyword originale - _build_rows() chiama _resolve_image per cover, slides e cta image keywords - JobStatus ha campo image_url_map con persistenza su disco JSON - GenerationPipeline._resolve_unsplash_keywords() chiamato dopo batch LLM - Carica unsplash_api_key da settings.json, crea UnsplashService, chiama resolve_keywords - image_url_map salvato nel job JSON per riuso in export con edits - Export router recupera image_url_map dal job JSON e passa a build_csv_content - generate_single NON risolve Unsplash (velocità e riuso map job originale) --- backend/routers/export.py | 13 ++- backend/services/csv_builder.py | 47 +++++++++-- backend/services/generation_pipeline.py | 106 +++++++++++++++++++++++- 3 files changed, 157 insertions(+), 9 deletions(-) diff --git a/backend/routers/export.py b/backend/routers/export.py index d48574a..39c89bb 100644 --- a/backend/routers/export.py +++ b/backend/routers/export.py @@ -106,23 +106,34 @@ async def download_csv_with_edits( # Carica il calendario dal JSON del job import json + from typing import Optional from backend.schemas.calendar import CalendarResponse try: with open(job_path, "r", encoding="utf-8") as f: job_data = json.load(f) calendar = CalendarResponse.model_validate(job_data["calendar"]) + # Recupera image_url_map se presente (risoluzione Unsplash originale) + image_url_map: Optional[dict[str, str]] = job_data.get("image_url_map") except Exception as e: raise HTTPException( status_code=500, detail=f"Errore nel caricamento del job: {str(e)}", ) - # Genera il CSV con i dati modificati + if image_url_map: + logger.info( + "Uso image_url_map dal job originale | job_id=%s | url_count=%d", + job_id, + len(image_url_map), + ) + + # Genera il CSV con i dati modificati (+ URL Unsplash se disponibili) csv_content = _csv_builder.build_csv_content( posts=request.results, calendar=calendar, job_id=job_id, + image_url_map=image_url_map, ) # Salva anche su disco come versione edited diff --git a/backend/services/csv_builder.py b/backend/services/csv_builder.py index ed705be..4c6a12c 100644 --- a/backend/services/csv_builder.py +++ b/backend/services/csv_builder.py @@ -6,6 +6,7 @@ Caratteristiche: - Mappa GeneratedPost + CalendarSlot -> riga CSV - Filtra solo PostResult con status="success" - Scrive su disco in OUTPUTS_PATH/{job_id}.csv +- Supporta image_url_map opzionale: risolve keyword -> URL Unsplash nelle colonne _image_keyword """ from __future__ import annotations @@ -14,7 +15,7 @@ import csv import io import logging from pathlib import Path -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Optional from backend.constants import CANVA_FIELDS from backend.schemas.generate import PostResult @@ -35,6 +36,7 @@ class CSVBuilder: calendar: "CalendarResponse", job_id: str, output_dir: Path, + image_url_map: Optional[dict[str, str]] = None, ) -> Path: """Genera e scrive il CSV su disco. @@ -42,11 +44,16 @@ class CSVBuilder: GeneratedPost + CalendarSlot alle 33 colonne CANVA_FIELDS, e scrive con encoding utf-8-sig per compatibilità Excel. + Se image_url_map è fornita, le colonne _image_keyword contengono + URL Unsplash reali invece delle keyword testuali originali. + Args: posts: Lista di PostResult (include success e failed). calendar: CalendarResponse con i metadati degli slot. job_id: Identificatore univoco del job (usato come nome file). output_dir: Directory dove scrivere il file CSV. + image_url_map: Mappa opzionale {keyword: url_unsplash}. Se None, + usa le keyword testuali originali. Returns: Path del file CSV scritto su disco. @@ -54,7 +61,7 @@ class CSVBuilder: output_dir.mkdir(parents=True, exist_ok=True) output_path = output_dir / f"{job_id}.csv" - rows = self._build_rows(posts, calendar) + rows = self._build_rows(posts, calendar, image_url_map) # CRITICO: encoding utf-8-sig (BOM) per Excel + caratteri italiani with open(output_path, "w", newline="", encoding="utf-8-sig") as f: @@ -62,10 +69,12 @@ class CSVBuilder: writer.writeheader() writer.writerows(rows) + url_count = len(image_url_map) if image_url_map else 0 logger.info( - "CSV scritto | job_id=%s | righe_success=%d | path=%s", + "CSV scritto | job_id=%s | righe_success=%d | url_unsplash=%d | path=%s", job_id, len(rows), + url_count, output_path, ) return output_path @@ -75,21 +84,26 @@ class CSVBuilder: posts: list[PostResult], calendar: "CalendarResponse", job_id: str, + image_url_map: Optional[dict[str, str]] = None, ) -> str: """Genera il CSV come stringa (senza scrivere su disco). Usato per preview e per la route POST /export/{job_id}/csv con dati modificati inline dall'utente. + Se image_url_map è fornita, le colonne _image_keyword contengono + URL Unsplash reali invece delle keyword testuali originali. + Args: posts: Lista di PostResult (include success e failed). calendar: CalendarResponse con i metadati degli slot. job_id: Identificatore univoco del job. + image_url_map: Mappa opzionale {keyword: url_unsplash}. Returns: Stringa CSV con encoding utf-8-sig (BOM). """ - rows = self._build_rows(posts, calendar) + rows = self._build_rows(posts, calendar, image_url_map) output = io.StringIO() # Aggiungi BOM manualmente per compatibilità Excel @@ -103,19 +117,38 @@ class CSVBuilder: # Metodi privati # --------------------------------------------------------------------------- + def _resolve_image(self, keyword: str, image_url_map: Optional[dict[str, str]]) -> str: + """Risolve una keyword immagine in URL Unsplash se disponibile. + + Args: + keyword: Keyword immagine originale. + image_url_map: Mappa {keyword: url} o None. + + Returns: + URL Unsplash se disponibile nella mappa, altrimenti la keyword originale. + """ + if image_url_map and keyword in image_url_map: + return image_url_map[keyword] + return keyword + def _build_rows( self, posts: list[PostResult], calendar: "CalendarResponse", + image_url_map: Optional[dict[str, str]] = None, ) -> list[dict[str, str]]: """Costruisce la lista di righe CSV dai risultati. Filtra solo i post con status="success" e mappa i dati GeneratedPost + CalendarSlot alle colonne CANVA_FIELDS. + Se image_url_map è fornita, le colonne _image_keyword vengono + risolte in URL Unsplash quando disponibili. + Args: posts: Lista completa di PostResult. calendar: CalendarResponse con i metadati degli slot. + image_url_map: Mappa opzionale {keyword: url_unsplash}. Returns: Lista di dict con chiavi = CANVA_FIELDS. @@ -152,7 +185,7 @@ class CSVBuilder: # --- Cover slide (3 colonne) --- row["cover_title"] = post.cover_title row["cover_subtitle"] = post.cover_subtitle - row["cover_image_keyword"] = post.cover_image_keyword + row["cover_image_keyword"] = self._resolve_image(post.cover_image_keyword, image_url_map) # --- Slide centrali s2-s7 (6 slide x 3 colonne = 18 colonne) --- slide_labels = ["s2", "s3", "s4", "s5", "s6", "s7"] @@ -161,7 +194,7 @@ class CSVBuilder: slide = post.slides[idx] row[f"{label}_headline"] = slide.headline row[f"{label}_body"] = slide.body - row[f"{label}_image_keyword"] = slide.image_keyword + row[f"{label}_image_keyword"] = self._resolve_image(slide.image_keyword, image_url_map) else: # Fallback se slides ha meno di 6 elementi (non dovrebbe accadere) row[f"{label}_headline"] = "" @@ -171,7 +204,7 @@ class CSVBuilder: # --- CTA slide (3 colonne) --- row["cta_text"] = post.cta_text row["cta_subtext"] = post.cta_subtext - row["cta_image_keyword"] = post.cta_image_keyword + row["cta_image_keyword"] = self._resolve_image(post.cta_image_keyword, image_url_map) # --- Caption Instagram (1 colonna) --- row["caption_instagram"] = post.caption_instagram diff --git a/backend/services/generation_pipeline.py b/backend/services/generation_pipeline.py index f2840c7..5c8a33f 100644 --- a/backend/services/generation_pipeline.py +++ b/backend/services/generation_pipeline.py @@ -18,6 +18,7 @@ from dataclasses import dataclass, field from pathlib import Path from typing import Literal, Optional +from backend.config import DATA_PATH from backend.schemas.calendar import CalendarRequest, CalendarResponse, CalendarSlot from backend.schemas.generate import ( GenerateResponse, @@ -30,6 +31,7 @@ from backend.services.csv_builder import CSVBuilder from backend.services.format_selector import FormatSelector from backend.services.llm_service import LLMService from backend.services.prompt_service import PromptService +from backend.services.unsplash_service import UnsplashService logger = logging.getLogger(__name__) @@ -69,6 +71,8 @@ class JobStatus: calendar: Optional[CalendarResponse] = None error: Optional[str] = None campagna: str = "" + image_url_map: Optional[dict[str, str]] = None + """Mappa keyword->URL Unsplash risolta dopo la generazione batch. None se Unsplash non configurato.""" # --------------------------------------------------------------------------- @@ -372,6 +376,18 @@ class GenerationPipeline: job.results.append(post_result) job.completed += 1 + # Risolvi keyword immagine via Unsplash (se API key configurata) + image_url_map: Optional[dict[str, str]] = None + try: + image_url_map = await self._resolve_unsplash_keywords(job) + job.image_url_map = image_url_map + except Exception as e: + logger.warning( + "Unsplash resolution fallita | job_id=%s | errore=%s — continuo con keyword testuali", + job_id, + str(e), + ) + # Genera CSV con i risultati success_results = [r for r in job.results if r.status == "success"] if success_results: @@ -381,12 +397,14 @@ class GenerationPipeline: calendar=calendar, job_id=job_id, output_dir=self._outputs_path, + image_url_map=image_url_map, ) logger.info( - "CSV generato | job_id=%s | success=%d/%d", + "CSV generato | job_id=%s | success=%d/%d | url_unsplash=%d", job_id, len(success_results), job.total, + len(image_url_map) if image_url_map else 0, ) # Salva metadata job su disco per persistenza @@ -499,6 +517,90 @@ class GenerationPipeline: # Altrimenti usa la mappa formato -> prompt return _FORMAT_TO_PROMPT.get(formato, _DEFAULT_PROMPT) + # --------------------------------------------------------------------------- + # Integrazione Unsplash + # --------------------------------------------------------------------------- + + async def _resolve_unsplash_keywords( + self, + job: JobStatus, + ) -> Optional[dict[str, str]]: + """Risolve le keyword immagine dei post in URL Unsplash. + + Carica la settings da disco per verificare se unsplash_api_key e' configurata. + Se non e' configurata, ritorna None (nessuna risoluzione, usa keyword testuali). + + Estrae tutte le keyword uniche dai PostResult success: + - cover_image_keyword + - slide.image_keyword per ogni slide + - cta_image_keyword + + Args: + job: JobStatus con i risultati generati. + + Returns: + Dizionario {keyword: url} per le keyword risolte, o None se Unsplash non configurato. + """ + import json as _json + + # Carica settings per controllare unsplash_api_key + settings_path = DATA_PATH / "config" / "settings.json" + unsplash_api_key: Optional[str] = None + + if settings_path.exists(): + try: + data = _json.loads(settings_path.read_text(encoding="utf-8")) + unsplash_api_key = data.get("unsplash_api_key") + except Exception as e: + logger.warning("Errore lettura settings per Unsplash: %s", str(e)) + + if not unsplash_api_key: + logger.debug("unsplash_api_key non configurata — skip risoluzione keyword") + return None + + # Estrai tutte le keyword uniche dai post success + keywords: list[str] = [] + for post_result in job.results: + if post_result.status != "success" or post_result.post is None: + continue + post = post_result.post + if post.cover_image_keyword: + keywords.append(post.cover_image_keyword) + for slide in post.slides: + if slide.image_keyword: + keywords.append(slide.image_keyword) + if post.cta_image_keyword: + keywords.append(post.cta_image_keyword) + + if not keywords: + logger.debug("Nessuna keyword immagine trovata nei post") + return None + + logger.info( + "Avvio risoluzione Unsplash | job_id=%s | keyword_totali=%d", + job.job_id, + len(keywords), + ) + + # Crea e usa UnsplashService + unsplash_cache_path = DATA_PATH / "unsplash_cache.json" + unsplash = UnsplashService( + api_key=unsplash_api_key, + cache_path=unsplash_cache_path, + ) + + try: + image_url_map = await unsplash.resolve_keywords(keywords) + logger.info( + "Risoluzione Unsplash completata | job_id=%s | risolte=%d/%d", + job.job_id, + len(image_url_map), + len(set(keywords)), + ) + return image_url_map if image_url_map else None + finally: + await unsplash.close() + # --------------------------------------------------------------------------- # Persistenza su disco # --------------------------------------------------------------------------- @@ -523,6 +625,7 @@ class GenerationPipeline: "error": job.error, "results": [r.model_dump() for r in job.results], "calendar": job.calendar.model_dump() if job.calendar else None, + "image_url_map": job.image_url_map, } with open(job_path, "w", encoding="utf-8") as f: @@ -566,6 +669,7 @@ class GenerationPipeline: calendar=calendar, error=data.get("error"), campagna=data.get("campagna", ""), + image_url_map=data.get("image_url_map"), ) # Metti in memoria per accesso futuro