itsulu-blog-publisher/addons/itsulu_blog_publisher/services/llm_router.py
Nicholas Riegel 0fc4febabf Reorganize codebase into Odoo addon structure per ARCHITECTURE.md
Restructure project files to follow the addon layout:
- Move models to addons/itsulu_blog_publisher/models/
- Move services (LLM providers, routers) to addons/itsulu_blog_publisher/services/
- Move wizards to addons/itsulu_blog_publisher/wizards/
- Move views (XML templates) to addons/itsulu_blog_publisher/views/
- Move data (cron, mail templates) to addons/itsulu_blog_publisher/data/
- Move security (ACL) to addons/itsulu_blog_publisher/security/
- Move tests and factories to addons/itsulu_blog_publisher/tests/
- Move BDD features to addons/itsulu_blog_publisher/features/
- Create __init__.py files for all Python packages

This enables proper Odoo module discovery and import structure.

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2026-05-29 12:11:42 -04:00

345 lines
14 KiB
Python

# -*- coding: utf-8 -*-
"""
LLM Router — itsulu_blog_publisher
===================================
Central dispatch layer. Picks the right provider, builds one structured-JSON
prompt, calls the provider, validates the response, and returns an LLMResponse.
Design decisions
----------------
* Single API call per blog post — the LLM returns a JSON object containing
title, body HTML, SEO fields, tags, social copy, and cited sources.
This is the primary mechanism for reducing token usage vs. CoWork.
* No provider logic lives here — each provider is an isolated class.
* All secrets are read from ir.config_parameter at call time, never cached
in Python memory across requests (Odoo workers can be long-lived).
* Anthropic Pro user account tokens (claude.ai/api) are supported — they
use the same Anthropic API endpoint but may have different rate limits;
the router treats them identically to API keys.
"""
import json
import logging
import time
from dataclasses import dataclass, field
from typing import List, Optional
from odoo.exceptions import UserError
_logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Response dataclass — NOT an Odoo model
# ---------------------------------------------------------------------------
@dataclass
class SocialCopy:
twitter_a: str = ''
twitter_b: str = ''
bluesky_a: str = ''
bluesky_b: str = ''
mastodon: str = ''
linkedin: str = ''
@dataclass
class SourceRef:
title: str = ''
url: str = ''
@dataclass
class LLMResponse:
"""Structured result of one generation call."""
title: str = ''
body_html: str = ''
meta_title: str = ''
meta_description: str = ''
meta_keywords: str = ''
tags: List[str] = field(default_factory=list)
social: SocialCopy = field(default_factory=SocialCopy)
sources: List[SourceRef] = field(default_factory=list)
tokens_used: int = 0
raw_text: str = '' # full LLM response for debugging
# ---------------------------------------------------------------------------
# Prompt builder
# ---------------------------------------------------------------------------
# This is the default structured-JSON user prompt injected when no schedule
# override is configured. Stored here as a fallback; the DB-editable version
# in ir.config_parameter takes precedence.
DEFAULT_USER_PROMPT_TEMPLATE = """
You are writing a blog post for {blog_name}, a professional technology and AI services company.
TOPIC / FOCUS: {topic}
DATE: {date}
TONE: {tone}
SOCIAL MEDIA PLATFORMS TO GENERATE (only generate copy for enabled platforms):
{enabled_platforms}
Search the internet for recent, authoritative sources relevant to this topic.
Cite at least 3 real URLs in the blog body and include them in the sources list.
Return ONLY a valid JSON object — no markdown fences, no preamble — with this exact schema:
{{
"title": "<compelling post title, 6-12 words>",
"body_html": "<full HTML blog post body, minimum 800 words, using <h2>, <h3>, <p>, <ul>, <strong>. Cite sources inline as <a href='URL'>anchor text</a>>",
"meta_title": "<SEO title, 50-60 chars, includes primary keyword>",
"meta_description": "<SEO description, 140-155 chars, compelling, includes CTA>",
"meta_keywords": "<5-8 comma-separated keywords>",
"tags": ["<tag1>", "<tag2>", "<tag3>", "<tag4>", "<tag5>"],
"social": {{
"twitter_a": "<Post A for X/Twitter. Max 240 chars excluding URL. Hook stat or question. End with URL placeholder {{URL}}>",
"twitter_b": "<Post B for X/Twitter. Different angle. Max 240 chars excluding URL. End with URL placeholder {{URL}}>",
"bluesky_a": "<Post A for BlueSky. Max 290 chars excluding URL. End with URL placeholder {{URL}}>",
"bluesky_b": "<Post B for BlueSky. Different angle. Max 290 chars. End with URL placeholder {{URL}}>",
"mastodon": "<Fediverse/Mastodon post. Max 480 chars excluding URL. Thoughtful, community-oriented. End with URL placeholder {{URL}}>",
"linkedin": "<LinkedIn post. 200-600 words. Professional. Data-driven. Include insight + CTA. End with URL placeholder {{URL}}>"
}},
"sources": [
{{"title": "<source title>", "url": "<full https URL>"}},
{{"title": "<source title>", "url": "<full https URL>"}},
{{"title": "<source title>", "url": "<full https URL>"}}
]
}}
CRITICAL RULES:
- meta_title MUST be <= 60 characters
- meta_description MUST be <= 155 characters
- twitter_a and twitter_b MUST be <= 240 characters each (excluding the URL placeholder)
- bluesky_a and bluesky_b MUST be <= 290 characters each (excluding the URL placeholder)
- mastodon MUST be <= 480 characters (excluding the URL placeholder)
- linkedin MUST be >= 150 characters
- All sources must be real, verifiable URLs
- Only include social fields for enabled platforms; set others to empty string ""
- body_html must be complete, well-structured HTML — no Lorem Ipsum
- Output ONLY the JSON object, nothing else
"""
ENABLED_PLATFORMS_ALL = """\
- twitter_a (X/Twitter Post A)
- twitter_b (X/Twitter Post B)
- bluesky_a (BlueSky Post A)
- bluesky_b (BlueSky Post B)
- mastodon (Fediverse/Mastodon)
- linkedin (LinkedIn)"""
def build_enabled_platforms_text(platforms: dict) -> str:
"""Convert {platform: bool} dict to human-readable list for prompt."""
labels = {
'twitter_a': 'twitter_a (X/Twitter Post A)',
'twitter_b': 'twitter_b (X/Twitter Post B)',
'bluesky_a': 'bluesky_a (BlueSky Post A)',
'bluesky_b': 'bluesky_b (BlueSky Post B)',
'mastodon': 'mastodon (Fediverse/Mastodon)',
'linkedin': 'linkedin (LinkedIn)',
}
enabled = [labels[k] for k, v in platforms.items() if v and k in labels]
if not enabled:
return ENABLED_PLATFORMS_ALL # fallback: generate all
return '\n'.join(f'- {label}' for label in enabled)
# ---------------------------------------------------------------------------
# Router
# ---------------------------------------------------------------------------
SUPPORTED_PROVIDERS = ('anthropic', 'openai', 'gemini', 'ollama')
class LLMRouter:
"""
Usage::
router = LLMRouter(env, provider='anthropic', model='claude-sonnet-4-20250514')
response = router.generate(
topic='AI Governance in Enterprise',
blog_name='ITSulu Insights',
tone='professional',
enabled_platforms={'twitter_a': True, 'linkedin': True, ...},
system_prompt='...', # optional override
user_prompt='...', # optional override (replaces template entirely)
)
# response is an LLMResponse dataclass
"""
def __init__(self, env, provider: str, model: str):
self.env = env
self.provider = provider.lower().strip()
self.model = model.strip()
if self.provider not in SUPPORTED_PROVIDERS:
raise UserError(
f"LLM provider '{self.provider}' is not configured. "
f"Supported providers: {', '.join(SUPPORTED_PROVIDERS)}."
)
def _get_param(self, key: str) -> str:
return self.env['ir.config_parameter'].sudo().get_param(
f'itsulu_blog_publisher.{key}', default=''
)
def _require_param(self, key: str, human_name: str) -> str:
value = self._get_param(key)
if not value or not value.strip():
raise UserError(
f"Missing configuration: {human_name} is required for provider '{self.provider}'. "
f"Go to Settings → Blog Publisher to configure it."
)
return value.strip()
def _build_prompt(self, topic: str, blog_name: str, tone: str,
enabled_platforms: dict) -> str:
"""Substitute template variables into the user prompt."""
import datetime
template = (
self._get_param('user_prompt_template') or DEFAULT_USER_PROMPT_TEMPLATE
)
platforms_text = build_enabled_platforms_text(enabled_platforms or {})
return template.format(
topic=topic,
blog_name=blog_name,
date=datetime.date.today().strftime('%B %d, %Y'),
tone=tone or 'professional and informative',
enabled_platforms=platforms_text,
)
def _get_system_prompt(self) -> str:
default = (
"You are an expert technology content writer for a professional AI and IT services "
"company. You write clear, data-driven, SEO-optimised blog posts. You always cite "
"real, verifiable sources. You respond ONLY with valid JSON — no markdown, no "
"preamble, no explanation."
)
return self._get_param('system_prompt') or default
def _parse_response(self, raw_text: str, tokens_used: int) -> LLMResponse:
"""Parse and validate the JSON blob from the LLM."""
# Strip potential markdown fences the model may add despite instructions
text = raw_text.strip()
if text.startswith('```'):
lines = text.split('\n')
# Drop first line (```json or ```) and last line (```)
text = '\n'.join(lines[1:-1]) if lines[-1].strip() == '```' else '\n'.join(lines[1:])
text = text.strip()
try:
data = json.loads(text)
except json.JSONDecodeError as exc:
_logger.error("LLM returned non-JSON response: %s", raw_text[:500])
raise UserError(
f"The LLM returned an invalid response (not valid JSON). "
f"Error: {exc}. Raw start: {raw_text[:200]}"
) from exc
# --- Validate required fields ---
required = ['title', 'body_html', 'meta_title', 'meta_description',
'meta_keywords', 'tags']
missing = [f for f in required if not data.get(f)]
if missing:
raise UserError(
f"LLM response is missing required fields: {', '.join(missing)}. "
f"This usually means the model did not follow the JSON schema."
)
# --- Enforce character limits (trim rather than error) ---
meta_title = (data.get('meta_title') or '')[:60]
meta_desc = (data.get('meta_description') or '')[:155]
social_data = data.get('social') or {}
social = SocialCopy(
twitter_a=(social_data.get('twitter_a') or '')[:280],
twitter_b=(social_data.get('twitter_b') or '')[:280],
bluesky_a=(social_data.get('bluesky_a') or '')[:300],
bluesky_b=(social_data.get('bluesky_b') or '')[:300],
mastodon=(social_data.get('mastodon') or '')[:500],
linkedin=social_data.get('linkedin') or '',
)
sources = [
SourceRef(title=s.get('title', ''), url=s.get('url', ''))
for s in (data.get('sources') or [])
if s.get('url', '').startswith('http')
]
tags = [str(t).strip() for t in (data.get('tags') or []) if t]
return LLMResponse(
title=data.get('title', '')[:200],
body_html=data.get('body_html', ''),
meta_title=meta_title,
meta_description=meta_desc,
meta_keywords=(data.get('meta_keywords') or '')[:255],
tags=tags,
social=social,
sources=sources,
tokens_used=tokens_used,
raw_text=raw_text,
)
def generate(self, topic: str, blog_name: str = 'ITSulu Insights',
tone: str = 'professional', enabled_platforms: dict = None,
system_prompt: str = None, user_prompt: str = None) -> LLMResponse:
"""
Execute a single structured LLM call and return an LLMResponse.
:param topic: The topic/focus for the blog post.
:param blog_name: Name of the target blog (used in prompt).
:param tone: Writing tone hint for the LLM.
:param enabled_platforms: dict of {platform_key: bool}.
:param system_prompt: Override the system prompt entirely.
:param user_prompt: Override the user prompt entirely (skips template building).
:return: LLMResponse dataclass.
:raises UserError: on configuration error or LLM failure.
"""
_logger.info(
"LLMRouter.generate: provider=%s model=%s topic=%r",
self.provider, self.model, topic[:80]
)
sys_prompt = system_prompt or self._get_system_prompt()
usr_prompt = user_prompt or self._build_prompt(
topic=topic,
blog_name=blog_name,
tone=tone,
enabled_platforms=enabled_platforms or {},
)
start = time.monotonic()
if self.provider == 'anthropic':
from .anthropic_provider import AnthropicProvider
api_key = self._require_param('anthropic_api_key', 'Anthropic API Key')
provider = AnthropicProvider(api_key=api_key, model=self.model)
elif self.provider == 'openai':
from .openai_provider import OpenAIProvider
api_key = self._require_param('openai_api_key', 'OpenAI API Key')
provider = OpenAIProvider(api_key=api_key, model=self.model)
elif self.provider == 'gemini':
from .gemini_provider import GeminiProvider
api_key = self._require_param('gemini_api_key', 'Google Gemini API Key')
provider = GeminiProvider(api_key=api_key, model=self.model)
elif self.provider == 'ollama':
from .ollama_provider import OllamaProvider
base_url = self._require_param('ollama_base_url', 'Ollama / Open WebUI Base URL')
provider = OllamaProvider(base_url=base_url, model=self.model)
else:
# Guarded by __init__ but keep for safety
raise UserError(f"provider not configured: '{self.provider}'")
raw_text, tokens_used = provider.generate(
system_prompt=sys_prompt,
user_prompt=usr_prompt,
)
elapsed = time.monotonic() - start
_logger.info(
"LLMRouter.generate: completed in %.1fs, tokens=%d",
elapsed, tokens_used
)
return self._parse_response(raw_text=raw_text, tokens_used=tokens_used)