meinantrag/meinantrag.py

#!/usr/bin/env python3
"""
MeinAntrag - A web application to generate prefilled government requests
"""

import falcon
import json
import requests
from urllib.parse import urlencode, parse_qs
import os
import sys
from jinja2 import Environment, FileSystemLoader
import google.generativeai as genai
import re
from io import BytesIO
from datetime import datetime

try:
    from docx import Document
    from docx.shared import Pt, Inches
    from docx.enum.text import WD_ALIGN_PARAGRAPH

    DOCX_AVAILABLE = True
except ImportError:
    DOCX_AVAILABLE = False

# Setup logging
import logging
logging.basicConfig(
    level=os.environ.get('LOG_LEVEL', 'INFO').upper(),
    format='%(asctime)s [%(levelname)s] %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger(__name__)

SITE_BASE_URL = os.environ.get("MEINANTRAG_BASE_URL", "http://localhost:8000")


def resolve_env_value(value):
    """
    Resolve environment variable values that may use file: prefix.
    If the value starts with 'file:', read the content from the specified path.
    """
    if value and value.startswith("file:"):
        file_path = value[5:]
        try:
            with open(file_path, 'r') as f:
                return f.read().strip()
        except (IOError, OSError) as e:
            logger.warning(f"Failed to read credential file {file_path}: {e}")
            return None
    return value


class BaseTemplateResource:
    """Base class for resources that need template rendering"""

    def _get_template_dir(self):
        """Get the template directory path, handling both development and installed environments"""
        # Allow overriding via environment variable (for packaged deployments)
        env_dir = os.environ.get("MEINANTRAG_TEMPLATES_DIR")
        if env_dir and os.path.exists(env_dir):
            return env_dir

        # Get the directory where this script is located
        script_dir = os.path.dirname(os.path.abspath(__file__))

        # Try development templates first
        dev_template_dir = os.path.join(script_dir, "templates")
        if os.path.exists(dev_template_dir):
            return dev_template_dir

        # Try to find templates relative to the executable
        try:
            # If we're running from a Nix store, look for templates in share/meinantrag
            if "/nix/store/" in script_dir:
                # Go up from bin to share/meinantrag/templates
                share_dir = os.path.join(
                    script_dir, "..", "share", "meinantrag", "templates"
                )
                if os.path.exists(share_dir):
                    return share_dir

                # Alternative: look for templates in the same store path
                store_root = script_dir.split("/nix/store/")[1].split("/")[0]
                store_path = f"/nix/store/{store_root}"
                alt_share_dir = os.path.join(
                    store_path, "share", "meinantrag", "templates"
                )
                if os.path.exists(alt_share_dir):
                    return alt_share_dir
        except Exception:
            pass

        # Last resort: try to find any templates directory
        for root, dirs, files in os.walk("/nix/store"):
            if "templates" in dirs and "index.html" in os.listdir(
                os.path.join(root, "templates")
            ):
                return os.path.join(root, "templates")

        # Fallback to current directory
        return dev_template_dir


class MeinAntragApp(BaseTemplateResource):
    def __init__(self):
        # Setup Jinja2 template environment
        template_dir = self._get_template_dir()
        print(f"Using template directory: {template_dir}")
        self.jinja_env = Environment(loader=FileSystemLoader(template_dir))

    def on_get(self, req, resp):
        """Serve the main page"""
        template = self.jinja_env.get_template("index.html")
        resp.content_type = "text/html; charset=utf-8"
        resp.text = template.render(
            meta_title="MeinAntrag – Anträge an die Karlsruher Stadtverwaltung",
            meta_description="Erstelle einfach Vorlagen für Anfragen oder Anträge an die Karlsruher Stadtverwaltung zu deinem persönlichen Thema und schicke diese direkt an eine Stadtratsfraktion!",
            canonical_url=f"{SITE_BASE_URL}/",
        )


class ImpressumResource(BaseTemplateResource):
    def __init__(self):
        template_dir = self._get_template_dir()
        self.jinja_env = Environment(loader=FileSystemLoader(template_dir))

    def on_get(self, req, resp):
        """Serve the Impressum page"""
        template = self.jinja_env.get_template("impressum.html")
        resp.content_type = "text/html; charset=utf-8"
        resp.text = template.render(
            meta_title="Impressum – MeinAntrag",
            meta_description="Impressum für MeinAntrag.",
            canonical_url=f"{SITE_BASE_URL}/impressum",
            noindex=True,
        )


class DatenschutzResource(BaseTemplateResource):
    def __init__(self):
        template_dir = self._get_template_dir()
        self.jinja_env = Environment(loader=FileSystemLoader(template_dir))

    def on_get(self, req, resp):
        """Serve the Datenschutz page"""
        template = self.jinja_env.get_template("datenschutz.html")
        resp.content_type = "text/html; charset=utf-8"
        resp.text = template.render(
            meta_title="Datenschutz – MeinAntrag",
            meta_description="Datenschutzerklärung für MeinAntrag. Keine Cookies, es werden nur Anfragen an die FragDenStaat-API gestellt.",
            canonical_url=f"{SITE_BASE_URL}/datenschutz",
            noindex=True,
        )


class GenerateAntragResource:
    def __init__(self):
        # Initialize Gemini API with file-macro support
        api_key_raw = os.environ.get("GOOGLE_GEMINI_API_KEY")
        api_key = resolve_env_value(api_key_raw)
        if api_key:
            genai.configure(api_key=api_key)
            self.model = genai.GenerativeModel("gemini-2.5-pro")
        else:
            self.model = None

    def _remove_markdown(self, text):
        """Remove markdown formatting from text"""
        if not text:
            return text

        # Remove bold/italic markdown: **text** or *text* or __text__ or _text_
        text = re.sub(r"\*\*(.+?)\*\*", r"\1", text)
        text = re.sub(r"\*(.+?)\*", r"\1", text)
        text = re.sub(r"__(.+?)__", r"\1", text)
        text = re.sub(r"_(.+?)_", r"\1", text)

        # Remove heading markdown: /Heading or # Heading
        text = re.sub(r"^/\s*", "", text, flags=re.MULTILINE)
        text = re.sub(r"^#+\s*", "", text, flags=re.MULTILINE)

        # Remove other markdown elements
        text = re.sub(r"`(.+?)`", r"\1", text)  # Code
        text = re.sub(r"\[(.+?)\]\(.+?\)", r"\1", text)  # Links

        return text.strip()

    def on_post(self, req, resp):
        """Generate text from user input using Gemini API"""
        try:
            if not self.model:
                resp.status = falcon.HTTP_500
                resp.content_type = "application/json"
                resp.text = json.dumps(
                    {"success": False, "error": "Gemini API key not configured"}
                )
                return

            # Get form data - try multiple methods for Falcon compatibility
            anliegen = ""
            party_id = ""

            # Method 1: Try get_param (works for URL-encoded form data)
            anliegen = req.get_param("anliegen", default="") or ""
            party_id = req.get_param("party_id", default="") or ""

            # Method 2: If empty, try to read from stream and parse manually
            if not anliegen:
                try:
                    # Read the raw body - use bounded_stream if available, otherwise stream
                    stream = getattr(req, "bounded_stream", req.stream)
                    raw_body = stream.read().decode("utf-8")
                    # Parse URL-encoded data manually
                    parsed = parse_qs(raw_body)
                    anliegen = parsed.get("anliegen", [""])[0]
                    party_id = parsed.get("party_id", [""])[0]
                except Exception as e:
                    # Log the exception for debugging
                    print(f"Error parsing form data: {e}")
                    pass

            # Remove any whitespace and check if actually empty
            anliegen = anliegen.strip() if anliegen else ""
            party_id = party_id.strip() if party_id else ""

            if not anliegen:
                resp.status = falcon.HTTP_400
                resp.content_type = "application/json"
                resp.text = json.dumps(
                    {"success": False, "error": "Anliegen-Feld ist erforderlich"}
                )
                return

            # Create prompt for Gemini
            prompt = """Erzeuge aus dem folgenden Anliegen-Text je nach Anliegen eine Anfrage oder einen Antrag an die Karlsruher Stadtverwaltung im Namen einer Stadtratsfraktion.

Der Antrag soll im sachlichen, offiziellen Ton einer Fraktion verfasst sein - KEINE persönliche Anrede, KEINE "ich" oder "wir" Formulierungen. Verwende die dritte Person oder Passiv-Formulierungen.

Gib das Ergebnis ALS GÜLTIGES JSON mit den folgenden Keys zurück:
{
  "antragstitel": "PRÄGNANTER Titel (max. 8-10 Wörter)",
  "forderung": "Forderungstext oder Liste von Forderungen",
  "begruendung": "Begründung/Sachverhalt",
  "mail_recipient": "Empfänger-E-Mail (z.B. 'fraktion@example.com')",
  "mail_subject": "Betreff für die E-Mail",
  "mail_body": "Höflicher E-Mail-Text in der ersten Person",
  "filename": "Dateiname (z.B. 'antragsentwurf_...docx')"
}

WICHTIG:
- ANTWORTE NUR MIT GÜLTIGEM JSON, KEINERLEI ERKLÄRUNGEN ODER ZUSÄTZLICHEN TEXT!
- Der Titel soll PRÄGNANT, EINFACH und EINPRÄGSAM sein - maximal 8-10 Wörter.
- Der Dateiname soll knapp sein, z.B. 'antragsentwurf_...docx'.
- Keine Markdown-Formatierung im Text.
- Sachlicher, offizieller Ton einer Fraktion, keine persönlichen Formulierungen im Antrag.
- Der E-Mail-Text soll persönlich sein (ich-Form).

Anliegen: """
            prompt += anliegen

            # Call Gemini API
            response = self.model.generate_content(prompt)
            generated_text = response.text

            # Log the raw response for debugging
            logger.debug(f"Gemini raw response: {generated_text}")

            # Extract JSON from markdown code block if present
            if generated_text.startswith('```json'):
                generated_text = generated_text[7:].strip()  # Remove ```json
                if generated_text.endswith('```'):
                    generated_text = generated_text[:-3].strip()  # Remove trailing ```
            elif generated_text.startswith('```'):
                generated_text = generated_text[3:].strip()  # Remove leading ```
                if generated_text.endswith('```'):
                    generated_text = generated_text[:-3].strip()  # Remove trailing ```

            # Parse the JSON response
            gemini_data = json.loads(generated_text)
            logger.debug(f"Parsed Gemini data: {gemini_data}")
            parsed = {
                "title": gemini_data.get("antragstitel", ""),
                "demand": gemini_data.get("forderung", ""),
                "justification": gemini_data.get("begruendung", ""),
                "mail_recipient": gemini_data.get("mail_recipient", ""),
                "mail_subject": gemini_data.get("mail_subject", ""),
                "mail_body": gemini_data.get("mail_body", ""),
                "filename": gemini_data.get("filename", ""),
            }
            email_text = parsed["mail_body"]

            # Ensure proper format - clean up and ensure structure
            email_text = email_text.strip()

            # Return JSON with the generated text parts
            resp.content_type = "application/json"
            resp.text = json.dumps(
                {
                    "success": True,
                    "title": parsed["title"],
                    "demand": parsed["demand"],
                    "justification": parsed["justification"],
                    "email_body": email_text,
                    "party_name": party_id if party_id else "",
                }
            )

        except Exception as e:
            logger.error(f"Error generating antrag: {str(e)}", exc_info=True)
            resp.status = falcon.HTTP_500
            resp.content_type = "application/json"
            resp.text = json.dumps({"success": False, "error": str(e)})


class GenerateWordResource:
    def __init__(self):
        # Get template path
        script_dir = os.path.dirname(os.path.abspath(__file__))
        self.template_path = os.path.join(script_dir, "assets", "antrag_vorlage.docx")
        # Fallback if not in assets
        if not os.path.exists(self.template_path):
            assets_dir = os.path.join(script_dir, "..", "assets")
            self.template_path = os.path.join(assets_dir, "antrag_vorlage.docx")

    def _generate_word(self, title, demand, justification, party_name=""):
        """Generate a Word document using the template"""
        # Load template
        if os.path.exists(self.template_path):
            doc = Document(self.template_path)
        else:
            # Fallback: create new document if template not found
            doc = Document()

        # Get current date in DD.MM.YYYY format
        current_date = datetime.now().strftime("%d.%m.%Y")

        # Use demand directly without heading
        antragtext = demand

        # Replace placeholders in all paragraphs
        for paragraph in doc.paragraphs:
            full_text = paragraph.text
            if not full_text:
                continue

            # Replace FRAKTION
            if "FRAKTION" in full_text:
                for run in paragraph.runs:
                    if "FRAKTION" in run.text:
                        run.text = run.text.replace("FRAKTION", party_name)

            # Replace XX.XX.XXXX with current date
            if "XX.XX.XXXX" in full_text:
                for run in paragraph.runs:
                    if "XX.XX.XXXX" in run.text:
                        run.text = run.text.replace("XX.XX.XXXX", current_date)

            # Replace ANTRAGSTITEL (bold)
            if "ANTRAGSTITEL" in full_text:
                paragraph.clear()
                run = paragraph.add_run(title)
                run.bold = True

            # Replace ANTRAGSTEXT
            if "ANTRAGSTEXT" in full_text:
                paragraph.clear()
                lines = antragtext.split("\n")
                for i, line in enumerate(lines):
                    if line.strip():
                        paragraph.add_run(line.strip())
                        if i < len(lines) - 1:
                            paragraph.add_run("\n")

            # Replace BEGRÜNDUNGSTEXT
            if "BEGRÜNDUNGSTEXT" in full_text:
                paragraph.clear()
                lines = justification.split("\n")
                for i, line in enumerate(lines):
                    if line.strip():
                        paragraph.add_run(line.strip())
                        if i < len(lines) - 1:
                            paragraph.add_run("\n")

        # Check text boxes (shapes) for placeholders
        # Text boxes are stored in the document's part relationships
        try:
            # Access document part to search for text boxes
            document_part = doc.part
            from docx.oxml.ns import qn

            # Search for FRAKTION in text boxes
            # Text boxes are in w:txbxContent elements within w:p (paragraphs)
            # We need to search the entire XML tree
            def replace_in_element(element, search_text, replace_text):
                """Recursively replace text in XML elements"""
                if element.text and search_text in element.text:
                    element.text = element.text.replace(search_text, replace_text)
                if element.tail and search_text in element.tail:
                    element.tail = element.tail.replace(search_text, replace_text)
                for child in element:
                    replace_in_element(child, search_text, replace_text)

            # Search in main document body
            if party_name:
                replace_in_element(document_part.element, "FRAKTION", party_name)

            # Also search in header and footer parts
            for rel in document_part.rels.values():
                if "header" in rel.target_ref or "footer" in rel.target_ref:
                    try:
                        header_footer_part = rel.target_part
                        if party_name:
                            replace_in_element(
                                header_footer_part.element, "FRAKTION", party_name
                            )
                    except Exception:
                        pass
        except Exception as e:
            # If text box access fails, continue with other replacements
            print(f"Warning: Could not replace in text boxes: {e}")
            pass

        # Also check tables for placeholders
        for table in doc.tables:
            for row in table.rows:
                for cell in row.cells:
                    for paragraph in cell.paragraphs:
                        full_text = paragraph.text
                        if not full_text:
                            continue

                        if party_name and "FRAKTION" in full_text:
                            for run in paragraph.runs:
                                if "FRAKTION" in run.text:
                                    run.text = run.text.replace("FRAKTION", party_name)

                        if "XX.XX.XXXX" in full_text:
                            for run in paragraph.runs:
                                if "XX.XX.XXXX" in run.text:
                                    run.text = run.text.replace(
                                        "XX.XX.XXXX", current_date
                                    )

                        if "ANTRAGSTITEL" in full_text:
                            paragraph.clear()
                            run = paragraph.add_run(title)
                            run.bold = True

                        if "ANTRAGSTEXT" in full_text:
                            paragraph.clear()
                            lines = antragtext.split("\n")
                            for i, line in enumerate(lines):
                                if line.strip():
                                    paragraph.add_run(line.strip())
                                    if i < len(lines) - 1:
                                        paragraph.add_run("\n")

                        if "BEGRÜNDUNGSTEXT" in full_text:
                            paragraph.clear()
                            lines = justification.split("\n")
                            for i, line in enumerate(lines):
                                if line.strip():
                                    paragraph.add_run(line.strip())
                                    if i < len(lines) - 1:
                                        paragraph.add_run("\n")

        # Save to buffer
        buffer = BytesIO()
        doc.save(buffer)
        buffer.seek(0)
        return buffer

    def on_post(self, req, resp):
        """Generate Word document from form data"""
        try:
            if not DOCX_AVAILABLE:
                resp.status = falcon.HTTP_500
                resp.content_type = "application/json"
                resp.text = json.dumps(
                    {"success": False, "error": "python-docx not installed"}
                )
                return

            # Get form data
            title = req.get_param("title", default="") or ""
            demand = req.get_param("demand", default="") or ""
            justification = req.get_param("justification", default="") or ""
            party_name = req.get_param("party_name", default="") or ""

            # If empty, try to read from stream
            if not title:
                try:
                    stream = getattr(req, "bounded_stream", req.stream)
                    raw_body = stream.read().decode("utf-8")
                    parsed = parse_qs(raw_body)
                    title = parsed.get("title", [""])[0]
                    demand = parsed.get("demand", [""])[0]
                    justification = parsed.get("justification", [""])[0]
                    party_name = parsed.get("party_name", [""])[0]
                except Exception:
                    pass

            # Generate Word document
            word_buffer = self._generate_word(title, demand, justification, party_name)

            # Return Word document
            resp.content_type = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
            resp.set_header("Content-Disposition", 'attachment; filename="antrag.docx"')
            resp.data = word_buffer.read()

        except Exception as e:
            import traceback

            traceback.print_exc()
            resp.status = falcon.HTTP_500
            resp.content_type = "application/json"
            resp.text = json.dumps({"success": False, "error": str(e)})


class RobotsResource:
    def on_get(self, req, resp):
        resp.content_type = "text/plain; charset=utf-8"
        resp.text = f"""User-agent: *
Allow: /
Sitemap: {SITE_BASE_URL}/sitemap.xml
"""


class SitemapResource:
    def on_get(self, req, resp):
        resp.content_type = "application/xml; charset=utf-8"
        resp.text = f"""<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
  <url><loc>{SITE_BASE_URL}/</loc></url>
  <url><loc>{SITE_BASE_URL}/impressum</loc></url>
  <url><loc>{SITE_BASE_URL}/datenschutz</loc></url>
</urlset>
"""


# Create Falcon application
app = falcon.App()

# Discover static assets directory
STATIC_DIR = os.environ.get("MEINANTRAG_STATIC_DIR")
if not STATIC_DIR:
    # Prefer local assets folder in development (relative to this file)
    script_dir = os.path.dirname(os.path.abspath(__file__))
    candidate = os.path.join(script_dir, "assets")
    if os.path.isdir(candidate):
        STATIC_DIR = candidate
    else:
        # Try current working directory (useful when running packaged binary from project root)
        cwd_candidate = os.path.join(os.getcwd(), "assets")
        if os.path.isdir(cwd_candidate):
            STATIC_DIR = cwd_candidate
        else:
            # Fallback to packaged location under share
            STATIC_DIR = os.path.join(script_dir, "..", "share", "meinantrag", "assets")

# Add routes
meinantrag = MeinAntragApp()
impressum = ImpressumResource()
datenschutz = DatenschutzResource()
generate_antrag = GenerateAntragResource()
generate_word = GenerateWordResource()
robots = RobotsResource()
sitemap = SitemapResource()

app.add_route("/", meinantrag)
app.add_route("/impressum", impressum)
app.add_route("/datenschutz", datenschutz)
app.add_route("/api/generate-antrag", generate_antrag)
app.add_route("/api/generate-word", generate_word)
app.add_route("/robots.txt", robots)
app.add_route("/sitemap.xml", sitemap)

# Static file route
if STATIC_DIR and os.path.isdir(STATIC_DIR):
    app.add_static_route("/static", STATIC_DIR)

if __name__ == "__main__":
    import wsgiref.simple_server

    print("Starting MeinAntrag web application...")
    print("Open your browser and navigate to: http://localhost:8000")
    print(f"Serving static assets from: {STATIC_DIR}")

    httpd = wsgiref.simple_server.make_server("localhost", 8000, app)
    httpd.serve_forever()