Agents & Services

API reference – agents and services¶

This section is generated automatically from Python docstrings using mkdocstrings.

Agents¶

Bases: BaseAgent

Agent for generating personalized feedback to candidates.

Source code in agents/feedback_agent.py

class FeedbackAgent(BaseAgent):
    """Agent for generating personalized feedback to candidates."""

    def __init__(
        self,
        model_name: str = settings.azure_openai_gpt_deployment,
        temperature: float = 0.7,
        api_key: Optional[str] = None,
        timeout: int = 120,
        max_retries: int = 2,
    ):
        """
        Initialize Feedback Agent using Azure OpenAI SDK (no LangChain).
        """
        super().__init__(model_name, temperature, api_key, timeout, max_retries)

        # Static format instructions (we no longer rely on LangChain parsers)
        self.format_instructions = (
            "Return ONLY a single JSON object with the following structure:\n"
            "{\n"
            '  "html_content": "<!DOCTYPE html>\\n<html>...complete email HTML...</html>"\n'
            "}\n\n"
            "- Do NOT return a JSON schema, OpenAPI schema, or description of fields.\n"
            "- Do NOT wrap the JSON in markdown code fences.\n"
            "- Do NOT include any extra top-level keys besides html_content.\n"
            "- The html_content value must be a valid HTML email as a single string."
        )

        # Store prompt template for later use
        self.prompt_template = FEEDBACK_GENERATION_PROMPT

    def generate_feedback(
        self,
        cv_data: CVData,
        hr_feedback: HRFeedback,
        job_offer: Optional[JobOffer] = None,
        output_format: FeedbackFormat = FeedbackFormat.HTML,
        candidate_id: Optional[int] = None,
        recruitment_stage: Optional[str] = None,
    ) -> CandidateFeedback:
        """
        Generate personalized feedback for a candidate using Azure OpenAI.
        """
        # Convert CV data to formatted string
        cv_data_str = self._format_cv_data(cv_data)

        # Convert HR feedback to formatted string (with extraction note for feedback generation)
        hr_feedback_str = self._format_hr_feedback(hr_feedback, include_extraction_note=True)

        # Convert job offer to formatted string
        if job_offer:
            job_offer_str = self._format_job_offer(job_offer)
        else:
            job_offer_str = "No job offer information provided"

        # Get candidate name
        candidate_name = cv_data.full_name

        # Format output format for prompt
        format_str = (
            output_format.value if isinstance(output_format, FeedbackFormat) else str(output_format)
        )

        # Format recruitment stage for prompt
        recruitment_stage_str = recruitment_stage or "Pierwsza selekcja"

        # Build prompt with format instructions and output format
        prompt_text = self.prompt_template.format(
            cv_data=cv_data_str,
            hr_feedback=hr_feedback_str,
            job_offer=job_offer_str,
            candidate_name=candidate_name,
            recruitment_stage=recruitment_stage_str,
            format_instructions=self.format_instructions,
            output_format=format_str,
        )

        # Call LLM via adapter
        raw_text = None
        try:
            logger.info(
                f"Generating feedback for candidate: {candidate_name} (stage: {recruitment_stage_str})"
            )

            raw_text, response = self._chat(
                messages=[
                    {
                        "role": "system",
                        "content": (
                            "You are a careful JSON-producing assistant. "
                            "You must follow the format_instructions exactly."
                        ),
                    },
                    {"role": "user", "content": prompt_text},
                ],
                max_completion_tokens=4000,
            )

            if not raw_text:
                raise ValueError("Empty response from model")

            # Track model response (with token usage and cost)
            self._save_model_response(
                agent_type="feedback_generator",
                input_data={
                    "cv_data": cv_data_str,
                    "hr_feedback": hr_feedback_str,
                    "job_offer": job_offer_str,
                    "candidate_name": candidate_name,
                    "recruitment_stage": recruitment_stage_str,
                },
                output_data=raw_text,
                candidate_id=candidate_id,
                metadata={"temperature": self.temperature},
                response=response,  # Pass response to extract tokens and costs
            )

            # Parse JSON into CandidateFeedback
            feedback = self._parse_feedback_from_text(raw_text)
            logger.info(f"Feedback generated successfully for {candidate_name}")
            return feedback
        except Exception as e:
            # Fallback: try to parse whatever we got in `raw_text` (only if assigned)
            if raw_text is not None:
                try:
                    feedback = self._parse_feedback_from_text(raw_text)
                    logger.warning("Feedback parsed using fallback parser after initial error.")
                    return feedback
                except Exception as final_error:
                    raise Exception(
                        f"Failed to parse feedback: {str(final_error)}. Original error: {str(e)}"
                    ) from e
            raise

    def _parse_feedback_from_text(self, text: str) -> CandidateFeedback:
        """
        Parse CandidateFeedback from raw model text, handling common JSON issues.
        """
        if not text:
            raise ValueError("Empty response from model")

        # Try to parse JSON
        try:
            data = parse_json_safe(text, fallback_to_extraction=True)
        except ValueError:
            # No JSON at all – treat full text as HTML
            logger.warning("No JSON detected in model output, using raw text as HTML.")
            return CandidateFeedback(html_content=self._wrap_html_if_needed(text))

        # Validate html_content
        html = data.get("html_content")
        if not html or not isinstance(html, str):
            raise ValueError("html_content field is required and must be a non-empty string")

        return CandidateFeedback(html_content=html)

    @staticmethod
    def _wrap_html_if_needed(content: str) -> str:
        """If content is not full HTML, wrap it in a minimal HTML template."""
        lower = content.lower()
        if "<html" in lower or "<body" in lower:
            return content

        return (
            "<!DOCTYPE html>\n"
            '<html lang="pl">\n'
            "<head>\n"
            '    <meta charset="UTF-8">\n'
            '    <meta name="viewport" content="width=device-width, initial-scale=1.0">\n'
            "    <title>Odpowiedź na aplikację</title>\n"
            "</head>\n"
            '<body style="font-family: Arial, sans-serif; line-height: 1.6; color: #333; '
            'max-width: 600px; margin: 0 auto; padding: 20px;">\n'
            f"    {content}\n"
            "</body>\n"
            "</html>"
        )

`init(model_name=settings.azure_openai_gpt_deployment, temperature=0.7, api_key=None, timeout=120, max_retries=2)` ¶

Initialize Feedback Agent using Azure OpenAI SDK (no LangChain).

Source code in agents/feedback_agent.py

def __init__(
    self,
    model_name: str = settings.azure_openai_gpt_deployment,
    temperature: float = 0.7,
    api_key: Optional[str] = None,
    timeout: int = 120,
    max_retries: int = 2,
):
    """
    Initialize Feedback Agent using Azure OpenAI SDK (no LangChain).
    """
    super().__init__(model_name, temperature, api_key, timeout, max_retries)

    # Static format instructions (we no longer rely on LangChain parsers)
    self.format_instructions = (
        "Return ONLY a single JSON object with the following structure:\n"
        "{\n"
        '  "html_content": "<!DOCTYPE html>\\n<html>...complete email HTML...</html>"\n'
        "}\n\n"
        "- Do NOT return a JSON schema, OpenAPI schema, or description of fields.\n"
        "- Do NOT wrap the JSON in markdown code fences.\n"
        "- Do NOT include any extra top-level keys besides html_content.\n"
        "- The html_content value must be a valid HTML email as a single string."
    )

    # Store prompt template for later use
    self.prompt_template = FEEDBACK_GENERATION_PROMPT

`generate_feedback(cv_data, hr_feedback, job_offer=None, output_format=FeedbackFormat.HTML, candidate_id=None, recruitment_stage=None)` ¶

Generate personalized feedback for a candidate using Azure OpenAI.

Source code in agents/feedback_agent.py

def generate_feedback(
    self,
    cv_data: CVData,
    hr_feedback: HRFeedback,
    job_offer: Optional[JobOffer] = None,
    output_format: FeedbackFormat = FeedbackFormat.HTML,
    candidate_id: Optional[int] = None,
    recruitment_stage: Optional[str] = None,
) -> CandidateFeedback:
    """
    Generate personalized feedback for a candidate using Azure OpenAI.
    """
    # Convert CV data to formatted string
    cv_data_str = self._format_cv_data(cv_data)

    # Convert HR feedback to formatted string (with extraction note for feedback generation)
    hr_feedback_str = self._format_hr_feedback(hr_feedback, include_extraction_note=True)

    # Convert job offer to formatted string
    if job_offer:
        job_offer_str = self._format_job_offer(job_offer)
    else:
        job_offer_str = "No job offer information provided"

    # Get candidate name
    candidate_name = cv_data.full_name

    # Format output format for prompt
    format_str = (
        output_format.value if isinstance(output_format, FeedbackFormat) else str(output_format)
    )

    # Format recruitment stage for prompt
    recruitment_stage_str = recruitment_stage or "Pierwsza selekcja"

    # Build prompt with format instructions and output format
    prompt_text = self.prompt_template.format(
        cv_data=cv_data_str,
        hr_feedback=hr_feedback_str,
        job_offer=job_offer_str,
        candidate_name=candidate_name,
        recruitment_stage=recruitment_stage_str,
        format_instructions=self.format_instructions,
        output_format=format_str,
    )

    # Call LLM via adapter
    raw_text = None
    try:
        logger.info(
            f"Generating feedback for candidate: {candidate_name} (stage: {recruitment_stage_str})"
        )

        raw_text, response = self._chat(
            messages=[
                {
                    "role": "system",
                    "content": (
                        "You are a careful JSON-producing assistant. "
                        "You must follow the format_instructions exactly."
                    ),
                },
                {"role": "user", "content": prompt_text},
            ],
            max_completion_tokens=4000,
        )

        if not raw_text:
            raise ValueError("Empty response from model")

        # Track model response (with token usage and cost)
        self._save_model_response(
            agent_type="feedback_generator",
            input_data={
                "cv_data": cv_data_str,
                "hr_feedback": hr_feedback_str,
                "job_offer": job_offer_str,
                "candidate_name": candidate_name,
                "recruitment_stage": recruitment_stage_str,
            },
            output_data=raw_text,
            candidate_id=candidate_id,
            metadata={"temperature": self.temperature},
            response=response,  # Pass response to extract tokens and costs
        )

        # Parse JSON into CandidateFeedback
        feedback = self._parse_feedback_from_text(raw_text)
        logger.info(f"Feedback generated successfully for {candidate_name}")
        return feedback
    except Exception as e:
        # Fallback: try to parse whatever we got in `raw_text` (only if assigned)
        if raw_text is not None:
            try:
                feedback = self._parse_feedback_from_text(raw_text)
                logger.warning("Feedback parsed using fallback parser after initial error.")
                return feedback
            except Exception as final_error:
                raise Exception(
                    f"Failed to parse feedback: {str(final_error)}. Original error: {str(e)}"
                ) from e
        raise

Bases: BaseAgent

Agent for validating candidate feedback emails.

Source code in agents/validation_agent.py

class FeedbackValidatorAgent(BaseAgent):
    """Agent for validating candidate feedback emails."""

    def __init__(
        self,
        model_name: str = "gpt-4.1-nano",
        temperature: float = 0.0,
        api_key: Optional[str] = None,
        timeout: int = 120,
        max_retries: int = 2,
    ):
        """
        Initialize Feedback Validator Agent using Azure OpenAI SDK (no LangChain).
        """
        super().__init__(model_name, temperature, api_key, timeout, max_retries)

        # Static format instructions describing ValidationResult JSON schema
        self.format_instructions = (
            "Return ONLY a single JSON object with the following structure:\n"
            "{\n"
            '  "status": "approved" | "rejected",\n'
            '  "is_approved": true | false,\n'
            '  "reasoning": "short explanation",\n'
            '  "issues_found": ["issue 1", "issue 2"],\n'
            '  "ethical_concerns": ["concern 1", "concern 2"],\n'
            '  "factual_errors": ["error 1", "error 2"],\n'
            '  "suggestions": ["suggestion 1", "suggestion 2"]\n'
            "}\n\n"
            "- Do NOT return a JSON schema or description.\n"
            "- Do NOT wrap the JSON in markdown code fences.\n"
            "- All list fields must be valid JSON arrays of strings."
        )

        # Store prompt template
        self.prompt_template = VALIDATION_PROMPT

    def validate_feedback(
        self,
        html_content: str,
        cv_data: CVData,
        hr_feedback: HRFeedback,
        job_offer: Optional[JobOffer] = None,
        candidate_id: Optional[int] = None,
        validation_number: Optional[int] = None,
    ) -> ValidationResult:
        """
        Validate feedback email using Azure OpenAI (no LangChain).
        """
        logger.info(f"Validating feedback email for: {cv_data.full_name}")

        # Format data for prompt
        cv_data_str = self._format_cv_data(cv_data)
        hr_feedback_str = self._format_hr_feedback(hr_feedback)

        if job_offer:
            job_offer_str = self._format_job_offer(job_offer)
        else:
            job_offer_str = "No job offer information provided"

        # Build prompt with format instructions
        prompt_text = self.prompt_template.format(
            html_content=html_content,
            cv_data=cv_data_str,
            hr_feedback=hr_feedback_str,
            job_offer=job_offer_str,
            format_instructions=self.format_instructions,
        )

        # Run validation via LLM adapter
        try:
            logger.info(f"Validating feedback email for: {cv_data.full_name}")

            raw_text, response = self._chat(
                messages=[
                    {
                        "role": "system",
                        "content": (
                            "You are a careful JSON-producing validation assistant. "
                            "You must follow the format_instructions exactly."
                        ),
                    },
                    {"role": "user", "content": prompt_text},
                ],
                max_completion_tokens=2000,
            )

            if not raw_text:
                raise ValueError("Empty response from model")

            # Track model response (with token usage and cost)
            metadata = {"temperature": self.temperature}
            if validation_number is not None:
                metadata["validation_number"] = validation_number

            self._save_model_response(
                agent_type="validator",
                input_data={
                    "html_content": html_content,
                    "cv_data": cv_data_str,
                    "hr_feedback": hr_feedback_str,
                    "job_offer": job_offer_str,
                },
                output_data=raw_text,
                candidate_id=candidate_id,
                metadata=metadata,
                response=response,  # Pass response to extract tokens and costs
            )

            validation_result = self._parse_validation_from_text(raw_text)
            logger.info(
                f"Validation completed for {cv_data.full_name}: {validation_result.status.value}"
            )
            return validation_result
        except Exception as e:
            error_msg = f"Failed to validate feedback: {str(e)}"
            logger.error(error_msg, exc_info=True)

            # On validation failure, reject by default for safety
            return ValidationResult(
                status=ValidationStatus.REJECTED,
                is_approved=False,
                reasoning=f"Validation process failed: {str(e)}. Email rejected for safety.",
                issues_found=["Validation process error"],
                ethical_concerns=[],
                factual_errors=[],
                suggestions=["Please review the validation process and try again."],
            )

    def _parse_validation_from_text(self, text: str) -> ValidationResult:
        """
        Parse ValidationResult from raw model text, handling common JSON issues.
        """
        if not text:
            raise ValueError("Empty response from model")

        # Parse JSON with fallback extraction
        data = parse_json_safe(text, fallback_to_extraction=True)

        # Map to ValidationResult, with sensible defaults
        status_str = data.get("status", "rejected")
        status = (
            ValidationStatus(status_str)
            if status_str in ("approved", "rejected")
            else ValidationStatus.REJECTED
        )
        is_approved = bool(data.get("is_approved", False))
        reasoning = data.get("reasoning") or "No reasoning provided."
        issues_found = data.get("issues_found") or []
        ethical_concerns = data.get("ethical_concerns") or []
        factual_errors = data.get("factual_errors") or []
        suggestions = data.get("suggestions") or []

        # Ensure all list fields are lists of strings
        def ensure_str_list(value):
            if isinstance(value, list):
                return [str(v) for v in value]
            if not value:
                return []
            return [str(value)]

        return ValidationResult(
            status=status,
            is_approved=is_approved,
            reasoning=str(reasoning),
            issues_found=ensure_str_list(issues_found),
            ethical_concerns=ensure_str_list(ethical_concerns),
            factual_errors=ensure_str_list(factual_errors),
            suggestions=ensure_str_list(suggestions),
        )

`init(model_name='gpt-4.1-nano', temperature=0.0, api_key=None, timeout=120, max_retries=2)` ¶

Initialize Feedback Validator Agent using Azure OpenAI SDK (no LangChain).

Source code in agents/validation_agent.py

def __init__(
    self,
    model_name: str = "gpt-4.1-nano",
    temperature: float = 0.0,
    api_key: Optional[str] = None,
    timeout: int = 120,
    max_retries: int = 2,
):
    """
    Initialize Feedback Validator Agent using Azure OpenAI SDK (no LangChain).
    """
    super().__init__(model_name, temperature, api_key, timeout, max_retries)

    # Static format instructions describing ValidationResult JSON schema
    self.format_instructions = (
        "Return ONLY a single JSON object with the following structure:\n"
        "{\n"
        '  "status": "approved" | "rejected",\n'
        '  "is_approved": true | false,\n'
        '  "reasoning": "short explanation",\n'
        '  "issues_found": ["issue 1", "issue 2"],\n'
        '  "ethical_concerns": ["concern 1", "concern 2"],\n'
        '  "factual_errors": ["error 1", "error 2"],\n'
        '  "suggestions": ["suggestion 1", "suggestion 2"]\n'
        "}\n\n"
        "- Do NOT return a JSON schema or description.\n"
        "- Do NOT wrap the JSON in markdown code fences.\n"
        "- All list fields must be valid JSON arrays of strings."
    )

    # Store prompt template
    self.prompt_template = VALIDATION_PROMPT

`validate_feedback(html_content, cv_data, hr_feedback, job_offer=None, candidate_id=None, validation_number=None)` ¶

Validate feedback email using Azure OpenAI (no LangChain).

Source code in agents/validation_agent.py

def validate_feedback(
    self,
    html_content: str,
    cv_data: CVData,
    hr_feedback: HRFeedback,
    job_offer: Optional[JobOffer] = None,
    candidate_id: Optional[int] = None,
    validation_number: Optional[int] = None,
) -> ValidationResult:
    """
    Validate feedback email using Azure OpenAI (no LangChain).
    """
    logger.info(f"Validating feedback email for: {cv_data.full_name}")

    # Format data for prompt
    cv_data_str = self._format_cv_data(cv_data)
    hr_feedback_str = self._format_hr_feedback(hr_feedback)

    if job_offer:
        job_offer_str = self._format_job_offer(job_offer)
    else:
        job_offer_str = "No job offer information provided"

    # Build prompt with format instructions
    prompt_text = self.prompt_template.format(
        html_content=html_content,
        cv_data=cv_data_str,
        hr_feedback=hr_feedback_str,
        job_offer=job_offer_str,
        format_instructions=self.format_instructions,
    )

    # Run validation via LLM adapter
    try:
        logger.info(f"Validating feedback email for: {cv_data.full_name}")

        raw_text, response = self._chat(
            messages=[
                {
                    "role": "system",
                    "content": (
                        "You are a careful JSON-producing validation assistant. "
                        "You must follow the format_instructions exactly."
                    ),
                },
                {"role": "user", "content": prompt_text},
            ],
            max_completion_tokens=2000,
        )

        if not raw_text:
            raise ValueError("Empty response from model")

        # Track model response (with token usage and cost)
        metadata = {"temperature": self.temperature}
        if validation_number is not None:
            metadata["validation_number"] = validation_number

        self._save_model_response(
            agent_type="validator",
            input_data={
                "html_content": html_content,
                "cv_data": cv_data_str,
                "hr_feedback": hr_feedback_str,
                "job_offer": job_offer_str,
            },
            output_data=raw_text,
            candidate_id=candidate_id,
            metadata=metadata,
            response=response,  # Pass response to extract tokens and costs
        )

        validation_result = self._parse_validation_from_text(raw_text)
        logger.info(
            f"Validation completed for {cv_data.full_name}: {validation_result.status.value}"
        )
        return validation_result
    except Exception as e:
        error_msg = f"Failed to validate feedback: {str(e)}"
        logger.error(error_msg, exc_info=True)

        # On validation failure, reject by default for safety
        return ValidationResult(
            status=ValidationStatus.REJECTED,
            is_approved=False,
            reasoning=f"Validation process failed: {str(e)}. Email rejected for safety.",
            issues_found=["Validation process error"],
            ethical_concerns=[],
            factual_errors=[],
            suggestions=["Please review the validation process and try again."],
        )

Bases: BaseAgent

Agent for correcting candidate feedback emails based on validation feedback.

Source code in agents/correction_agent.py

class FeedbackCorrectionAgent(BaseAgent):
    """Agent for correcting candidate feedback emails based on validation feedback."""

    def __init__(
        self,
        model_name: str = "gpt-4.1-nano",
        temperature: float = 0.3,
        api_key: Optional[str] = None,
        timeout: int = 120,
        max_retries: int = 2,
    ):
        """
        Initialize Feedback Correction Agent using Azure OpenAI SDK (no LangChain).
        """
        super().__init__(model_name, temperature, api_key, timeout, max_retries)

        # Static format instructions describing CorrectedFeedback schema
        self.format_instructions = (
            "Return ONLY a single JSON object with the following structure:\n"
            "{\n"
            '  "html_content": "<!DOCTYPE html>\\n<html>...corrected email HTML...</html>",\n'
            '  "corrections_made": ["correction 1", "correction 2"],\n'
            '  "explanation": "short explanation of changes"\n'
            "}\n\n"
            "- corrections_made must be a list of strings, each describing one change.\n"
            "- Do NOT return a JSON schema or description.\n"
            "- Do NOT wrap the JSON in markdown code fences (no ```json).\n"
            '- CRITICAL for valid JSON: Inside the "html_content" string use escaped newlines only (\\n). '
            "Do NOT put literal line breaks inside the JSON string—that breaks parsing. "
            'Example: use "...Cześć,\\n\\nDziękujemy..." not "...Cześć,\n\nDziękujemy...".'
        )

        # Store prompt template
        self.prompt_template = CORRECTION_PROMPT

    def correct_feedback(
        self,
        original_html: str,
        validation_result: ValidationResult,
        cv_data: CVData,
        hr_feedback: HRFeedback,
        job_offer: Optional[JobOffer] = None,
        candidate_id: Optional[int] = None,
        correction_number: Optional[int] = None,
    ) -> CorrectedFeedback:
        """
        Correct feedback email based on validation feedback using Azure OpenAI.
        """
        logger.info(f"Correcting feedback email for: {cv_data.full_name}")

        # Format data for prompt
        cv_data_str = self._format_cv_data(cv_data)
        hr_feedback_str = self._format_hr_feedback(hr_feedback)

        if job_offer:
            job_offer_str = self._format_job_offer(job_offer)
        else:
            job_offer_str = "No job offer information provided"

        # Format validation feedback
        issues_str = (
            "\n".join([f"- {issue}" for issue in validation_result.issues_found])
            if validation_result.issues_found
            else "None"
        )
        ethical_str = (
            "\n".join([f"- {concern}" for concern in validation_result.ethical_concerns])
            if validation_result.ethical_concerns
            else "None"
        )
        factual_str = (
            "\n".join([f"- {error}" for error in validation_result.factual_errors])
            if validation_result.factual_errors
            else "None"
        )

        # Build prompt with format instructions
        prompt_text = self.prompt_template.format(
            original_html=original_html,
            validation_reasoning=validation_result.reasoning,
            issues_found=issues_str,
            ethical_concerns=ethical_str,
            factual_errors=factual_str,
            cv_data=cv_data_str,
            hr_feedback=hr_feedback_str,
            job_offer=job_offer_str,
            format_instructions=self.format_instructions,
        )

        # Run correction
        try:
            logger.info(f"Correcting feedback email for: {cv_data.full_name}")

            raw_text, response = self._chat(
                messages=[
                    {
                        "role": "system",
                        "content": (
                            "You are a careful JSON-producing correction assistant. "
                            "You must follow the format_instructions exactly."
                        ),
                    },
                    {"role": "user", "content": prompt_text},
                ],
                max_completion_tokens=3000,
            )

            if not raw_text:
                raise ValueError("Empty response from model")

            # Track model response (with token usage and cost)
            metadata = {"temperature": self.temperature}
            if correction_number is not None:
                metadata["correction_number"] = correction_number

            self._save_model_response(
                agent_type="corrector",
                input_data={
                    "original_html": original_html,
                    "validation_reasoning": validation_result.reasoning,
                    "issues_found": issues_str,
                    "ethical_concerns": ethical_str,
                    "factual_errors": factual_str,
                    "cv_data": cv_data_str,
                    "hr_feedback": hr_feedback_str,
                    "job_offer": job_offer_str,
                },
                output_data=raw_text,
                candidate_id=candidate_id,
                response=response,  # Pass response to extract tokens and costs
                metadata=metadata,
            )

            corrected_feedback = self._parse_correction_from_text(raw_text)
            logger.info(
                f"Correction completed for {cv_data.full_name}. "
                f"Corrections made: {len(corrected_feedback.corrections_made)}"
            )
            return corrected_feedback
        except Exception as e:
            error_msg = f"Failed to correct feedback: {str(e)}"
            logger.error(error_msg, exc_info=True)
            raise Exception(error_msg) from e

    def _parse_correction_from_text(self, text: str) -> CorrectedFeedback:
        """
        Parse CorrectedFeedback from raw model text, handling common JSON issues.
        """
        if not text:
            raise ValueError("Empty response from model")

        original_text = text

        # Try to parse JSON
        try:
            data = parse_json_safe(text, fallback_to_extraction=True)
        except ValueError:
            # No JSON at all – treat as plain HTML correction without metadata
            logger.warning("No JSON detected in correction output, using raw text as HTML.")
            return CorrectedFeedback(
                html_content=self._wrap_html_if_needed(original_text),
                corrections_made=["Used raw model output as HTML (no structured JSON)"],
                explanation="Model did not return valid JSON; raw output was wrapped as HTML.",
            )

        # Validate and coerce via Pydantic (ensures types + html_content is valid HTML)
        try:
            return CorrectedFeedback.model_validate(data)
        except PydanticValidationError as e:
            # If html_content failed (e.g. plain text without tags), wrap and retry
            html_raw = data.get("html_content") if isinstance(data.get("html_content"), str) else ""
            if html_raw and ("html_content" in str(e).lower() or "markup" in str(e).lower()):
                wrapped = self._wrap_html_if_needed(html_raw)
                logger.info(
                    "html_content did not pass HTML validation (e.g. plain text); wrapping in template."
                )
                raw_corrections = data.get("corrections_made")
                corrections = (
                    [str(c) for c in raw_corrections]
                    if isinstance(raw_corrections, list)
                    else [str(raw_corrections)] if raw_corrections else []
                )
                return CorrectedFeedback(
                    html_content=wrapped,
                    corrections_made=corrections,
                    explanation=str(data.get("explanation") or ""),
                )
            raise ValueError(f"CorrectedFeedback validation failed: {e}") from e

    @staticmethod
    def _wrap_html_if_needed(content: str) -> str:
        """If content is not full HTML, wrap it in a minimal HTML template."""
        lower = content.lower()
        if "<html" in lower or "<body" in lower:
            return content

        return (
            "<!DOCTYPE html>\n"
            '<html lang="pl">\n'
            "<head>\n"
            '    <meta charset="UTF-8">\n'
            '    <meta name="viewport" content="width=device-width, initial-scale=1.0">\n'
            "    <title>Odpowiedź na aplikację</title>\n"
            "</head>\n"
            '<body style="font-family: Arial, sans-serif; line-height: 1.6; color: #333; '
            'max-width: 600px; margin: 0 auto; padding: 20px;">\n'
            f"    {content}\n"
            "</body>\n"
            "</html>"
        )

`init(model_name='gpt-4.1-nano', temperature=0.3, api_key=None, timeout=120, max_retries=2)` ¶

Initialize Feedback Correction Agent using Azure OpenAI SDK (no LangChain).

Source code in agents/correction_agent.py

def __init__(
    self,
    model_name: str = "gpt-4.1-nano",
    temperature: float = 0.3,
    api_key: Optional[str] = None,
    timeout: int = 120,
    max_retries: int = 2,
):
    """
    Initialize Feedback Correction Agent using Azure OpenAI SDK (no LangChain).
    """
    super().__init__(model_name, temperature, api_key, timeout, max_retries)

    # Static format instructions describing CorrectedFeedback schema
    self.format_instructions = (
        "Return ONLY a single JSON object with the following structure:\n"
        "{\n"
        '  "html_content": "<!DOCTYPE html>\\n<html>...corrected email HTML...</html>",\n'
        '  "corrections_made": ["correction 1", "correction 2"],\n'
        '  "explanation": "short explanation of changes"\n'
        "}\n\n"
        "- corrections_made must be a list of strings, each describing one change.\n"
        "- Do NOT return a JSON schema or description.\n"
        "- Do NOT wrap the JSON in markdown code fences (no ```json).\n"
        '- CRITICAL for valid JSON: Inside the "html_content" string use escaped newlines only (\\n). '
        "Do NOT put literal line breaks inside the JSON string—that breaks parsing. "
        'Example: use "...Cześć,\\n\\nDziękujemy..." not "...Cześć,\n\nDziękujemy...".'
    )

    # Store prompt template
    self.prompt_template = CORRECTION_PROMPT

`correct_feedback(original_html, validation_result, cv_data, hr_feedback, job_offer=None, candidate_id=None, correction_number=None)` ¶

Correct feedback email based on validation feedback using Azure OpenAI.

Source code in agents/correction_agent.py

def correct_feedback(
    self,
    original_html: str,
    validation_result: ValidationResult,
    cv_data: CVData,
    hr_feedback: HRFeedback,
    job_offer: Optional[JobOffer] = None,
    candidate_id: Optional[int] = None,
    correction_number: Optional[int] = None,
) -> CorrectedFeedback:
    """
    Correct feedback email based on validation feedback using Azure OpenAI.
    """
    logger.info(f"Correcting feedback email for: {cv_data.full_name}")

    # Format data for prompt
    cv_data_str = self._format_cv_data(cv_data)
    hr_feedback_str = self._format_hr_feedback(hr_feedback)

    if job_offer:
        job_offer_str = self._format_job_offer(job_offer)
    else:
        job_offer_str = "No job offer information provided"

    # Format validation feedback
    issues_str = (
        "\n".join([f"- {issue}" for issue in validation_result.issues_found])
        if validation_result.issues_found
        else "None"
    )
    ethical_str = (
        "\n".join([f"- {concern}" for concern in validation_result.ethical_concerns])
        if validation_result.ethical_concerns
        else "None"
    )
    factual_str = (
        "\n".join([f"- {error}" for error in validation_result.factual_errors])
        if validation_result.factual_errors
        else "None"
    )

    # Build prompt with format instructions
    prompt_text = self.prompt_template.format(
        original_html=original_html,
        validation_reasoning=validation_result.reasoning,
        issues_found=issues_str,
        ethical_concerns=ethical_str,
        factual_errors=factual_str,
        cv_data=cv_data_str,
        hr_feedback=hr_feedback_str,
        job_offer=job_offer_str,
        format_instructions=self.format_instructions,
    )

    # Run correction
    try:
        logger.info(f"Correcting feedback email for: {cv_data.full_name}")

        raw_text, response = self._chat(
            messages=[
                {
                    "role": "system",
                    "content": (
                        "You are a careful JSON-producing correction assistant. "
                        "You must follow the format_instructions exactly."
                    ),
                },
                {"role": "user", "content": prompt_text},
            ],
            max_completion_tokens=3000,
        )

        if not raw_text:
            raise ValueError("Empty response from model")

        # Track model response (with token usage and cost)
        metadata = {"temperature": self.temperature}
        if correction_number is not None:
            metadata["correction_number"] = correction_number

        self._save_model_response(
            agent_type="corrector",
            input_data={
                "original_html": original_html,
                "validation_reasoning": validation_result.reasoning,
                "issues_found": issues_str,
                "ethical_concerns": ethical_str,
                "factual_errors": factual_str,
                "cv_data": cv_data_str,
                "hr_feedback": hr_feedback_str,
                "job_offer": job_offer_str,
            },
            output_data=raw_text,
            candidate_id=candidate_id,
            response=response,  # Pass response to extract tokens and costs
            metadata=metadata,
        )

        corrected_feedback = self._parse_correction_from_text(raw_text)
        logger.info(
            f"Correction completed for {cv_data.full_name}. "
            f"Corrections made: {len(corrected_feedback.corrections_made)}"
        )
        return corrected_feedback
    except Exception as e:
        error_msg = f"Failed to correct feedback: {str(e)}"
        logger.error(error_msg, exc_info=True)
        raise Exception(error_msg) from e

Bases: BaseAgent

Agent for parsing CV information from PDF files.

Source code in agents/cv_parser_agent.py

class CVParserAgent(BaseAgent):
    """Agent for parsing CV information from PDF files."""

    def __init__(
        self,
        model_name: Optional[str] = None,
        temperature: Optional[float] = None,
        api_key: Optional[str] = None,
        vision_model_name: Optional[str] = None,
        use_ocr: bool = True,
        timeout: int = 240,
        max_retries: int = 2,
    ):
        """
        Initialize CV Parser Agent using Azure OpenAI SDK (no LangChain).
        """
        # Store model names and config for logging
        model_name_to_use = model_name or settings.openai_model
        temperature_to_use = temperature if temperature is not None else settings.openai_temperature

        # Ensure temperature is at least 1.0 for Azure (some deployments reject 0.0)
        if temperature_to_use is None or temperature_to_use < 1.0:
            temperature_to_use = 1.0

        super().__init__(model_name_to_use, temperature_to_use, api_key, timeout, max_retries)

        # OCR: we no longer use an LLM-based vision model here; rely on pdf_reader defaults
        self.use_ocr = use_ocr
        self.vision_model = None
        self.vision_model_name = vision_model_name

        # We don't use PydanticOutputParser anymore; parsing is done manually via JSON + _transform_llm_response.

    def _transform_llm_response(self, data: Dict[str, Any]) -> Dict[str, Any]:
        """
        Transform LLM response to match Pydantic model structure.
        Handles common field name mismatches.
        """
        transformed = {}

        # Handle nested personal_information
        if "personal_information" in data:
            personal = data["personal_information"]
            transformed["full_name"] = personal.get("full_name", "")
            transformed["email"] = personal.get("email") or personal.get("email_address")
            transformed["phone"] = personal.get("phone") or personal.get("phone_number")
            transformed["location"] = personal.get("location")
            transformed["linkedin"] = personal.get("linkedin")
            transformed["github"] = personal.get("github")
            transformed["portfolio"] = personal.get("portfolio") or personal.get(
                "portfolio_website"
            )
        else:
            transformed["full_name"] = data.get("full_name", "")
            transformed["email"] = data.get("email")
            transformed["phone"] = data.get("phone")
            transformed["location"] = data.get("location")
            transformed["linkedin"] = data.get("linkedin")
            transformed["github"] = data.get("github")
            transformed["portfolio"] = data.get("portfolio")

        # Handle summary
        transformed["summary"] = data.get("summary") or data.get("professional_summary")

        # Transform education
        education_list = data.get("education", [])
        transformed["education"] = []
        for edu in education_list:
            transformed["education"].append(
                {
                    "institution": edu.get("institution") or edu.get("institution_name", ""),
                    "degree": edu.get("degree") or edu.get("degree_obtained", ""),
                    "field_of_study": edu.get("field_of_study"),
                    "start_date": edu.get("start_date"),
                    "end_date": edu.get("end_date"),
                    "gpa": edu.get("gpa"),
                    "honors": edu.get("honors"),
                }
            )

        # Transform experience
        experience_list = data.get("experience", []) or data.get("work_experience", [])
        transformed["experience"] = []
        for exp in experience_list:
            achievements = exp.get("achievements") or exp.get("key_achievements")
            if isinstance(achievements, str):
                achievements = [achievements] if achievements else []
            elif not isinstance(achievements, list):
                achievements = []

            transformed["experience"].append(
                {
                    "company": exp.get("company") or exp.get("company_name", ""),
                    "position": exp.get("position") or exp.get("job_title", ""),
                    "start_date": exp.get("start_date"),
                    "end_date": exp.get("end_date"),
                    "description": exp.get("description") or exp.get("job_description"),
                    "achievements": achievements,
                }
            )

        # Transform skills - handle both list and dict formats
        skills_data = data.get("skills", [])
        transformed["skills"] = []

        if isinstance(skills_data, dict):
            # Handle dict format: {"technical_skills": [...], "language_skills": [...], "soft_skills": [...]}
            for skill_name in skills_data.get("technical_skills", []):
                transformed["skills"].append(
                    {"name": skill_name, "category": "Technical", "proficiency": None}
                )
            for skill_name in skills_data.get("language_skills", []):
                transformed["skills"].append(
                    {"name": skill_name, "category": "Language", "proficiency": None}
                )
            for skill_name in skills_data.get("soft_skills", []):
                transformed["skills"].append(
                    {"name": skill_name, "category": "Soft", "proficiency": None}
                )
        elif isinstance(skills_data, list):
            # Handle list format
            for skill in skills_data:
                if isinstance(skill, str):
                    transformed["skills"].append(
                        {"name": skill, "category": None, "proficiency": None}
                    )
                elif isinstance(skill, dict):
                    transformed["skills"].append(
                        {
                            "name": skill.get("name", ""),
                            "category": skill.get("category"),
                            "proficiency": skill.get("proficiency"),
                        }
                    )

        # Transform certifications
        certs_list = data.get("certifications", [])
        transformed["certifications"] = []
        for cert in certs_list:
            transformed["certifications"].append(
                {
                    "name": cert.get("name") or cert.get("certification_name", ""),
                    "issuer": cert.get("issuer") or cert.get("issuing_organization"),
                    "date": cert.get("date") or cert.get("date_obtained"),
                    "expiry_date": cert.get("expiry_date"),
                }
            )

        # Transform languages
        languages_list = data.get("languages", [])
        transformed["languages"] = []
        for lang in languages_list:
            transformed["languages"].append(
                {"language": lang.get("language", ""), "proficiency": lang.get("proficiency", "")}
            )

        # Transform additional_info - convert dict to string if needed
        additional_info = data.get("additional_info") or data.get("additional_information")
        if additional_info:
            if isinstance(additional_info, dict):
                # Convert dict to formatted string
                parts = []
                if additional_info.get("hobbies"):
                    hobbies = additional_info["hobbies"]
                    if isinstance(hobbies, list):
                        parts.append(f"Hobbies: {', '.join(hobbies)}")
                    else:
                        parts.append(f"Hobbies: {hobbies}")

                if additional_info.get("projects"):
                    projects = additional_info["projects"]
                    if isinstance(projects, list):
                        parts.append(f"Projects: {'; '.join(projects)}")
                    else:
                        parts.append(f"Projects: {projects}")

                if additional_info.get("awards"):
                    awards = additional_info["awards"]
                    if isinstance(awards, list):
                        parts.append(f"Awards: {', '.join(awards)}")
                    else:
                        parts.append(f"Awards: {awards}")

                if additional_info.get("other_activities"):
                    activities = additional_info["other_activities"]
                    if isinstance(activities, list):
                        parts.append(f"Other Activities: {', '.join(activities)}")
                    else:
                        parts.append(f"Other Activities: {activities}")

                transformed["additional_info"] = "\n".join(parts) if parts else None
            elif isinstance(additional_info, str):
                transformed["additional_info"] = additional_info
            else:
                transformed["additional_info"] = str(additional_info) if additional_info else None
        else:
            transformed["additional_info"] = None

        return transformed

    def parse_cv_from_pdf(
        self, pdf_path: str, verbose: bool = False, candidate_id: Optional[int] = None
    ) -> CVData:
        """
        Parse CV from PDF file and return structured data.
        Uses vision model as OCR if enabled.

        Args:
            pdf_path: Path to the PDF file
            verbose: If True, print progress messages

        Returns:
            CVData object with parsed information
        """
        import time

        start_time = time.time()

        logger.info("=" * 80)
        logger.info("CV PARSING PROCESS STARTED")
        logger.info("=" * 80)
        logger.info(f"PDF file: {pdf_path}")
        logger.info(f"OCR enabled: {self.use_ocr}")
        logger.info(f"Vision model: {self.vision_model_name if self.vision_model_name else 'N/A'}")
        logger.info(f"Text model: {self.model_name}")

        if verbose:
            print("\n" + "=" * 80)
            print("STEP 1: EXTRACTING TEXT FROM PDF")
            print("=" * 80)
            print("  📄 Extracting text from PDF...")

        logger.info("Step 1: Starting text extraction from PDF...")
        extraction_start = time.time()

        # Extract text from PDF using vision model OCR if available
        cv_text = extract_text_from_pdf(
            pdf_path,
            vision_model=self.vision_model if self.use_ocr else None,
            use_ocr=self.use_ocr,
            verbose=verbose,
        )

        extraction_time = time.time() - extraction_start
        logger.info(
            f"Step 1 completed: Extracted {len(cv_text)} characters in {extraction_time:.2f}s"
        )

        if verbose:
            print(f"  ✅ Extracted {len(cv_text)} characters of text ({extraction_time:.2f}s)")
            print("\n" + "=" * 80)
            print("STEP 2: PARSING STRUCTURED DATA WITH LLM")
            print("=" * 80)
            print("  🤖 Parsing structured data...")

        # Limit text length to avoid token limits
        # gpt-5 models may have issues with very long prompts, so use smaller limit
        max_text_length = 10000 if "gpt-5" in self.model_name.lower() else 15000

        if len(cv_text) > max_text_length:
            logger.warning(
                f"Text is very long ({len(cv_text)} characters), truncating to {max_text_length} characters "
                f"(model: {self.model_name})"
            )
            if verbose:
                print(
                    f"  ⚠️ Text is very long ({len(cv_text)} characters), truncating to {max_text_length} characters..."
                )
            cv_text = cv_text[:max_text_length] + "\n\n[... text was truncated due to length ...]"

        # Estimate prompt length (CV text + format instructions + prompt template)
        # Format instructions are typically ~500-1000 chars, prompt template ~500-800 chars
        estimated_prompt_length = len(cv_text) + 1500
        logger.info(
            f"Step 2: Preparing to send CV text ({len(cv_text)} chars) to LLM for parsing. "
            f"Estimated total prompt length: ~{estimated_prompt_length} chars"
        )

        if estimated_prompt_length > 20000 and "gpt-5" in self.model_name.lower():
            logger.warning(
                f"Warning: Prompt may be too long for {self.model_name}. "
                f"Consider using gpt-4o-mini or gpt-4.1-nano for better reliability with long CVs."
            )

        # Run LLM via Azure OpenAI
        try:
            if verbose:
                print("    ⏳ Sending request to LLM (this may take 30-120 seconds)...")
                print("    💡 Tip: LLM is analyzing the CV and extracting structured information")

            logger.info("Step 2: Sending request to LLM for structured parsing...")
            parsing_start = time.time()

            # Build prompt text
            prompt_text = CV_PARSING_PROMPT.format(cv_text=cv_text)

            raw_text, response = self._chat(
                messages=[
                    {
                        "role": "system",
                        "content": (
                            "You are an expert CV parser. "
                            "You must return valid JSON matching the CVData schema."
                        ),
                    },
                    {"role": "user", "content": prompt_text},
                ],
            )

            if not raw_text or not str(raw_text).strip():
                raise Exception(
                    f"Model returned empty response for CV parsing. "
                    f"Check Azure deployment '{self.model_name}' and API availability."
                )
            parsing_time = time.time() - parsing_start
            logger.info(f"Step 2 completed: LLM parsing successful in {parsing_time:.2f}s")

            # Track model response (with token usage and cost)
            self._save_model_response(
                agent_type="cv_parser",
                input_data={"cv_text": cv_text[:1000] + "..." if len(cv_text) > 1000 else cv_text},
                output_data=raw_text,
                candidate_id=candidate_id,
                metadata={"temperature": self.temperature, "parsing_time": parsing_time},
                response=response,  # Pass response to extract tokens and costs
            )

            # Parse raw_text into CVData
            parsed_data = self._parse_cv_from_text_raw(raw_text)

            if verbose:
                print(f"    ✅ Received response from LLM ({parsing_time:.2f}s)")

            total_time = time.time() - start_time
            logger.info(f"CV parsing completed successfully in {total_time:.2f}s total")
            logger.info("=" * 80)

            return parsed_data
        except Exception as e:
            error_type = type(e).__name__
            error_msg = str(e)
            logger.error(f"Failed to parse CV: {error_type}: {error_msg}", exc_info=True)
            raise Exception(f"Failed to parse CV: {error_msg}") from e

    def parse_cv_from_text(self, cv_text: str, candidate_id: Optional[int] = None) -> CVData:
        """
        Parse CV from text content.

        Args:
            cv_text: CV text content

        Returns:
            CVData object with parsed information
        """
        # Run LLM via adapter
        try:
            # Build prompt text
            prompt_text = CV_PARSING_PROMPT.format(cv_text=cv_text)

            raw_text, response = self._chat(
                messages=[
                    {
                        "role": "system",
                        "content": (
                            "You are an expert CV parser. "
                            "You must return valid JSON matching the CVData schema."
                        ),
                    },
                    {"role": "user", "content": prompt_text},
                ],
            )
            if not raw_text:
                raise ValueError("Empty response from CV parser model")

            self._save_model_response(
                agent_type="cv_parser",
                input_data={"cv_text": cv_text[:1000] + "..." if len(cv_text) > 1000 else cv_text},
                output_data=raw_text,
                candidate_id=candidate_id,
                metadata={"temperature": self.temperature},
                response=response,
            )

            return self._parse_cv_from_text_raw(raw_text)
        except Exception as e:
            raise Exception(f"Failed to parse CV text: {str(e)}") from e

    def _parse_cv_from_text_raw(self, text: str) -> CVData:
        """
        Parse CVData from raw model text, using JSON + _transform_llm_response.
        """
        if not text:
            raise ValueError("Empty response from CV parser model")

        # Strip code fences
        cleaned_text = strip_code_fences(text)

        # Try to parse JSON and transform
        try:
            data = json.loads(cleaned_text)
            transformed_data = self._transform_llm_response(data)
            return CVData(**transformed_data)
        except json.JSONDecodeError as e:
            raise Exception(f"Failed to parse CV data from model output: {str(e)}") from e
        except Exception as e:
            raise Exception(f"Failed to transform CV data: {str(e)}") from e

`init(model_name=None, temperature=None, api_key=None, vision_model_name=None, use_ocr=True, timeout=240, max_retries=2)` ¶

Initialize CV Parser Agent using Azure OpenAI SDK (no LangChain).

Source code in agents/cv_parser_agent.py

def __init__(
    self,
    model_name: Optional[str] = None,
    temperature: Optional[float] = None,
    api_key: Optional[str] = None,
    vision_model_name: Optional[str] = None,
    use_ocr: bool = True,
    timeout: int = 240,
    max_retries: int = 2,
):
    """
    Initialize CV Parser Agent using Azure OpenAI SDK (no LangChain).
    """
    # Store model names and config for logging
    model_name_to_use = model_name or settings.openai_model
    temperature_to_use = temperature if temperature is not None else settings.openai_temperature

    # Ensure temperature is at least 1.0 for Azure (some deployments reject 0.0)
    if temperature_to_use is None or temperature_to_use < 1.0:
        temperature_to_use = 1.0

    super().__init__(model_name_to_use, temperature_to_use, api_key, timeout, max_retries)

    # OCR: we no longer use an LLM-based vision model here; rely on pdf_reader defaults
    self.use_ocr = use_ocr
    self.vision_model = None
    self.vision_model_name = vision_model_name

`parse_cv_from_pdf(pdf_path, verbose=False, candidate_id=None)` ¶

Parse CV from PDF file and return structured data. Uses vision model as OCR if enabled.

Parameters:

Name	Type	Description	Default
`pdf_path`	`str`	Path to the PDF file	required
`verbose`	`bool`	If True, print progress messages	`False`

Returns:

Type	Description
`CVData`	CVData object with parsed information

Source code in agents/cv_parser_agent.py

def parse_cv_from_pdf(
    self, pdf_path: str, verbose: bool = False, candidate_id: Optional[int] = None
) -> CVData:
    """
    Parse CV from PDF file and return structured data.
    Uses vision model as OCR if enabled.

    Args:
        pdf_path: Path to the PDF file
        verbose: If True, print progress messages

    Returns:
        CVData object with parsed information
    """
    import time

    start_time = time.time()

    logger.info("=" * 80)
    logger.info("CV PARSING PROCESS STARTED")
    logger.info("=" * 80)
    logger.info(f"PDF file: {pdf_path}")
    logger.info(f"OCR enabled: {self.use_ocr}")
    logger.info(f"Vision model: {self.vision_model_name if self.vision_model_name else 'N/A'}")
    logger.info(f"Text model: {self.model_name}")

    if verbose:
        print("\n" + "=" * 80)
        print("STEP 1: EXTRACTING TEXT FROM PDF")
        print("=" * 80)
        print("  📄 Extracting text from PDF...")

    logger.info("Step 1: Starting text extraction from PDF...")
    extraction_start = time.time()

    # Extract text from PDF using vision model OCR if available
    cv_text = extract_text_from_pdf(
        pdf_path,
        vision_model=self.vision_model if self.use_ocr else None,
        use_ocr=self.use_ocr,
        verbose=verbose,
    )

    extraction_time = time.time() - extraction_start
    logger.info(
        f"Step 1 completed: Extracted {len(cv_text)} characters in {extraction_time:.2f}s"
    )

    if verbose:
        print(f"  ✅ Extracted {len(cv_text)} characters of text ({extraction_time:.2f}s)")
        print("\n" + "=" * 80)
        print("STEP 2: PARSING STRUCTURED DATA WITH LLM")
        print("=" * 80)
        print("  🤖 Parsing structured data...")

    # Limit text length to avoid token limits
    # gpt-5 models may have issues with very long prompts, so use smaller limit
    max_text_length = 10000 if "gpt-5" in self.model_name.lower() else 15000

    if len(cv_text) > max_text_length:
        logger.warning(
            f"Text is very long ({len(cv_text)} characters), truncating to {max_text_length} characters "
            f"(model: {self.model_name})"
        )
        if verbose:
            print(
                f"  ⚠️ Text is very long ({len(cv_text)} characters), truncating to {max_text_length} characters..."
            )
        cv_text = cv_text[:max_text_length] + "\n\n[... text was truncated due to length ...]"

    # Estimate prompt length (CV text + format instructions + prompt template)
    # Format instructions are typically ~500-1000 chars, prompt template ~500-800 chars
    estimated_prompt_length = len(cv_text) + 1500
    logger.info(
        f"Step 2: Preparing to send CV text ({len(cv_text)} chars) to LLM for parsing. "
        f"Estimated total prompt length: ~{estimated_prompt_length} chars"
    )

    if estimated_prompt_length > 20000 and "gpt-5" in self.model_name.lower():
        logger.warning(
            f"Warning: Prompt may be too long for {self.model_name}. "
            f"Consider using gpt-4o-mini or gpt-4.1-nano for better reliability with long CVs."
        )

    # Run LLM via Azure OpenAI
    try:
        if verbose:
            print("    ⏳ Sending request to LLM (this may take 30-120 seconds)...")
            print("    💡 Tip: LLM is analyzing the CV and extracting structured information")

        logger.info("Step 2: Sending request to LLM for structured parsing...")
        parsing_start = time.time()

        # Build prompt text
        prompt_text = CV_PARSING_PROMPT.format(cv_text=cv_text)

        raw_text, response = self._chat(
            messages=[
                {
                    "role": "system",
                    "content": (
                        "You are an expert CV parser. "
                        "You must return valid JSON matching the CVData schema."
                    ),
                },
                {"role": "user", "content": prompt_text},
            ],
        )

        if not raw_text or not str(raw_text).strip():
            raise Exception(
                f"Model returned empty response for CV parsing. "
                f"Check Azure deployment '{self.model_name}' and API availability."
            )
        parsing_time = time.time() - parsing_start
        logger.info(f"Step 2 completed: LLM parsing successful in {parsing_time:.2f}s")

        # Track model response (with token usage and cost)
        self._save_model_response(
            agent_type="cv_parser",
            input_data={"cv_text": cv_text[:1000] + "..." if len(cv_text) > 1000 else cv_text},
            output_data=raw_text,
            candidate_id=candidate_id,
            metadata={"temperature": self.temperature, "parsing_time": parsing_time},
            response=response,  # Pass response to extract tokens and costs
        )

        # Parse raw_text into CVData
        parsed_data = self._parse_cv_from_text_raw(raw_text)

        if verbose:
            print(f"    ✅ Received response from LLM ({parsing_time:.2f}s)")

        total_time = time.time() - start_time
        logger.info(f"CV parsing completed successfully in {total_time:.2f}s total")
        logger.info("=" * 80)

        return parsed_data
    except Exception as e:
        error_type = type(e).__name__
        error_msg = str(e)
        logger.error(f"Failed to parse CV: {error_type}: {error_msg}", exc_info=True)
        raise Exception(f"Failed to parse CV: {error_msg}") from e

`parse_cv_from_text(cv_text, candidate_id=None)` ¶

Parse CV from text content.

Parameters:

Name	Type	Description	Default
`cv_text`	`str`	CV text content	required

Returns:

Type	Description
`CVData`	CVData object with parsed information

Source code in agents/cv_parser_agent.py

def parse_cv_from_text(self, cv_text: str, candidate_id: Optional[int] = None) -> CVData:
    """
    Parse CV from text content.

    Args:
        cv_text: CV text content

    Returns:
        CVData object with parsed information
    """
    # Run LLM via adapter
    try:
        # Build prompt text
        prompt_text = CV_PARSING_PROMPT.format(cv_text=cv_text)

        raw_text, response = self._chat(
            messages=[
                {
                    "role": "system",
                    "content": (
                        "You are an expert CV parser. "
                        "You must return valid JSON matching the CVData schema."
                    ),
                },
                {"role": "user", "content": prompt_text},
            ],
        )
        if not raw_text:
            raise ValueError("Empty response from CV parser model")

        self._save_model_response(
            agent_type="cv_parser",
            input_data={"cv_text": cv_text[:1000] + "..." if len(cv_text) > 1000 else cv_text},
            output_data=raw_text,
            candidate_id=candidate_id,
            metadata={"temperature": self.temperature},
            response=response,
        )

        return self._parse_cv_from_text_raw(raw_text)
    except Exception as e:
        raise Exception(f"Failed to parse CV text: {str(e)}") from e

Bases: BaseAgent

AI agent for classifying incoming emails.

Source code in agents/email_classifier_agent.py

class EmailClassifierAgent(BaseAgent):
    """AI agent for classifying incoming emails."""

    # Critical IOD keywords - at least a few of these must be present for IOD classification
    CRITICAL_IOD_KEYWORDS = [
        "rodo",
        "iod",
        "dpo",
        "gdpr",
        "dane osobowe",
        "ochrona danych",
        "uodo",
        "organ nadzorczy",
        "profilowanie",
        "automatyczna decyzja",
    ]

    def __init__(
        self,
        model_name: str = settings.azure_openai_gpt_deployment,
        temperature: float = 0.1,  # Low temperature for consistent classification
        api_key: Optional[str] = None,
        timeout: int = 30,
        max_retries: int = 2,
    ):
        """
        Initialize Email Classifier Agent using Azure OpenAI SDK (no LangChain).
        """
        super().__init__(model_name, temperature, api_key, timeout, max_retries)

        # Static format instructions instead of LangChain parser
        self.format_instructions = (
            "Return ONLY a single JSON object with the following structure:\n"
            "{\n"
            '  "category": "iod" | "consent_yes" | "consent_no" | "default",\n'
            '  "confidence": 0.0-1.0,\n'
            '  "reasoning": "short explanation",\n'
            '  "keywords_found": ["keyword1", "keyword2"]\n'
            "}\n\n"
            "- Do NOT return a JSON schema.\n"
            "- Do NOT wrap the JSON in markdown code fences."
        )

        # Build prompt
        self.prompt_template = self._create_prompt_template()

    def _create_prompt_template(self) -> str:
        """Create prompt template for email classification."""
        return """You are an email classification system for a recruitment department.

Your task is to classify incoming emails from candidates into one of the following categories:

1. **IOD** - Emails related to data protection, GDPR, RODO, privacy rights, or requests to IOD/DPO
   - Keywords that indicate IOD: RODO, IOD, DPO, GDPR, dane osobowe, ochrona danych, sprzeciw, zgoda (in context of data), wycofanie zgody, skarga, UODO, Organ Nadzorczy, profilowanie, automatyczna decyzja, sztuczna inteligencja (in context of data processing), AI (in context of data processing)
   - Examples: "Chcę wycofać zgodę na przetwarzanie danych", "Składam sprzeciw wobec profilowania", "Mam pytanie dotyczące RODO"

2. **consent_yes** - Emails expressing consent to be considered for other positions
   - Keywords: zgoda, zgadzam się, wyrażam zgodę, wyrażam zgodę na udział, wyrażam zgodę na udział w innych rekrutacjach, wyrażam zgodę na udział w innych, wyrażam zgodę na udział w rekrutacjach, tak, chcę, zainteresowany, rozważenie, inne oferty, inne stanowiska, inne pozycje, inne rekrutacje, udział w innych rekrutacjach, udział w innych, udział w rekrutacjach, mogę brać udział, mogę uczestniczyć, jestem zainteresowany innymi ofertami
   - Examples:
     * "Wyrażam zgodę na udział w innych rekrutacjach"
     * "Wyrażam zgodę na udział w innych"
     * "Zgadzam się na rozważenie mojej kandydatury w kontekście innych stanowisk"
     * "Chcę być brany pod uwagę przy innych ofertach"
     * "Tak, wyrażam zgodę"
     * "Jestem zainteresowany innymi ofertami"
     * "Mogę brać udział w innych rekrutacjach"

3. **consent_no** - Emails refusing consent to be considered for other positions
   - Keywords: nie zgadzam się, nie wyrażam zgody, nie wyrażam zgody na udział, nie wyrażam zgody na udział w innych rekrutacjach, odmawiam, nie, nie chcę, wycofuję zgodę, nie jestem zainteresowany, nie rozważaj, nie chcę brać udziału, nie chcę uczestniczyć, nie jestem zainteresowany innymi ofertami, nie wyrażam zgody na udział w innych, nie wyrażam zgody na udział w rekrutacjach
   - Examples:
     * "Nie wyrażam zgody na udział w innych rekrutacjach"
     * "Nie wyrażam zgody na udział w innych"
     * "Nie wyrażam zgody"
     * "Nie wyrażam zgody na rozważenie w innych rekrutacjach"
     * "Wycofuję zgodę na inne oferty"
     * "Nie jestem zainteresowany innymi ofertami"
     * "Nie chcę brać udziału w innych rekrutacjach"

4. **default** - All other emails that should go to HR department
   - Regular questions, follow-ups, general inquiries, etc.

CRITICAL RULES:
- For IOD classification: The email MUST contain at least 2-3 of the critical IOD keywords listed above
- If an email mentions "zgoda" but in context of consent for other positions (not data protection), classify as consent_yes/consent_no, NOT IOD
- If an email mentions "AI" or "sztuczna inteligencja" but in context of job requirements or skills, classify as default, NOT IOD
- Be precise - only classify as IOD if it's clearly about data protection/privacy rights
- For consent classification, look for explicit statements about being considered for other positions
- IMPORTANT: If email contains phrases like "wyrażam zgodę na udział w innych rekrutacjach" or "wyrażam zgodę na udział w innych" → classify as consent_yes
- IMPORTANT: If email contains phrases like "nie wyrażam zgody na udział w innych rekrutacjach" or "nie wyrażam zgody na udział w innych" → classify as consent_no
- Pay attention to Polish language variations: "udział w innych rekrutacjach", "udział w innych", "udział w rekrutacjach" all mean consent for other positions

Email to classify:
From: {from_email}
Subject: {subject}
Body: {body}

Provide your classification in the following JSON format:
{format_instructions}

IMPORTANT: Return ACTUAL DATA VALUES, not a schema description.
Example of correct output:
{{
  "category": "iod",
  "confidence": 0.95,
  "reasoning": "Email contains multiple IOD keywords: RODO, dane osobowe, and requests information about data processing",
  "keywords_found": ["rodo", "dane osobowe", "przetwarzanie danych"]
}}

DO NOT return:
{{
  "description": "Email classification model",
  "properties": {{...}},
  "required": [...]
}}
"""

    def classify_email(self, from_email: str, subject: str, body: str) -> EmailClassification:
        """
        Classify email using AI.

        Args:
            from_email: Sender's email address
            subject: Email subject
            body: Email body text

        Returns:
            EmailClassification object
        """
        try:
            # Format prompt
            prompt = self.prompt_template.format(
                from_email=from_email,
                subject=subject,
                body=body[:5000],  # Limit body length to avoid token limits
                format_instructions=self.format_instructions,
            )

            # Get classification via LLM adapter
            result_text, _ = self._chat(
                messages=[
                    {
                        "role": "system",
                        "content": (
                            "You are an email classification assistant. "
                            "You must respond with JSON exactly as described in the format_instructions."
                        ),
                    },
                    {"role": "user", "content": prompt},
                ],
                max_completion_tokens=500,
            )

            # Parse response
            classification = self._parse_classification_from_text(result_text or "")

            # Validate IOD classification - must have at least ONE critical keyword
            if classification.category == "iod":
                body_lower = body.lower()
                subject_lower = subject.lower()
                text_lower = f"{subject_lower} {body_lower}"

                found_keywords = [
                    keyword
                    for keyword in self.CRITICAL_IOD_KEYWORDS
                    if keyword.lower() in text_lower
                ]

                # Require at least ONE strong IOD keyword in the actual email text
                if len(found_keywords) < 1:
                    logger.warning(
                        "IOD classification rejected - no critical IOD keywords found in email text. "
                        "Reclassifying as 'default'."
                    )
                    # Reclassify as default
                    classification.category = "default"
                    classification.reasoning = (
                        "Originally classified as IOD by the model, but no critical IOD keywords were found "
                        "in the email subject/body. Reclassified as default (HR)."
                    )
                    classification.confidence = 0.7

            logger.info(
                f"Email classified as '{classification.category}' "
                f"(confidence: {classification.confidence:.2f}, keywords: {classification.keywords_found})"
            )

            return classification

        except Exception as e:
            logger.error(f"Error classifying email: {str(e)}", exc_info=True)
            # Fallback to default classification
            return EmailClassification(
                category="default",
                confidence=0.5,
                reasoning=f"Classification failed: {str(e)}. Defaulting to HR routing.",
                keywords_found=[],
            )

    def _parse_classification_from_text(self, text: str) -> EmailClassification:
        """
        Parse EmailClassification from raw model text, handling common JSON issues.
        """
        if not text:
            raise ValueError("Empty response from model")

        # Parse JSON with fallback extraction
        data = parse_json_safe(text, fallback_to_extraction=True)

        return EmailClassification(**data)

`init(model_name=settings.azure_openai_gpt_deployment, temperature=0.1, api_key=None, timeout=30, max_retries=2)` ¶

Initialize Email Classifier Agent using Azure OpenAI SDK (no LangChain).

Source code in agents/email_classifier_agent.py

def __init__(
    self,
    model_name: str = settings.azure_openai_gpt_deployment,
    temperature: float = 0.1,  # Low temperature for consistent classification
    api_key: Optional[str] = None,
    timeout: int = 30,
    max_retries: int = 2,
):
    """
    Initialize Email Classifier Agent using Azure OpenAI SDK (no LangChain).
    """
    super().__init__(model_name, temperature, api_key, timeout, max_retries)

    # Static format instructions instead of LangChain parser
    self.format_instructions = (
        "Return ONLY a single JSON object with the following structure:\n"
        "{\n"
        '  "category": "iod" | "consent_yes" | "consent_no" | "default",\n'
        '  "confidence": 0.0-1.0,\n'
        '  "reasoning": "short explanation",\n'
        '  "keywords_found": ["keyword1", "keyword2"]\n'
        "}\n\n"
        "- Do NOT return a JSON schema.\n"
        "- Do NOT wrap the JSON in markdown code fences."
    )

    # Build prompt
    self.prompt_template = self._create_prompt_template()

`classify_email(from_email, subject, body)` ¶

Classify email using AI.

Parameters:

Name	Type	Description	Default
`from_email`	`str`	Sender's email address	required
`subject`	`str`	Email subject	required
`body`	`str`	Email body text	required

Returns:

Type	Description
`EmailClassification`	EmailClassification object

Source code in agents/email_classifier_agent.py

def classify_email(self, from_email: str, subject: str, body: str) -> EmailClassification:
    """
    Classify email using AI.

    Args:
        from_email: Sender's email address
        subject: Email subject
        body: Email body text

    Returns:
        EmailClassification object
    """
    try:
        # Format prompt
        prompt = self.prompt_template.format(
            from_email=from_email,
            subject=subject,
            body=body[:5000],  # Limit body length to avoid token limits
            format_instructions=self.format_instructions,
        )

        # Get classification via LLM adapter
        result_text, _ = self._chat(
            messages=[
                {
                    "role": "system",
                    "content": (
                        "You are an email classification assistant. "
                        "You must respond with JSON exactly as described in the format_instructions."
                    ),
                },
                {"role": "user", "content": prompt},
            ],
            max_completion_tokens=500,
        )

        # Parse response
        classification = self._parse_classification_from_text(result_text or "")

        # Validate IOD classification - must have at least ONE critical keyword
        if classification.category == "iod":
            body_lower = body.lower()
            subject_lower = subject.lower()
            text_lower = f"{subject_lower} {body_lower}"

            found_keywords = [
                keyword
                for keyword in self.CRITICAL_IOD_KEYWORDS
                if keyword.lower() in text_lower
            ]

            # Require at least ONE strong IOD keyword in the actual email text
            if len(found_keywords) < 1:
                logger.warning(
                    "IOD classification rejected - no critical IOD keywords found in email text. "
                    "Reclassifying as 'default'."
                )
                # Reclassify as default
                classification.category = "default"
                classification.reasoning = (
                    "Originally classified as IOD by the model, but no critical IOD keywords were found "
                    "in the email subject/body. Reclassified as default (HR)."
                )
                classification.confidence = 0.7

        logger.info(
            f"Email classified as '{classification.category}' "
            f"(confidence: {classification.confidence:.2f}, keywords: {classification.keywords_found})"
        )

        return classification

    except Exception as e:
        logger.error(f"Error classifying email: {str(e)}", exc_info=True)
        # Fallback to default classification
        return EmailClassification(
            category="default",
            confidence=0.5,
            reasoning=f"Classification failed: {str(e)}. Defaulting to HR routing.",
            keywords_found=[],
        )

Bases: BaseAgent

Agent that classifies email inquiries and decides how to respond.

Can decide: - "direct_answer" - can answer based on basic knowledge - "rag_answer" - must use RAG from the vector database - "forward_to_hr" - forward to HR (specific, sensitive, or human-intervention questions)

Source code in agents/query_classifier_agent.py

class QueryClassifierAgent(BaseAgent):
    """
    Agent that classifies email inquiries and decides how to respond.

    Can decide:
    - "direct_answer" - can answer based on basic knowledge
    - "rag_answer" - must use RAG from the vector database
    - "forward_to_hr" - forward to HR (specific, sensitive, or human-intervention questions)
    """

    def __init__(self, model_name: Optional[str] = None, temperature: float = 0.3):
        from config import settings

        model_name = model_name or settings.azure_openai_gpt_deployment
        super().__init__(model_name=model_name, temperature=temperature)

        # Basic knowledge for the agent (can answer without RAG)
        self.basic_knowledge = """
PODSTAWOWA WIEDZA O REKRUTACJI:

1. Proces rekrutacji:
- Pierwsza selekcja (screening) - weryfikacja CV
- Rozmowa HR - ocena kompetencji miękkich
- Ocena techniczna - dla stanowisk technicznych
- Weryfikacja wiedzy
- Rozmowa finalna

2. Komunikacja:
- Odpowiedzi w ciągu 5 dni roboczych
- Profesjonalne i empatyczne komunikaty
- Konstruktywny feedback

3. Zgoda na inne rekrutacje:
- Dobrowolna i można ją wycofać
- Informujemy o nowych ofertach jeśli zgoda wyrażona

4. Feedback:
- Zawsze konstruktywny
- Zawiera mocne strony i obszary do rozwoju
- Motywujący i wspierający

5. Decyzje:
- Akceptacja - przejście do kolejnego etapu
- Odrzucenie - generowanie feedbacku i wysłanie emaila
"""

        # RAG knowledge base description – so the agent knows when to use it
        self.rag_knowledge_description = """
DODATKOWA BAZA WIEDZY (RAG – vektorowa baza dokumentów):

Ta baza zawiera przede wszystkim TREŚCI FORMALNE I POLITYKI firmy, w szczególności:
- rodo_ai_act.txt – fragmenty dokumentów dotyczących RODO, ochrony danych osobowych,
  wykorzystania AI w rekrutacji, podstawy prawne, obowiązki informacyjne itp.
- polityka_rekrutacji.txt – wewnętrzna polityka rekrutacyjna firmy: zasady procesu,
  standardy komunikacji z kandydatami, przechowywania danych, okresy retencji itp.
- informacje_o_firmie.txt – ogólne informacje o firmie, misja, wartości, opis działalności.

RAG jest szczególnie przydatny gdy:
- pytanie dotyczy RODO, ochrony danych, AI Act, podstaw prawnych i formalnych obowiązków,
- pytanie dotyczy wewnętrznych procedur lub polityki rekrutacyjnej,
- kandydat pyta o „jak firma przetwarza dane”, „jak długo przechowujecie CV”, „jak działa AI w rekrutacji”.

Jeśli pytanie DOTYCZY powyższych obszarów, preferuj użycie \"rag_answer\".
Jeśli po użyciu RAG nadal nie ma wystarczających, jednoznacznych informacji – wtedy przekaż sprawę do HR (forward_to_hr).
"""

    def classify_query(self, email_subject: str, email_body: str, sender_email: str) -> Dict:
        """
        Classify the inquiry and decide how to respond.

        Returns:
            Dict with keys:
            - action: "direct_answer" | "rag_answer" | "forward_to_hr"
            - reasoning: decision justification
            - confidence: confidence level (0.0-1.0)
            - suggested_response: response suggestion (if action="direct_answer")
        """
        prompt = self._create_classification_prompt(email_subject, email_body, sender_email)

        try:
            result_text, _ = self._chat(
                messages=[
                    {
                        "role": "system",
                        "content": "You are an expert in classifying email inquiries in the recruitment process. You analyze inquiries and decide on the best way to respond.",
                    },
                    {"role": "user", "content": prompt},
                ],
                # response_format is only understood by some providers; pass through
                response_format={"type": "json_object"},
            )

            result_text = (result_text or "").strip()
            result = json.loads(result_text)

            # Validate result
            if result.get("action") not in ["direct_answer", "rag_answer", "forward_to_hr"]:
                result["action"] = "forward_to_hr"
                result["reasoning"] = "Invalid classification - forwarded to HR for safety"
                result["confidence"] = 0.0

            # CRITICAL VALIDATION: Different thresholds for different actions
            # For rag_answer: allow trying even with lower confidence (0.5+), as RAG may find the answer
            # For direct_answer and forward_to_hr: require higher confidence (0.7+)
            confidence = result.get("confidence", 0.0)
            try:
                confidence = float(confidence)
            except (ValueError, TypeError):
                confidence = 0.0

            action = result.get("action", "forward_to_hr")
            if action == "rag_answer" and confidence < 0.5:
                # For rag_answer: threshold 0.5 (lower, as RAG may find the answer)
                result["action"] = "forward_to_hr"
                result["reasoning"] = (
                    f"Confidence level ({confidence}) is too low for rag_answer (< 0.5). Forwarded to HR for safety."
                )
                result["confidence"] = confidence
            elif action != "rag_answer" and confidence < 0.7:
                # For direct_answer and forward_to_hr: threshold 0.7
                result["action"] = "forward_to_hr"
                result["reasoning"] = (
                    f"Confidence level ({confidence}) is below required (0.7). Forwarded to HR for safety."
                )
                result["confidence"] = confidence

            return result

        except Exception as e:
            # On error, safer to forward to HR
            return {
                "action": "forward_to_hr",
                "reasoning": f"Error during classification: {str(e)}",
                "confidence": 0.0,
            }

    def _create_classification_prompt(
        self, email_subject: str, email_body: str, sender_email: str
    ) -> str:
        """Create prompt for query classification."""
        return f"""
You are analyzing an email inquiry from a candidate in the recruitment process.

AGENT'S BASIC KNOWLEDGE:
{self.basic_knowledge}

RAG KNOWLEDGE BASE (VECTOR DOCUMENTS AVAILABLE FOR YOU):
{self.rag_knowledge_description}

EMAIL:
Subject: {email_subject}
From: {sender_email}
Content: {email_body}

TASK:
Decide how to best respond to this inquiry. You have 3 options:

⚠️ CRITICAL RULES (BALANCE BETWEEN SAFETY AND USEFULNESS):
- Jeśli możesz odpowiedzieć na podstawie PODSTAWOWEJ WIEDZY lub dokumentów RAG z wysoką pewnością (confidence blisko 1.0),
  wybierz odpowiednio "direct_answer" lub "rag_answer".
- Jeśli po przeanalizowaniu treści nadal masz poważne wątpliwości lub temat dotyczy indywidualnej sytuacji kandydata,
  przekaż sprawę do HR (forward_to_hr).

1. "direct_answer" - You can answer based on basic knowledge (recruitment process, general information, standard procedures)
   - ⚠️ Use when: you are highly confident (confidence is high, np. >= 0.7)
   - ⚠️ Use ONLY when: the question concerns standard procedures that are clearly defined in basic knowledge
   - Examples: "What are the recruitment stages?", "How can I express consent for other recruitments?"
   - ❌ DO NOT use for: questions about details that may vary, questions requiring interpretation

2. "rag_answer" - You must use RAG from vector database (detailed information from company documents)
   - ⚠️ Prefer this option when: the question touches GDPR/RODO, AI Act, data protection, internal recruitment policy,
     or other topics that are TYPICZNIE opisane w dokumentach (regulaminy, polityki, oficjalne zasady).
   - ⚠️ Use when: the question requires detailed knowledge from documents and you reasonably expect the documents to contain the answer.
   - Examples:
     * "Jak dokładnie przetwarzacie moje dane w procesie rekrutacji?"
     * "Jak długo przechowujecie CV?"
     * "Jakie są wymagania RODO w kontekście rekrutacji?"
     * "Jak używacie AI w procesie rekrutacji i jakie są zasady?"
   - Jeśli po skorzystaniu z RAG odpowiedź nadal nie jest wystarczająco jednoznaczna lub pełna – wtedy lepiej wybrać forward_to_hr.

3. "forward_to_hr" - Forward to HR (ALWAYS when you are not 100% certain)
   - ⚠️ Use ALWAYS when:
     * You have serious doubts and confidence is low (np. < 0.7)
     * The question concerns a specific candidate application (status, decision, details)
     * The question is sensitive or requires access to candidate data
     * The question should not be handled by AI
     * RAG documents do not contain sufficiently clear / reliable answer
     * The question requires interpretation or subjective assessment
   - Examples: "What is the status of my application?", "Why was I rejected?", "I want to change data in my CV", "What are the details of AI Act?" (if you are not certain that documents contain the answer)

RETURN JSON in format:
{{
    "action": "direct_answer" | "rag_answer" | "forward_to_hr",
    "reasoning": "Detailed justification of the decision",
    "confidence": 0.0-1.0,
    "suggested_response": "Response suggestion (only if action='direct_answer', otherwise null)"
}}
"""

`classify_query(email_subject, email_body, sender_email)` ¶

Classify the inquiry and decide how to respond.

Returns:

Type	Description
`Dict`	Dict with keys:
`Dict`	action: "direct_answer" \| "rag_answer" \| "forward_to_hr"
`Dict`	reasoning: decision justification
`Dict`	confidence: confidence level (0.0-1.0)
`Dict`	suggested_response: response suggestion (if action="direct_answer")

Source code in agents/query_classifier_agent.py

def classify_query(self, email_subject: str, email_body: str, sender_email: str) -> Dict:
    """
    Classify the inquiry and decide how to respond.

    Returns:
        Dict with keys:
        - action: "direct_answer" | "rag_answer" | "forward_to_hr"
        - reasoning: decision justification
        - confidence: confidence level (0.0-1.0)
        - suggested_response: response suggestion (if action="direct_answer")
    """
    prompt = self._create_classification_prompt(email_subject, email_body, sender_email)

    try:
        result_text, _ = self._chat(
            messages=[
                {
                    "role": "system",
                    "content": "You are an expert in classifying email inquiries in the recruitment process. You analyze inquiries and decide on the best way to respond.",
                },
                {"role": "user", "content": prompt},
            ],
            # response_format is only understood by some providers; pass through
            response_format={"type": "json_object"},
        )

        result_text = (result_text or "").strip()
        result = json.loads(result_text)

        # Validate result
        if result.get("action") not in ["direct_answer", "rag_answer", "forward_to_hr"]:
            result["action"] = "forward_to_hr"
            result["reasoning"] = "Invalid classification - forwarded to HR for safety"
            result["confidence"] = 0.0

        # CRITICAL VALIDATION: Different thresholds for different actions
        # For rag_answer: allow trying even with lower confidence (0.5+), as RAG may find the answer
        # For direct_answer and forward_to_hr: require higher confidence (0.7+)
        confidence = result.get("confidence", 0.0)
        try:
            confidence = float(confidence)
        except (ValueError, TypeError):
            confidence = 0.0

        action = result.get("action", "forward_to_hr")
        if action == "rag_answer" and confidence < 0.5:
            # For rag_answer: threshold 0.5 (lower, as RAG may find the answer)
            result["action"] = "forward_to_hr"
            result["reasoning"] = (
                f"Confidence level ({confidence}) is too low for rag_answer (< 0.5). Forwarded to HR for safety."
            )
            result["confidence"] = confidence
        elif action != "rag_answer" and confidence < 0.7:
            # For direct_answer and forward_to_hr: threshold 0.7
            result["action"] = "forward_to_hr"
            result["reasoning"] = (
                f"Confidence level ({confidence}) is below required (0.7). Forwarded to HR for safety."
            )
            result["confidence"] = confidence

        return result

    except Exception as e:
        # On error, safer to forward to HR
        return {
            "action": "forward_to_hr",
            "reasoning": f"Error during classification: {str(e)}",
            "confidence": 0.0,
        }

Bases: BaseAgent

Agent that generates responses to email inquiries. Can use basic knowledge or RAG from the vector database.

Source code in agents/query_responder_agent.py

class QueryResponderAgent(BaseAgent):
    """
    Agent that generates responses to email inquiries.
    Can use basic knowledge or RAG from the vector database.
    """

    def __init__(self, model_name: Optional[str] = None, temperature: float = 0.7):
        from config import settings

        model_name = model_name or settings.openai_model
        super().__init__(model_name=model_name, temperature=temperature)

        # Basic knowledge
        self.basic_knowledge = """
PODSTAWOWA WIEDZA O REKRUTACJI:

1. Proces rekrutacji:
- Pierwsza selekcja (screening) - weryfikacja CV i podstawowych wymagań
- Rozmowa HR - ocena kompetencji miękkich, motywacji, dopasowania kulturowego
- Ocena techniczna - testy, zadania praktyczne (dla stanowisk technicznych)
- Weryfikacja wiedzy - sprawdzenie kompetencji merytorycznych
- Rozmowa finalna - spotkanie z przełożonym, negocjacje warunków

2. Komunikacja z kandydatami:
- Odpowiedzi na aplikacje w ciągu 5 dni roboczych
- Wszystkie komunikaty są profesjonalne, przyjazne i empatyczne
- Informacja zwrotna zawsze zawiera konstruktywne uwagi
- Unikamy słów "odrzucenie", "odmowa" - używamy łagodniejszych sformułowań

3. Zgoda na inne rekrutacje:
- Kandydaci mogą wyrazić zgodę na rozważenie ich kandydatury w innych rekrutacjach
- Zgoda jest dobrowolna i można ją wycofać w każdej chwili
- Jeśli kandydat wyraził zgodę, informujemy go o nowych, odpowiednich ofertach

4. Feedback:
- Zawsze konstruktywny i wspierający
- Zawiera mocne strony kandydata
- Wskazuje obszary do rozwoju w sposób empatyczny
- Zachęca do dalszego rozwoju zawodowego

5. Podpis w emailach:
- "Z wyrazami szacunku\n\nDział HR"
"""

    def generate_response(
        self,
        email_subject: str,
        email_body: str,
        sender_email: str,
        rag_context: Optional[List[Dict]] = None,
    ) -> Optional[str]:
        """
        Generate a response to an email inquiry.

        Args:
            email_subject: Email subject
            email_body: Email body content
            sender_email: Sender email address
            rag_context: RAG context (list of documents from the vector database)

        Returns:
            Generated response or None if the agent is not confident (then forward to HR)
        """
        prompt = self._create_response_prompt(email_subject, email_body, sender_email, rag_context)

        try:
            response_text, _ = self._chat(
                messages=[
                    {
                        "role": "system",
                        "content": "You are an HR department assistant. You respond to candidate inquiries in a professional, friendly, and helpful manner. You MUST always write responses in POLISH (Polish language). Always end your response with the signature 'Z wyrazami szacunku\n\nDział HR'. If you are not certain of the answer, return the special value: 'FORWARD_TO_HR'.",
                    },
                    {"role": "user", "content": prompt},
                ],
            )

            response_text = (response_text or "").strip()

            # Check if agent returned the forward-to-HR signal
            if response_text.upper() == "FORWARD_TO_HR" or "FORWARD_TO_HR" in response_text.upper():
                logger.info("Agent returned FORWARD_TO_HR signal - not confident enough to answer")
                return None

            # Check if response contains uncertainty phrases (in Polish)
            uncertainty_phrases = [
                "nie posiadamy szczegółowych informacji",
                "chociaż nie posiadamy",
                "nie mamy dokładnych informacji",
                "nie jesteśmy w stanie",
                "nie możemy udzielić",
                "przekazaliśmy do działu hr",
                "przekazaliśmy je do działu hr",
                "przekazaliśmy do hr",
                "przekazaliśmy je do hr",
                "dziękujemy za pańskie zapytanie. przekazaliśmy",
                "przekazaliśmy je do działu",
                "skontaktuje się z państwem w najkrótszym możliwym terminie",  # Typical phrase when forwarding to HR
            ]

            response_lower = response_text.lower()
            has_uncertainty = any(phrase in response_lower for phrase in uncertainty_phrases)

            if has_uncertainty:
                logger.warning(
                    f"Response contains uncertainty phrases - agent not confident enough. Phrases found: {[p for p in uncertainty_phrases if p in response_lower]}"
                )
                return None

            # Add privacy policy link to the end of response (after signature)
            response_text = self._add_privacy_link(response_text)

            return response_text

        except Exception as e:
            logger.error(f"Error generating response: {str(e)}")
            return None

    def _create_response_prompt(
        self,
        email_subject: str,
        email_body: str,
        sender_email: str,
        rag_context: Optional[List[Dict]] = None,
    ) -> str:
        """Create prompt for generating response."""
        context_section_english = ""
        if rag_context:
            context_section_english = "\n\nADDITIONAL CONTEXT FROM KNOWLEDGE BASE:\n"
            for i, doc in enumerate(rag_context, 1):
                context_section_english += f"\n--- Document {i} ---\n"
                context_section_english += (
                    f"Source: {doc.get('metadata', {}).get('source', 'N/A')}\n"
                )
                context_section_english += f"Content: {doc.get('document', '')}\n"

        return f"""
You are an HR assistant responding to candidate inquiries in the recruitment process.

BASIC KNOWLEDGE:
{self.basic_knowledge}
{context_section_english}

EMAIL:
Subject: {email_subject}
From: {sender_email}
Content: {email_body}

TASK:
Generate a professional, friendly, and helpful response to this inquiry.

DECISION RULES:
1. If RAG context is provided and contains relevant information → Answer based on RAG context (you can be confident)
2. If question can be answered from basic knowledge → Answer from basic knowledge
3. If question requires specific candidate data or personal information → Return "FORWARD_TO_HR"
4. If question requires interpretation or subjective assessment → Return "FORWARD_TO_HR"
5. If RAG context is empty or irrelevant → Return "FORWARD_TO_HR"

REQUIREMENTS:
1. If RAG context is provided, use it to answer the question - you can be confident if RAG found relevant documents
2. The answer must be factually accurate based on basic knowledge and RAG context (if available)
3. Use professional but friendly tone
4. Be empathetic and supportive
5. ❌ NEVER answer in style: "Although we do not have detailed information..." - this means you should forward to HR
6. ❌ NEVER answer in style: "we do not have detailed information" - this indicates lack of certainty
7. ❌ NEVER answer if question requires specific candidate data or personal information
8. ⚠️ CRITICAL: If you cannot answer based on available context, return ONLY: "FORWARD_TO_HR" (without any other text, no Polish text, just these exact words)
9. ⚠️ CRITICAL: DO NOT generate a response saying "we forwarded to HR" or "we will forward to HR" - if you cannot answer, return ONLY "FORWARD_TO_HR"
10. ⚠️ CRITICAL: DO NOT write "Dziękujemy za Pańskie zapytanie. Przekazaliśmy je do działu HR..." - if you cannot answer, return ONLY "FORWARD_TO_HR"
11. Always end the response with: "Z wyrazami szacunku\n\nDział HR" (ONLY if you are answering, not if returning FORWARD_TO_HR)
12. ⚠️ CRITICAL: The response MUST be written in POLISH (Polish language) - this is mandatory (ONLY if you are answering, not if returning FORWARD_TO_HR)
13. If the question concerns a specific candidate application, suggest direct contact with HR

LANGUAGE REQUIREMENT (ONLY if answering, not if returning FORWARD_TO_HR):
- You MUST write the ENTIRE response in POLISH (Polish language)
- Use natural, conversational Polish
- Professional but friendly tone
- All content must be in Polish, including greetings, explanations, and closing
- The response should sound natural and human-like in Polish

REMEMBER:
- If you have RAG context with relevant information → Answer in Polish (you can be confident)
- If you can answer from basic knowledge → Answer in Polish
- If you cannot answer based on available context → Return ONLY "FORWARD_TO_HR" (no Polish text, no explanation)

ODPOWIEDŹ:
"""

    def _add_privacy_link(self, response_text: str) -> str:
        """Add privacy policy link to the end of AI-generated response."""
        from config import settings

        # Find where the signature ends
        if "Z wyrazami szacunku" in response_text:
            # Add privacy link after signature
            privacy_text = ""
            if settings.privacy_policy_url:
                privacy_text = f"\n\nInformacje o przetwarzaniu danych osobowych, w tym wykorzystaniu narzędzi AI znajdziesz na naszej stronie internetowe: {settings.privacy_policy_url}"
            elif settings.company_website:
                privacy_text = f"\n\nInformacje o przetwarzaniu danych osobowych, w tym wykorzystaniu narzędzi AI znajdziesz na naszej stronie internetowe: {settings.company_website}"
            else:
                privacy_text = '\n\nInformacje o przetwarzaniu danych osobowych, w tym wykorzystaniu narzędzi AI znajdziesz na naszej stronie internetowe: "https://www.example.com/privacy".'

            # Insert privacy text after signature
            response_text = response_text.replace(
                "Z wyrazami szacunku\n\nDział HR", f"Z wyrazami szacunku\n\nDział HR{privacy_text}"
            )

        return response_text

`generate_response(email_subject, email_body, sender_email, rag_context=None)` ¶

Generate a response to an email inquiry.

Parameters:

Name	Type	Description	Default
`email_subject`	`str`	Email subject	required
`email_body`	`str`	Email body content	required
`sender_email`	`str`	Sender email address	required
`rag_context`	`Optional[List[Dict]]`	RAG context (list of documents from the vector database)	`None`

Returns:

Type	Description
`Optional[str]`	Generated response or None if the agent is not confident (then forward to HR)

Source code in agents/query_responder_agent.py

def generate_response(
    self,
    email_subject: str,
    email_body: str,
    sender_email: str,
    rag_context: Optional[List[Dict]] = None,
) -> Optional[str]:
    """
    Generate a response to an email inquiry.

    Args:
        email_subject: Email subject
        email_body: Email body content
        sender_email: Sender email address
        rag_context: RAG context (list of documents from the vector database)

    Returns:
        Generated response or None if the agent is not confident (then forward to HR)
    """
    prompt = self._create_response_prompt(email_subject, email_body, sender_email, rag_context)

    try:
        response_text, _ = self._chat(
            messages=[
                {
                    "role": "system",
                    "content": "You are an HR department assistant. You respond to candidate inquiries in a professional, friendly, and helpful manner. You MUST always write responses in POLISH (Polish language). Always end your response with the signature 'Z wyrazami szacunku\n\nDział HR'. If you are not certain of the answer, return the special value: 'FORWARD_TO_HR'.",
                },
                {"role": "user", "content": prompt},
            ],
        )

        response_text = (response_text or "").strip()

        # Check if agent returned the forward-to-HR signal
        if response_text.upper() == "FORWARD_TO_HR" or "FORWARD_TO_HR" in response_text.upper():
            logger.info("Agent returned FORWARD_TO_HR signal - not confident enough to answer")
            return None

        # Check if response contains uncertainty phrases (in Polish)
        uncertainty_phrases = [
            "nie posiadamy szczegółowych informacji",
            "chociaż nie posiadamy",
            "nie mamy dokładnych informacji",
            "nie jesteśmy w stanie",
            "nie możemy udzielić",
            "przekazaliśmy do działu hr",
            "przekazaliśmy je do działu hr",
            "przekazaliśmy do hr",
            "przekazaliśmy je do hr",
            "dziękujemy za pańskie zapytanie. przekazaliśmy",
            "przekazaliśmy je do działu",
            "skontaktuje się z państwem w najkrótszym możliwym terminie",  # Typical phrase when forwarding to HR
        ]

        response_lower = response_text.lower()
        has_uncertainty = any(phrase in response_lower for phrase in uncertainty_phrases)

        if has_uncertainty:
            logger.warning(
                f"Response contains uncertainty phrases - agent not confident enough. Phrases found: {[p for p in uncertainty_phrases if p in response_lower]}"
            )
            return None

        # Add privacy policy link to the end of response (after signature)
        response_text = self._add_privacy_link(response_text)

        return response_text

    except Exception as e:
        logger.error(f"Error generating response: {str(e)}")
        return None

Bases: BaseAgent

Agent for validating RAG-generated responses to candidate inquiries.

Source code in agents/rag_response_validator_agent.py

class RAGResponseValidatorAgent(BaseAgent):
    """Agent for validating RAG-generated responses to candidate inquiries."""

    def __init__(
        self,
        model_name: str = "gpt-4o",
        temperature: float = 0.0,
        api_key: Optional[str] = None,
        timeout: int = 60,
        max_retries: int = 2,
    ):
        """
        Initialize RAG Response Validator Agent using Azure OpenAI SDK (no LangChain).
        """
        super().__init__(model_name, temperature, api_key, timeout, max_retries)

        # Static format instructions describing ValidationResult JSON schema
        self.format_instructions = (
            "Return ONLY a single JSON object with the following structure:\n"
            "{\n"
            '  "status": "approved" | "rejected",\n'
            '  "is_approved": true | false,\n'
            '  "reasoning": "short explanation",\n'
            '  "issues_found": ["issue 1", "issue 2"],\n'
            '  "ethical_concerns": ["concern 1", "concern 2"],\n'
            '  "factual_errors": ["error 1", "error 2"],\n'
            '  "suggestions": ["suggestion 1", "suggestion 2"]\n'
            "}\n\n"
            "- Do NOT return a JSON schema or description.\n"
            "- Do NOT wrap the JSON in markdown code fences.\n"
            "- All list fields must be valid JSON arrays of strings."
        )

        # Store prompt template
        self.prompt_template = RAG_RESPONSE_VALIDATION_PROMPT

    def validate_rag_response(
        self,
        generated_response: str,
        email_subject: str,
        email_body: str,
        sender_email: str,
        rag_sources: List[Dict],
        validation_number: Optional[int] = None,
    ) -> ValidationResult:
        """
        Validate RAG-generated response using Azure OpenAI (no LangChain).

        Args:
            generated_response: The AI-generated response to validate
            email_subject: Original email subject
            email_body: Original email body
            sender_email: Sender's email address
            rag_sources: List of RAG source documents used to generate the response
            validation_number: Optional validation number for tracking

        Returns:
            ValidationResult object
        """
        logger.info(f"Validating RAG response for inquiry from {sender_email}")

        # Format RAG sources for prompt
        rag_sources_str = self._format_rag_sources(rag_sources)

        # Build prompt with format instructions
        prompt_text = self.prompt_template.format(
            generated_response=generated_response,
            email_subject=email_subject,
            email_body=email_body,
            sender_email=sender_email,
            rag_sources=rag_sources_str,
            format_instructions=self.format_instructions,
        )

        # Run validation via LLM adapter
        try:
            logger.info(f"Validating RAG response for inquiry from {sender_email}")

            raw_text, response = self._chat(
                messages=[
                    {
                        "role": "system",
                        "content": (
                            "You are a careful JSON-producing validation assistant. "
                            "You must follow the format_instructions exactly."
                        ),
                    },
                    {"role": "user", "content": prompt_text},
                ],
                max_completion_tokens=2000,
            )

            if not raw_text:
                raise ValueError("Empty response from RAG validator model")

            # Track model response (optional - can be extended to save to database)
            # For now, just log it
            logger.debug(f"Validation response: {raw_text[:500]}...")

            validation_result = self._parse_validation_from_text(raw_text)
            logger.info(
                f"Validation completed for {sender_email}: {validation_result.status.value} "
                f"(is_approved: {validation_result.is_approved})"
            )
            return validation_result
        except Exception as e:
            error_msg = f"Failed to validate RAG response: {str(e)}"
            logger.error(error_msg, exc_info=True)

            # On validation failure, reject by default for safety
            return ValidationResult(
                status=ValidationStatus.REJECTED,
                is_approved=False,
                reasoning=f"Validation process failed: {str(e)}. Response rejected for safety.",
                issues_found=["Validation process error"],
                ethical_concerns=[],
                factual_errors=[],
                suggestions=["Please review the validation process and try again."],
            )

    def _format_rag_sources(self, rag_sources: List[Dict]) -> str:
        """Format RAG sources for prompt."""
        if not rag_sources:
            return "No RAG sources provided."

        formatted = []
        for i, source in enumerate(rag_sources, 1):
            metadata = source.get("metadata", {})
            document = source.get("document", "")
            source_name = metadata.get("source", "Unknown source")
            score = metadata.get("score", None)

            source_text = f"--- Source {i}: {source_name} ---\n"
            if score is not None:
                source_text += f"Relevance score: {score:.4f}\n"
            source_text += f"Content:\n{document}\n"
            formatted.append(source_text)

        return "\n".join(formatted)

    def _parse_validation_from_text(self, text: str) -> ValidationResult:
        """
        Parse ValidationResult from raw model text, handling common JSON issues.
        """
        if not text:
            raise ValueError("Empty response from model")

        # Parse JSON with fallback extraction
        data = parse_json_safe(text, fallback_to_extraction=True)

        # Map to ValidationResult, with sensible defaults
        status_str = data.get("status", "rejected")
        status = (
            ValidationStatus(status_str)
            if status_str in ("approved", "rejected")
            else ValidationStatus.REJECTED
        )
        is_approved = bool(data.get("is_approved", False))
        reasoning = data.get("reasoning") or "No reasoning provided."
        issues_found = data.get("issues_found") or []
        ethical_concerns = data.get("ethical_concerns") or []
        factual_errors = data.get("factual_errors") or []
        suggestions = data.get("suggestions") or []

        # Ensure all list fields are lists of strings
        def ensure_str_list(value):
            if isinstance(value, list):
                return [str(v) for v in value]
            if not value:
                return []
            return [str(value)]

        return ValidationResult(
            status=status,
            is_approved=is_approved,
            reasoning=str(reasoning),
            issues_found=ensure_str_list(issues_found),
            ethical_concerns=ensure_str_list(ethical_concerns),
            factual_errors=ensure_str_list(factual_errors),
            suggestions=ensure_str_list(suggestions),
        )

`init(model_name='gpt-4o', temperature=0.0, api_key=None, timeout=60, max_retries=2)` ¶

Initialize RAG Response Validator Agent using Azure OpenAI SDK (no LangChain).

Source code in agents/rag_response_validator_agent.py

def __init__(
    self,
    model_name: str = "gpt-4o",
    temperature: float = 0.0,
    api_key: Optional[str] = None,
    timeout: int = 60,
    max_retries: int = 2,
):
    """
    Initialize RAG Response Validator Agent using Azure OpenAI SDK (no LangChain).
    """
    super().__init__(model_name, temperature, api_key, timeout, max_retries)

    # Static format instructions describing ValidationResult JSON schema
    self.format_instructions = (
        "Return ONLY a single JSON object with the following structure:\n"
        "{\n"
        '  "status": "approved" | "rejected",\n'
        '  "is_approved": true | false,\n'
        '  "reasoning": "short explanation",\n'
        '  "issues_found": ["issue 1", "issue 2"],\n'
        '  "ethical_concerns": ["concern 1", "concern 2"],\n'
        '  "factual_errors": ["error 1", "error 2"],\n'
        '  "suggestions": ["suggestion 1", "suggestion 2"]\n'
        "}\n\n"
        "- Do NOT return a JSON schema or description.\n"
        "- Do NOT wrap the JSON in markdown code fences.\n"
        "- All list fields must be valid JSON arrays of strings."
    )

    # Store prompt template
    self.prompt_template = RAG_RESPONSE_VALIDATION_PROMPT

`validate_rag_response(generated_response, email_subject, email_body, sender_email, rag_sources, validation_number=None)` ¶

Validate RAG-generated response using Azure OpenAI (no LangChain).

Parameters:

Name	Type	Description	Default
`generated_response`	`str`	The AI-generated response to validate	required
`email_subject`	`str`	Original email subject	required
`email_body`	`str`	Original email body	required
`sender_email`	`str`	Sender's email address	required
`rag_sources`	`List[Dict]`	List of RAG source documents used to generate the response	required
`validation_number`	`Optional[int]`	Optional validation number for tracking	`None`

Returns:

Type	Description
`ValidationResult`	ValidationResult object

Source code in agents/rag_response_validator_agent.py

def validate_rag_response(
    self,
    generated_response: str,
    email_subject: str,
    email_body: str,
    sender_email: str,
    rag_sources: List[Dict],
    validation_number: Optional[int] = None,
) -> ValidationResult:
    """
    Validate RAG-generated response using Azure OpenAI (no LangChain).

    Args:
        generated_response: The AI-generated response to validate
        email_subject: Original email subject
        email_body: Original email body
        sender_email: Sender's email address
        rag_sources: List of RAG source documents used to generate the response
        validation_number: Optional validation number for tracking

    Returns:
        ValidationResult object
    """
    logger.info(f"Validating RAG response for inquiry from {sender_email}")

    # Format RAG sources for prompt
    rag_sources_str = self._format_rag_sources(rag_sources)

    # Build prompt with format instructions
    prompt_text = self.prompt_template.format(
        generated_response=generated_response,
        email_subject=email_subject,
        email_body=email_body,
        sender_email=sender_email,
        rag_sources=rag_sources_str,
        format_instructions=self.format_instructions,
    )

    # Run validation via LLM adapter
    try:
        logger.info(f"Validating RAG response for inquiry from {sender_email}")

        raw_text, response = self._chat(
            messages=[
                {
                    "role": "system",
                    "content": (
                        "You are a careful JSON-producing validation assistant. "
                        "You must follow the format_instructions exactly."
                    ),
                },
                {"role": "user", "content": prompt_text},
            ],
            max_completion_tokens=2000,
        )

        if not raw_text:
            raise ValueError("Empty response from RAG validator model")

        # Track model response (optional - can be extended to save to database)
        # For now, just log it
        logger.debug(f"Validation response: {raw_text[:500]}...")

        validation_result = self._parse_validation_from_text(raw_text)
        logger.info(
            f"Validation completed for {sender_email}: {validation_result.status.value} "
            f"(is_approved: {validation_result.is_approved})"
        )
        return validation_result
    except Exception as e:
        error_msg = f"Failed to validate RAG response: {str(e)}"
        logger.error(error_msg, exc_info=True)

        # On validation failure, reject by default for safety
        return ValidationResult(
            status=ValidationStatus.REJECTED,
            is_approved=False,
            reasoning=f"Validation process failed: {str(e)}. Response rejected for safety.",
            issues_found=["Validation process error"],
            ethical_concerns=[],
            factual_errors=[],
            suggestions=["Please review the validation process and try again."],
        )

Services¶

Service for generating candidate feedback.

Source code in services/feedback_service.py

class FeedbackService:
    """Service for generating candidate feedback."""

    def __init__(
        self,
        feedback_agent: FeedbackAgent,
        validator_agent: Optional[FeedbackValidatorAgent] = None,
        correction_agent: Optional[FeedbackCorrectionAgent] = None,
        max_validation_iterations: int = 3,
    ):
        """
        Initialize feedback service.

        Args:
            feedback_agent: Initialized FeedbackAgent instance
            validator_agent: Optional FeedbackValidatorAgent instance (if None, validation is skipped)
            correction_agent: Optional FeedbackCorrectionAgent instance (if None, correction is skipped)
            max_validation_iterations: Maximum number of validation-correction cycles (default: 3)
        """
        self.agent = feedback_agent
        self.validator = validator_agent
        self.corrector = correction_agent
        self.max_iterations = max_validation_iterations
        self.validation_failed = False
        self.validation_error_info: Optional[Dict[str, Any]] = None
        logger.info("FeedbackService initialized")

    def generate_feedback(
        self,
        cv_data: CVData,
        hr_feedback: HRFeedback,
        job_offer: Optional[JobOffer] = None,
        output_format: FeedbackFormat = FeedbackFormat.HTML,
        save_to_file: bool = False,
        output_dir: Optional[Path] = None,
        enable_validation: bool = True,
        candidate_id: Optional[int] = None,
        recruitment_stage: Optional[str] = None,
    ) -> tuple[CandidateFeedback, bool, Optional[Dict[str, Any]]]:
        """
        Generate personalized feedback for candidate with optional validation and correction.

        Args:
            cv_data: Parsed CV data
            hr_feedback: HR evaluation feedback
            job_offer: Optional job offer with requirements
            output_format: Feedback format (default: HTML)
            save_to_file: Whether to save feedback to file
            output_dir: Directory to save feedback file (default: current directory)
            enable_validation: Whether to enable validation and correction (default: True)

        Returns:
            Tuple of (CandidateFeedback, is_validated: bool, error_info: Optional[Dict])
            - is_validated: True if feedback was validated successfully, False if validation failed
            - error_info: Dict with error details if validation failed, None otherwise

        Raises:
            LLMError: If feedback generation fails
        """
        logger.info(f"Generating feedback for: {cv_data.full_name} (format: {output_format.value})")

        # Start timing for overall feedback generation
        generation_start_time = time.time()

        try:
            # Step 1: Generate initial feedback (with timing)
            feedback_start_time = time.time()
            feedback = self.agent.generate_feedback(
                cv_data,
                hr_feedback,
                job_offer,
                output_format=output_format,
                candidate_id=candidate_id,
                recruitment_stage=recruitment_stage,
            )
            feedback_duration = time.time() - feedback_start_time

            # Record timing metric for feedback generation
            if metrics_service:
                metrics_service.record_timing(
                    metric_type="feedback_generation",
                    operation_name="generate_feedback",
                    duration_seconds=feedback_duration,
                    metadata={"candidate_id": candidate_id, "recruitment_stage": recruitment_stage},
                )

            logger.info(
                f"Successfully generated initial feedback for: {cv_data.full_name} (took {feedback_duration:.2f}s)"
            )

            # Step 2: Validate and correct if validation is enabled
            is_validated = True
            validation_error_info = None
            if enable_validation and self.validator and self.corrector:
                feedback, is_validated, validation_error_info = self._validate_and_correct(
                    feedback, cv_data, hr_feedback, job_offer, candidate_id=candidate_id
                )
                # Store validation status for potential error saving
                self.validation_failed = not is_validated
                self.validation_error_info = validation_error_info

            # Calculate total generation time
            total_duration = time.time() - generation_start_time

            # Record total timing metric
            if metrics_service:
                metrics_service.record_timing(
                    metric_type="feedback_generation",
                    operation_name="total_feedback_generation",
                    duration_seconds=total_duration,
                    metadata={
                        "candidate_id": candidate_id,
                        "recruitment_stage": recruitment_stage,
                        "is_validated": is_validated,
                        "validation_enabled": enable_validation,
                    },
                )

                # Record success metric
                metrics_service.record_success(
                    metric_type="feedback_generation",
                    operation_name="generate_feedback",
                    success=is_validated,
                    metadata={"candidate_id": candidate_id},
                )

            logger.info(
                f"Feedback generation completed for: {cv_data.full_name} (total: {total_duration:.2f}s, validated: {is_validated})"
            )

            if save_to_file and feedback.html_content:
                html_path = self._save_feedback_html(feedback, cv_data, output_dir)
                logger.info(f"HTML feedback saved to: {html_path}")

            return feedback, is_validated, validation_error_info

        except Exception as e:
            error_msg = f"Failed to generate feedback: {str(e)}"
            logger.error(error_msg, exc_info=True)
            raise LLMError(error_msg) from e

    def _validate_and_correct(
        self,
        feedback: CandidateFeedback,
        cv_data: CVData,
        hr_feedback: HRFeedback,
        job_offer: Optional[JobOffer] = None,
        candidate_id: Optional[int] = None,
    ) -> tuple[CandidateFeedback, bool, Optional[Dict[str, Any]]]:
        """
        Validate feedback and correct if needed.

        Args:
            feedback: Initial CandidateFeedback object
            cv_data: Parsed CV data
            hr_feedback: HR evaluation feedback
            job_offer: Optional job offer information
            candidate_id: Optional candidate ID

        Returns:
            Tuple of (CandidateFeedback, is_validated: bool, error_info: Optional[Dict])
            - is_validated: True if feedback was approved, False if validation failed after max iterations
            - error_info: Dict with error details if validation failed, None otherwise
        """
        assert self.validator is not None and self.corrector is not None  # caller ensures both set
        current_feedback = feedback
        iteration = 0
        all_validation_results = []
        validation_start_time = time.time()

        while iteration < self.max_iterations:
            iteration += 1
            logger.info(
                f"Validation iteration {iteration}/{self.max_iterations} for {cv_data.full_name}"
            )

            # Validate current feedback (with timing)
            validation_iter_start = time.time()
            validation_result = self.validator.validate_feedback(
                current_feedback.html_content,
                cv_data,
                hr_feedback,
                job_offer,
                candidate_id=candidate_id,
                validation_number=iteration,
            )
            validation_iter_duration = time.time() - validation_iter_start

            # Record timing metric for validation
            if metrics_service:
                metrics_service.record_timing(
                    metric_type="validation",
                    operation_name="validate_feedback",
                    duration_seconds=validation_iter_duration,
                    metadata={
                        "candidate_id": candidate_id,
                        "iteration": iteration,
                        "is_approved": validation_result.is_approved,
                    },
                )

            # Store validation result
            all_validation_results.append(
                {
                    "iteration": iteration,
                    "is_approved": validation_result.is_approved,
                    "status": validation_result.status.value,
                    "reasoning": validation_result.reasoning,
                    "issues_found": validation_result.issues_found,
                    "ethical_concerns": validation_result.ethical_concerns,
                    "factual_errors": validation_result.factual_errors,
                    "suggestions": validation_result.suggestions,
                }
            )

            if validation_result.is_approved:
                logger.info(
                    f"Feedback approved after {iteration} iteration(s) for {cv_data.full_name}"
                )
                return current_feedback, True, None

            # Feedback was rejected, need to correct
            logger.warning(
                f"Feedback rejected in iteration {iteration} for {cv_data.full_name}. "
                f"Reason: {validation_result.reasoning}"
            )

            if iteration >= self.max_iterations:
                logger.error(
                    f"Maximum validation iterations ({self.max_iterations}) reached for {cv_data.full_name}. "
                    f"Validation failed - feedback will not be sent."
                )
                # Return error info with summary
                total_validations = len(all_validation_results)
                total_corrections = sum(
                    1 for r in all_validation_results if "correction_number" in r
                )
                error_info = {
                    "validation_results": all_validation_results,
                    "final_feedback_html": current_feedback.html_content,
                    "max_iterations_reached": True,
                    "total_validations": total_validations,
                    "total_corrections": total_corrections,
                    "last_validation_number": total_validations,
                    "last_correction_number": total_corrections,
                }
                return current_feedback, False, error_info

            # Correct the feedback (with timing)
            try:
                correction_start = time.time()
                corrected = self.corrector.correct_feedback(
                    current_feedback.html_content,
                    validation_result,
                    cv_data,
                    hr_feedback,
                    job_offer,
                    candidate_id=candidate_id,
                    correction_number=iteration,
                )
                correction_duration = time.time() - correction_start

                # Record timing metric for correction
                if metrics_service:
                    metrics_service.record_timing(
                        metric_type="validation",
                        operation_name="correct_feedback",
                        duration_seconds=correction_duration,
                        metadata={"candidate_id": candidate_id, "iteration": iteration},
                    )

                # Update current feedback with corrected version
                current_feedback = CandidateFeedback(html_content=corrected.html_content)

                # Add correction number to the last validation result
                if all_validation_results:
                    all_validation_results[-1]["correction_number"] = iteration
                    all_validation_results[-1]["corrections_made"] = corrected.corrections_made

                logger.info(
                    f"Feedback corrected for {cv_data.full_name} (correction #{iteration}). "
                    f"Corrections made: {', '.join(corrected.corrections_made)}"
                )
            except Exception as e:
                logger.error(f"Failed to correct feedback in iteration {iteration}: {str(e)}")
                # If correction fails, return error info
                total_validations = len(all_validation_results)
                total_corrections = sum(
                    1 for r in all_validation_results if "correction_number" in r
                )
                error_info = {
                    "validation_results": all_validation_results,
                    "final_feedback_html": current_feedback.html_content,
                    "correction_failed": True,
                    "correction_error": str(e),
                    "total_validations": total_validations,
                    "total_corrections": total_corrections,
                    "last_validation_number": total_validations,
                    "failed_correction_number": iteration,
                }
                return current_feedback, False, error_info

        # Should not reach here, but return current feedback as fallback
        total_validation_duration = time.time() - validation_start_time

        # Record total validation timing
        if metrics_service:
            metrics_service.record_timing(
                metric_type="validation",
                operation_name="total_validation_cycle",
                duration_seconds=total_validation_duration,
                metadata={"candidate_id": candidate_id, "iterations": len(all_validation_results)},
            )

        total_validations = len(all_validation_results)
        total_corrections = sum(1 for r in all_validation_results if "correction_number" in r)
        return (
            current_feedback,
            False,
            {
                "validation_results": all_validation_results,
                "total_validations": total_validations,
                "total_corrections": total_corrections,
            },
        )

    # _save_feedback method removed - we only use HTML format now

    def _save_feedback_html(
        self, feedback: CandidateFeedback, cv_data: CVData, output_dir: Optional[Path] = None
    ) -> Path:
        """Save feedback to HTML file."""
        if output_dir is None:
            output_dir = Path.cwd()
        else:
            output_dir = Path(output_dir)
            output_dir.mkdir(parents=True, exist_ok=True)

        filename = f"feedback_{cv_data.full_name.replace(' ', '_')}.html"
        output_path = output_dir / filename

        html_content = format_feedback_as_html(feedback)

        with open(output_path, "w", encoding="utf-8") as f:
            f.write(html_content)

        return output_path

    def get_feedback_html(
        self,
        feedback: CandidateFeedback,
        consent_for_other_positions: Optional[bool] = None,
    ) -> str:
        """
        Get HTML formatted version of feedback.

        Args:
            feedback: CandidateFeedback object
            consent_for_other_positions: Optional boolean indicating if candidate consented to other positions

        Returns:
            HTML formatted string
        """
        return format_feedback_as_html(
            feedback,
            consent_for_other_positions=(
                consent_for_other_positions if consent_for_other_positions is not None else False
            ),
        )

`init(feedback_agent, validator_agent=None, correction_agent=None, max_validation_iterations=3)` ¶

Initialize feedback service.

Parameters:

Name	Type	Description	Default
`feedback_agent`	`FeedbackAgent`	Initialized FeedbackAgent instance	required
`validator_agent`	`Optional[FeedbackValidatorAgent]`	Optional FeedbackValidatorAgent instance (if None, validation is skipped)	`None`
`correction_agent`	`Optional[FeedbackCorrectionAgent]`	Optional FeedbackCorrectionAgent instance (if None, correction is skipped)	`None`
`max_validation_iterations`	`int`	Maximum number of validation-correction cycles (default: 3)	`3`

Source code in services/feedback_service.py

def __init__(
    self,
    feedback_agent: FeedbackAgent,
    validator_agent: Optional[FeedbackValidatorAgent] = None,
    correction_agent: Optional[FeedbackCorrectionAgent] = None,
    max_validation_iterations: int = 3,
):
    """
    Initialize feedback service.

    Args:
        feedback_agent: Initialized FeedbackAgent instance
        validator_agent: Optional FeedbackValidatorAgent instance (if None, validation is skipped)
        correction_agent: Optional FeedbackCorrectionAgent instance (if None, correction is skipped)
        max_validation_iterations: Maximum number of validation-correction cycles (default: 3)
    """
    self.agent = feedback_agent
    self.validator = validator_agent
    self.corrector = correction_agent
    self.max_iterations = max_validation_iterations
    self.validation_failed = False
    self.validation_error_info: Optional[Dict[str, Any]] = None
    logger.info("FeedbackService initialized")

`generate_feedback(cv_data, hr_feedback, job_offer=None, output_format=FeedbackFormat.HTML, save_to_file=False, output_dir=None, enable_validation=True, candidate_id=None, recruitment_stage=None)` ¶

Generate personalized feedback for candidate with optional validation and correction.

Parameters:

Name	Type	Description	Default
`cv_data`	`CVData`	Parsed CV data	required
`hr_feedback`	`HRFeedback`	HR evaluation feedback	required
`job_offer`	`Optional[JobOffer]`	Optional job offer with requirements	`None`
`output_format`	`FeedbackFormat`	Feedback format (default: HTML)	`HTML`
`save_to_file`	`bool`	Whether to save feedback to file	`False`
`output_dir`	`Optional[Path]`	Directory to save feedback file (default: current directory)	`None`
`enable_validation`	`bool`	Whether to enable validation and correction (default: True)	`True`

Returns:

Type	Description
`CandidateFeedback`	Tuple of (CandidateFeedback, is_validated: bool, error_info: Optional[Dict])
`bool`	is_validated: True if feedback was validated successfully, False if validation failed
`Optional[Dict[str, Any]]`	error_info: Dict with error details if validation failed, None otherwise

Raises:

Type	Description
`LLMError`	If feedback generation fails

Source code in services/feedback_service.py

def generate_feedback(
    self,
    cv_data: CVData,
    hr_feedback: HRFeedback,
    job_offer: Optional[JobOffer] = None,
    output_format: FeedbackFormat = FeedbackFormat.HTML,
    save_to_file: bool = False,
    output_dir: Optional[Path] = None,
    enable_validation: bool = True,
    candidate_id: Optional[int] = None,
    recruitment_stage: Optional[str] = None,
) -> tuple[CandidateFeedback, bool, Optional[Dict[str, Any]]]:
    """
    Generate personalized feedback for candidate with optional validation and correction.

    Args:
        cv_data: Parsed CV data
        hr_feedback: HR evaluation feedback
        job_offer: Optional job offer with requirements
        output_format: Feedback format (default: HTML)
        save_to_file: Whether to save feedback to file
        output_dir: Directory to save feedback file (default: current directory)
        enable_validation: Whether to enable validation and correction (default: True)

    Returns:
        Tuple of (CandidateFeedback, is_validated: bool, error_info: Optional[Dict])
        - is_validated: True if feedback was validated successfully, False if validation failed
        - error_info: Dict with error details if validation failed, None otherwise

    Raises:
        LLMError: If feedback generation fails
    """
    logger.info(f"Generating feedback for: {cv_data.full_name} (format: {output_format.value})")

    # Start timing for overall feedback generation
    generation_start_time = time.time()

    try:
        # Step 1: Generate initial feedback (with timing)
        feedback_start_time = time.time()
        feedback = self.agent.generate_feedback(
            cv_data,
            hr_feedback,
            job_offer,
            output_format=output_format,
            candidate_id=candidate_id,
            recruitment_stage=recruitment_stage,
        )
        feedback_duration = time.time() - feedback_start_time

        # Record timing metric for feedback generation
        if metrics_service:
            metrics_service.record_timing(
                metric_type="feedback_generation",
                operation_name="generate_feedback",
                duration_seconds=feedback_duration,
                metadata={"candidate_id": candidate_id, "recruitment_stage": recruitment_stage},
            )

        logger.info(
            f"Successfully generated initial feedback for: {cv_data.full_name} (took {feedback_duration:.2f}s)"
        )

        # Step 2: Validate and correct if validation is enabled
        is_validated = True
        validation_error_info = None
        if enable_validation and self.validator and self.corrector:
            feedback, is_validated, validation_error_info = self._validate_and_correct(
                feedback, cv_data, hr_feedback, job_offer, candidate_id=candidate_id
            )
            # Store validation status for potential error saving
            self.validation_failed = not is_validated
            self.validation_error_info = validation_error_info

        # Calculate total generation time
        total_duration = time.time() - generation_start_time

        # Record total timing metric
        if metrics_service:
            metrics_service.record_timing(
                metric_type="feedback_generation",
                operation_name="total_feedback_generation",
                duration_seconds=total_duration,
                metadata={
                    "candidate_id": candidate_id,
                    "recruitment_stage": recruitment_stage,
                    "is_validated": is_validated,
                    "validation_enabled": enable_validation,
                },
            )

            # Record success metric
            metrics_service.record_success(
                metric_type="feedback_generation",
                operation_name="generate_feedback",
                success=is_validated,
                metadata={"candidate_id": candidate_id},
            )

        logger.info(
            f"Feedback generation completed for: {cv_data.full_name} (total: {total_duration:.2f}s, validated: {is_validated})"
        )

        if save_to_file and feedback.html_content:
            html_path = self._save_feedback_html(feedback, cv_data, output_dir)
            logger.info(f"HTML feedback saved to: {html_path}")

        return feedback, is_validated, validation_error_info

    except Exception as e:
        error_msg = f"Failed to generate feedback: {str(e)}"
        logger.error(error_msg, exc_info=True)
        raise LLMError(error_msg) from e

`get_feedback_html(feedback, consent_for_other_positions=None)` ¶

Get HTML formatted version of feedback.

Parameters:

Name	Type	Description	Default
`feedback`	`CandidateFeedback`	CandidateFeedback object	required
`consent_for_other_positions`	`Optional[bool]`	Optional boolean indicating if candidate consented to other positions	`None`

Returns:

Type	Description
`str`	HTML formatted string

Source code in services/feedback_service.py

def get_feedback_html(
    self,
    feedback: CandidateFeedback,
    consent_for_other_positions: Optional[bool] = None,
) -> str:
    """
    Get HTML formatted version of feedback.

    Args:
        feedback: CandidateFeedback object
        consent_for_other_positions: Optional boolean indicating if candidate consented to other positions

    Returns:
        HTML formatted string
    """
    return format_feedback_as_html(
        feedback,
        consent_for_other_positions=(
            consent_for_other_positions if consent_for_other_positions is not None else False
        ),
    )

Service for processing CV documents.

Source code in services/cv_service.py

class CVService:
    """Service for processing CV documents."""

    def __init__(self, parser_agent: CVParserAgent):
        """
        Initialize CV service.

        Args:
            parser_agent: Initialized CVParserAgent instance
        """
        self.parser = parser_agent
        logger.info("CVService initialized")

    def process_cv_from_pdf(
        self, pdf_path: str, verbose: bool = False, candidate_id: Optional[int] = None
    ) -> CVData:
        """
        Process CV from PDF file with proper error handling.

        Args:
            pdf_path: Path to PDF file
            verbose: Enable verbose logging

        Returns:
            Parsed CVData object

        Raises:
            PDFReadError: If PDF cannot be read
            ValidationError: If CV data is invalid
            LLMError: If LLM processing fails
        """
        pdf_path_obj = Path(pdf_path)

        # Validate file exists
        if not pdf_path_obj.exists():
            error_msg = f"PDF file not found: {pdf_path}"
            logger.error(error_msg)
            raise PDFReadError(error_msg)

        if not pdf_path_obj.suffix.lower() == ".pdf":
            error_msg = f"File is not a PDF: {pdf_path}"
            logger.error(error_msg)
            raise PDFReadError(error_msg)

        logger.info(f"Processing CV from PDF: {pdf_path}")
        logger.info(f"File size: {pdf_path_obj.stat().st_size / 1024:.2f} KB")
        logger.info(f"Verbose mode: {verbose}")

        try:
            logger.info("Starting CV parsing process...")
            cv_data = self.parser.parse_cv_from_pdf(
                str(pdf_path_obj), verbose=verbose, candidate_id=candidate_id
            )
            logger.info(f"Successfully parsed CV for: {cv_data.full_name}")
            logger.info(
                f"Extracted data: {len(cv_data.education)} education entries, {len(cv_data.experience)} experience entries, {len(cv_data.skills)} skills"
            )
            return cv_data

        except Exception as e:
            error_msg = f"Failed to process CV: {str(e)}"
            logger.error(error_msg, exc_info=True)

            if "PDF" in str(e) or "pdf" in str(e):
                raise PDFReadError(error_msg) from e
            elif "validation" in str(e).lower() or "pydantic" in str(e).lower():
                raise ValidationError(error_msg) from e
            else:
                raise LLMError(error_msg) from e

`init(parser_agent)` ¶

Initialize CV service.

Parameters:

Name	Type	Description	Default
`parser_agent`	`CVParserAgent`	Initialized CVParserAgent instance	required

Source code in services/cv_service.py

def __init__(self, parser_agent: CVParserAgent):
    """
    Initialize CV service.

    Args:
        parser_agent: Initialized CVParserAgent instance
    """
    self.parser = parser_agent
    logger.info("CVService initialized")

`process_cv_from_pdf(pdf_path, verbose=False, candidate_id=None)` ¶

Process CV from PDF file with proper error handling.

Parameters:

Name	Type	Description	Default
`pdf_path`	`str`	Path to PDF file	required
`verbose`	`bool`	Enable verbose logging	`False`

Returns:

Type	Description
`CVData`	Parsed CVData object

Raises:

Type	Description
`PDFReadError`	If PDF cannot be read
`ValidationError`	If CV data is invalid
`LLMError`	If LLM processing fails

Source code in services/cv_service.py

def process_cv_from_pdf(
    self, pdf_path: str, verbose: bool = False, candidate_id: Optional[int] = None
) -> CVData:
    """
    Process CV from PDF file with proper error handling.

    Args:
        pdf_path: Path to PDF file
        verbose: Enable verbose logging

    Returns:
        Parsed CVData object

    Raises:
        PDFReadError: If PDF cannot be read
        ValidationError: If CV data is invalid
        LLMError: If LLM processing fails
    """
    pdf_path_obj = Path(pdf_path)

    # Validate file exists
    if not pdf_path_obj.exists():
        error_msg = f"PDF file not found: {pdf_path}"
        logger.error(error_msg)
        raise PDFReadError(error_msg)

    if not pdf_path_obj.suffix.lower() == ".pdf":
        error_msg = f"File is not a PDF: {pdf_path}"
        logger.error(error_msg)
        raise PDFReadError(error_msg)

    logger.info(f"Processing CV from PDF: {pdf_path}")
    logger.info(f"File size: {pdf_path_obj.stat().st_size / 1024:.2f} KB")
    logger.info(f"Verbose mode: {verbose}")

    try:
        logger.info("Starting CV parsing process...")
        cv_data = self.parser.parse_cv_from_pdf(
            str(pdf_path_obj), verbose=verbose, candidate_id=candidate_id
        )
        logger.info(f"Successfully parsed CV for: {cv_data.full_name}")
        logger.info(
            f"Extracted data: {len(cv_data.education)} education entries, {len(cv_data.experience)} experience entries, {len(cv_data.skills)} skills"
        )
        return cv_data

    except Exception as e:
        error_msg = f"Failed to process CV: {str(e)}"
        logger.error(error_msg, exc_info=True)

        if "PDF" in str(e) or "pdf" in str(e):
            raise PDFReadError(error_msg) from e
        elif "validation" in str(e).lower() or "pydantic" in str(e).lower():
            raise ValidationError(error_msg) from e
        else:
            raise LLMError(error_msg) from e

Service for routing classified emails to appropriate departments.

Source code in services/email_router.py

class EmailRouter:
    """Service for routing classified emails to appropriate departments."""

    def __init__(
        self,
        email_username: str,
        email_password: str,
        smtp_host: str,
        smtp_port: int,
        smtp_use_tls: bool = True,
        iod_email: Optional[str] = None,
        hr_email: Optional[str] = None,
    ):
        """
        Initialize email router.

        Args:
            email_username: Email username for sending emails
            email_password: Email password or app password
            smtp_host: SMTP server hostname
            smtp_port: SMTP server port
            smtp_use_tls: Whether to use TLS (True for port 587, False for port 465)
            iod_email: Email address for IOD department
            hr_email: Email address for HR department
        """
        self.email_username = email_username
        self.email_password = email_password
        self.smtp_host = smtp_host
        self.smtp_port = smtp_port
        self.smtp_use_tls = smtp_use_tls
        self.iod_email = iod_email
        self.hr_email = hr_email
        # Track processed emails to prevent duplicates (using Message-ID or UID)
        # Use insertion-ordered dict to preserve recency when trimming
        self.processed_emails: dict[str, None] = {}
        self.max_processed_emails = 1000  # Maximum number of processed emails to track

        # Initialize AI client for ticket priority/deadline determination
        self.ai_client: Any = None
        self.model_name: Optional[str] = None
        self.query_classifier: Optional[QueryClassifierAgent] = None
        self.query_responder: Optional[QueryResponderAgent] = None
        self.rag_validator: Optional[RAGResponseValidatorAgent] = None
        self.rag_db: Optional[QdrantRAG] = None
        try:
            from openai import AzureOpenAI

            self.ai_client = AzureOpenAI(
                api_version=settings.azure_openai_api_version,
                azure_endpoint=settings.azure_openai_endpoint,
                api_key=settings.api_key,
            )
            self.model_name = settings.openai_model
            logger.info("AI client initialized for ticket priority/deadline determination")
        except Exception as e:
            logger.warning(
                f"Failed to initialize AI client: {e}. Will use default priority/deadline."
            )
            self.ai_client = None
            self.model_name = None

        # Initialize AI agents for query handling
        try:
            self.query_classifier = QueryClassifierAgent(
                model_name=settings.azure_openai_gpt_deployment
            )
            self.query_responder = QueryResponderAgent(
                model_name=settings.azure_openai_gpt_deployment
            )
            self.rag_validator = RAGResponseValidatorAgent(
                model_name=settings.azure_openai_gpt_deployment
            )
            # Initialize RAG (will be lazy-loaded when needed)
            self.rag_db = None
            logger.info("Query classification agents initialized")
        except Exception as e:
            logger.warning(
                f"Failed to initialize query agents: {e}. Will forward all queries to HR."
            )
            self.query_classifier = None
            self.query_responder = None
            self.rag_validator = None
            self.rag_db = None

    def route_email(self, email_data: Dict, classification: str) -> bool:
        """
        Route email based on classification.

        Args:
            email_data: Email dictionary
            classification: Email classification ('iod', 'consent_yes', 'consent_no', 'default')

        Returns:
            True if routing was successful
        """
        try:
            # Check for duplicates using Message-ID or UID (normalize to string)
            # Use combination of UID and Message-ID for better deduplication
            uid = str(email_data.get("uid", ""))
            message_id = str(email_data.get("message_id", ""))

            # Create unique identifier: prefer Message-ID, fallback to UID, or combination
            if message_id and message_id != "":
                email_id = message_id
            elif uid and uid != "":
                email_id = f"uid:{uid}"
            else:
                # Fallback: use from_email + subject + date as identifier
                email_id = f"{email_data.get('from_email', 'unknown')}:{email_data.get('subject', 'no-subject')}:{email_data.get('date', 'no-date')}"

            if email_id:
                if email_id in self.processed_emails:
                    logger.warning(
                        f"Email {email_id} already processed (classification: {classification}, "
                        f"from: {email_data.get('from_email', 'unknown')}), skipping duplicate"
                    )
                    return True  # Return True to avoid reprocessing
                self.processed_emails[email_id] = None
                logger.debug(
                    f"Marking email {email_id} as processed (classification: {classification})"
                )
                # Keep only last N processed emails (oldest evicted first)
                if len(self.processed_emails) > self.max_processed_emails:
                    keep_count = self.max_processed_emails // 2
                    keys = list(self.processed_emails)
                    for k in keys[:-keep_count]:
                        del self.processed_emails[k]
                    logger.debug(
                        f"Trimmed processed_emails to {len(self.processed_emails)} entries"
                    )

            if classification == "iod":
                return self._route_to_iod(email_data)
            elif classification in ["consent_yes", "consent_no"]:
                return self._handle_consent(email_data, classification)
            else:  # default - general query
                return self._handle_general_query(email_data)
        except Exception as e:
            logger.error(f"Error routing email: {str(e)}", exc_info=True)
            return False

    def _route_to_iod(self, email_data: Dict) -> bool:
        """Route email to IOD department, create ticket, and send acknowledgment to sender."""
        try:
            from datetime import datetime, timedelta
            from database.models import (
                create_ticket,
                TicketDepartment,
                TicketPriority,
            )

            # Create ticket for IOD
            from_email = email_data.get("from_email", "Nieznany")
            email_subject = email_data.get("subject", "Brak tematu")
            email_body = email_data.get("body", "Brak treści")

            # Try to find related candidate (get the latest one if multiple exist)
            related_candidate_id = None
            try:
                from database.models import get_all_candidates

                # Get all candidates with this email and take the latest one (most recent created_at or highest ID)
                all_candidates = get_all_candidates()
                candidates_with_email = [
                    c for c in all_candidates if c.email.lower() == from_email.lower()
                ]
                if candidates_with_email:
                    # Sort by created_at DESC (most recent first), then by ID DESC as fallback
                    # Use a tuple for sorting: (created_at timestamp, id) - both descending
                    latest_candidate = max(
                        candidates_with_email,
                        key=lambda c: (c.created_at.timestamp() if c.created_at else 0, c.id or 0),
                    )
                    related_candidate_id = latest_candidate.id
                    logger.info(
                        f"Found candidate {related_candidate_id} ({latest_candidate.full_name}) for email {from_email} (selected latest from {len(candidates_with_email)} candidates)"
                    )
            except Exception as e:
                logger.warning(f"Error finding candidate for email {from_email}: {str(e)}")
                pass

            # Check if ticket already exists for this email (prevent duplicates)
            message_id = email_data.get("message_id")
            existing_ticket = None
            if message_id:
                try:
                    from database.models import get_all_tickets

                    all_tickets = get_all_tickets()
                    # Check if ticket with same related_email_id already exists
                    existing_ticket = next(
                        (t for t in all_tickets if t.related_email_id == message_id), None
                    )
                    if existing_ticket:
                        logger.info(
                            f"Ticket #{existing_ticket.id} already exists for email {message_id}, skipping duplicate ticket creation"
                        )
                except Exception as e:
                    logger.warning(f"Error checking for existing ticket: {str(e)}")

            # Create ticket only if it doesn't exist
            if not existing_ticket:
                deadline = datetime.now() + timedelta(days=7)
                ticket_description = (
                    f"Email od: {from_email}\n"
                    f"Temat: {email_subject}\n\n"
                    f"Treść wiadomości:\n{email_body}\n\n"
                    f"Message-ID: {message_id or 'Brak'}"
                )

                try:
                    ticket = create_ticket(
                        department=TicketDepartment.IOD,
                        priority=TicketPriority.HIGH,
                        description=ticket_description,
                        deadline=deadline,
                        related_candidate_id=related_candidate_id,
                        related_email_id=message_id,
                    )
                    logger.info(f"Created ticket #{ticket.id} for IOD incident from {from_email}")
                except Exception as e:
                    logger.warning(f"Failed to create ticket for IOD: {str(e)}")

            # Check if acknowledgment was already sent (by checking if ticket exists)
            # If ticket exists, it means this email was already processed
            if existing_ticket:
                logger.info(
                    f"Email {message_id or 'with UID'} already processed (ticket #{existing_ticket.id} exists), skipping duplicate processing"
                )
                return True  # Return True to avoid reprocessing

            # Forward email to IOD
            subject = f"[IOD] {email_subject}"
            # Ensure body is not empty - if email_body is empty, add default message
            if not email_body or not email_body.strip():
                email_body = "(Brak treści w oryginalnej wiadomości)"
                logger.warning(f"Empty email body from {from_email}, using default message")

            body = f"""
Email otrzymany od: {from_email}
Data: {email_data.get('date', 'Nieznana')}
Temat: {email_subject}

---
Treść wiadomości:
---

{email_body}

---
Oryginalny Message-ID: {email_data.get('message_id', 'Brak')}
In-Reply-To: {email_data.get('in_reply_to', 'Brak')}
"""

            success = (
                self._send_email(
                    to_email=self.iod_email, subject=subject, body=body, reply_to=from_email
                )
                if self.iod_email
                else False
            )

            # Send acknowledgment to original sender (always, even if forwarding to IOD failed)
            # Ticket was created, so candidate should be notified
            ack_subject = "Re: " + email_subject
            ack_body = """
Dziękujemy za kontakt.

Twoja wiadomość została przekazana do działu Inspektora Ochrony Danych (IOD) w celu rozpatrzenia.

Otrzymasz odpowiedź w najkrótszym możliwym terminie.

Z wyrazami szacunku

Dział HR
"""
            ack_success = self._send_email(to_email=from_email, subject=ack_subject, body=ack_body)

            if success:
                if ack_success:
                    logger.info(f"Email routed to IOD and acknowledgment sent to {from_email}")
                else:
                    logger.warning(
                        f"Email routed to IOD, but failed to send acknowledgment to {from_email}"
                    )
            else:
                if ack_success:
                    logger.warning(
                        f"Failed to send email to IOD for {from_email}, but acknowledgment sent to candidate"
                    )
                else:
                    logger.error(f"Failed to send email to IOD and acknowledgment to {from_email}")

            # Return True if either forwarding or acknowledgment succeeded (ticket was created)
            return success or ack_success

        except Exception as e:
            logger.error(f"Error routing email to IOD: {str(e)}", exc_info=True)
            return False

    def _handle_consent(self, email_data: Dict, classification: str) -> bool:
        """Handle consent email and update database."""
        try:
            from_email = email_data.get("from_email", "")
            if not from_email:
                logger.warning("Cannot handle consent - no email address")
                return False

            # 1) Try to link candidate by Message-ID (reply to our feedback)
            candidate = None
            in_reply_to = email_data.get("in_reply_to") or email_data.get("message_id")
            feedback_email = None

            if in_reply_to:
                # Normalize Message-ID (keep as stored in DB, usually with <>)
                possible_ids = {in_reply_to.strip()}
                # Sometimes the client strips angle brackets – try without them too
                if in_reply_to.startswith("<") and in_reply_to.endswith(">"):
                    possible_ids.add(in_reply_to[1:-1].strip())

                for mid in possible_ids:
                    try:
                        feedback_email = get_feedback_email_by_message_id(mid)
                        if feedback_email:
                            break
                    except Exception as e:
                        logger.warning(f"Error looking up feedback_email by Message-ID={mid}: {e}")

            if feedback_email:
                # We have linked feedback → find candidate by candidate_id
                try:
                    candidate = get_candidate_by_id(feedback_email.candidate_id)
                    if candidate:
                        logger.info(
                            f"Consent email linked to candidate {candidate.id} via Message-ID "
                            f"({feedback_email.message_id})"
                        )
                except Exception as e:
                    logger.warning(
                        f"Error loading candidate by id {feedback_email.candidate_id}: {e}"
                    )

            # 2) If Message-ID lookup failed, try by email address
            if not candidate:
                candidate = get_candidate_by_email(from_email)

            if not candidate:
                logger.warning(
                    f"Cannot find candidate for consent handling. "
                    f"from_email={from_email}, in_reply_to={in_reply_to}"
                )
                # Still route to HR for manual handling
                return self._route_to_hr(email_data)

            # Update consent in database
            consent_value = classification == "consent_yes"
            if candidate.id is not None:
                update_candidate(candidate.id, consent_for_other_positions=consent_value)

            logger.info(
                f"Updated consent_for_other_positions for candidate {candidate.id} ({from_email}) to {consent_value}"
            )

            # Send acknowledgment
            ack_subject = "Re: " + email_data.get("subject", "Twoja wiadomość")
            if consent_value:
                ack_body = """
Dziękujemy za kontakt.

Zarejestrowaliśmy Twoją zgodę na rozważenie Twojej kandydatury w kontekście innych stanowisk.

Jeśli znajdziemy odpowiednią dla Ciebie ofertę, skontaktujemy się z Tobą niezwłocznie.

W razie chęci zmiany decyzji prosimy o poinformowanie nas, odpowiadając na tę wiadomość.

Z wyrazami szacunku

Dział HR
"""
            else:
                ack_body = """
Dziękujemy za kontakt.

Zarejestrowaliśmy informację, że nie wyrażasz zgody na rozważenie Twojej kandydatury w kontekście innych stanowisk.

W razie chęci zmiany decyzji prosimy o poinformowanie nas, odpowiadając na tę wiadomość.

Z wyrazami szacunku

Dział HR
"""

            self._send_email(to_email=from_email, subject=ack_subject, body=ack_body)

            # Notify HR about consent change (fuller picture of the situation)
            try:
                hr_subject = f"[HR] Zmiana zgody kandydata na inne rekrutacje – {candidate.first_name} {candidate.last_name}"
                hr_body = f"""
Kandydat: {candidate.first_name} {candidate.last_name}
Email: {candidate.email}
ID kandydata w systemie: {candidate.id}

Nowa wartość zgody na inne rekrutacje: {"TAK" if consent_value else "NIE"}

Oryginalna wiadomość kandydata:
--------------------------------
Temat: {email_data.get('subject', 'Brak tematu')}

{email_data.get('body', 'Brak treści')}
"""
                if self.hr_email:
                    self._send_email(to_email=self.hr_email, subject=hr_subject, body=hr_body)
                logger.info(f"HR notified about consent change for candidate {candidate.id}")
            except Exception as e:
                logger.warning(f"Failed to notify HR about consent change: {e}")

            logger.info(f"Consent handled and acknowledgment sent to {from_email}")
            return True

        except Exception as e:
            logger.error(f"Error handling consent: {str(e)}", exc_info=True)
            return False

    def _get_rag_db(self) -> Optional[QdrantRAG]:
        """Lazy-load RAG database when needed."""
        if self.rag_db is None:
            try:
                azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
                azure_api_key = os.getenv("AZURE_OPENAI_API_KEY")
                azure_deployment = os.getenv(
                    "AZURE_OPENAI_EMBEDDING_DEPLOYMENT", "text-embedding-3-small"
                )
                azure_api_version = os.getenv("AZURE_OPENAI_API_VERSION", "2024-12-01-preview")

                # Prefer Qdrant server over local path (avoids locking issues)
                qdrant_host = os.getenv(
                    "QDRANT_HOST", "qdrant"
                )  # Default to service name in Docker
                qdrant_port = int(os.getenv("QDRANT_PORT", "6333"))
                qdrant_path = os.getenv("QDRANT_PATH")  # Optional, only if server not available

                if azure_api_key:
                    # Try server first, fallback to local path
                    if qdrant_host:
                        self.rag_db = QdrantRAG(
                            collection_name="recruitment_knowledge_base",
                            use_azure_openai=True,
                            azure_endpoint=azure_endpoint,
                            azure_api_key=azure_api_key,
                            azure_deployment=azure_deployment,
                            azure_api_version=azure_api_version,
                            qdrant_host=qdrant_host,
                            qdrant_port=qdrant_port,
                        )
                    elif qdrant_path:
                        self.rag_db = QdrantRAG(
                            collection_name="recruitment_knowledge_base",
                            use_azure_openai=True,
                            azure_endpoint=azure_endpoint,
                            azure_api_key=azure_api_key,
                            azure_deployment=azure_deployment,
                            azure_api_version=azure_api_version,
                            qdrant_path=qdrant_path,
                        )
                    else:
                        logger.warning("Neither QDRANT_HOST nor QDRANT_PATH set, RAG unavailable")
                        return None

                    logger.info("RAG database initialized")
                else:
                    logger.warning("Azure OpenAI API key not set, RAG unavailable")
            except Exception as e:
                logger.warning(f"Failed to initialize RAG database: {e}")
        return self.rag_db

    def _handle_general_query(self, email_data: Dict) -> bool:
        """
        Handle general query email - classify and respond using AI or forward to HR.

        Workflow:
        1. QueryClassifierAgent decides: direct_answer, rag_answer, or forward_to_hr
        2. If direct_answer: QueryResponderAgent generates response from basic knowledge
        3. If rag_answer: QueryResponderAgent uses RAG to find relevant context, then generates response
        4. If forward_to_hr: Forward email to HR department
        """
        try:
            from_email = email_data.get("from_email", "")
            email_subject = email_data.get("subject", "")
            email_body = email_data.get("body", "")

            if not self.query_classifier or not self.query_responder:
                # Fallback: forward to HR if agents not initialized
                logger.warning("Query agents not initialized, forwarding to HR")
                return self._route_to_hr(email_data)
            assert self.query_classifier is not None and self.query_responder is not None

            # Step 1: Classify query
            logger.info(f"Classifying query from {from_email}")
            classification_result = self.query_classifier.classify_query(
                email_subject, email_body, from_email
            )

            action = classification_result.get("action", "forward_to_hr")
            reasoning = classification_result.get("reasoning", "")
            confidence = classification_result.get("confidence", 0.0)

            # Konwertuj confidence na float
            try:
                confidence = float(confidence)
            except (ValueError, TypeError):
                confidence = 0.0

            logger.info(
                f"Query classified as: {action} (confidence: {confidence:.2f}, reasoning: {reasoning})"
            )

            # CRITICAL VALIDATION: Different thresholds for different actions
            # For rag_answer: allow trying even with lower confidence (0.5+), as RAG may find the answer
            # For direct_answer: require higher confidence (0.85+)
            # For forward_to_hr or confidence < 0.5: always forward to HR
            if action == "rag_answer" and confidence < 0.5:
                logger.warning(
                    f"Confidence ({confidence:.2f}) < 0.50 for rag_answer, forwarding to HR instead"
                )
                action = "forward_to_hr"
                reasoning = f"Poziom pewności ({confidence:.2f}) jest zbyt niski dla rag_answer. {reasoning}"
            elif action == "direct_answer" and confidence < 0.7:
                # Only downgrade direct_answer to forward_to_hr on low confidence; leave forward_to_hr unchanged
                logger.warning(
                    f"Confidence ({confidence:.2f}) < 0.70 for direct_answer, forwarding to HR instead"
                )
                action = "forward_to_hr"
                reasoning = f"Poziom pewności ({confidence:.2f}) jest mniejszy niż wymagany (0.70). {reasoning}"

            # Step 2: Handle based on classification
            if action == "forward_to_hr":
                # Forward to HR - don't auto-respond
                logger.info(f"Forwarding query to HR: {reasoning}")
                return self._route_to_hr(email_data)

            elif action == "direct_answer":
                # Generate response from basic knowledge
                # ADDITIONAL VALIDATION: Check confidence again before responding
                if confidence < 0.85:
                    logger.warning(
                        f"Confidence ({confidence:.2f}) < 0.85 for direct_answer, forwarding to HR"
                    )
                    return self._route_to_hr(email_data)

                logger.info(
                    f"Generating direct answer from basic knowledge (confidence = {confidence:.2f})"
                )
                response = self.query_responder.generate_response(
                    email_subject, email_body, from_email, rag_context=None
                )

                # VALIDATION: If agent returned None, it is not confident – forward to HR
                if response is None:
                    logger.warning("Agent returned None (not confident enough), forwarding to HR")
                    return self._route_to_hr(email_data)

                # Send response to candidate
                reply_subject = (
                    f"Re: {email_subject}" if email_subject else "Odpowiedź na Twoje zapytanie"
                )
                success = self._send_email(
                    to_email=from_email, subject=reply_subject, body=response
                )

                if success:
                    logger.info(f"Direct answer sent to {from_email}")
                    # Also notify HR about the auto-response
                    self._notify_hr_about_auto_response(email_data, response, "direct_answer")

                return success

            elif action == "rag_answer":
                # For rag_answer: allow trying if confidence >= 0.5 (already validated earlier)
                # RAG may find the answer even if classification was uncertain
                logger.info(f"Generating answer using RAG (confidence = {confidence:.2f})")
                rag_db = self._get_rag_db()

                if not rag_db:
                    logger.warning("RAG database not available, forwarding to HR")
                    return self._route_to_hr(email_data)

                # Search RAG for relevant context
                query_text = f"{email_subject} {email_body}".strip()
                rag_results = rag_db.search(query_text, n_results=3)

                logger.info(f"Found {len(rag_results)} relevant documents from RAG")

                # VALIDATION: Check whether RAG found relevant documents
                if not rag_results or len(rag_results) == 0:
                    logger.warning("No relevant documents found in RAG, forwarding to HR")
                    return self._route_to_hr(email_data)

                # Generate response with RAG context
                response = self.query_responder.generate_response(
                    email_subject, email_body, from_email, rag_context=rag_results
                )

                # VALIDATION: If agent returned None, it is not confident – forward to HR
                if response is None:
                    logger.warning("Agent returned None (not confident enough), forwarding to HR")
                    return self._route_to_hr(email_data)

                # VALIDATION: Check quality of RAG response before sending
                if self.rag_validator:
                    logger.info("Validating RAG-generated response before sending")
                    validation_result = self.rag_validator.validate_rag_response(
                        generated_response=response,
                        email_subject=email_subject,
                        email_body=email_body,
                        sender_email=from_email,
                        rag_sources=rag_results,
                    )

                    if not validation_result.is_approved:
                        logger.warning(
                            f"RAG response validation FAILED for {from_email}: {validation_result.reasoning}. "
                            f"Issues: {validation_result.issues_found}, "
                            f"Factual errors: {validation_result.factual_errors}. "
                            f"Forwarding to HR instead."
                        )
                        # Forward to HR with validation details
                        return self._route_to_hr(email_data)
                    else:
                        logger.info(
                            f"RAG response validation PASSED for {from_email}: {validation_result.reasoning}"
                        )
                else:
                    logger.warning(
                        "RAG validator not available, skipping validation (sending response anyway)"
                    )

                # Send response to candidate
                reply_subject = (
                    f"Re: {email_subject}" if email_subject else "Odpowiedź na Twoje zapytanie"
                )
                success = self._send_email(
                    to_email=from_email, subject=reply_subject, body=response
                )

                if success:
                    logger.info(f"RAG-based answer sent to {from_email}")
                    # Also notify HR about the auto-response
                    self._notify_hr_about_auto_response(
                        email_data, response, "rag_answer", rag_results
                    )

                return success

            else:
                # Unknown action - forward to HR
                logger.warning(f"Unknown action: {action}, forwarding to HR")
                return self._route_to_hr(email_data)

        except Exception as e:
            logger.error(f"Error handling general query: {str(e)}", exc_info=True)
            # On error, forward to HR for manual handling
            return self._route_to_hr(email_data)

    def _notify_hr_about_auto_response(
        self,
        email_data: Dict,
        response: str,
        response_type: str,
        rag_context: Optional[list] = None,
    ):
        """Notify HR about auto-generated response."""
        try:
            hr_subject = f"[HR-AUTO] Automatyczna odpowiedź wysłana do {email_data.get('from_email', 'Nieznany')}"
            hr_body = f"""
Automatyczna odpowiedź została wygenerowana i wysłana do kandydata.

TYP ODPOWIEDZI: {response_type.upper()}
Kandydat: {email_data.get('from_email', 'Nieznany')}
Temat oryginalnej wiadomości: {email_data.get('subject', 'Brak tematu')}

---
ORYGINALNA WIADOMOŚĆ KANDYDATA:
---
{email_data.get('body', 'Brak treści')}

---
WYGENEROWANA ODPOWIEDŹ:
---
{response}
"""

            if rag_context:
                hr_body += "\n\n---\nUŻYTE ŹRÓDŁA Z RAG:\n---\n"
                for i, doc in enumerate(rag_context, 1):
                    hr_body += f"\n{i}. Źródło: {doc.get('metadata', {}).get('source', 'N/A')}\n"
                    hr_body += f"   Fragment: {doc.get('document', '')[:200]}...\n"

            hr_body += f"\n\n---\nMessage-ID: {email_data.get('message_id', 'Brak')}\n"

            if self.hr_email:
                self._send_email(to_email=self.hr_email, subject=hr_subject, body=hr_body)
            logger.info(f"HR notified about auto-response ({response_type})")
        except Exception as e:
            logger.warning(f"Failed to notify HR about auto-response: {e}")

    def _determine_ticket_priority_and_deadline(
        self, email_subject: str, email_body: str, from_email: str
    ) -> tuple:
        """
        Determine ticket priority and deadline using AI based on email content.

        Returns:
            tuple: (priority: TicketPriority, deadline_days: int) where deadline_days is between 5-14
        """
        try:
            from database.models import TicketPriority
            import json

            # Note: email_subject, from_email, email_body are user-controlled; consider sanitizing
            # or passing as a separate user message to reduce prompt injection risk in production.
            prompt = f"""
Analyze the following email inquiry and determine the appropriate ticket priority and deadline.

EMAIL:
Subject: {email_subject}
From: {from_email}
Content: {email_body}

TASK:
Determine:
1. Priority: LOW, MEDIUM, HIGH, or URGENT
   - URGENT: Time-sensitive issues, complaints, urgent requests, critical problems
   - HIGH: Important questions, status inquiries, significant concerns
   - MEDIUM: Standard questions, general inquiries, moderate importance
   - LOW: Simple questions, informational requests, low urgency

2. Deadline (in days, between 5-14 days):
   - URGENT: 5-7 days
   - HIGH: 7-10 days
   - MEDIUM: 10-12 days
   - LOW: 12-14 days

Return JSON in format:
{{
    "priority": "LOW" | "MEDIUM" | "HIGH" | "URGENT",
    "deadline_days": <number between 5-14>,
    "reasoning": "Brief explanation of the decision"
}}
"""

            if not self.ai_client or not self.model_name:
                logger.warning("AI client not available, using default priority/deadline")
                from database.models import TicketPriority

                return TicketPriority.MEDIUM, 10

            response = self.ai_client.chat.completions.create(
                model=self.model_name,
                messages=[
                    {
                        "role": "system",
                        "content": "You are an expert in prioritizing HR inquiries and determining appropriate response deadlines.",
                    },
                    {"role": "user", "content": prompt},
                ],
                temperature=0.3,
                response_format={"type": "json_object"},
            )

            content = response.choices[0].message.content
            result_text = (content or "").strip()
            result = json.loads(result_text)

            priority_str = result.get("priority", "MEDIUM").upper()
            deadline_days = result.get("deadline_days", 10)

            # Validate and clamp deadline_days to 5-14 range
            try:
                deadline_days = int(deadline_days)
                deadline_days = max(5, min(14, deadline_days))  # Clamp to 5-14
            except (ValueError, TypeError):
                deadline_days = 10  # Default

            # Map priority string to enum
            priority_map = {
                "LOW": TicketPriority.LOW,
                "MEDIUM": TicketPriority.MEDIUM,
                "HIGH": TicketPriority.HIGH,
                "URGENT": TicketPriority.URGENT,
            }
            priority = priority_map.get(priority_str, TicketPriority.MEDIUM)

            reasoning = result.get("reasoning", "No reasoning provided")
            logger.info(
                f"AI determined priority: {priority_str}, deadline: {deadline_days} days. Reasoning: {reasoning}"
            )

            return priority, deadline_days

        except Exception as e:
            logger.warning(
                f"Error determining ticket priority/deadline with AI: {str(e)}. Using defaults."
            )
            from database.models import TicketPriority

            return TicketPriority.MEDIUM, 10  # Default fallback

    def _route_to_hr(self, email_data: Dict) -> bool:
        """Route email to HR department, create ticket, and send email."""
        try:
            from datetime import datetime, timedelta
            from database.models import (
                create_ticket,
                TicketDepartment,
                get_position_by_id,
                get_all_tickets,
                get_all_candidates,
            )

            from_email = email_data.get("from_email", "Nieznany")
            message_id = email_data.get("message_id")
            email_subject = email_data.get("subject", "Brak tematu")
            email_body = email_data.get("body", "Brak treści")

            # Check for duplicates - if email was already forwarded to HR (by checking processed_emails)
            # Create unique identifier for HR forwarding
            hr_forward_id = f"hr_forward:{message_id or email_data.get('uid', '')}:{from_email}"
            if hr_forward_id in self.processed_emails:
                logger.warning(
                    f"Email {message_id or 'with UID'} from {from_email} already forwarded to HR, "
                    f"skipping duplicate"
                )
                return True  # Return True to avoid reprocessing

            # Mark as processed
            self.processed_emails[hr_forward_id] = None
            logger.debug(f"Marking HR forward {hr_forward_id} as processed")

            # Try to find related candidate (get the latest one if multiple exist)
            related_candidate_id = None
            candidate = None
            try:
                # Get all candidates with this email and take the latest one
                all_candidates = get_all_candidates()
                candidates_with_email = [
                    c for c in all_candidates if c.email.lower() == from_email.lower()
                ]
                if candidates_with_email:
                    latest_candidate = max(
                        candidates_with_email,
                        key=lambda c: (c.created_at.timestamp() if c.created_at else 0, c.id or 0),
                    )
                    related_candidate_id = latest_candidate.id
                    candidate = latest_candidate
                    logger.info(
                        f"Found candidate {related_candidate_id} ({latest_candidate.full_name}) for email {from_email}"
                    )
            except Exception as e:
                logger.warning(f"Error finding candidate for email {from_email}: {str(e)}")

            # Check if ticket already exists for this email (prevent duplicates)
            existing_ticket = None
            if message_id:
                try:
                    all_tickets = get_all_tickets()
                    existing_ticket = next(
                        (t for t in all_tickets if t.related_email_id == message_id), None
                    )
                    if existing_ticket:
                        logger.info(
                            f"Ticket #{existing_ticket.id} already exists for email {message_id}, skipping duplicate ticket creation"
                        )
                except Exception as e:
                    logger.warning(f"Error checking for existing ticket: {str(e)}")

            # Create ticket only if it doesn't exist
            if not existing_ticket:
                # Determine priority and deadline using AI
                priority, deadline_days = self._determine_ticket_priority_and_deadline(
                    email_subject, email_body, from_email
                )
                deadline = datetime.now() + timedelta(days=deadline_days)

                ticket_description = (
                    f"Email od: {from_email}\n"
                    f"Temat: {email_subject}\n\n"
                    f"Treść wiadomości:\n{email_body}\n\n"
                    f"Message-ID: {message_id or 'Brak'}"
                )

                try:
                    ticket = create_ticket(
                        department=TicketDepartment.HR,
                        priority=priority,
                        description=ticket_description,
                        deadline=deadline,
                        related_candidate_id=related_candidate_id,
                        related_email_id=message_id,
                    )
                    logger.info(
                        f"Created ticket #{ticket.id} for HR inquiry from {from_email} (priority: {priority.value}, deadline: {deadline_days} days)"
                    )
                except Exception as e:
                    logger.warning(f"Failed to create ticket for HR: {str(e)}")

            # If ticket exists, it means this email was already processed
            if existing_ticket:
                logger.info(
                    f"Email {message_id or 'with UID'} already processed (ticket #{existing_ticket.id} exists), skipping duplicate processing"
                )
                return True  # Return True to avoid reprocessing

            subject = f"[HR] {email_subject}"

            # Check if candidate exists in the database (for email content)
            candidate_info = ""
            if candidate:
                try:
                    position_info = ""
                    if candidate.position_id:
                        try:
                            position = get_position_by_id(candidate.position_id)
                            if position:
                                position_info = (
                                    f"\nStanowisko: {position.title} ({position.company})"
                                )
                        except Exception as e:
                            logger.warning(f"Error loading position {candidate.position_id}: {e}")

                    # Formatuj informacje o kandydacie
                    stage_display = {
                        "initial_screening": "Pierwsza selekcja",
                        "hr_interview": "Rozmowa HR",
                        "technical_assessment": "Weryfikacja wiedzy",
                        "final_interview": "Rozmowa końcowa",
                        "offer": "Oferta",
                    }.get(
                        (
                            candidate.stage.value
                            if hasattr(candidate.stage, "value")
                            else str(candidate.stage)
                        ),
                        str(candidate.stage),
                    )

                    status_display = {
                        "in_progress": "W trakcie",
                        "accepted": "Zaakceptowany",
                        "rejected": "Odrzucony",
                    }.get(
                        (
                            candidate.status.value
                            if hasattr(candidate.status, "value")
                            else str(candidate.status)
                        ),
                        str(candidate.status),
                    )

                    consent_info = ""
                    if candidate.consent_for_other_positions is not None:
                        consent_info = f"\nZgoda na inne rekrutacje: {'Tak' if candidate.consent_for_other_positions else 'Nie'}"

                    candidate_info = f"""
---
INFORMACJE O KANDYDACIE (dane z bazy):
---
ID kandydata: {candidate.id}
Imię i nazwisko: {candidate.full_name}
Email: {candidate.email}
Status: {status_display}
Etap rekrutacji: {stage_display}{position_info}{consent_info}
Link do profilu: http://localhost:5000/candidate/{candidate.id} (w środowisku produkcyjnym należy podać właściwy adres URL)
---
"""
                    logger.info(
                        f"Found candidate {candidate.id} ({candidate.full_name}) for email {from_email}"
                    )
                except Exception as e:
                    logger.warning(f"Error formatting candidate info: {str(e)}")
                    candidate_info = ""
            else:
                logger.debug(f"No candidate found in database for email {from_email}")

            # Ensure body is not empty - if email_body is empty, add default message
            email_body = email_data.get("body", "")
            if not email_body or not email_body.strip():
                email_body = "(Brak treści w oryginalnej wiadomości)"
                logger.warning(f"Empty email body from {from_email}, using default message")

            body = f"""
Email otrzymany od: {from_email}
Data: {email_data.get('date', 'Nieznana')}
Temat: {email_data.get('subject', 'Brak tematu')}
{candidate_info}
---
Treść wiadomości:
---

{email_body}

---
Oryginalny Message-ID: {email_data.get('message_id', 'Brak')}
In-Reply-To: {email_data.get('in_reply_to', 'Brak')}
"""

            success = (
                self._send_email(
                    to_email=self.hr_email, subject=subject, body=body, reply_to=from_email
                )
                if self.hr_email
                else False
            )

            # Send acknowledgment to original sender (always, even if forwarding to HR failed)
            # Ticket was created, so candidate should be notified
            ack_subject = "Re: " + email_subject
            ack_body = """
Dziękujemy za kontakt.

Twoja wiadomość została przekazana do działu HR w celu rozpatrzenia.

Otrzymasz odpowiedź w najkrótszym możliwym terminie.

Z wyrazami szacunku

Dział HR
"""
            ack_success = self._send_email(to_email=from_email, subject=ack_subject, body=ack_body)

            if success:
                if ack_success:
                    if candidate_info:
                        logger.info(
                            f"Email routed to HR and acknowledgment sent to {from_email} (candidate ID: {candidate.id if candidate else 'N/A'})"
                        )
                    else:
                        logger.info(f"Email routed to HR and acknowledgment sent to {from_email}")
                else:
                    logger.warning(
                        f"Email routed to HR, but failed to send acknowledgment to {from_email}"
                    )
            else:
                if ack_success:
                    logger.warning(
                        f"Failed to send email to HR for {from_email}, but acknowledgment sent to candidate"
                    )
                else:
                    logger.error(f"Failed to send email to HR and acknowledgment to {from_email}")

            # Return True if either forwarding or acknowledgment succeeded (ticket was created)
            return success or ack_success

        except Exception as e:
            logger.error(f"Error routing email to HR: {str(e)}", exc_info=True)
            return False

    def _send_email(
        self, to_email: str, subject: str, body: str, reply_to: Optional[str] = None
    ) -> bool:
        """Send email via SMTP."""
        try:
            # Validate email content - prevent sending empty emails
            if not body or not body.strip():
                logger.warning(
                    f"Attempted to send empty email to {to_email} with subject: {subject}. Skipping."
                )
                return False

            if not subject or not subject.strip():
                logger.warning(
                    f"Attempted to send email to {to_email} with empty subject. Skipping."
                )
                return False

            msg = MIMEMultipart("alternative")
            msg["Subject"] = subject
            msg["From"] = self.email_username
            msg["To"] = to_email
            if reply_to:
                msg["Reply-To"] = reply_to

            # Add text content
            text_part = MIMEText(body, "plain", "utf-8")
            msg.attach(text_part)

            # Send email via SMTP
            if self.smtp_port == 465:
                # Use SSL for port 465
                with smtplib.SMTP_SSL(self.smtp_host, self.smtp_port) as server:
                    server.login(self.email_username, self.email_password)
                    server.send_message(msg)
            else:
                # Use TLS for port 587
                with smtplib.SMTP(self.smtp_host, self.smtp_port) as server:
                    if self.smtp_use_tls:
                        server.starttls()
                    server.login(self.email_username, self.email_password)
                    server.send_message(msg)

            logger.info(f"Email sent successfully to {to_email}")
            return True

        except Exception as e:
            logger.error(f"Failed to send email to {to_email}: {str(e)}")
            return False

`init(email_username, email_password, smtp_host, smtp_port, smtp_use_tls=True, iod_email=None, hr_email=None)` ¶

Initialize email router.

Parameters:

Name	Type	Description	Default
`email_username`	`str`	Email username for sending emails	required
`email_password`	`str`	Email password or app password	required
`smtp_host`	`str`	SMTP server hostname	required
`smtp_port`	`int`	SMTP server port	required
`smtp_use_tls`	`bool`	Whether to use TLS (True for port 587, False for port 465)	`True`
`iod_email`	`Optional[str]`	Email address for IOD department	`None`
`hr_email`	`Optional[str]`	Email address for HR department	`None`

Source code in services/email_router.py

def __init__(
    self,
    email_username: str,
    email_password: str,
    smtp_host: str,
    smtp_port: int,
    smtp_use_tls: bool = True,
    iod_email: Optional[str] = None,
    hr_email: Optional[str] = None,
):
    """
    Initialize email router.

    Args:
        email_username: Email username for sending emails
        email_password: Email password or app password
        smtp_host: SMTP server hostname
        smtp_port: SMTP server port
        smtp_use_tls: Whether to use TLS (True for port 587, False for port 465)
        iod_email: Email address for IOD department
        hr_email: Email address for HR department
    """
    self.email_username = email_username
    self.email_password = email_password
    self.smtp_host = smtp_host
    self.smtp_port = smtp_port
    self.smtp_use_tls = smtp_use_tls
    self.iod_email = iod_email
    self.hr_email = hr_email
    # Track processed emails to prevent duplicates (using Message-ID or UID)
    # Use insertion-ordered dict to preserve recency when trimming
    self.processed_emails: dict[str, None] = {}
    self.max_processed_emails = 1000  # Maximum number of processed emails to track

    # Initialize AI client for ticket priority/deadline determination
    self.ai_client: Any = None
    self.model_name: Optional[str] = None
    self.query_classifier: Optional[QueryClassifierAgent] = None
    self.query_responder: Optional[QueryResponderAgent] = None
    self.rag_validator: Optional[RAGResponseValidatorAgent] = None
    self.rag_db: Optional[QdrantRAG] = None
    try:
        from openai import AzureOpenAI

        self.ai_client = AzureOpenAI(
            api_version=settings.azure_openai_api_version,
            azure_endpoint=settings.azure_openai_endpoint,
            api_key=settings.api_key,
        )
        self.model_name = settings.openai_model
        logger.info("AI client initialized for ticket priority/deadline determination")
    except Exception as e:
        logger.warning(
            f"Failed to initialize AI client: {e}. Will use default priority/deadline."
        )
        self.ai_client = None
        self.model_name = None

    # Initialize AI agents for query handling
    try:
        self.query_classifier = QueryClassifierAgent(
            model_name=settings.azure_openai_gpt_deployment
        )
        self.query_responder = QueryResponderAgent(
            model_name=settings.azure_openai_gpt_deployment
        )
        self.rag_validator = RAGResponseValidatorAgent(
            model_name=settings.azure_openai_gpt_deployment
        )
        # Initialize RAG (will be lazy-loaded when needed)
        self.rag_db = None
        logger.info("Query classification agents initialized")
    except Exception as e:
        logger.warning(
            f"Failed to initialize query agents: {e}. Will forward all queries to HR."
        )
        self.query_classifier = None
        self.query_responder = None
        self.rag_validator = None
        self.rag_db = None

`route_email(email_data, classification)` ¶

Route email based on classification.

Parameters:

Name	Type	Description	Default
`email_data`	`Dict`	Email dictionary	required
`classification`	`str`	Email classification ('iod', 'consent_yes', 'consent_no', 'default')	required

Returns:

Type	Description
`bool`	True if routing was successful

Source code in services/email_router.py

def route_email(self, email_data: Dict, classification: str) -> bool:
    """
    Route email based on classification.

    Args:
        email_data: Email dictionary
        classification: Email classification ('iod', 'consent_yes', 'consent_no', 'default')

    Returns:
        True if routing was successful
    """
    try:
        # Check for duplicates using Message-ID or UID (normalize to string)
        # Use combination of UID and Message-ID for better deduplication
        uid = str(email_data.get("uid", ""))
        message_id = str(email_data.get("message_id", ""))

        # Create unique identifier: prefer Message-ID, fallback to UID, or combination
        if message_id and message_id != "":
            email_id = message_id
        elif uid and uid != "":
            email_id = f"uid:{uid}"
        else:
            # Fallback: use from_email + subject + date as identifier
            email_id = f"{email_data.get('from_email', 'unknown')}:{email_data.get('subject', 'no-subject')}:{email_data.get('date', 'no-date')}"

        if email_id:
            if email_id in self.processed_emails:
                logger.warning(
                    f"Email {email_id} already processed (classification: {classification}, "
                    f"from: {email_data.get('from_email', 'unknown')}), skipping duplicate"
                )
                return True  # Return True to avoid reprocessing
            self.processed_emails[email_id] = None
            logger.debug(
                f"Marking email {email_id} as processed (classification: {classification})"
            )
            # Keep only last N processed emails (oldest evicted first)
            if len(self.processed_emails) > self.max_processed_emails:
                keep_count = self.max_processed_emails // 2
                keys = list(self.processed_emails)
                for k in keys[:-keep_count]:
                    del self.processed_emails[k]
                logger.debug(
                    f"Trimmed processed_emails to {len(self.processed_emails)} entries"
                )

        if classification == "iod":
            return self._route_to_iod(email_data)
        elif classification in ["consent_yes", "consent_no"]:
            return self._handle_consent(email_data, classification)
        else:  # default - general query
            return self._handle_general_query(email_data)
    except Exception as e:
        logger.error(f"Error routing email: {str(e)}", exc_info=True)
        return False

Service for listening to incoming emails via IMAP.

Source code in services/email_listener.py

class EmailListener:
    """Service for listening to incoming emails via IMAP."""

    # Keywords for IOD/RODO classification
    IOD_KEYWORDS = [
        "rodo",
        "iod",
        "dpo",
        "gdpr",
        "dane osobowe",
        "ochrona danych",
        "sprzeciw",
        "zgoda",
        "wycofanie",
        "skarga",
        "uodo",
        "organ nadzorczy",
        "profilowanie",
        "automatyczna decyzja",
        "sztuczna inteligencja",
        "ai",
        "si",
    ]

    # Keywords for consent classification
    CONSENT_KEYWORDS_POSITIVE = [
        "zgoda",
        "zgadzam się",
        "wyrażam zgodę",
        "tak",
        "chcę",
        "zainteresowany",
        "rozważenie",
        "inne oferty",
        "inne stanowiska",
        "inne pozycje",
        "inne rekrutacje",
    ]

    CONSENT_KEYWORDS_NEGATIVE = [
        "nie zgadzam się",
        "odmawiam",
        "nie",
        "nie chcę",
        "nie wyrażam zgody",
        "wycofuję zgodę",
        "nie jestem zainteresowany",
        "nie rozważaj",
    ]

    def __init__(
        self,
        email_username: str,
        email_password: str,
        imap_server: str = "imap.zoho.com",
        imap_port: int = 993,
    ):
        """
        Initialize email listener.

        Args:
            email_username: Email username/address
            email_password: Email password or app password
            imap_server: IMAP server address (default: imap.zoho.com; can be any provider)
            imap_port: IMAP server port (default: 993 for SSL)
        """
        self.email_username = email_username
        self.email_password = email_password
        self.imap_server = imap_server
        self.imap_port = imap_port
        self.mail: Optional[imaplib.IMAP4_SSL] = None

    def connect(self) -> bool:
        """Connect to IMAP server."""
        try:
            if not self.email_username or not self.email_password:
                logger.error("Email credentials not provided to EmailListener")
                return False

            # Create IMAP connection with timeout (only this socket, not global default)
            import socket

            self.mail = imaplib.IMAP4_SSL(self.imap_server, self.imap_port, timeout=30)
            self.mail.login(self.email_username, self.email_password)
            logger.debug(f"Connected to IMAP server {self.imap_server}: {self.email_username}")
            return True
        except socket.timeout:
            logger.error(
                f"IMAP connection timeout to {self.imap_server}:{self.imap_port}. Check network/firewall."
            )
            return False
        except socket.error as e:
            error_msg = str(e)
            if "EOF" in error_msg or "Connection reset" in error_msg:
                logger.warning(
                    f"IMAP connection error (EOF/reset) to {self.imap_server}:{self.imap_port}. "
                    "This may be temporary. Will retry in next cycle. "
                    "Possible causes: server-side connection limit, firewall, or network issue."
                )
            else:
                logger.error(
                    f"IMAP socket error to {self.imap_server}:{self.imap_port}: {error_msg}"
                )
            return False
        except imaplib.IMAP4.error as e:
            error_msg = str(e)
            if "AUTHENTICATIONFAILED" in error_msg or "Invalid credentials" in error_msg:
                logger.error(
                    f"IMAP authentication failed for {self.email_username} on {self.imap_server}. "
                    "This usually means:\n"
                    "  1. Wrong username or password\n"
                    "  2. For Gmail: You need 'App Password' instead of regular password\n"
                    "  3. For Zoho: Check if password is correct and account is active\n"
                    "  4. IMAP might be disabled in your email account settings"
                )
            else:
                logger.error(f"IMAP protocol error to {self.imap_server}: {error_msg}")
            return False
        except Exception as e:
            logger.error(
                f"Failed to connect to IMAP server {self.imap_server}:{self.imap_port}: {str(e)}"
            )
            return False

    def disconnect(self):
        """Disconnect from Gmail IMAP server."""
        if self.mail:
            try:
                self.mail.close()
                self.mail.logout()
                # logger.info("Disconnected from IMAP server")
            except Exception as e:
                logger.warning(f"Error disconnecting from IMAP: {str(e)}")
            finally:
                self.mail = None

    def get_unread_emails(self, folder: str = "INBOX") -> List[Dict]:
        """
        Get unread emails from specified folder.

        Args:
            folder: Email folder to check (default: 'INBOX')

        Returns:
            List of email dictionaries with keys: id, from_email, subject, body, date, message_id, in_reply_to
        """
        if not self.mail:
            if not self.connect():
                return []

        assert self.mail is not None  # narrow type after connect
        try:
            # Select folder
            status, messages = self.mail.select(folder)
            if status != "OK":
                logger.error(
                    f"Failed to select folder {folder} (status={status}, messages={messages})"
                )
                return []

            # Search for unread emails
            status, message_numbers = self.mail.search(None, "UNSEEN")
            if status != "OK":
                logger.error(
                    f"Failed to search for unread emails (status={status}, data={message_numbers})"
                )
                return []

            raw_ids = message_numbers[0]
            if not raw_ids:
                # No unread emails
                return []

            message_ids = raw_ids.split()

            email_list = []

            for msg_num in message_ids:
                try:
                    # Fetch email
                    status, msg_data = self.mail.fetch(msg_num, "(RFC822)")
                    if status != "OK" or not msg_data or msg_data[0] is None:
                        logger.warning(f"Failed to fetch email {msg_num} (status={status})")
                        continue

                    # Parse email
                    email_body = msg_data[0][1]
                    if not isinstance(email_body, (bytes, bytearray)):
                        continue
                    email_message = email.message_from_bytes(email_body)

                    # Extract email data
                    email_data = self._parse_email(email_message)
                    if email_data:
                        email_data["uid"] = msg_num.decode()
                        email_list.append(email_data)
                    else:
                        logger.warning(f"Parsed email {msg_num} returned None")

                except Exception as e:
                    logger.warning(f"Error parsing email {msg_num}: {str(e)}", exc_info=True)
                    continue

            logger.info(f"Found {len(email_list)} unread emails in {folder} after parsing")
            return email_list

        except Exception as e:
            logger.error(f"Error getting unread emails: {str(e)}")
            return []

    def _parse_email(self, email_message: Message) -> Optional[Dict]:
        """Parse email message into dictionary."""
        try:
            # Get headers
            subject = self._decode_header(email_message.get("Subject", ""))
            from_email = self._decode_header(email_message.get("From", ""))
            message_id = email_message.get("Message-ID", "")
            in_reply_to = email_message.get("In-Reply-To", "")
            date_str = email_message.get("Date", "")

            # Parse date
            try:
                date = parsedate_to_datetime(date_str) if date_str else datetime.now()
            except Exception:
                date = datetime.now()

            # Extract email address from From field
            from_email_clean = self._extract_email_address(from_email)

            # Get email body
            body = self._get_email_body(email_message)

            return {
                "from_email": from_email_clean,
                "from_name": from_email,
                "subject": subject,
                "body": body,
                "date": date,
                "message_id": message_id,
                "in_reply_to": in_reply_to,
                "raw_message": email_message,
            }
        except Exception as e:
            logger.error(f"Error parsing email: {str(e)}")
            return None

    def _decode_header(self, header: str) -> str:
        """Decode email header (handles problematic encodings like 'unknown-8bit')."""
        try:
            decoded_parts = email.header.decode_header(header)
            decoded_string = ""
            for part, encoding in decoded_parts:
                if isinstance(part, bytes):
                    # Try declared encoding first, fall back to utf-8 on errors or unknown encodings
                    if encoding:
                        try:
                            decoded_string += part.decode(encoding, errors="ignore")
                        except (LookupError, UnicodeError):
                            # Unknown codec (e.g. 'unknown-8bit') or decode error – fall back to utf-8
                            decoded_string += part.decode("utf-8", errors="ignore")
                    else:
                        decoded_string += part.decode("utf-8", errors="ignore")
                else:
                    decoded_string += part
            return decoded_string
        except Exception as e:
            logger.warning(f"Error decoding header: {str(e)}")
            return str(header)

    def _extract_email_address(self, email_string: str) -> str:
        """Extract email address from 'Name <email@domain.com>' format."""
        match = re.search(r"[\w\.-]+@[\w\.-]+\.\w+", email_string)
        if match:
            return match.group(0)
        return email_string

    def _get_email_body(self, email_message: Message) -> str:
        """Extract email body text."""
        body = ""

        if email_message.is_multipart():
            for part in email_message.walk():
                content_type = part.get_content_type()
                content_disposition = str(part.get("Content-Disposition", ""))

                # Skip attachments
                if "attachment" in content_disposition:
                    continue

                # Get text content
                if content_type == "text/plain":
                    try:
                        body_bytes = part.get_payload(decode=True)
                        if isinstance(body_bytes, bytes):
                            charset = part.get_content_charset() or "utf-8"
                            body += body_bytes.decode(charset, errors="ignore")
                    except Exception as e:
                        logger.warning(f"Error decoding email body part: {str(e)}")
                elif content_type == "text/html":
                    # Fallback to HTML if no plain text
                    if not body:
                        try:
                            body_bytes = part.get_payload(decode=True)
                            if isinstance(body_bytes, bytes):
                                charset = part.get_content_charset() or "utf-8"
                                html_body = body_bytes.decode(charset, errors="ignore")
                                # Simple HTML to text conversion (remove tags)
                                body = re.sub(r"<[^>]+>", "", html_body)
                        except Exception as e:
                            logger.warning(f"Error decoding HTML body: {str(e)}")
        else:
            # Not multipart
            try:
                payload_bytes: Any = email_message.get_payload(decode=True)
                if isinstance(payload_bytes, bytes):
                    charset = email_message.get_content_charset() or "utf-8"
                    body = payload_bytes.decode(charset, errors="ignore")
            except Exception as e:
                logger.warning(f"Error decoding email body: {str(e)}")

        return body.strip()

    def mark_as_read(self, uid: str, folder: str = "INBOX") -> bool:
        """Mark email as read."""
        if not self.mail:
            return False
        assert self.mail is not None
        try:
            self.mail.select(folder)
            self.mail.store(uid, "+FLAGS", "\\Seen")
            return True
        except Exception as e:
            logger.error(f"Error marking email as read: {str(e)}")
            return False

    def classify_email(self, email_data: Dict, classifier_agent=None) -> str:
        """
        Classify email into categories using AI agent.

        Args:
            email_data: Email dictionary with 'subject' and 'body' keys
            classifier_agent: Optional EmailClassifierAgent instance (if None, uses simple keyword matching)

        Returns:
            Classification: 'iod', 'consent_yes', 'consent_no', or 'default'
        """
        # If AI classifier is available, use it
        if classifier_agent:
            try:
                classification = classifier_agent.classify_email(
                    from_email=email_data.get("from_email", ""),
                    subject=email_data.get("subject", ""),
                    body=email_data.get("body", ""),
                )
                logger.info(
                    f"Email classified by AI as '{classification.category}' "
                    f"(confidence: {classification.confidence:.2f})"
                )
                return classification.category
            except Exception as e:
                logger.warning(
                    f"AI classification failed, falling back to keyword matching: {str(e)}"
                )

        # Fallback to simple keyword matching
        text = f"{email_data.get('subject', '')} {email_data.get('body', '')}".lower()

        # Check for IOD keywords (at least 1 critical one required)
        critical_iod_keywords = [
            "rodo",
            "iod",
            "dpo",
            "gdpr",
            "dane osobowe",
            "ochrona danych",
            "uodo",
            "organ nadzorczy",
            "profilowanie",
            "automatyczna decyzja",
        ]
        found_iod_keywords = [kw for kw in critical_iod_keywords if kw.lower() in text]

        if len(found_iod_keywords) >= 1:
            logger.info(f"Email classified as IOD (keywords: {found_iod_keywords})")
            return "iod"

        # Check for consent keywords
        for keyword in self.CONSENT_KEYWORDS_POSITIVE:
            if keyword.lower() in text:
                logger.info(f"Email classified as consent_yes (keyword: {keyword})")
                return "consent_yes"

        for keyword in self.CONSENT_KEYWORDS_NEGATIVE:
            if keyword.lower() in text:
                logger.info(f"Email classified as consent_no (keyword: {keyword})")
                return "consent_no"

        # Default classification
        logger.info("Email classified as default (HR)")
        return "default"

`init(email_username, email_password, imap_server='imap.zoho.com', imap_port=993)` ¶

Initialize email listener.

Parameters:

Name	Type	Description	Default
`email_username`	`str`	Email username/address	required
`email_password`	`str`	Email password or app password	required
`imap_server`	`str`	IMAP server address (default: imap.zoho.com; can be any provider)	`'imap.zoho.com'`
`imap_port`	`int`	IMAP server port (default: 993 for SSL)	`993`

Source code in services/email_listener.py

def __init__(
    self,
    email_username: str,
    email_password: str,
    imap_server: str = "imap.zoho.com",
    imap_port: int = 993,
):
    """
    Initialize email listener.

    Args:
        email_username: Email username/address
        email_password: Email password or app password
        imap_server: IMAP server address (default: imap.zoho.com; can be any provider)
        imap_port: IMAP server port (default: 993 for SSL)
    """
    self.email_username = email_username
    self.email_password = email_password
    self.imap_server = imap_server
    self.imap_port = imap_port
    self.mail: Optional[imaplib.IMAP4_SSL] = None

`classify_email(email_data, classifier_agent=None)` ¶

Classify email into categories using AI agent.

Parameters:

Name	Type	Description	Default
`email_data`	`Dict`	Email dictionary with 'subject' and 'body' keys	required
`classifier_agent`		Optional EmailClassifierAgent instance (if None, uses simple keyword matching)	`None`

Returns:

Name	Type	Description
`Classification`	`str`	'iod', 'consent_yes', 'consent_no', or 'default'

Source code in services/email_listener.py

def classify_email(self, email_data: Dict, classifier_agent=None) -> str:
    """
    Classify email into categories using AI agent.

    Args:
        email_data: Email dictionary with 'subject' and 'body' keys
        classifier_agent: Optional EmailClassifierAgent instance (if None, uses simple keyword matching)

    Returns:
        Classification: 'iod', 'consent_yes', 'consent_no', or 'default'
    """
    # If AI classifier is available, use it
    if classifier_agent:
        try:
            classification = classifier_agent.classify_email(
                from_email=email_data.get("from_email", ""),
                subject=email_data.get("subject", ""),
                body=email_data.get("body", ""),
            )
            logger.info(
                f"Email classified by AI as '{classification.category}' "
                f"(confidence: {classification.confidence:.2f})"
            )
            return classification.category
        except Exception as e:
            logger.warning(
                f"AI classification failed, falling back to keyword matching: {str(e)}"
            )

    # Fallback to simple keyword matching
    text = f"{email_data.get('subject', '')} {email_data.get('body', '')}".lower()

    # Check for IOD keywords (at least 1 critical one required)
    critical_iod_keywords = [
        "rodo",
        "iod",
        "dpo",
        "gdpr",
        "dane osobowe",
        "ochrona danych",
        "uodo",
        "organ nadzorczy",
        "profilowanie",
        "automatyczna decyzja",
    ]
    found_iod_keywords = [kw for kw in critical_iod_keywords if kw.lower() in text]

    if len(found_iod_keywords) >= 1:
        logger.info(f"Email classified as IOD (keywords: {found_iod_keywords})")
        return "iod"

    # Check for consent keywords
    for keyword in self.CONSENT_KEYWORDS_POSITIVE:
        if keyword.lower() in text:
            logger.info(f"Email classified as consent_yes (keyword: {keyword})")
            return "consent_yes"

    for keyword in self.CONSENT_KEYWORDS_NEGATIVE:
        if keyword.lower() in text:
            logger.info(f"Email classified as consent_no (keyword: {keyword})")
            return "consent_no"

    # Default classification
    logger.info("Email classified as default (HR)")
    return "default"

`connect()` ¶

Connect to IMAP server.

Source code in services/email_listener.py

def connect(self) -> bool:
    """Connect to IMAP server."""
    try:
        if not self.email_username or not self.email_password:
            logger.error("Email credentials not provided to EmailListener")
            return False

        # Create IMAP connection with timeout (only this socket, not global default)
        import socket

        self.mail = imaplib.IMAP4_SSL(self.imap_server, self.imap_port, timeout=30)
        self.mail.login(self.email_username, self.email_password)
        logger.debug(f"Connected to IMAP server {self.imap_server}: {self.email_username}")
        return True
    except socket.timeout:
        logger.error(
            f"IMAP connection timeout to {self.imap_server}:{self.imap_port}. Check network/firewall."
        )
        return False
    except socket.error as e:
        error_msg = str(e)
        if "EOF" in error_msg or "Connection reset" in error_msg:
            logger.warning(
                f"IMAP connection error (EOF/reset) to {self.imap_server}:{self.imap_port}. "
                "This may be temporary. Will retry in next cycle. "
                "Possible causes: server-side connection limit, firewall, or network issue."
            )
        else:
            logger.error(
                f"IMAP socket error to {self.imap_server}:{self.imap_port}: {error_msg}"
            )
        return False
    except imaplib.IMAP4.error as e:
        error_msg = str(e)
        if "AUTHENTICATIONFAILED" in error_msg or "Invalid credentials" in error_msg:
            logger.error(
                f"IMAP authentication failed for {self.email_username} on {self.imap_server}. "
                "This usually means:\n"
                "  1. Wrong username or password\n"
                "  2. For Gmail: You need 'App Password' instead of regular password\n"
                "  3. For Zoho: Check if password is correct and account is active\n"
                "  4. IMAP might be disabled in your email account settings"
            )
        else:
            logger.error(f"IMAP protocol error to {self.imap_server}: {error_msg}")
        return False
    except Exception as e:
        logger.error(
            f"Failed to connect to IMAP server {self.imap_server}:{self.imap_port}: {str(e)}"
        )
        return False

`disconnect()` ¶

Disconnect from Gmail IMAP server.

Source code in services/email_listener.py

def disconnect(self):
    """Disconnect from Gmail IMAP server."""
    if self.mail:
        try:
            self.mail.close()
            self.mail.logout()
            # logger.info("Disconnected from IMAP server")
        except Exception as e:
            logger.warning(f"Error disconnecting from IMAP: {str(e)}")
        finally:
            self.mail = None

`get_unread_emails(folder='INBOX')` ¶

Get unread emails from specified folder.

Parameters:

Name	Type	Description	Default
`folder`	`str`	Email folder to check (default: 'INBOX')	`'INBOX'`

Returns:

Type	Description
`List[Dict]`	List of email dictionaries with keys: id, from_email, subject, body, date, message_id, in_reply_to

Source code in services/email_listener.py

def get_unread_emails(self, folder: str = "INBOX") -> List[Dict]:
    """
    Get unread emails from specified folder.

    Args:
        folder: Email folder to check (default: 'INBOX')

    Returns:
        List of email dictionaries with keys: id, from_email, subject, body, date, message_id, in_reply_to
    """
    if not self.mail:
        if not self.connect():
            return []

    assert self.mail is not None  # narrow type after connect
    try:
        # Select folder
        status, messages = self.mail.select(folder)
        if status != "OK":
            logger.error(
                f"Failed to select folder {folder} (status={status}, messages={messages})"
            )
            return []

        # Search for unread emails
        status, message_numbers = self.mail.search(None, "UNSEEN")
        if status != "OK":
            logger.error(
                f"Failed to search for unread emails (status={status}, data={message_numbers})"
            )
            return []

        raw_ids = message_numbers[0]
        if not raw_ids:
            # No unread emails
            return []

        message_ids = raw_ids.split()

        email_list = []

        for msg_num in message_ids:
            try:
                # Fetch email
                status, msg_data = self.mail.fetch(msg_num, "(RFC822)")
                if status != "OK" or not msg_data or msg_data[0] is None:
                    logger.warning(f"Failed to fetch email {msg_num} (status={status})")
                    continue

                # Parse email
                email_body = msg_data[0][1]
                if not isinstance(email_body, (bytes, bytearray)):
                    continue
                email_message = email.message_from_bytes(email_body)

                # Extract email data
                email_data = self._parse_email(email_message)
                if email_data:
                    email_data["uid"] = msg_num.decode()
                    email_list.append(email_data)
                else:
                    logger.warning(f"Parsed email {msg_num} returned None")

            except Exception as e:
                logger.warning(f"Error parsing email {msg_num}: {str(e)}", exc_info=True)
                continue

        logger.info(f"Found {len(email_list)} unread emails in {folder} after parsing")
        return email_list

    except Exception as e:
        logger.error(f"Error getting unread emails: {str(e)}")
        return []

`mark_as_read(uid, folder='INBOX')` ¶

Mark email as read.

Source code in services/email_listener.py

def mark_as_read(self, uid: str, folder: str = "INBOX") -> bool:
    """Mark email as read."""
    if not self.mail:
        return False
    assert self.mail is not None
    try:
        self.mail.select(folder)
        self.mail.store(uid, "+FLAGS", "\\Seen")
        return True
    except Exception as e:
        logger.error(f"Error marking email as read: {str(e)}")
        return False

Background service for monitoring and processing incoming emails.

Source code in services/email_monitor.py

class EmailMonitor:
    """Background service for monitoring and processing incoming emails."""

    def __init__(
        self,
        email_username: str,
        email_password: str,
        imap_host: str,
        imap_port: int,
        smtp_host: str,
        smtp_port: int,
        smtp_use_tls: bool = True,
        iod_email: Optional[str] = None,
        hr_email: Optional[str] = None,
        check_interval: int = 60,
    ):
        """
        Initialize email monitor.

        Args:
            email_username: Email username
            email_password: Email password or app password
            imap_host: IMAP server hostname
            imap_port: IMAP server port
            smtp_host: SMTP server hostname
            smtp_port: SMTP server port
            smtp_use_tls: Whether to use TLS for SMTP
            iod_email: Email address for IOD department
            hr_email: Email address for HR department
            check_interval: Interval between email checks in seconds (default: 60)
        """
        self.email_username = email_username
        self.email_password = email_password
        self.iod_email = iod_email
        self.hr_email = hr_email
        self.check_interval = check_interval

        self.listener = EmailListener(email_username, email_password, imap_host, imap_port)
        self.router = EmailRouter(
            email_username, email_password, smtp_host, smtp_port, smtp_use_tls, iod_email, hr_email
        )

        # Track last processed message sequence number within this process.
        # This way we react to all NEW messages (ALL),
        # regardless of whether they were already marked as read.
        self.last_msg_num: Optional[int] = None

        # Initialize AI classifier
        self.classifier: Optional[EmailClassifierAgent] = None
        try:
            from config import settings

            self.classifier = EmailClassifierAgent(
                model_name=settings.openai_model, api_key=settings.api_key
            )
            logger.info(f"AI email classifier initialized with model: {settings.openai_model}")
        except Exception as e:
            logger.warning(
                f"Failed to initialize AI classifier: {str(e)}. Will use keyword-based classification."
            )
            self.classifier = None

        self.running = False
        self.thread: Optional[threading.Thread] = None

    def start(self):
        """Start email monitoring in background thread."""
        if self.running:
            logger.warning("Email monitor is already running")
            return

        if not self.email_username or not self.email_password:
            logger.warning("Email credentials not configured. Email monitoring disabled.")
            return

        if not self.iod_email or not self.hr_email:
            logger.warning("IOD or HR email not configured. Email monitoring disabled.")
            return

        self.running = True
        self.thread = threading.Thread(target=self._monitor_loop, daemon=True)
        self.thread.start()
        logger.info("Email monitor started")

    def stop(self):
        """Stop email monitoring."""
        self.running = False
        if self.listener:
            self.listener.disconnect()
        logger.info("Email monitor stopped")

    def _monitor_loop(self):
        """Main monitoring loop."""
        logger.info(f"Email monitor loop started (check interval: {self.check_interval}s)")

        while self.running:
            try:
                # Connect to email server
                if not self.listener.connect():
                    logger.debug(
                        f"Failed to connect to email server. Retrying in {self.check_interval}s."
                    )
                    time.sleep(self.check_interval)
                    continue

                # Select INBOX
                status, _ = self.listener.mail.select("INBOX")
                if status != "OK":
                    logger.warning("Failed to select INBOX for email monitoring")
                    self.listener.disconnect()
                    time.sleep(self.check_interval)
                    continue

                # Get ALL message sequence numbers
                status, data = self.listener.mail.search(None, "ALL")
                if status != "OK":
                    logger.warning(
                        f"Failed to search ALL messages in INBOX (status={status}, data={data})"
                    )
                    self.listener.disconnect()
                    time.sleep(self.check_interval)
                    continue

                raw_ids = data[0]
                if not raw_ids:
                    # No messages at all
                    self.listener.disconnect()
                    time.sleep(self.check_interval)
                    continue

                all_msg_nums = [int(x) for x in raw_ids.split()]
                if not all_msg_nums:
                    self.listener.disconnect()
                    time.sleep(self.check_interval)
                    continue

                # On first run: set last_msg_num to the latest message,
                # so we don't process the entire history.
                if self.last_msg_num is None:
                    self.last_msg_num = max(all_msg_nums)
                else:
                    # New messages have a number greater than last_msg_num
                    new_msg_nums = [n for n in all_msg_nums if n > self.last_msg_num]

                    if new_msg_nums:
                        # Track successfully processed message numbers
                        successfully_processed = []

                        for msg_num in new_msg_nums:
                            try:
                                # Fetch full message by sequence number
                                status, msg_data = self.listener.mail.fetch(
                                    str(msg_num), "(RFC822)"
                                )
                                if status != "OK" or not msg_data or msg_data[0] is None:
                                    logger.warning(
                                        f"Failed to fetch email seq={msg_num} (status={status})"
                                    )
                                    continue

                                email_body = msg_data[0][1]
                                email_message = email.message_from_bytes(email_body)

                                # Parse using listener helper
                                email_data = self.listener._parse_email(email_message)
                                if not email_data:
                                    logger.warning(f"Parsed email seq={msg_num} returned None")
                                    continue

                                email_data["uid"] = str(msg_num)

                                # Classify email using AI agent
                                classification = self.listener.classify_email(
                                    email_data, classifier_agent=self.classifier
                                )
                                logger.info(
                                    f"Email seq={msg_num} from {email_data.get('from_email')} "
                                    f"classified as: {classification}"
                                )

                                # Route email
                                success = self.router.route_email(email_data, classification)

                                if success:
                                    logger.info(f"Email seq={msg_num} processed successfully")
                                    # Only track successfully processed messages
                                    successfully_processed.append(msg_num)
                                else:
                                    logger.warning(
                                        f"Failed to route email seq={msg_num} from {email_data.get('from_email')}"
                                    )
                                    # Don't add to successfully_processed - will retry next cycle

                            except Exception as e:
                                logger.error(
                                    f"Error processing email seq={msg_num}: {str(e)}", exc_info=True
                                )
                                # Don't add to successfully_processed - will retry next cycle

                        # Advance to highest successfully processed (failed messages are not retried to avoid duplicate processing)
                        if successfully_processed:
                            self.last_msg_num = max(successfully_processed)
                            logger.debug(
                                f"Updated last_msg_num to {self.last_msg_num} ({len(successfully_processed)} messages processed)"
                            )

                # Disconnect
                self.listener.disconnect()

                # Wait before next check
                time.sleep(self.check_interval)

            except Exception as e:
                logger.error(f"Error in email monitor loop: {str(e)}", exc_info=True)
                time.sleep(self.check_interval)

`init(email_username, email_password, imap_host, imap_port, smtp_host, smtp_port, smtp_use_tls=True, iod_email=None, hr_email=None, check_interval=60)` ¶

Initialize email monitor.

Parameters:

Name	Type	Description	Default
`email_username`	`str`	Email username	required
`email_password`	`str`	Email password or app password	required
`imap_host`	`str`	IMAP server hostname	required
`imap_port`	`int`	IMAP server port	required
`smtp_host`	`str`	SMTP server hostname	required
`smtp_port`	`int`	SMTP server port	required
`smtp_use_tls`	`bool`	Whether to use TLS for SMTP	`True`
`iod_email`	`Optional[str]`	Email address for IOD department	`None`
`hr_email`	`Optional[str]`	Email address for HR department	`None`
`check_interval`	`int`	Interval between email checks in seconds (default: 60)	`60`

Source code in services/email_monitor.py

def __init__(
    self,
    email_username: str,
    email_password: str,
    imap_host: str,
    imap_port: int,
    smtp_host: str,
    smtp_port: int,
    smtp_use_tls: bool = True,
    iod_email: Optional[str] = None,
    hr_email: Optional[str] = None,
    check_interval: int = 60,
):
    """
    Initialize email monitor.

    Args:
        email_username: Email username
        email_password: Email password or app password
        imap_host: IMAP server hostname
        imap_port: IMAP server port
        smtp_host: SMTP server hostname
        smtp_port: SMTP server port
        smtp_use_tls: Whether to use TLS for SMTP
        iod_email: Email address for IOD department
        hr_email: Email address for HR department
        check_interval: Interval between email checks in seconds (default: 60)
    """
    self.email_username = email_username
    self.email_password = email_password
    self.iod_email = iod_email
    self.hr_email = hr_email
    self.check_interval = check_interval

    self.listener = EmailListener(email_username, email_password, imap_host, imap_port)
    self.router = EmailRouter(
        email_username, email_password, smtp_host, smtp_port, smtp_use_tls, iod_email, hr_email
    )

    # Track last processed message sequence number within this process.
    # This way we react to all NEW messages (ALL),
    # regardless of whether they were already marked as read.
    self.last_msg_num: Optional[int] = None

    # Initialize AI classifier
    self.classifier: Optional[EmailClassifierAgent] = None
    try:
        from config import settings

        self.classifier = EmailClassifierAgent(
            model_name=settings.openai_model, api_key=settings.api_key
        )
        logger.info(f"AI email classifier initialized with model: {settings.openai_model}")
    except Exception as e:
        logger.warning(
            f"Failed to initialize AI classifier: {str(e)}. Will use keyword-based classification."
        )
        self.classifier = None

    self.running = False
    self.thread: Optional[threading.Thread] = None

`start()` ¶

Start email monitoring in background thread.

Source code in services/email_monitor.py

def start(self):
    """Start email monitoring in background thread."""
    if self.running:
        logger.warning("Email monitor is already running")
        return

    if not self.email_username or not self.email_password:
        logger.warning("Email credentials not configured. Email monitoring disabled.")
        return

    if not self.iod_email or not self.hr_email:
        logger.warning("IOD or HR email not configured. Email monitoring disabled.")
        return

    self.running = True
    self.thread = threading.Thread(target=self._monitor_loop, daemon=True)
    self.thread.start()
    logger.info("Email monitor started")

`stop()` ¶

Stop email monitoring.

Source code in services/email_monitor.py

def stop(self):
    """Stop email monitoring."""
    self.running = False
    if self.listener:
        self.listener.disconnect()
    logger.info("Email monitor stopped")

Qdrant RAG service for vector database operations.

Source code in services/qdrant_service.py

class QdrantRAG:
    """Qdrant RAG service for vector database operations."""

    def __init__(
        self,
        collection_name: str = "recruitment_knowledge_base",
        use_azure_openai: bool = False,
        azure_endpoint: Optional[str] = None,
        azure_api_key: Optional[str] = None,
        azure_deployment: Optional[str] = None,
        azure_api_version: Optional[str] = None,
        qdrant_path: Optional[str] = None,
        qdrant_host: Optional[str] = None,
        qdrant_port: Optional[int] = None,
    ):
        """
        Initialize Qdrant RAG service.

        Args:
            collection_name: Collection name
            use_azure_openai: Whether to use Azure OpenAI embeddings
            azure_endpoint: Azure OpenAI endpoint URL
            azure_api_key: Azure OpenAI API key
            azure_deployment: Deployment name (e.g., "text-embedding-3-small")
            azure_api_version: Azure OpenAI API version
            qdrant_path: Path to local Qdrant database (None = in-memory or server)
            qdrant_host: Qdrant server hostname (e.g., "qdrant" or "localhost")
            qdrant_port: Qdrant server port (default: 6333)
        """
        # Initialize Qdrant client
        # Priority: server (host+port) > local path > in-memory
        if qdrant_host:
            # Connect to Qdrant server
            qdrant_port = qdrant_port or 6333
            self.client = QdrantClient(host=qdrant_host, port=qdrant_port)
            logger.info(f"Qdrant server connection: {qdrant_host}:{qdrant_port}")
        elif qdrant_path:
            try:
                self.client = QdrantClient(path=qdrant_path)
                logger.info(f"Qdrant local database: {qdrant_path}")
            except RuntimeError as e:
                if "already accessed by another instance" in str(e) or "AlreadyLocked" in str(e):
                    error_msg = (
                        f"Qdrant database at {qdrant_path} is already locked by another instance. "
                        "Close other Qdrant clients (e.g., app.py) before accessing. "
                        "Consider using Qdrant server (qdrant_host/qdrant_port) instead."
                    )
                    logger.error(error_msg)
                    raise RuntimeError(error_msg) from e
                else:
                    raise
        else:
            self.client = QdrantClient(":memory:")  # In-memory
            logger.info("Qdrant in-memory database")

        self.collection_name = collection_name

        # Initialize Azure OpenAI if needed
        if use_azure_openai and azure_endpoint and azure_api_key:
            if not AZURE_OPENAI_AVAILABLE:
                raise ImportError("openai is not installed. Run: pip install openai")

            self.azure_client = AzureOpenAI(
                api_version=azure_api_version or "2024-12-01-preview",
                azure_endpoint=azure_endpoint,
                api_key=azure_api_key,
            )
            self.azure_deployment = azure_deployment or "text-embedding-3-small"
            self.use_azure_openai = True
            logger.info(f"Using Azure OpenAI embeddings (deployment: {self.azure_deployment})")
        else:
            self.use_azure_openai = False
            raise ValueError("Azure OpenAI credentials must be provided")

        # Create collection if it doesn't exist
        try:
            self.client.get_collection(collection_name)
            logger.info(f"Loaded existing collection: {collection_name}")
        except Exception:
            # Create new collection
            # text-embedding-3-small has 1536 dimensions
            self.client.create_collection(
                collection_name=collection_name,
                vectors_config=VectorParams(
                    size=1536, distance=Distance.COSINE  # text-embedding-3-small
                ),
            )
            logger.info(f"Created new collection: {collection_name}")

    def _generate_embeddings(self, texts: List[str]) -> List[List[float]]:
        """Generate embeddings for a list of texts."""
        if not self.use_azure_openai:
            raise ValueError("Azure OpenAI is not configured")

        response = self.azure_client.embeddings.create(
            input=texts, model=self.azure_deployment, timeout=60.0
        )

        # Return embeddings in order matching input
        embeddings: List[List[float]] = [[] for _ in range(len(texts))]
        for item in response.data:
            embeddings[item.index] = item.embedding

        return embeddings

    def add_documents(
        self,
        documents: List[str],
        ids: Optional[List[Union[str, int, uuid.UUID]]] = None,
        metadatas: Optional[List[Dict]] = None,
    ):
        """Add documents to collection."""
        if ids is None:
            # Generate UUID for each document
            ids = [uuid.uuid4() for _ in range(len(documents))]
        else:
            # Convert string IDs to UUID if needed
            converted_ids: List[Union[str, int, uuid.UUID]] = []
            for id_val in ids:
                if isinstance(id_val, str):
                    try:
                        converted_ids.append(uuid.UUID(id_val))
                    except ValueError:
                        # If not UUID, generate new UUID
                        converted_ids.append(uuid.uuid4())
                elif isinstance(id_val, int):
                    converted_ids.append(id_val)
                else:
                    converted_ids.append(id_val)
            ids = converted_ids

        if metadatas is None:
            metadatas = [{}] * len(documents)

        logger.info(f"Generating embeddings for {len(documents)} documents...")
        embeddings = self._generate_embeddings(documents)
        logger.info(f"Generated {len(embeddings)} embeddings")

        logger.info("Saving to Qdrant...")
        points = [
            PointStruct(
                id=ids[i],
                vector=embeddings[i],
                payload={
                    "document": documents[i],
                    "original_id": str(ids[i]),  # Save original ID in payload
                    **metadatas[i],
                },
            )
            for i in range(len(documents))
        ]

        self.client.upsert(collection_name=self.collection_name, points=points)
        logger.info(f"Added {len(documents)} documents to collection")

    def search(self, query: str, n_results: int = 5) -> List[Dict]:
        """Search for similar documents."""
        logger.debug(f"Generating embedding for query: {query[:50]}...")
        query_embedding = self._generate_embeddings([query])[0]

        logger.debug("Searching in Qdrant...")
        # Use search API (works with Qdrant 1.8.4+)
        try:
            # Try search method first (standard API)
            results = self.client.search(
                collection_name=self.collection_name, query_vector=query_embedding, limit=n_results
            )
        except (AttributeError, Exception) as e:
            # Fallback for older Qdrant versions or different API
            logger.warning(f"Search method failed ({e}), trying alternative API...")
            try:
                # Try scroll with vectors (for manual similarity calculation)
                scroll_results = self.client.scroll(
                    collection_name=self.collection_name,
                    limit=100,  # Get more points to calculate similarity
                    with_payload=True,
                    with_vectors=True,  # Need vectors for similarity calculation
                )
                # Calculate distances manually (simple cosine similarity)
                import numpy as np

                query_vec = np.array(query_embedding)
                scored_results = []
                for point in scroll_results[0]:
                    if point.vector:
                        point_vec = np.array(point.vector)
                        # Cosine similarity
                        similarity = np.dot(query_vec, point_vec) / (
                            np.linalg.norm(query_vec) * np.linalg.norm(point_vec)
                        )
                        scored_results.append((point, similarity))

                # Sort by similarity and take top n
                scored_results.sort(key=lambda x: x[1], reverse=True)
                results = [point for point, score in scored_results[:n_results]]
            except Exception as e2:
                logger.error(f"All search methods failed: {e2}")
                return []

        formatted_results = []
        for point in results:
            # Handle both search result format and scroll format
            point_id = point.id if hasattr(point, "id") else getattr(point, "id", None)
            point_payload = (
                point.payload if hasattr(point, "payload") else getattr(point, "payload", {})
            )
            point_score = point.score if hasattr(point, "score") else getattr(point, "score", None)

            formatted_results.append(
                {
                    "id": str(point_id),  # Convert UUID to string
                    "document": point_payload.get("document", ""),
                    "metadata": {
                        k: v
                        for k, v in point_payload.items()
                        if k not in ["document", "original_id"]
                    },
                    "distance": point_score,
                }
            )

        logger.debug(f"Found {len(formatted_results)} results")
        return formatted_results

    def count(self) -> int:
        """Return number of documents in collection."""
        info = self.client.get_collection(self.collection_name)
        return info.points_count

    def get_all(self) -> List[Dict]:
        """Get all documents from collection."""
        results = self.client.scroll(
            collection_name=self.collection_name, limit=10000  # Large limit
        )

        formatted_results = []
        for point in results[0]:  # results[0] is list of points
            formatted_results.append(
                {
                    "id": point.id,
                    "document": point.payload.get("document", ""),
                    "metadata": {k: v for k, v in point.payload.items() if k != "document"},
                }
            )

        return formatted_results

`init(collection_name='recruitment_knowledge_base', use_azure_openai=False, azure_endpoint=None, azure_api_key=None, azure_deployment=None, azure_api_version=None, qdrant_path=None, qdrant_host=None, qdrant_port=None)` ¶

Initialize Qdrant RAG service.

Parameters:

Name	Type	Description	Default
`collection_name`	`str`	Collection name	`'recruitment_knowledge_base'`
`use_azure_openai`	`bool`	Whether to use Azure OpenAI embeddings	`False`
`azure_endpoint`	`Optional[str]`	Azure OpenAI endpoint URL	`None`
`azure_api_key`	`Optional[str]`	Azure OpenAI API key	`None`
`azure_deployment`	`Optional[str]`	Deployment name (e.g., "text-embedding-3-small")	`None`
`azure_api_version`	`Optional[str]`	Azure OpenAI API version	`None`
`qdrant_path`	`Optional[str]`	Path to local Qdrant database (None = in-memory or server)	`None`
`qdrant_host`	`Optional[str]`	Qdrant server hostname (e.g., "qdrant" or "localhost")	`None`
`qdrant_port`	`Optional[int]`	Qdrant server port (default: 6333)	`None`

Source code in services/qdrant_service.py

def __init__(
    self,
    collection_name: str = "recruitment_knowledge_base",
    use_azure_openai: bool = False,
    azure_endpoint: Optional[str] = None,
    azure_api_key: Optional[str] = None,
    azure_deployment: Optional[str] = None,
    azure_api_version: Optional[str] = None,
    qdrant_path: Optional[str] = None,
    qdrant_host: Optional[str] = None,
    qdrant_port: Optional[int] = None,
):
    """
    Initialize Qdrant RAG service.

    Args:
        collection_name: Collection name
        use_azure_openai: Whether to use Azure OpenAI embeddings
        azure_endpoint: Azure OpenAI endpoint URL
        azure_api_key: Azure OpenAI API key
        azure_deployment: Deployment name (e.g., "text-embedding-3-small")
        azure_api_version: Azure OpenAI API version
        qdrant_path: Path to local Qdrant database (None = in-memory or server)
        qdrant_host: Qdrant server hostname (e.g., "qdrant" or "localhost")
        qdrant_port: Qdrant server port (default: 6333)
    """
    # Initialize Qdrant client
    # Priority: server (host+port) > local path > in-memory
    if qdrant_host:
        # Connect to Qdrant server
        qdrant_port = qdrant_port or 6333
        self.client = QdrantClient(host=qdrant_host, port=qdrant_port)
        logger.info(f"Qdrant server connection: {qdrant_host}:{qdrant_port}")
    elif qdrant_path:
        try:
            self.client = QdrantClient(path=qdrant_path)
            logger.info(f"Qdrant local database: {qdrant_path}")
        except RuntimeError as e:
            if "already accessed by another instance" in str(e) or "AlreadyLocked" in str(e):
                error_msg = (
                    f"Qdrant database at {qdrant_path} is already locked by another instance. "
                    "Close other Qdrant clients (e.g., app.py) before accessing. "
                    "Consider using Qdrant server (qdrant_host/qdrant_port) instead."
                )
                logger.error(error_msg)
                raise RuntimeError(error_msg) from e
            else:
                raise
    else:
        self.client = QdrantClient(":memory:")  # In-memory
        logger.info("Qdrant in-memory database")

    self.collection_name = collection_name

    # Initialize Azure OpenAI if needed
    if use_azure_openai and azure_endpoint and azure_api_key:
        if not AZURE_OPENAI_AVAILABLE:
            raise ImportError("openai is not installed. Run: pip install openai")

        self.azure_client = AzureOpenAI(
            api_version=azure_api_version or "2024-12-01-preview",
            azure_endpoint=azure_endpoint,
            api_key=azure_api_key,
        )
        self.azure_deployment = azure_deployment or "text-embedding-3-small"
        self.use_azure_openai = True
        logger.info(f"Using Azure OpenAI embeddings (deployment: {self.azure_deployment})")
    else:
        self.use_azure_openai = False
        raise ValueError("Azure OpenAI credentials must be provided")

    # Create collection if it doesn't exist
    try:
        self.client.get_collection(collection_name)
        logger.info(f"Loaded existing collection: {collection_name}")
    except Exception:
        # Create new collection
        # text-embedding-3-small has 1536 dimensions
        self.client.create_collection(
            collection_name=collection_name,
            vectors_config=VectorParams(
                size=1536, distance=Distance.COSINE  # text-embedding-3-small
            ),
        )
        logger.info(f"Created new collection: {collection_name}")

`add_documents(documents, ids=None, metadatas=None)` ¶

Add documents to collection.

Source code in services/qdrant_service.py

def add_documents(
    self,
    documents: List[str],
    ids: Optional[List[Union[str, int, uuid.UUID]]] = None,
    metadatas: Optional[List[Dict]] = None,
):
    """Add documents to collection."""
    if ids is None:
        # Generate UUID for each document
        ids = [uuid.uuid4() for _ in range(len(documents))]
    else:
        # Convert string IDs to UUID if needed
        converted_ids: List[Union[str, int, uuid.UUID]] = []
        for id_val in ids:
            if isinstance(id_val, str):
                try:
                    converted_ids.append(uuid.UUID(id_val))
                except ValueError:
                    # If not UUID, generate new UUID
                    converted_ids.append(uuid.uuid4())
            elif isinstance(id_val, int):
                converted_ids.append(id_val)
            else:
                converted_ids.append(id_val)
        ids = converted_ids

    if metadatas is None:
        metadatas = [{}] * len(documents)

    logger.info(f"Generating embeddings for {len(documents)} documents...")
    embeddings = self._generate_embeddings(documents)
    logger.info(f"Generated {len(embeddings)} embeddings")

    logger.info("Saving to Qdrant...")
    points = [
        PointStruct(
            id=ids[i],
            vector=embeddings[i],
            payload={
                "document": documents[i],
                "original_id": str(ids[i]),  # Save original ID in payload
                **metadatas[i],
            },
        )
        for i in range(len(documents))
    ]

    self.client.upsert(collection_name=self.collection_name, points=points)
    logger.info(f"Added {len(documents)} documents to collection")

`count()` ¶

Return number of documents in collection.

Source code in services/qdrant_service.py

def count(self) -> int:
    """Return number of documents in collection."""
    info = self.client.get_collection(self.collection_name)
    return info.points_count

`get_all()` ¶

Get all documents from collection.

Source code in services/qdrant_service.py

def get_all(self) -> List[Dict]:
    """Get all documents from collection."""
    results = self.client.scroll(
        collection_name=self.collection_name, limit=10000  # Large limit
    )

    formatted_results = []
    for point in results[0]:  # results[0] is list of points
        formatted_results.append(
            {
                "id": point.id,
                "document": point.payload.get("document", ""),
                "metadata": {k: v for k, v in point.payload.items() if k != "document"},
            }
        )

    return formatted_results

`search(query, n_results=5)` ¶

Search for similar documents.

Source code in services/qdrant_service.py

def search(self, query: str, n_results: int = 5) -> List[Dict]:
    """Search for similar documents."""
    logger.debug(f"Generating embedding for query: {query[:50]}...")
    query_embedding = self._generate_embeddings([query])[0]

    logger.debug("Searching in Qdrant...")
    # Use search API (works with Qdrant 1.8.4+)
    try:
        # Try search method first (standard API)
        results = self.client.search(
            collection_name=self.collection_name, query_vector=query_embedding, limit=n_results
        )
    except (AttributeError, Exception) as e:
        # Fallback for older Qdrant versions or different API
        logger.warning(f"Search method failed ({e}), trying alternative API...")
        try:
            # Try scroll with vectors (for manual similarity calculation)
            scroll_results = self.client.scroll(
                collection_name=self.collection_name,
                limit=100,  # Get more points to calculate similarity
                with_payload=True,
                with_vectors=True,  # Need vectors for similarity calculation
            )
            # Calculate distances manually (simple cosine similarity)
            import numpy as np

            query_vec = np.array(query_embedding)
            scored_results = []
            for point in scroll_results[0]:
                if point.vector:
                    point_vec = np.array(point.vector)
                    # Cosine similarity
                    similarity = np.dot(query_vec, point_vec) / (
                        np.linalg.norm(query_vec) * np.linalg.norm(point_vec)
                    )
                    scored_results.append((point, similarity))

            # Sort by similarity and take top n
            scored_results.sort(key=lambda x: x[1], reverse=True)
            results = [point for point, score in scored_results[:n_results]]
        except Exception as e2:
            logger.error(f"All search methods failed: {e2}")
            return []

    formatted_results = []
    for point in results:
        # Handle both search result format and scroll format
        point_id = point.id if hasattr(point, "id") else getattr(point, "id", None)
        point_payload = (
            point.payload if hasattr(point, "payload") else getattr(point, "payload", {})
        )
        point_score = point.score if hasattr(point, "score") else getattr(point, "score", None)

        formatted_results.append(
            {
                "id": str(point_id),  # Convert UUID to string
                "document": point_payload.get("document", ""),
                "metadata": {
                    k: v
                    for k, v in point_payload.items()
                    if k not in ["document", "original_id"]
                },
                "distance": point_score,
            }
        )

    logger.debug(f"Found {len(formatted_results)} results")
    return formatted_results

Configuration¶

Bases: BaseSettings

Application settings loaded from environment variables.

Source code in config/settings.py

class Settings(BaseSettings):
    """Application settings loaded from environment variables."""

    model_config = SettingsConfigDict(
        env_file=".env", env_file_encoding="utf-8", case_sensitive=False, extra="ignore"
    )

    # LLM provider selection
    # azure  – current default, uses Azure OpenAI
    # openai – optional, uses api.openai.com via OpenAI official client
    llm_provider: str = "azure"

    # Azure OpenAI Configuration (single source of truth for models)
    # This is the default and currently most supported provider.
    azure_openai_api_key: Optional[str] = None
    azure_openai_endpoint: str = "https://openai-agentai-pl.openai.azure.com/"
    azure_openai_api_version: str = "2024-12-01-preview"
    # IMPORTANT: these names MUST match deployment names in Azure
    azure_openai_gpt_deployment: str = "gpt-5-mini"
    azure_openai_vision_deployment: str = "gpt-5-nano"

    # Alias for the current text model „bieżący model tekstowy” – usually points
    # to the Azure deployment; can also be used as a logical model name for
    # other providers (e.g. OpenAI) when LLM_PROVIDER=openai.
    openai_model: str = "gpt-5-nano"
    openai_vision_model: str = "gpt-5-nano"

    # Temperature / timeout configuration shared by all agents
    openai_temperature: float = 1.0
    openai_feedback_temperature: float = 0.7
    openai_timeout: int = 600
    openai_max_retries: int = 2

    # OCR Configuration
    use_ocr: bool = False
    ocr_timeout: int = 600

    # PDF Processing
    max_text_length: int = 15000
    pdf_min_text_threshold: int = 100

    # Logging
    log_level: str = "INFO"
    verbose: bool = False

    # Email/SMTP Configuration
    email_username: Optional[str] = None  # Email username (for Gmail, Zoho, etc.)
    email_password: Optional[str] = None  # Email password or app password
    smtp_host: str = (
        "smtp.zoho.eu"  # Default to Zoho EU, can be changed to smtp.zoho.com or smtp.gmail.com
    )
    smtp_port: int = 587  # 587 for TLS, 465 for SSL
    smtp_use_tls: bool = True  # Use TLS (True for port 587, False for port 465 with SSL)

    # IMAP Configuration (for email monitoring)
    imap_host: str = (
        "imap.zoho.eu"  # Default to Zoho EU, can be changed to imap.zoho.com or imap.gmail.com
    )
    imap_port: int = 993  # 993 for SSL

    # Email routing configuration
    iod_email: Optional[str] = None
    hr_email: Optional[str] = None
    email_check_interval: int = 60  # seconds
    email_monitor_enabled: bool = False

    # Privacy policy and information clause
    privacy_policy_url: Optional[str] = None  # URL to privacy policy / information clause
    company_website: Optional[str] = None  # Company website URL (optional)

    # Backward compatibility aliases (deprecated, use email_username/email_password)
    @property
    def gmail_username(self) -> Optional[str]:
        """Backward compatibility: returns email_username."""
        return self.email_username

    @property
    def gmail_password(self) -> Optional[str]:
        """Backward compatibility: returns email_password."""
        return self.email_password

    # OpenAI official (api.openai.com) configuration – used when llm_provider="openai"
    openai_api_key: Optional[str] = None
    openai_base_url: Optional[str] = None
    openai_chat_model: Optional[str] = None

    @property
    def api_key(self) -> str:
        """
        Returns the API key for **Azure OpenAI**.

        This property is intentionally Azure-specific and used by parts of the
        code that historically relied on `AZURE_OPENAI_API_KEY`. The new
        OpenAI (api.openai.com) support uses `openai_api_key` directly,
        without going through this property.
        """
        if not self.azure_openai_api_key:
            raise ValueError(
                "AZURE_OPENAI_API_KEY not found. "
                "Add it to the .env file or environment variables."
            )
        return self.azure_openai_api_key

    @property
    def is_azure_configured(self) -> bool:
        """Check if Azure OpenAI is configured."""
        return bool(self.azure_openai_api_key and self.azure_openai_endpoint)

    def model_post_init(self, __context) -> None:
        """
        Additional initialization after settings are loaded.

        If Azure OpenAI is configured (endpoint + api_key), we set:
        - environment variables expected by the OpenAI client (Azure mode),
        - openai_model to the Azure deployment name (azure_openai_gpt_deployment).
        This way agents still use the single field settings.openai_model,
        but actually refer to the Azure deployment.
        """
        if self.azure_openai_api_key and self.azure_openai_endpoint:
            # Configure Azure mode for the OpenAI library
            os.environ["OPENAI_API_KEY"] = self.azure_openai_api_key
            # Use endpoint from Azure portal (e.g. https://xxx.openai.azure.com)
            os.environ["OPENAI_API_BASE"] = self.azure_openai_endpoint
            os.environ["OPENAI_API_TYPE"] = "azure"
            os.environ["OPENAI_API_VERSION"] = self.azure_openai_api_version

            # If deployment name is defined – use it as the model
            if self.azure_openai_gpt_deployment:
                self.openai_model = self.azure_openai_gpt_deployment
            if self.azure_openai_vision_deployment:
                self.openai_vision_model = self.azure_openai_vision_deployment

`api_key` `property` ¶

Returns the API key for Azure OpenAI.

This property is intentionally Azure-specific and used by parts of the code that historically relied on AZURE_OPENAI_API_KEY. The new OpenAI (api.openai.com) support uses openai_api_key directly, without going through this property.

`gmail_password` `property` ¶

Backward compatibility: returns email_password.

`gmail_username` `property` ¶

Backward compatibility: returns email_username.

`is_azure_configured` `property` ¶

Check if Azure OpenAI is configured.

`model_post_init(__context)` ¶

Additional initialization after settings are loaded.

If Azure OpenAI is configured (endpoint + api_key), we set: - environment variables expected by the OpenAI client (Azure mode), - openai_model to the Azure deployment name (azure_openai_gpt_deployment). This way agents still use the single field settings.openai_model, but actually refer to the Azure deployment.

Source code in config/settings.py

def model_post_init(self, __context) -> None:
    """
    Additional initialization after settings are loaded.

    If Azure OpenAI is configured (endpoint + api_key), we set:
    - environment variables expected by the OpenAI client (Azure mode),
    - openai_model to the Azure deployment name (azure_openai_gpt_deployment).
    This way agents still use the single field settings.openai_model,
    but actually refer to the Azure deployment.
    """
    if self.azure_openai_api_key and self.azure_openai_endpoint:
        # Configure Azure mode for the OpenAI library
        os.environ["OPENAI_API_KEY"] = self.azure_openai_api_key
        # Use endpoint from Azure portal (e.g. https://xxx.openai.azure.com)
        os.environ["OPENAI_API_BASE"] = self.azure_openai_endpoint
        os.environ["OPENAI_API_TYPE"] = "azure"
        os.environ["OPENAI_API_VERSION"] = self.azure_openai_api_version

        # If deployment name is defined – use it as the model
        if self.azure_openai_gpt_deployment:
            self.openai_model = self.azure_openai_gpt_deployment
        if self.azure_openai_vision_deployment:
            self.openai_vision_model = self.azure_openai_vision_deployment

Agents & Services

API reference – agents and services¶

Agents¶

__init__(model_name=settings.azure_openai_gpt_deployment, temperature=0.7, api_key=None, timeout=120, max_retries=2) ¶

generate_feedback(cv_data, hr_feedback, job_offer=None, output_format=FeedbackFormat.HTML, candidate_id=None, recruitment_stage=None) ¶

__init__(model_name='gpt-4.1-nano', temperature=0.0, api_key=None, timeout=120, max_retries=2) ¶

validate_feedback(html_content, cv_data, hr_feedback, job_offer=None, candidate_id=None, validation_number=None) ¶

__init__(model_name='gpt-4.1-nano', temperature=0.3, api_key=None, timeout=120, max_retries=2) ¶

correct_feedback(original_html, validation_result, cv_data, hr_feedback, job_offer=None, candidate_id=None, correction_number=None) ¶

__init__(model_name=None, temperature=None, api_key=None, vision_model_name=None, use_ocr=True, timeout=240, max_retries=2) ¶

parse_cv_from_pdf(pdf_path, verbose=False, candidate_id=None) ¶

parse_cv_from_text(cv_text, candidate_id=None) ¶

__init__(model_name=settings.azure_openai_gpt_deployment, temperature=0.1, api_key=None, timeout=30, max_retries=2) ¶

classify_email(from_email, subject, body) ¶

classify_query(email_subject, email_body, sender_email) ¶

generate_response(email_subject, email_body, sender_email, rag_context=None) ¶

__init__(model_name='gpt-4o', temperature=0.0, api_key=None, timeout=60, max_retries=2) ¶

validate_rag_response(generated_response, email_subject, email_body, sender_email, rag_sources, validation_number=None) ¶

Services¶

__init__(feedback_agent, validator_agent=None, correction_agent=None, max_validation_iterations=3) ¶

generate_feedback(cv_data, hr_feedback, job_offer=None, output_format=FeedbackFormat.HTML, save_to_file=False, output_dir=None, enable_validation=True, candidate_id=None, recruitment_stage=None) ¶

get_feedback_html(feedback, consent_for_other_positions=None) ¶

__init__(parser_agent) ¶

process_cv_from_pdf(pdf_path, verbose=False, candidate_id=None) ¶

__init__(email_username, email_password, smtp_host, smtp_port, smtp_use_tls=True, iod_email=None, hr_email=None) ¶

route_email(email_data, classification) ¶

__init__(email_username, email_password, imap_server='imap.zoho.com', imap_port=993) ¶

classify_email(email_data, classifier_agent=None) ¶

connect() ¶

disconnect() ¶

get_unread_emails(folder='INBOX') ¶

mark_as_read(uid, folder='INBOX') ¶

__init__(email_username, email_password, imap_host, imap_port, smtp_host, smtp_port, smtp_use_tls=True, iod_email=None, hr_email=None, check_interval=60) ¶

start() ¶

stop() ¶

__init__(collection_name='recruitment_knowledge_base', use_azure_openai=False, azure_endpoint=None, azure_api_key=None, azure_deployment=None, azure_api_version=None, qdrant_path=None, qdrant_host=None, qdrant_port=None) ¶

add_documents(documents, ids=None, metadatas=None) ¶

count() ¶

get_all() ¶

search(query, n_results=5) ¶

Configuration¶

api_key property ¶

gmail_password property ¶

gmail_username property ¶

is_azure_configured property ¶

model_post_init(__context) ¶

`init(model_name=settings.azure_openai_gpt_deployment, temperature=0.7, api_key=None, timeout=120, max_retries=2)` ¶

`generate_feedback(cv_data, hr_feedback, job_offer=None, output_format=FeedbackFormat.HTML, candidate_id=None, recruitment_stage=None)` ¶

`init(model_name='gpt-4.1-nano', temperature=0.0, api_key=None, timeout=120, max_retries=2)` ¶

`validate_feedback(html_content, cv_data, hr_feedback, job_offer=None, candidate_id=None, validation_number=None)` ¶

`init(model_name='gpt-4.1-nano', temperature=0.3, api_key=None, timeout=120, max_retries=2)` ¶

`correct_feedback(original_html, validation_result, cv_data, hr_feedback, job_offer=None, candidate_id=None, correction_number=None)` ¶

`init(model_name=None, temperature=None, api_key=None, vision_model_name=None, use_ocr=True, timeout=240, max_retries=2)` ¶

`parse_cv_from_pdf(pdf_path, verbose=False, candidate_id=None)` ¶

`parse_cv_from_text(cv_text, candidate_id=None)` ¶

`init(model_name=settings.azure_openai_gpt_deployment, temperature=0.1, api_key=None, timeout=30, max_retries=2)` ¶

`classify_email(from_email, subject, body)` ¶

`classify_query(email_subject, email_body, sender_email)` ¶

`generate_response(email_subject, email_body, sender_email, rag_context=None)` ¶

`init(model_name='gpt-4o', temperature=0.0, api_key=None, timeout=60, max_retries=2)` ¶

`validate_rag_response(generated_response, email_subject, email_body, sender_email, rag_sources, validation_number=None)` ¶

`init(feedback_agent, validator_agent=None, correction_agent=None, max_validation_iterations=3)` ¶

`generate_feedback(cv_data, hr_feedback, job_offer=None, output_format=FeedbackFormat.HTML, save_to_file=False, output_dir=None, enable_validation=True, candidate_id=None, recruitment_stage=None)` ¶

`get_feedback_html(feedback, consent_for_other_positions=None)` ¶

`init(parser_agent)` ¶

`process_cv_from_pdf(pdf_path, verbose=False, candidate_id=None)` ¶

`init(email_username, email_password, smtp_host, smtp_port, smtp_use_tls=True, iod_email=None, hr_email=None)` ¶

`route_email(email_data, classification)` ¶

`init(email_username, email_password, imap_server='imap.zoho.com', imap_port=993)` ¶

`classify_email(email_data, classifier_agent=None)` ¶

`connect()` ¶

`disconnect()` ¶

`get_unread_emails(folder='INBOX')` ¶

`mark_as_read(uid, folder='INBOX')` ¶

`init(email_username, email_password, imap_host, imap_port, smtp_host, smtp_port, smtp_use_tls=True, iod_email=None, hr_email=None, check_interval=60)` ¶

`start()` ¶

`stop()` ¶

`init(collection_name='recruitment_knowledge_base', use_azure_openai=False, azure_endpoint=None, azure_api_key=None, azure_deployment=None, azure_api_version=None, qdrant_path=None, qdrant_host=None, qdrant_port=None)` ¶

`add_documents(documents, ids=None, metadatas=None)` ¶

`count()` ¶

`get_all()` ¶

`search(query, n_results=5)` ¶

`api_key` `property` ¶

`gmail_password` `property` ¶

`gmail_username` `property` ¶

`is_azure_configured` `property` ¶

`model_post_init(__context)` ¶