Skip to content

Agents & Services

API reference – agents and services

This section is generated automatically from Python docstrings using mkdocstrings.

Agents

Bases: BaseAgent

Agent for generating personalized feedback to candidates.

Source code in agents/feedback_agent.py
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
class FeedbackAgent(BaseAgent):
    """Agent for generating personalized feedback to candidates."""

    def __init__(
        self,
        model_name: str = settings.azure_openai_gpt_deployment,
        temperature: float = 0.7,
        api_key: Optional[str] = None,
        timeout: int = 120,
        max_retries: int = 2,
    ):
        """
        Initialize Feedback Agent using Azure OpenAI SDK (no LangChain).
        """
        super().__init__(model_name, temperature, api_key, timeout, max_retries)

        # Static format instructions (we no longer rely on LangChain parsers)
        self.format_instructions = (
            "Return ONLY a single JSON object with the following structure:\n"
            "{\n"
            '  "html_content": "<!DOCTYPE html>\\n<html>...complete email HTML...</html>"\n'
            "}\n\n"
            "- Do NOT return a JSON schema, OpenAPI schema, or description of fields.\n"
            "- Do NOT wrap the JSON in markdown code fences.\n"
            "- Do NOT include any extra top-level keys besides html_content.\n"
            "- The html_content value must be a valid HTML email as a single string."
        )

        # Store prompt template for later use
        self.prompt_template = FEEDBACK_GENERATION_PROMPT

    def generate_feedback(
        self,
        cv_data: CVData,
        hr_feedback: HRFeedback,
        job_offer: Optional[JobOffer] = None,
        output_format: FeedbackFormat = FeedbackFormat.HTML,
        candidate_id: Optional[int] = None,
        recruitment_stage: Optional[str] = None,
    ) -> CandidateFeedback:
        """
        Generate personalized feedback for a candidate using Azure OpenAI.
        """
        # Convert CV data to formatted string
        cv_data_str = self._format_cv_data(cv_data)

        # Convert HR feedback to formatted string (with extraction note for feedback generation)
        hr_feedback_str = self._format_hr_feedback(hr_feedback, include_extraction_note=True)

        # Convert job offer to formatted string
        if job_offer:
            job_offer_str = self._format_job_offer(job_offer)
        else:
            job_offer_str = "No job offer information provided"

        # Get candidate name
        candidate_name = cv_data.full_name

        # Format output format for prompt
        format_str = (
            output_format.value if isinstance(output_format, FeedbackFormat) else str(output_format)
        )

        # Format recruitment stage for prompt
        recruitment_stage_str = recruitment_stage or "Pierwsza selekcja"

        # Build prompt with format instructions and output format
        prompt_text = self.prompt_template.format(
            cv_data=cv_data_str,
            hr_feedback=hr_feedback_str,
            job_offer=job_offer_str,
            candidate_name=candidate_name,
            recruitment_stage=recruitment_stage_str,
            format_instructions=self.format_instructions,
            output_format=format_str,
        )

        # Call LLM via adapter
        raw_text = None
        try:
            logger.info(
                f"Generating feedback for candidate: {candidate_name} (stage: {recruitment_stage_str})"
            )

            raw_text, response = self._chat(
                messages=[
                    {
                        "role": "system",
                        "content": (
                            "You are a careful JSON-producing assistant. "
                            "You must follow the format_instructions exactly."
                        ),
                    },
                    {"role": "user", "content": prompt_text},
                ],
                max_completion_tokens=4000,
            )

            if not raw_text:
                raise ValueError("Empty response from model")

            # Track model response (with token usage and cost)
            self._save_model_response(
                agent_type="feedback_generator",
                input_data={
                    "cv_data": cv_data_str,
                    "hr_feedback": hr_feedback_str,
                    "job_offer": job_offer_str,
                    "candidate_name": candidate_name,
                    "recruitment_stage": recruitment_stage_str,
                },
                output_data=raw_text,
                candidate_id=candidate_id,
                metadata={"temperature": self.temperature},
                response=response,  # Pass response to extract tokens and costs
            )

            # Parse JSON into CandidateFeedback
            feedback = self._parse_feedback_from_text(raw_text)
            logger.info(f"Feedback generated successfully for {candidate_name}")
            return feedback
        except Exception as e:
            # Fallback: try to parse whatever we got in `raw_text` (only if assigned)
            if raw_text is not None:
                try:
                    feedback = self._parse_feedback_from_text(raw_text)
                    logger.warning("Feedback parsed using fallback parser after initial error.")
                    return feedback
                except Exception as final_error:
                    raise Exception(
                        f"Failed to parse feedback: {str(final_error)}. Original error: {str(e)}"
                    ) from e
            raise

    def _parse_feedback_from_text(self, text: str) -> CandidateFeedback:
        """
        Parse CandidateFeedback from raw model text, handling common JSON issues.
        """
        if not text:
            raise ValueError("Empty response from model")

        # Try to parse JSON
        try:
            data = parse_json_safe(text, fallback_to_extraction=True)
        except ValueError:
            # No JSON at all – treat full text as HTML
            logger.warning("No JSON detected in model output, using raw text as HTML.")
            return CandidateFeedback(html_content=self._wrap_html_if_needed(text))

        # Validate html_content
        html = data.get("html_content")
        if not html or not isinstance(html, str):
            raise ValueError("html_content field is required and must be a non-empty string")

        return CandidateFeedback(html_content=html)

    @staticmethod
    def _wrap_html_if_needed(content: str) -> str:
        """If content is not full HTML, wrap it in a minimal HTML template."""
        lower = content.lower()
        if "<html" in lower or "<body" in lower:
            return content

        return (
            "<!DOCTYPE html>\n"
            '<html lang="pl">\n'
            "<head>\n"
            '    <meta charset="UTF-8">\n'
            '    <meta name="viewport" content="width=device-width, initial-scale=1.0">\n'
            "    <title>Odpowiedź na aplikację</title>\n"
            "</head>\n"
            '<body style="font-family: Arial, sans-serif; line-height: 1.6; color: #333; '
            'max-width: 600px; margin: 0 auto; padding: 20px;">\n'
            f"    {content}\n"
            "</body>\n"
            "</html>"
        )

__init__(model_name=settings.azure_openai_gpt_deployment, temperature=0.7, api_key=None, timeout=120, max_retries=2)

Initialize Feedback Agent using Azure OpenAI SDK (no LangChain).

Source code in agents/feedback_agent.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    model_name: str = settings.azure_openai_gpt_deployment,
    temperature: float = 0.7,
    api_key: Optional[str] = None,
    timeout: int = 120,
    max_retries: int = 2,
):
    """
    Initialize Feedback Agent using Azure OpenAI SDK (no LangChain).
    """
    super().__init__(model_name, temperature, api_key, timeout, max_retries)

    # Static format instructions (we no longer rely on LangChain parsers)
    self.format_instructions = (
        "Return ONLY a single JSON object with the following structure:\n"
        "{\n"
        '  "html_content": "<!DOCTYPE html>\\n<html>...complete email HTML...</html>"\n'
        "}\n\n"
        "- Do NOT return a JSON schema, OpenAPI schema, or description of fields.\n"
        "- Do NOT wrap the JSON in markdown code fences.\n"
        "- Do NOT include any extra top-level keys besides html_content.\n"
        "- The html_content value must be a valid HTML email as a single string."
    )

    # Store prompt template for later use
    self.prompt_template = FEEDBACK_GENERATION_PROMPT

generate_feedback(cv_data, hr_feedback, job_offer=None, output_format=FeedbackFormat.HTML, candidate_id=None, recruitment_stage=None)

Generate personalized feedback for a candidate using Azure OpenAI.

Source code in agents/feedback_agent.py
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
def generate_feedback(
    self,
    cv_data: CVData,
    hr_feedback: HRFeedback,
    job_offer: Optional[JobOffer] = None,
    output_format: FeedbackFormat = FeedbackFormat.HTML,
    candidate_id: Optional[int] = None,
    recruitment_stage: Optional[str] = None,
) -> CandidateFeedback:
    """
    Generate personalized feedback for a candidate using Azure OpenAI.
    """
    # Convert CV data to formatted string
    cv_data_str = self._format_cv_data(cv_data)

    # Convert HR feedback to formatted string (with extraction note for feedback generation)
    hr_feedback_str = self._format_hr_feedback(hr_feedback, include_extraction_note=True)

    # Convert job offer to formatted string
    if job_offer:
        job_offer_str = self._format_job_offer(job_offer)
    else:
        job_offer_str = "No job offer information provided"

    # Get candidate name
    candidate_name = cv_data.full_name

    # Format output format for prompt
    format_str = (
        output_format.value if isinstance(output_format, FeedbackFormat) else str(output_format)
    )

    # Format recruitment stage for prompt
    recruitment_stage_str = recruitment_stage or "Pierwsza selekcja"

    # Build prompt with format instructions and output format
    prompt_text = self.prompt_template.format(
        cv_data=cv_data_str,
        hr_feedback=hr_feedback_str,
        job_offer=job_offer_str,
        candidate_name=candidate_name,
        recruitment_stage=recruitment_stage_str,
        format_instructions=self.format_instructions,
        output_format=format_str,
    )

    # Call LLM via adapter
    raw_text = None
    try:
        logger.info(
            f"Generating feedback for candidate: {candidate_name} (stage: {recruitment_stage_str})"
        )

        raw_text, response = self._chat(
            messages=[
                {
                    "role": "system",
                    "content": (
                        "You are a careful JSON-producing assistant. "
                        "You must follow the format_instructions exactly."
                    ),
                },
                {"role": "user", "content": prompt_text},
            ],
            max_completion_tokens=4000,
        )

        if not raw_text:
            raise ValueError("Empty response from model")

        # Track model response (with token usage and cost)
        self._save_model_response(
            agent_type="feedback_generator",
            input_data={
                "cv_data": cv_data_str,
                "hr_feedback": hr_feedback_str,
                "job_offer": job_offer_str,
                "candidate_name": candidate_name,
                "recruitment_stage": recruitment_stage_str,
            },
            output_data=raw_text,
            candidate_id=candidate_id,
            metadata={"temperature": self.temperature},
            response=response,  # Pass response to extract tokens and costs
        )

        # Parse JSON into CandidateFeedback
        feedback = self._parse_feedback_from_text(raw_text)
        logger.info(f"Feedback generated successfully for {candidate_name}")
        return feedback
    except Exception as e:
        # Fallback: try to parse whatever we got in `raw_text` (only if assigned)
        if raw_text is not None:
            try:
                feedback = self._parse_feedback_from_text(raw_text)
                logger.warning("Feedback parsed using fallback parser after initial error.")
                return feedback
            except Exception as final_error:
                raise Exception(
                    f"Failed to parse feedback: {str(final_error)}. Original error: {str(e)}"
                ) from e
        raise

Bases: BaseAgent

Agent for validating candidate feedback emails.

Source code in agents/validation_agent.py
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
class FeedbackValidatorAgent(BaseAgent):
    """Agent for validating candidate feedback emails."""

    def __init__(
        self,
        model_name: str = "gpt-4.1-nano",
        temperature: float = 0.0,
        api_key: Optional[str] = None,
        timeout: int = 120,
        max_retries: int = 2,
    ):
        """
        Initialize Feedback Validator Agent using Azure OpenAI SDK (no LangChain).
        """
        super().__init__(model_name, temperature, api_key, timeout, max_retries)

        # Static format instructions describing ValidationResult JSON schema
        self.format_instructions = (
            "Return ONLY a single JSON object with the following structure:\n"
            "{\n"
            '  "status": "approved" | "rejected",\n'
            '  "is_approved": true | false,\n'
            '  "reasoning": "short explanation",\n'
            '  "issues_found": ["issue 1", "issue 2"],\n'
            '  "ethical_concerns": ["concern 1", "concern 2"],\n'
            '  "factual_errors": ["error 1", "error 2"],\n'
            '  "suggestions": ["suggestion 1", "suggestion 2"]\n'
            "}\n\n"
            "- Do NOT return a JSON schema or description.\n"
            "- Do NOT wrap the JSON in markdown code fences.\n"
            "- All list fields must be valid JSON arrays of strings."
        )

        # Store prompt template
        self.prompt_template = VALIDATION_PROMPT

    def validate_feedback(
        self,
        html_content: str,
        cv_data: CVData,
        hr_feedback: HRFeedback,
        job_offer: Optional[JobOffer] = None,
        candidate_id: Optional[int] = None,
        validation_number: Optional[int] = None,
    ) -> ValidationResult:
        """
        Validate feedback email using Azure OpenAI (no LangChain).
        """
        logger.info(f"Validating feedback email for: {cv_data.full_name}")

        # Format data for prompt
        cv_data_str = self._format_cv_data(cv_data)
        hr_feedback_str = self._format_hr_feedback(hr_feedback)

        if job_offer:
            job_offer_str = self._format_job_offer(job_offer)
        else:
            job_offer_str = "No job offer information provided"

        # Build prompt with format instructions
        prompt_text = self.prompt_template.format(
            html_content=html_content,
            cv_data=cv_data_str,
            hr_feedback=hr_feedback_str,
            job_offer=job_offer_str,
            format_instructions=self.format_instructions,
        )

        # Run validation via LLM adapter
        try:
            logger.info(f"Validating feedback email for: {cv_data.full_name}")

            raw_text, response = self._chat(
                messages=[
                    {
                        "role": "system",
                        "content": (
                            "You are a careful JSON-producing validation assistant. "
                            "You must follow the format_instructions exactly."
                        ),
                    },
                    {"role": "user", "content": prompt_text},
                ],
                max_completion_tokens=2000,
            )

            if not raw_text:
                raise ValueError("Empty response from model")

            # Track model response (with token usage and cost)
            metadata = {"temperature": self.temperature}
            if validation_number is not None:
                metadata["validation_number"] = validation_number

            self._save_model_response(
                agent_type="validator",
                input_data={
                    "html_content": html_content,
                    "cv_data": cv_data_str,
                    "hr_feedback": hr_feedback_str,
                    "job_offer": job_offer_str,
                },
                output_data=raw_text,
                candidate_id=candidate_id,
                metadata=metadata,
                response=response,  # Pass response to extract tokens and costs
            )

            validation_result = self._parse_validation_from_text(raw_text)
            logger.info(
                f"Validation completed for {cv_data.full_name}: {validation_result.status.value}"
            )
            return validation_result
        except Exception as e:
            error_msg = f"Failed to validate feedback: {str(e)}"
            logger.error(error_msg, exc_info=True)

            # On validation failure, reject by default for safety
            return ValidationResult(
                status=ValidationStatus.REJECTED,
                is_approved=False,
                reasoning=f"Validation process failed: {str(e)}. Email rejected for safety.",
                issues_found=["Validation process error"],
                ethical_concerns=[],
                factual_errors=[],
                suggestions=["Please review the validation process and try again."],
            )

    def _parse_validation_from_text(self, text: str) -> ValidationResult:
        """
        Parse ValidationResult from raw model text, handling common JSON issues.
        """
        if not text:
            raise ValueError("Empty response from model")

        # Parse JSON with fallback extraction
        data = parse_json_safe(text, fallback_to_extraction=True)

        # Map to ValidationResult, with sensible defaults
        status_str = data.get("status", "rejected")
        status = (
            ValidationStatus(status_str)
            if status_str in ("approved", "rejected")
            else ValidationStatus.REJECTED
        )
        is_approved = bool(data.get("is_approved", False))
        reasoning = data.get("reasoning") or "No reasoning provided."
        issues_found = data.get("issues_found") or []
        ethical_concerns = data.get("ethical_concerns") or []
        factual_errors = data.get("factual_errors") or []
        suggestions = data.get("suggestions") or []

        # Ensure all list fields are lists of strings
        def ensure_str_list(value):
            if isinstance(value, list):
                return [str(v) for v in value]
            if not value:
                return []
            return [str(value)]

        return ValidationResult(
            status=status,
            is_approved=is_approved,
            reasoning=str(reasoning),
            issues_found=ensure_str_list(issues_found),
            ethical_concerns=ensure_str_list(ethical_concerns),
            factual_errors=ensure_str_list(factual_errors),
            suggestions=ensure_str_list(suggestions),
        )

__init__(model_name='gpt-4.1-nano', temperature=0.0, api_key=None, timeout=120, max_retries=2)

Initialize Feedback Validator Agent using Azure OpenAI SDK (no LangChain).

Source code in agents/validation_agent.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
def __init__(
    self,
    model_name: str = "gpt-4.1-nano",
    temperature: float = 0.0,
    api_key: Optional[str] = None,
    timeout: int = 120,
    max_retries: int = 2,
):
    """
    Initialize Feedback Validator Agent using Azure OpenAI SDK (no LangChain).
    """
    super().__init__(model_name, temperature, api_key, timeout, max_retries)

    # Static format instructions describing ValidationResult JSON schema
    self.format_instructions = (
        "Return ONLY a single JSON object with the following structure:\n"
        "{\n"
        '  "status": "approved" | "rejected",\n'
        '  "is_approved": true | false,\n'
        '  "reasoning": "short explanation",\n'
        '  "issues_found": ["issue 1", "issue 2"],\n'
        '  "ethical_concerns": ["concern 1", "concern 2"],\n'
        '  "factual_errors": ["error 1", "error 2"],\n'
        '  "suggestions": ["suggestion 1", "suggestion 2"]\n'
        "}\n\n"
        "- Do NOT return a JSON schema or description.\n"
        "- Do NOT wrap the JSON in markdown code fences.\n"
        "- All list fields must be valid JSON arrays of strings."
    )

    # Store prompt template
    self.prompt_template = VALIDATION_PROMPT

validate_feedback(html_content, cv_data, hr_feedback, job_offer=None, candidate_id=None, validation_number=None)

Validate feedback email using Azure OpenAI (no LangChain).

Source code in agents/validation_agent.py
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
def validate_feedback(
    self,
    html_content: str,
    cv_data: CVData,
    hr_feedback: HRFeedback,
    job_offer: Optional[JobOffer] = None,
    candidate_id: Optional[int] = None,
    validation_number: Optional[int] = None,
) -> ValidationResult:
    """
    Validate feedback email using Azure OpenAI (no LangChain).
    """
    logger.info(f"Validating feedback email for: {cv_data.full_name}")

    # Format data for prompt
    cv_data_str = self._format_cv_data(cv_data)
    hr_feedback_str = self._format_hr_feedback(hr_feedback)

    if job_offer:
        job_offer_str = self._format_job_offer(job_offer)
    else:
        job_offer_str = "No job offer information provided"

    # Build prompt with format instructions
    prompt_text = self.prompt_template.format(
        html_content=html_content,
        cv_data=cv_data_str,
        hr_feedback=hr_feedback_str,
        job_offer=job_offer_str,
        format_instructions=self.format_instructions,
    )

    # Run validation via LLM adapter
    try:
        logger.info(f"Validating feedback email for: {cv_data.full_name}")

        raw_text, response = self._chat(
            messages=[
                {
                    "role": "system",
                    "content": (
                        "You are a careful JSON-producing validation assistant. "
                        "You must follow the format_instructions exactly."
                    ),
                },
                {"role": "user", "content": prompt_text},
            ],
            max_completion_tokens=2000,
        )

        if not raw_text:
            raise ValueError("Empty response from model")

        # Track model response (with token usage and cost)
        metadata = {"temperature": self.temperature}
        if validation_number is not None:
            metadata["validation_number"] = validation_number

        self._save_model_response(
            agent_type="validator",
            input_data={
                "html_content": html_content,
                "cv_data": cv_data_str,
                "hr_feedback": hr_feedback_str,
                "job_offer": job_offer_str,
            },
            output_data=raw_text,
            candidate_id=candidate_id,
            metadata=metadata,
            response=response,  # Pass response to extract tokens and costs
        )

        validation_result = self._parse_validation_from_text(raw_text)
        logger.info(
            f"Validation completed for {cv_data.full_name}: {validation_result.status.value}"
        )
        return validation_result
    except Exception as e:
        error_msg = f"Failed to validate feedback: {str(e)}"
        logger.error(error_msg, exc_info=True)

        # On validation failure, reject by default for safety
        return ValidationResult(
            status=ValidationStatus.REJECTED,
            is_approved=False,
            reasoning=f"Validation process failed: {str(e)}. Email rejected for safety.",
            issues_found=["Validation process error"],
            ethical_concerns=[],
            factual_errors=[],
            suggestions=["Please review the validation process and try again."],
        )

Bases: BaseAgent

Agent for correcting candidate feedback emails based on validation feedback.

Source code in agents/correction_agent.py
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
class FeedbackCorrectionAgent(BaseAgent):
    """Agent for correcting candidate feedback emails based on validation feedback."""

    def __init__(
        self,
        model_name: str = "gpt-4.1-nano",
        temperature: float = 0.3,
        api_key: Optional[str] = None,
        timeout: int = 120,
        max_retries: int = 2,
    ):
        """
        Initialize Feedback Correction Agent using Azure OpenAI SDK (no LangChain).
        """
        super().__init__(model_name, temperature, api_key, timeout, max_retries)

        # Static format instructions describing CorrectedFeedback schema
        self.format_instructions = (
            "Return ONLY a single JSON object with the following structure:\n"
            "{\n"
            '  "html_content": "<!DOCTYPE html>\\n<html>...corrected email HTML...</html>",\n'
            '  "corrections_made": ["correction 1", "correction 2"],\n'
            '  "explanation": "short explanation of changes"\n'
            "}\n\n"
            "- corrections_made must be a list of strings, each describing one change.\n"
            "- Do NOT return a JSON schema or description.\n"
            "- Do NOT wrap the JSON in markdown code fences (no ```json).\n"
            '- CRITICAL for valid JSON: Inside the "html_content" string use escaped newlines only (\\n). '
            "Do NOT put literal line breaks inside the JSON string—that breaks parsing. "
            'Example: use "...Cześć,\\n\\nDziękujemy..." not "...Cześć,\n\nDziękujemy...".'
        )

        # Store prompt template
        self.prompt_template = CORRECTION_PROMPT

    def correct_feedback(
        self,
        original_html: str,
        validation_result: ValidationResult,
        cv_data: CVData,
        hr_feedback: HRFeedback,
        job_offer: Optional[JobOffer] = None,
        candidate_id: Optional[int] = None,
        correction_number: Optional[int] = None,
    ) -> CorrectedFeedback:
        """
        Correct feedback email based on validation feedback using Azure OpenAI.
        """
        logger.info(f"Correcting feedback email for: {cv_data.full_name}")

        # Format data for prompt
        cv_data_str = self._format_cv_data(cv_data)
        hr_feedback_str = self._format_hr_feedback(hr_feedback)

        if job_offer:
            job_offer_str = self._format_job_offer(job_offer)
        else:
            job_offer_str = "No job offer information provided"

        # Format validation feedback
        issues_str = (
            "\n".join([f"- {issue}" for issue in validation_result.issues_found])
            if validation_result.issues_found
            else "None"
        )
        ethical_str = (
            "\n".join([f"- {concern}" for concern in validation_result.ethical_concerns])
            if validation_result.ethical_concerns
            else "None"
        )
        factual_str = (
            "\n".join([f"- {error}" for error in validation_result.factual_errors])
            if validation_result.factual_errors
            else "None"
        )

        # Build prompt with format instructions
        prompt_text = self.prompt_template.format(
            original_html=original_html,
            validation_reasoning=validation_result.reasoning,
            issues_found=issues_str,
            ethical_concerns=ethical_str,
            factual_errors=factual_str,
            cv_data=cv_data_str,
            hr_feedback=hr_feedback_str,
            job_offer=job_offer_str,
            format_instructions=self.format_instructions,
        )

        # Run correction
        try:
            logger.info(f"Correcting feedback email for: {cv_data.full_name}")

            raw_text, response = self._chat(
                messages=[
                    {
                        "role": "system",
                        "content": (
                            "You are a careful JSON-producing correction assistant. "
                            "You must follow the format_instructions exactly."
                        ),
                    },
                    {"role": "user", "content": prompt_text},
                ],
                max_completion_tokens=3000,
            )

            if not raw_text:
                raise ValueError("Empty response from model")

            # Track model response (with token usage and cost)
            metadata = {"temperature": self.temperature}
            if correction_number is not None:
                metadata["correction_number"] = correction_number

            self._save_model_response(
                agent_type="corrector",
                input_data={
                    "original_html": original_html,
                    "validation_reasoning": validation_result.reasoning,
                    "issues_found": issues_str,
                    "ethical_concerns": ethical_str,
                    "factual_errors": factual_str,
                    "cv_data": cv_data_str,
                    "hr_feedback": hr_feedback_str,
                    "job_offer": job_offer_str,
                },
                output_data=raw_text,
                candidate_id=candidate_id,
                response=response,  # Pass response to extract tokens and costs
                metadata=metadata,
            )

            corrected_feedback = self._parse_correction_from_text(raw_text)
            logger.info(
                f"Correction completed for {cv_data.full_name}. "
                f"Corrections made: {len(corrected_feedback.corrections_made)}"
            )
            return corrected_feedback
        except Exception as e:
            error_msg = f"Failed to correct feedback: {str(e)}"
            logger.error(error_msg, exc_info=True)
            raise Exception(error_msg) from e

    def _parse_correction_from_text(self, text: str) -> CorrectedFeedback:
        """
        Parse CorrectedFeedback from raw model text, handling common JSON issues.
        """
        if not text:
            raise ValueError("Empty response from model")

        original_text = text

        # Try to parse JSON
        try:
            data = parse_json_safe(text, fallback_to_extraction=True)
        except ValueError:
            # No JSON at all – treat as plain HTML correction without metadata
            logger.warning("No JSON detected in correction output, using raw text as HTML.")
            return CorrectedFeedback(
                html_content=self._wrap_html_if_needed(original_text),
                corrections_made=["Used raw model output as HTML (no structured JSON)"],
                explanation="Model did not return valid JSON; raw output was wrapped as HTML.",
            )

        # Validate and coerce via Pydantic (ensures types + html_content is valid HTML)
        try:
            return CorrectedFeedback.model_validate(data)
        except PydanticValidationError as e:
            # If html_content failed (e.g. plain text without tags), wrap and retry
            html_raw = data.get("html_content") if isinstance(data.get("html_content"), str) else ""
            if html_raw and ("html_content" in str(e).lower() or "markup" in str(e).lower()):
                wrapped = self._wrap_html_if_needed(html_raw)
                logger.info(
                    "html_content did not pass HTML validation (e.g. plain text); wrapping in template."
                )
                raw_corrections = data.get("corrections_made")
                corrections = (
                    [str(c) for c in raw_corrections]
                    if isinstance(raw_corrections, list)
                    else [str(raw_corrections)] if raw_corrections else []
                )
                return CorrectedFeedback(
                    html_content=wrapped,
                    corrections_made=corrections,
                    explanation=str(data.get("explanation") or ""),
                )
            raise ValueError(f"CorrectedFeedback validation failed: {e}") from e

    @staticmethod
    def _wrap_html_if_needed(content: str) -> str:
        """If content is not full HTML, wrap it in a minimal HTML template."""
        lower = content.lower()
        if "<html" in lower or "<body" in lower:
            return content

        return (
            "<!DOCTYPE html>\n"
            '<html lang="pl">\n'
            "<head>\n"
            '    <meta charset="UTF-8">\n'
            '    <meta name="viewport" content="width=device-width, initial-scale=1.0">\n'
            "    <title>Odpowiedź na aplikację</title>\n"
            "</head>\n"
            '<body style="font-family: Arial, sans-serif; line-height: 1.6; color: #333; '
            'max-width: 600px; margin: 0 auto; padding: 20px;">\n'
            f"    {content}\n"
            "</body>\n"
            "</html>"
        )

__init__(model_name='gpt-4.1-nano', temperature=0.3, api_key=None, timeout=120, max_retries=2)

Initialize Feedback Correction Agent using Azure OpenAI SDK (no LangChain).

Source code in agents/correction_agent.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
def __init__(
    self,
    model_name: str = "gpt-4.1-nano",
    temperature: float = 0.3,
    api_key: Optional[str] = None,
    timeout: int = 120,
    max_retries: int = 2,
):
    """
    Initialize Feedback Correction Agent using Azure OpenAI SDK (no LangChain).
    """
    super().__init__(model_name, temperature, api_key, timeout, max_retries)

    # Static format instructions describing CorrectedFeedback schema
    self.format_instructions = (
        "Return ONLY a single JSON object with the following structure:\n"
        "{\n"
        '  "html_content": "<!DOCTYPE html>\\n<html>...corrected email HTML...</html>",\n'
        '  "corrections_made": ["correction 1", "correction 2"],\n'
        '  "explanation": "short explanation of changes"\n'
        "}\n\n"
        "- corrections_made must be a list of strings, each describing one change.\n"
        "- Do NOT return a JSON schema or description.\n"
        "- Do NOT wrap the JSON in markdown code fences (no ```json).\n"
        '- CRITICAL for valid JSON: Inside the "html_content" string use escaped newlines only (\\n). '
        "Do NOT put literal line breaks inside the JSON string—that breaks parsing. "
        'Example: use "...Cześć,\\n\\nDziękujemy..." not "...Cześć,\n\nDziękujemy...".'
    )

    # Store prompt template
    self.prompt_template = CORRECTION_PROMPT

correct_feedback(original_html, validation_result, cv_data, hr_feedback, job_offer=None, candidate_id=None, correction_number=None)

Correct feedback email based on validation feedback using Azure OpenAI.

Source code in agents/correction_agent.py
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
def correct_feedback(
    self,
    original_html: str,
    validation_result: ValidationResult,
    cv_data: CVData,
    hr_feedback: HRFeedback,
    job_offer: Optional[JobOffer] = None,
    candidate_id: Optional[int] = None,
    correction_number: Optional[int] = None,
) -> CorrectedFeedback:
    """
    Correct feedback email based on validation feedback using Azure OpenAI.
    """
    logger.info(f"Correcting feedback email for: {cv_data.full_name}")

    # Format data for prompt
    cv_data_str = self._format_cv_data(cv_data)
    hr_feedback_str = self._format_hr_feedback(hr_feedback)

    if job_offer:
        job_offer_str = self._format_job_offer(job_offer)
    else:
        job_offer_str = "No job offer information provided"

    # Format validation feedback
    issues_str = (
        "\n".join([f"- {issue}" for issue in validation_result.issues_found])
        if validation_result.issues_found
        else "None"
    )
    ethical_str = (
        "\n".join([f"- {concern}" for concern in validation_result.ethical_concerns])
        if validation_result.ethical_concerns
        else "None"
    )
    factual_str = (
        "\n".join([f"- {error}" for error in validation_result.factual_errors])
        if validation_result.factual_errors
        else "None"
    )

    # Build prompt with format instructions
    prompt_text = self.prompt_template.format(
        original_html=original_html,
        validation_reasoning=validation_result.reasoning,
        issues_found=issues_str,
        ethical_concerns=ethical_str,
        factual_errors=factual_str,
        cv_data=cv_data_str,
        hr_feedback=hr_feedback_str,
        job_offer=job_offer_str,
        format_instructions=self.format_instructions,
    )

    # Run correction
    try:
        logger.info(f"Correcting feedback email for: {cv_data.full_name}")

        raw_text, response = self._chat(
            messages=[
                {
                    "role": "system",
                    "content": (
                        "You are a careful JSON-producing correction assistant. "
                        "You must follow the format_instructions exactly."
                    ),
                },
                {"role": "user", "content": prompt_text},
            ],
            max_completion_tokens=3000,
        )

        if not raw_text:
            raise ValueError("Empty response from model")

        # Track model response (with token usage and cost)
        metadata = {"temperature": self.temperature}
        if correction_number is not None:
            metadata["correction_number"] = correction_number

        self._save_model_response(
            agent_type="corrector",
            input_data={
                "original_html": original_html,
                "validation_reasoning": validation_result.reasoning,
                "issues_found": issues_str,
                "ethical_concerns": ethical_str,
                "factual_errors": factual_str,
                "cv_data": cv_data_str,
                "hr_feedback": hr_feedback_str,
                "job_offer": job_offer_str,
            },
            output_data=raw_text,
            candidate_id=candidate_id,
            response=response,  # Pass response to extract tokens and costs
            metadata=metadata,
        )

        corrected_feedback = self._parse_correction_from_text(raw_text)
        logger.info(
            f"Correction completed for {cv_data.full_name}. "
            f"Corrections made: {len(corrected_feedback.corrections_made)}"
        )
        return corrected_feedback
    except Exception as e:
        error_msg = f"Failed to correct feedback: {str(e)}"
        logger.error(error_msg, exc_info=True)
        raise Exception(error_msg) from e

Bases: BaseAgent

Agent for parsing CV information from PDF files.

Source code in agents/cv_parser_agent.py
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
class CVParserAgent(BaseAgent):
    """Agent for parsing CV information from PDF files."""

    def __init__(
        self,
        model_name: Optional[str] = None,
        temperature: Optional[float] = None,
        api_key: Optional[str] = None,
        vision_model_name: Optional[str] = None,
        use_ocr: bool = True,
        timeout: int = 240,
        max_retries: int = 2,
    ):
        """
        Initialize CV Parser Agent using Azure OpenAI SDK (no LangChain).
        """
        # Store model names and config for logging
        model_name_to_use = model_name or settings.openai_model
        temperature_to_use = temperature if temperature is not None else settings.openai_temperature

        # Ensure temperature is at least 1.0 for Azure (some deployments reject 0.0)
        if temperature_to_use is None or temperature_to_use < 1.0:
            temperature_to_use = 1.0

        super().__init__(model_name_to_use, temperature_to_use, api_key, timeout, max_retries)

        # OCR: we no longer use an LLM-based vision model here; rely on pdf_reader defaults
        self.use_ocr = use_ocr
        self.vision_model = None
        self.vision_model_name = vision_model_name

        # We don't use PydanticOutputParser anymore; parsing is done manually via JSON + _transform_llm_response.

    def _transform_llm_response(self, data: Dict[str, Any]) -> Dict[str, Any]:
        """
        Transform LLM response to match Pydantic model structure.
        Handles common field name mismatches.
        """
        transformed = {}

        # Handle nested personal_information
        if "personal_information" in data:
            personal = data["personal_information"]
            transformed["full_name"] = personal.get("full_name", "")
            transformed["email"] = personal.get("email") or personal.get("email_address")
            transformed["phone"] = personal.get("phone") or personal.get("phone_number")
            transformed["location"] = personal.get("location")
            transformed["linkedin"] = personal.get("linkedin")
            transformed["github"] = personal.get("github")
            transformed["portfolio"] = personal.get("portfolio") or personal.get(
                "portfolio_website"
            )
        else:
            transformed["full_name"] = data.get("full_name", "")
            transformed["email"] = data.get("email")
            transformed["phone"] = data.get("phone")
            transformed["location"] = data.get("location")
            transformed["linkedin"] = data.get("linkedin")
            transformed["github"] = data.get("github")
            transformed["portfolio"] = data.get("portfolio")

        # Handle summary
        transformed["summary"] = data.get("summary") or data.get("professional_summary")

        # Transform education
        education_list = data.get("education", [])
        transformed["education"] = []
        for edu in education_list:
            transformed["education"].append(
                {
                    "institution": edu.get("institution") or edu.get("institution_name", ""),
                    "degree": edu.get("degree") or edu.get("degree_obtained", ""),
                    "field_of_study": edu.get("field_of_study"),
                    "start_date": edu.get("start_date"),
                    "end_date": edu.get("end_date"),
                    "gpa": edu.get("gpa"),
                    "honors": edu.get("honors"),
                }
            )

        # Transform experience
        experience_list = data.get("experience", []) or data.get("work_experience", [])
        transformed["experience"] = []
        for exp in experience_list:
            achievements = exp.get("achievements") or exp.get("key_achievements")
            if isinstance(achievements, str):
                achievements = [achievements] if achievements else []
            elif not isinstance(achievements, list):
                achievements = []

            transformed["experience"].append(
                {
                    "company": exp.get("company") or exp.get("company_name", ""),
                    "position": exp.get("position") or exp.get("job_title", ""),
                    "start_date": exp.get("start_date"),
                    "end_date": exp.get("end_date"),
                    "description": exp.get("description") or exp.get("job_description"),
                    "achievements": achievements,
                }
            )

        # Transform skills - handle both list and dict formats
        skills_data = data.get("skills", [])
        transformed["skills"] = []

        if isinstance(skills_data, dict):
            # Handle dict format: {"technical_skills": [...], "language_skills": [...], "soft_skills": [...]}
            for skill_name in skills_data.get("technical_skills", []):
                transformed["skills"].append(
                    {"name": skill_name, "category": "Technical", "proficiency": None}
                )
            for skill_name in skills_data.get("language_skills", []):
                transformed["skills"].append(
                    {"name": skill_name, "category": "Language", "proficiency": None}
                )
            for skill_name in skills_data.get("soft_skills", []):
                transformed["skills"].append(
                    {"name": skill_name, "category": "Soft", "proficiency": None}
                )
        elif isinstance(skills_data, list):
            # Handle list format
            for skill in skills_data:
                if isinstance(skill, str):
                    transformed["skills"].append(
                        {"name": skill, "category": None, "proficiency": None}
                    )
                elif isinstance(skill, dict):
                    transformed["skills"].append(
                        {
                            "name": skill.get("name", ""),
                            "category": skill.get("category"),
                            "proficiency": skill.get("proficiency"),
                        }
                    )

        # Transform certifications
        certs_list = data.get("certifications", [])
        transformed["certifications"] = []
        for cert in certs_list:
            transformed["certifications"].append(
                {
                    "name": cert.get("name") or cert.get("certification_name", ""),
                    "issuer": cert.get("issuer") or cert.get("issuing_organization"),
                    "date": cert.get("date") or cert.get("date_obtained"),
                    "expiry_date": cert.get("expiry_date"),
                }
            )

        # Transform languages
        languages_list = data.get("languages", [])
        transformed["languages"] = []
        for lang in languages_list:
            transformed["languages"].append(
                {"language": lang.get("language", ""), "proficiency": lang.get("proficiency", "")}
            )

        # Transform additional_info - convert dict to string if needed
        additional_info = data.get("additional_info") or data.get("additional_information")
        if additional_info:
            if isinstance(additional_info, dict):
                # Convert dict to formatted string
                parts = []
                if additional_info.get("hobbies"):
                    hobbies = additional_info["hobbies"]
                    if isinstance(hobbies, list):
                        parts.append(f"Hobbies: {', '.join(hobbies)}")
                    else:
                        parts.append(f"Hobbies: {hobbies}")

                if additional_info.get("projects"):
                    projects = additional_info["projects"]
                    if isinstance(projects, list):
                        parts.append(f"Projects: {'; '.join(projects)}")
                    else:
                        parts.append(f"Projects: {projects}")

                if additional_info.get("awards"):
                    awards = additional_info["awards"]
                    if isinstance(awards, list):
                        parts.append(f"Awards: {', '.join(awards)}")
                    else:
                        parts.append(f"Awards: {awards}")

                if additional_info.get("other_activities"):
                    activities = additional_info["other_activities"]
                    if isinstance(activities, list):
                        parts.append(f"Other Activities: {', '.join(activities)}")
                    else:
                        parts.append(f"Other Activities: {activities}")

                transformed["additional_info"] = "\n".join(parts) if parts else None
            elif isinstance(additional_info, str):
                transformed["additional_info"] = additional_info
            else:
                transformed["additional_info"] = str(additional_info) if additional_info else None
        else:
            transformed["additional_info"] = None

        return transformed

    def parse_cv_from_pdf(
        self, pdf_path: str, verbose: bool = False, candidate_id: Optional[int] = None
    ) -> CVData:
        """
        Parse CV from PDF file and return structured data.
        Uses vision model as OCR if enabled.

        Args:
            pdf_path: Path to the PDF file
            verbose: If True, print progress messages

        Returns:
            CVData object with parsed information
        """
        import time

        start_time = time.time()

        logger.info("=" * 80)
        logger.info("CV PARSING PROCESS STARTED")
        logger.info("=" * 80)
        logger.info(f"PDF file: {pdf_path}")
        logger.info(f"OCR enabled: {self.use_ocr}")
        logger.info(f"Vision model: {self.vision_model_name if self.vision_model_name else 'N/A'}")
        logger.info(f"Text model: {self.model_name}")

        if verbose:
            print("\n" + "=" * 80)
            print("STEP 1: EXTRACTING TEXT FROM PDF")
            print("=" * 80)
            print("  📄 Extracting text from PDF...")

        logger.info("Step 1: Starting text extraction from PDF...")
        extraction_start = time.time()

        # Extract text from PDF using vision model OCR if available
        cv_text = extract_text_from_pdf(
            pdf_path,
            vision_model=self.vision_model if self.use_ocr else None,
            use_ocr=self.use_ocr,
            verbose=verbose,
        )

        extraction_time = time.time() - extraction_start
        logger.info(
            f"Step 1 completed: Extracted {len(cv_text)} characters in {extraction_time:.2f}s"
        )

        if verbose:
            print(f"  ✅ Extracted {len(cv_text)} characters of text ({extraction_time:.2f}s)")
            print("\n" + "=" * 80)
            print("STEP 2: PARSING STRUCTURED DATA WITH LLM")
            print("=" * 80)
            print("  🤖 Parsing structured data...")

        # Limit text length to avoid token limits
        # gpt-5 models may have issues with very long prompts, so use smaller limit
        max_text_length = 10000 if "gpt-5" in self.model_name.lower() else 15000

        if len(cv_text) > max_text_length:
            logger.warning(
                f"Text is very long ({len(cv_text)} characters), truncating to {max_text_length} characters "
                f"(model: {self.model_name})"
            )
            if verbose:
                print(
                    f"  ⚠️ Text is very long ({len(cv_text)} characters), truncating to {max_text_length} characters..."
                )
            cv_text = cv_text[:max_text_length] + "\n\n[... text was truncated due to length ...]"

        # Estimate prompt length (CV text + format instructions + prompt template)
        # Format instructions are typically ~500-1000 chars, prompt template ~500-800 chars
        estimated_prompt_length = len(cv_text) + 1500
        logger.info(
            f"Step 2: Preparing to send CV text ({len(cv_text)} chars) to LLM for parsing. "
            f"Estimated total prompt length: ~{estimated_prompt_length} chars"
        )

        if estimated_prompt_length > 20000 and "gpt-5" in self.model_name.lower():
            logger.warning(
                f"Warning: Prompt may be too long for {self.model_name}. "
                f"Consider using gpt-4o-mini or gpt-4.1-nano for better reliability with long CVs."
            )

        # Run LLM via Azure OpenAI
        try:
            if verbose:
                print("    ⏳ Sending request to LLM (this may take 30-120 seconds)...")
                print("    💡 Tip: LLM is analyzing the CV and extracting structured information")

            logger.info("Step 2: Sending request to LLM for structured parsing...")
            parsing_start = time.time()

            # Build prompt text
            prompt_text = CV_PARSING_PROMPT.format(cv_text=cv_text)

            raw_text, response = self._chat(
                messages=[
                    {
                        "role": "system",
                        "content": (
                            "You are an expert CV parser. "
                            "You must return valid JSON matching the CVData schema."
                        ),
                    },
                    {"role": "user", "content": prompt_text},
                ],
            )

            if not raw_text or not str(raw_text).strip():
                raise Exception(
                    f"Model returned empty response for CV parsing. "
                    f"Check Azure deployment '{self.model_name}' and API availability."
                )
            parsing_time = time.time() - parsing_start
            logger.info(f"Step 2 completed: LLM parsing successful in {parsing_time:.2f}s")

            # Track model response (with token usage and cost)
            self._save_model_response(
                agent_type="cv_parser",
                input_data={"cv_text": cv_text[:1000] + "..." if len(cv_text) > 1000 else cv_text},
                output_data=raw_text,
                candidate_id=candidate_id,
                metadata={"temperature": self.temperature, "parsing_time": parsing_time},
                response=response,  # Pass response to extract tokens and costs
            )

            # Parse raw_text into CVData
            parsed_data = self._parse_cv_from_text_raw(raw_text)

            if verbose:
                print(f"    ✅ Received response from LLM ({parsing_time:.2f}s)")

            total_time = time.time() - start_time
            logger.info(f"CV parsing completed successfully in {total_time:.2f}s total")
            logger.info("=" * 80)

            return parsed_data
        except Exception as e:
            error_type = type(e).__name__
            error_msg = str(e)
            logger.error(f"Failed to parse CV: {error_type}: {error_msg}", exc_info=True)
            raise Exception(f"Failed to parse CV: {error_msg}") from e

    def parse_cv_from_text(self, cv_text: str, candidate_id: Optional[int] = None) -> CVData:
        """
        Parse CV from text content.

        Args:
            cv_text: CV text content

        Returns:
            CVData object with parsed information
        """
        # Run LLM via adapter
        try:
            # Build prompt text
            prompt_text = CV_PARSING_PROMPT.format(cv_text=cv_text)

            raw_text, response = self._chat(
                messages=[
                    {
                        "role": "system",
                        "content": (
                            "You are an expert CV parser. "
                            "You must return valid JSON matching the CVData schema."
                        ),
                    },
                    {"role": "user", "content": prompt_text},
                ],
            )
            if not raw_text:
                raise ValueError("Empty response from CV parser model")

            self._save_model_response(
                agent_type="cv_parser",
                input_data={"cv_text": cv_text[:1000] + "..." if len(cv_text) > 1000 else cv_text},
                output_data=raw_text,
                candidate_id=candidate_id,
                metadata={"temperature": self.temperature},
                response=response,
            )

            return self._parse_cv_from_text_raw(raw_text)
        except Exception as e:
            raise Exception(f"Failed to parse CV text: {str(e)}") from e

    def _parse_cv_from_text_raw(self, text: str) -> CVData:
        """
        Parse CVData from raw model text, using JSON + _transform_llm_response.
        """
        if not text:
            raise ValueError("Empty response from CV parser model")

        # Strip code fences
        cleaned_text = strip_code_fences(text)

        # Try to parse JSON and transform
        try:
            data = json.loads(cleaned_text)
            transformed_data = self._transform_llm_response(data)
            return CVData(**transformed_data)
        except json.JSONDecodeError as e:
            raise Exception(f"Failed to parse CV data from model output: {str(e)}") from e
        except Exception as e:
            raise Exception(f"Failed to transform CV data: {str(e)}") from e

__init__(model_name=None, temperature=None, api_key=None, vision_model_name=None, use_ocr=True, timeout=240, max_retries=2)

Initialize CV Parser Agent using Azure OpenAI SDK (no LangChain).

Source code in agents/cv_parser_agent.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    model_name: Optional[str] = None,
    temperature: Optional[float] = None,
    api_key: Optional[str] = None,
    vision_model_name: Optional[str] = None,
    use_ocr: bool = True,
    timeout: int = 240,
    max_retries: int = 2,
):
    """
    Initialize CV Parser Agent using Azure OpenAI SDK (no LangChain).
    """
    # Store model names and config for logging
    model_name_to_use = model_name or settings.openai_model
    temperature_to_use = temperature if temperature is not None else settings.openai_temperature

    # Ensure temperature is at least 1.0 for Azure (some deployments reject 0.0)
    if temperature_to_use is None or temperature_to_use < 1.0:
        temperature_to_use = 1.0

    super().__init__(model_name_to_use, temperature_to_use, api_key, timeout, max_retries)

    # OCR: we no longer use an LLM-based vision model here; rely on pdf_reader defaults
    self.use_ocr = use_ocr
    self.vision_model = None
    self.vision_model_name = vision_model_name

parse_cv_from_pdf(pdf_path, verbose=False, candidate_id=None)

Parse CV from PDF file and return structured data. Uses vision model as OCR if enabled.

Parameters:

Name Type Description Default
pdf_path str

Path to the PDF file

required
verbose bool

If True, print progress messages

False

Returns:

Type Description
CVData

CVData object with parsed information

Source code in agents/cv_parser_agent.py
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
def parse_cv_from_pdf(
    self, pdf_path: str, verbose: bool = False, candidate_id: Optional[int] = None
) -> CVData:
    """
    Parse CV from PDF file and return structured data.
    Uses vision model as OCR if enabled.

    Args:
        pdf_path: Path to the PDF file
        verbose: If True, print progress messages

    Returns:
        CVData object with parsed information
    """
    import time

    start_time = time.time()

    logger.info("=" * 80)
    logger.info("CV PARSING PROCESS STARTED")
    logger.info("=" * 80)
    logger.info(f"PDF file: {pdf_path}")
    logger.info(f"OCR enabled: {self.use_ocr}")
    logger.info(f"Vision model: {self.vision_model_name if self.vision_model_name else 'N/A'}")
    logger.info(f"Text model: {self.model_name}")

    if verbose:
        print("\n" + "=" * 80)
        print("STEP 1: EXTRACTING TEXT FROM PDF")
        print("=" * 80)
        print("  📄 Extracting text from PDF...")

    logger.info("Step 1: Starting text extraction from PDF...")
    extraction_start = time.time()

    # Extract text from PDF using vision model OCR if available
    cv_text = extract_text_from_pdf(
        pdf_path,
        vision_model=self.vision_model if self.use_ocr else None,
        use_ocr=self.use_ocr,
        verbose=verbose,
    )

    extraction_time = time.time() - extraction_start
    logger.info(
        f"Step 1 completed: Extracted {len(cv_text)} characters in {extraction_time:.2f}s"
    )

    if verbose:
        print(f"  ✅ Extracted {len(cv_text)} characters of text ({extraction_time:.2f}s)")
        print("\n" + "=" * 80)
        print("STEP 2: PARSING STRUCTURED DATA WITH LLM")
        print("=" * 80)
        print("  🤖 Parsing structured data...")

    # Limit text length to avoid token limits
    # gpt-5 models may have issues with very long prompts, so use smaller limit
    max_text_length = 10000 if "gpt-5" in self.model_name.lower() else 15000

    if len(cv_text) > max_text_length:
        logger.warning(
            f"Text is very long ({len(cv_text)} characters), truncating to {max_text_length} characters "
            f"(model: {self.model_name})"
        )
        if verbose:
            print(
                f"  ⚠️ Text is very long ({len(cv_text)} characters), truncating to {max_text_length} characters..."
            )
        cv_text = cv_text[:max_text_length] + "\n\n[... text was truncated due to length ...]"

    # Estimate prompt length (CV text + format instructions + prompt template)
    # Format instructions are typically ~500-1000 chars, prompt template ~500-800 chars
    estimated_prompt_length = len(cv_text) + 1500
    logger.info(
        f"Step 2: Preparing to send CV text ({len(cv_text)} chars) to LLM for parsing. "
        f"Estimated total prompt length: ~{estimated_prompt_length} chars"
    )

    if estimated_prompt_length > 20000 and "gpt-5" in self.model_name.lower():
        logger.warning(
            f"Warning: Prompt may be too long for {self.model_name}. "
            f"Consider using gpt-4o-mini or gpt-4.1-nano for better reliability with long CVs."
        )

    # Run LLM via Azure OpenAI
    try:
        if verbose:
            print("    ⏳ Sending request to LLM (this may take 30-120 seconds)...")
            print("    💡 Tip: LLM is analyzing the CV and extracting structured information")

        logger.info("Step 2: Sending request to LLM for structured parsing...")
        parsing_start = time.time()

        # Build prompt text
        prompt_text = CV_PARSING_PROMPT.format(cv_text=cv_text)

        raw_text, response = self._chat(
            messages=[
                {
                    "role": "system",
                    "content": (
                        "You are an expert CV parser. "
                        "You must return valid JSON matching the CVData schema."
                    ),
                },
                {"role": "user", "content": prompt_text},
            ],
        )

        if not raw_text or not str(raw_text).strip():
            raise Exception(
                f"Model returned empty response for CV parsing. "
                f"Check Azure deployment '{self.model_name}' and API availability."
            )
        parsing_time = time.time() - parsing_start
        logger.info(f"Step 2 completed: LLM parsing successful in {parsing_time:.2f}s")

        # Track model response (with token usage and cost)
        self._save_model_response(
            agent_type="cv_parser",
            input_data={"cv_text": cv_text[:1000] + "..." if len(cv_text) > 1000 else cv_text},
            output_data=raw_text,
            candidate_id=candidate_id,
            metadata={"temperature": self.temperature, "parsing_time": parsing_time},
            response=response,  # Pass response to extract tokens and costs
        )

        # Parse raw_text into CVData
        parsed_data = self._parse_cv_from_text_raw(raw_text)

        if verbose:
            print(f"    ✅ Received response from LLM ({parsing_time:.2f}s)")

        total_time = time.time() - start_time
        logger.info(f"CV parsing completed successfully in {total_time:.2f}s total")
        logger.info("=" * 80)

        return parsed_data
    except Exception as e:
        error_type = type(e).__name__
        error_msg = str(e)
        logger.error(f"Failed to parse CV: {error_type}: {error_msg}", exc_info=True)
        raise Exception(f"Failed to parse CV: {error_msg}") from e

parse_cv_from_text(cv_text, candidate_id=None)

Parse CV from text content.

Parameters:

Name Type Description Default
cv_text str

CV text content

required

Returns:

Type Description
CVData

CVData object with parsed information

Source code in agents/cv_parser_agent.py
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
def parse_cv_from_text(self, cv_text: str, candidate_id: Optional[int] = None) -> CVData:
    """
    Parse CV from text content.

    Args:
        cv_text: CV text content

    Returns:
        CVData object with parsed information
    """
    # Run LLM via adapter
    try:
        # Build prompt text
        prompt_text = CV_PARSING_PROMPT.format(cv_text=cv_text)

        raw_text, response = self._chat(
            messages=[
                {
                    "role": "system",
                    "content": (
                        "You are an expert CV parser. "
                        "You must return valid JSON matching the CVData schema."
                    ),
                },
                {"role": "user", "content": prompt_text},
            ],
        )
        if not raw_text:
            raise ValueError("Empty response from CV parser model")

        self._save_model_response(
            agent_type="cv_parser",
            input_data={"cv_text": cv_text[:1000] + "..." if len(cv_text) > 1000 else cv_text},
            output_data=raw_text,
            candidate_id=candidate_id,
            metadata={"temperature": self.temperature},
            response=response,
        )

        return self._parse_cv_from_text_raw(raw_text)
    except Exception as e:
        raise Exception(f"Failed to parse CV text: {str(e)}") from e

Bases: BaseAgent

AI agent for classifying incoming emails.

Source code in agents/email_classifier_agent.py
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
class EmailClassifierAgent(BaseAgent):
    """AI agent for classifying incoming emails."""

    # Critical IOD keywords - at least a few of these must be present for IOD classification
    CRITICAL_IOD_KEYWORDS = [
        "rodo",
        "iod",
        "dpo",
        "gdpr",
        "dane osobowe",
        "ochrona danych",
        "uodo",
        "organ nadzorczy",
        "profilowanie",
        "automatyczna decyzja",
    ]

    def __init__(
        self,
        model_name: str = settings.azure_openai_gpt_deployment,
        temperature: float = 0.1,  # Low temperature for consistent classification
        api_key: Optional[str] = None,
        timeout: int = 30,
        max_retries: int = 2,
    ):
        """
        Initialize Email Classifier Agent using Azure OpenAI SDK (no LangChain).
        """
        super().__init__(model_name, temperature, api_key, timeout, max_retries)

        # Static format instructions instead of LangChain parser
        self.format_instructions = (
            "Return ONLY a single JSON object with the following structure:\n"
            "{\n"
            '  "category": "iod" | "consent_yes" | "consent_no" | "default",\n'
            '  "confidence": 0.0-1.0,\n'
            '  "reasoning": "short explanation",\n'
            '  "keywords_found": ["keyword1", "keyword2"]\n'
            "}\n\n"
            "- Do NOT return a JSON schema.\n"
            "- Do NOT wrap the JSON in markdown code fences."
        )

        # Build prompt
        self.prompt_template = self._create_prompt_template()

    def _create_prompt_template(self) -> str:
        """Create prompt template for email classification."""
        return """You are an email classification system for a recruitment department.

Your task is to classify incoming emails from candidates into one of the following categories:

1. **IOD** - Emails related to data protection, GDPR, RODO, privacy rights, or requests to IOD/DPO
   - Keywords that indicate IOD: RODO, IOD, DPO, GDPR, dane osobowe, ochrona danych, sprzeciw, zgoda (in context of data), wycofanie zgody, skarga, UODO, Organ Nadzorczy, profilowanie, automatyczna decyzja, sztuczna inteligencja (in context of data processing), AI (in context of data processing)
   - Examples: "Chcę wycofać zgodę na przetwarzanie danych", "Składam sprzeciw wobec profilowania", "Mam pytanie dotyczące RODO"

2. **consent_yes** - Emails expressing consent to be considered for other positions
   - Keywords: zgoda, zgadzam się, wyrażam zgodę, wyrażam zgodę na udział, wyrażam zgodę na udział w innych rekrutacjach, wyrażam zgodę na udział w innych, wyrażam zgodę na udział w rekrutacjach, tak, chcę, zainteresowany, rozważenie, inne oferty, inne stanowiska, inne pozycje, inne rekrutacje, udział w innych rekrutacjach, udział w innych, udział w rekrutacjach, mogę brać udział, mogę uczestniczyć, jestem zainteresowany innymi ofertami
   - Examples:
     * "Wyrażam zgodę na udział w innych rekrutacjach"
     * "Wyrażam zgodę na udział w innych"
     * "Zgadzam się na rozważenie mojej kandydatury w kontekście innych stanowisk"
     * "Chcę być brany pod uwagę przy innych ofertach"
     * "Tak, wyrażam zgodę"
     * "Jestem zainteresowany innymi ofertami"
     * "Mogę brać udział w innych rekrutacjach"

3. **consent_no** - Emails refusing consent to be considered for other positions
   - Keywords: nie zgadzam się, nie wyrażam zgody, nie wyrażam zgody na udział, nie wyrażam zgody na udział w innych rekrutacjach, odmawiam, nie, nie chcę, wycofuję zgodę, nie jestem zainteresowany, nie rozważaj, nie chcę brać udziału, nie chcę uczestniczyć, nie jestem zainteresowany innymi ofertami, nie wyrażam zgody na udział w innych, nie wyrażam zgody na udział w rekrutacjach
   - Examples:
     * "Nie wyrażam zgody na udział w innych rekrutacjach"
     * "Nie wyrażam zgody na udział w innych"
     * "Nie wyrażam zgody"
     * "Nie wyrażam zgody na rozważenie w innych rekrutacjach"
     * "Wycofuję zgodę na inne oferty"
     * "Nie jestem zainteresowany innymi ofertami"
     * "Nie chcę brać udziału w innych rekrutacjach"

4. **default** - All other emails that should go to HR department
   - Regular questions, follow-ups, general inquiries, etc.

CRITICAL RULES:
- For IOD classification: The email MUST contain at least 2-3 of the critical IOD keywords listed above
- If an email mentions "zgoda" but in context of consent for other positions (not data protection), classify as consent_yes/consent_no, NOT IOD
- If an email mentions "AI" or "sztuczna inteligencja" but in context of job requirements or skills, classify as default, NOT IOD
- Be precise - only classify as IOD if it's clearly about data protection/privacy rights
- For consent classification, look for explicit statements about being considered for other positions
- IMPORTANT: If email contains phrases like "wyrażam zgodę na udział w innych rekrutacjach" or "wyrażam zgodę na udział w innych" → classify as consent_yes
- IMPORTANT: If email contains phrases like "nie wyrażam zgody na udział w innych rekrutacjach" or "nie wyrażam zgody na udział w innych" → classify as consent_no
- Pay attention to Polish language variations: "udział w innych rekrutacjach", "udział w innych", "udział w rekrutacjach" all mean consent for other positions

Email to classify:
From: {from_email}
Subject: {subject}
Body: {body}

Provide your classification in the following JSON format:
{format_instructions}

IMPORTANT: Return ACTUAL DATA VALUES, not a schema description.
Example of correct output:
{{
  "category": "iod",
  "confidence": 0.95,
  "reasoning": "Email contains multiple IOD keywords: RODO, dane osobowe, and requests information about data processing",
  "keywords_found": ["rodo", "dane osobowe", "przetwarzanie danych"]
}}

DO NOT return:
{{
  "description": "Email classification model",
  "properties": {{...}},
  "required": [...]
}}
"""

    def classify_email(self, from_email: str, subject: str, body: str) -> EmailClassification:
        """
        Classify email using AI.

        Args:
            from_email: Sender's email address
            subject: Email subject
            body: Email body text

        Returns:
            EmailClassification object
        """
        try:
            # Format prompt
            prompt = self.prompt_template.format(
                from_email=from_email,
                subject=subject,
                body=body[:5000],  # Limit body length to avoid token limits
                format_instructions=self.format_instructions,
            )

            # Get classification via LLM adapter
            result_text, _ = self._chat(
                messages=[
                    {
                        "role": "system",
                        "content": (
                            "You are an email classification assistant. "
                            "You must respond with JSON exactly as described in the format_instructions."
                        ),
                    },
                    {"role": "user", "content": prompt},
                ],
                max_completion_tokens=500,
            )

            # Parse response
            classification = self._parse_classification_from_text(result_text or "")

            # Validate IOD classification - must have at least ONE critical keyword
            if classification.category == "iod":
                body_lower = body.lower()
                subject_lower = subject.lower()
                text_lower = f"{subject_lower} {body_lower}"

                found_keywords = [
                    keyword
                    for keyword in self.CRITICAL_IOD_KEYWORDS
                    if keyword.lower() in text_lower
                ]

                # Require at least ONE strong IOD keyword in the actual email text
                if len(found_keywords) < 1:
                    logger.warning(
                        "IOD classification rejected - no critical IOD keywords found in email text. "
                        "Reclassifying as 'default'."
                    )
                    # Reclassify as default
                    classification.category = "default"
                    classification.reasoning = (
                        "Originally classified as IOD by the model, but no critical IOD keywords were found "
                        "in the email subject/body. Reclassified as default (HR)."
                    )
                    classification.confidence = 0.7

            logger.info(
                f"Email classified as '{classification.category}' "
                f"(confidence: {classification.confidence:.2f}, keywords: {classification.keywords_found})"
            )

            return classification

        except Exception as e:
            logger.error(f"Error classifying email: {str(e)}", exc_info=True)
            # Fallback to default classification
            return EmailClassification(
                category="default",
                confidence=0.5,
                reasoning=f"Classification failed: {str(e)}. Defaulting to HR routing.",
                keywords_found=[],
            )

    def _parse_classification_from_text(self, text: str) -> EmailClassification:
        """
        Parse EmailClassification from raw model text, handling common JSON issues.
        """
        if not text:
            raise ValueError("Empty response from model")

        # Parse JSON with fallback extraction
        data = parse_json_safe(text, fallback_to_extraction=True)

        return EmailClassification(**data)

__init__(model_name=settings.azure_openai_gpt_deployment, temperature=0.1, api_key=None, timeout=30, max_retries=2)

Initialize Email Classifier Agent using Azure OpenAI SDK (no LangChain).

Source code in agents/email_classifier_agent.py
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
def __init__(
    self,
    model_name: str = settings.azure_openai_gpt_deployment,
    temperature: float = 0.1,  # Low temperature for consistent classification
    api_key: Optional[str] = None,
    timeout: int = 30,
    max_retries: int = 2,
):
    """
    Initialize Email Classifier Agent using Azure OpenAI SDK (no LangChain).
    """
    super().__init__(model_name, temperature, api_key, timeout, max_retries)

    # Static format instructions instead of LangChain parser
    self.format_instructions = (
        "Return ONLY a single JSON object with the following structure:\n"
        "{\n"
        '  "category": "iod" | "consent_yes" | "consent_no" | "default",\n'
        '  "confidence": 0.0-1.0,\n'
        '  "reasoning": "short explanation",\n'
        '  "keywords_found": ["keyword1", "keyword2"]\n'
        "}\n\n"
        "- Do NOT return a JSON schema.\n"
        "- Do NOT wrap the JSON in markdown code fences."
    )

    # Build prompt
    self.prompt_template = self._create_prompt_template()

classify_email(from_email, subject, body)

Classify email using AI.

Parameters:

Name Type Description Default
from_email str

Sender's email address

required
subject str

Email subject

required
body str

Email body text

required

Returns:

Type Description
EmailClassification

EmailClassification object

Source code in agents/email_classifier_agent.py
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
def classify_email(self, from_email: str, subject: str, body: str) -> EmailClassification:
    """
    Classify email using AI.

    Args:
        from_email: Sender's email address
        subject: Email subject
        body: Email body text

    Returns:
        EmailClassification object
    """
    try:
        # Format prompt
        prompt = self.prompt_template.format(
            from_email=from_email,
            subject=subject,
            body=body[:5000],  # Limit body length to avoid token limits
            format_instructions=self.format_instructions,
        )

        # Get classification via LLM adapter
        result_text, _ = self._chat(
            messages=[
                {
                    "role": "system",
                    "content": (
                        "You are an email classification assistant. "
                        "You must respond with JSON exactly as described in the format_instructions."
                    ),
                },
                {"role": "user", "content": prompt},
            ],
            max_completion_tokens=500,
        )

        # Parse response
        classification = self._parse_classification_from_text(result_text or "")

        # Validate IOD classification - must have at least ONE critical keyword
        if classification.category == "iod":
            body_lower = body.lower()
            subject_lower = subject.lower()
            text_lower = f"{subject_lower} {body_lower}"

            found_keywords = [
                keyword
                for keyword in self.CRITICAL_IOD_KEYWORDS
                if keyword.lower() in text_lower
            ]

            # Require at least ONE strong IOD keyword in the actual email text
            if len(found_keywords) < 1:
                logger.warning(
                    "IOD classification rejected - no critical IOD keywords found in email text. "
                    "Reclassifying as 'default'."
                )
                # Reclassify as default
                classification.category = "default"
                classification.reasoning = (
                    "Originally classified as IOD by the model, but no critical IOD keywords were found "
                    "in the email subject/body. Reclassified as default (HR)."
                )
                classification.confidence = 0.7

        logger.info(
            f"Email classified as '{classification.category}' "
            f"(confidence: {classification.confidence:.2f}, keywords: {classification.keywords_found})"
        )

        return classification

    except Exception as e:
        logger.error(f"Error classifying email: {str(e)}", exc_info=True)
        # Fallback to default classification
        return EmailClassification(
            category="default",
            confidence=0.5,
            reasoning=f"Classification failed: {str(e)}. Defaulting to HR routing.",
            keywords_found=[],
        )

Bases: BaseAgent

Agent that classifies email inquiries and decides how to respond.

Can decide: - "direct_answer" - can answer based on basic knowledge - "rag_answer" - must use RAG from the vector database - "forward_to_hr" - forward to HR (specific, sensitive, or human-intervention questions)

Source code in agents/query_classifier_agent.py
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
class QueryClassifierAgent(BaseAgent):
    """
    Agent that classifies email inquiries and decides how to respond.

    Can decide:
    - "direct_answer" - can answer based on basic knowledge
    - "rag_answer" - must use RAG from the vector database
    - "forward_to_hr" - forward to HR (specific, sensitive, or human-intervention questions)
    """

    def __init__(self, model_name: Optional[str] = None, temperature: float = 0.3):
        from config import settings

        model_name = model_name or settings.azure_openai_gpt_deployment
        super().__init__(model_name=model_name, temperature=temperature)

        # Basic knowledge for the agent (can answer without RAG)
        self.basic_knowledge = """
PODSTAWOWA WIEDZA O REKRUTACJI:

1. Proces rekrutacji:
- Pierwsza selekcja (screening) - weryfikacja CV
- Rozmowa HR - ocena kompetencji miękkich
- Ocena techniczna - dla stanowisk technicznych
- Weryfikacja wiedzy
- Rozmowa finalna

2. Komunikacja:
- Odpowiedzi w ciągu 5 dni roboczych
- Profesjonalne i empatyczne komunikaty
- Konstruktywny feedback

3. Zgoda na inne rekrutacje:
- Dobrowolna i można ją wycofać
- Informujemy o nowych ofertach jeśli zgoda wyrażona

4. Feedback:
- Zawsze konstruktywny
- Zawiera mocne strony i obszary do rozwoju
- Motywujący i wspierający

5. Decyzje:
- Akceptacja - przejście do kolejnego etapu
- Odrzucenie - generowanie feedbacku i wysłanie emaila
"""

        # RAG knowledge base description – so the agent knows when to use it
        self.rag_knowledge_description = """
DODATKOWA BAZA WIEDZY (RAG – vektorowa baza dokumentów):

Ta baza zawiera przede wszystkim TREŚCI FORMALNE I POLITYKI firmy, w szczególności:
- rodo_ai_act.txt – fragmenty dokumentów dotyczących RODO, ochrony danych osobowych,
  wykorzystania AI w rekrutacji, podstawy prawne, obowiązki informacyjne itp.
- polityka_rekrutacji.txt – wewnętrzna polityka rekrutacyjna firmy: zasady procesu,
  standardy komunikacji z kandydatami, przechowywania danych, okresy retencji itp.
- informacje_o_firmie.txt – ogólne informacje o firmie, misja, wartości, opis działalności.

RAG jest szczególnie przydatny gdy:
- pytanie dotyczy RODO, ochrony danych, AI Act, podstaw prawnych i formalnych obowiązków,
- pytanie dotyczy wewnętrznych procedur lub polityki rekrutacyjnej,
- kandydat pyta o „jak firma przetwarza dane”, „jak długo przechowujecie CV”, „jak działa AI w rekrutacji”.

Jeśli pytanie DOTYCZY powyższych obszarów, preferuj użycie \"rag_answer\".
Jeśli po użyciu RAG nadal nie ma wystarczających, jednoznacznych informacji – wtedy przekaż sprawę do HR (forward_to_hr).
"""

    def classify_query(self, email_subject: str, email_body: str, sender_email: str) -> Dict:
        """
        Classify the inquiry and decide how to respond.

        Returns:
            Dict with keys:
            - action: "direct_answer" | "rag_answer" | "forward_to_hr"
            - reasoning: decision justification
            - confidence: confidence level (0.0-1.0)
            - suggested_response: response suggestion (if action="direct_answer")
        """
        prompt = self._create_classification_prompt(email_subject, email_body, sender_email)

        try:
            result_text, _ = self._chat(
                messages=[
                    {
                        "role": "system",
                        "content": "You are an expert in classifying email inquiries in the recruitment process. You analyze inquiries and decide on the best way to respond.",
                    },
                    {"role": "user", "content": prompt},
                ],
                # response_format is only understood by some providers; pass through
                response_format={"type": "json_object"},
            )

            result_text = (result_text or "").strip()
            result = json.loads(result_text)

            # Validate result
            if result.get("action") not in ["direct_answer", "rag_answer", "forward_to_hr"]:
                result["action"] = "forward_to_hr"
                result["reasoning"] = "Invalid classification - forwarded to HR for safety"
                result["confidence"] = 0.0

            # CRITICAL VALIDATION: Different thresholds for different actions
            # For rag_answer: allow trying even with lower confidence (0.5+), as RAG may find the answer
            # For direct_answer and forward_to_hr: require higher confidence (0.7+)
            confidence = result.get("confidence", 0.0)
            try:
                confidence = float(confidence)
            except (ValueError, TypeError):
                confidence = 0.0

            action = result.get("action", "forward_to_hr")
            if action == "rag_answer" and confidence < 0.5:
                # For rag_answer: threshold 0.5 (lower, as RAG may find the answer)
                result["action"] = "forward_to_hr"
                result["reasoning"] = (
                    f"Confidence level ({confidence}) is too low for rag_answer (< 0.5). Forwarded to HR for safety."
                )
                result["confidence"] = confidence
            elif action != "rag_answer" and confidence < 0.7:
                # For direct_answer and forward_to_hr: threshold 0.7
                result["action"] = "forward_to_hr"
                result["reasoning"] = (
                    f"Confidence level ({confidence}) is below required (0.7). Forwarded to HR for safety."
                )
                result["confidence"] = confidence

            return result

        except Exception as e:
            # On error, safer to forward to HR
            return {
                "action": "forward_to_hr",
                "reasoning": f"Error during classification: {str(e)}",
                "confidence": 0.0,
            }

    def _create_classification_prompt(
        self, email_subject: str, email_body: str, sender_email: str
    ) -> str:
        """Create prompt for query classification."""
        return f"""
You are analyzing an email inquiry from a candidate in the recruitment process.

AGENT'S BASIC KNOWLEDGE:
{self.basic_knowledge}

RAG KNOWLEDGE BASE (VECTOR DOCUMENTS AVAILABLE FOR YOU):
{self.rag_knowledge_description}

EMAIL:
Subject: {email_subject}
From: {sender_email}
Content: {email_body}

TASK:
Decide how to best respond to this inquiry. You have 3 options:

⚠️ CRITICAL RULES (BALANCE BETWEEN SAFETY AND USEFULNESS):
- Jeśli możesz odpowiedzieć na podstawie PODSTAWOWEJ WIEDZY lub dokumentów RAG z wysoką pewnością (confidence blisko 1.0),
  wybierz odpowiednio "direct_answer" lub "rag_answer".
- Jeśli po przeanalizowaniu treści nadal masz poważne wątpliwości lub temat dotyczy indywidualnej sytuacji kandydata,
  przekaż sprawę do HR (forward_to_hr).

1. "direct_answer" - You can answer based on basic knowledge (recruitment process, general information, standard procedures)
   - ⚠️ Use when: you are highly confident (confidence is high, np. >= 0.7)
   - ⚠️ Use ONLY when: the question concerns standard procedures that are clearly defined in basic knowledge
   - Examples: "What are the recruitment stages?", "How can I express consent for other recruitments?"
   - ❌ DO NOT use for: questions about details that may vary, questions requiring interpretation

2. "rag_answer" - You must use RAG from vector database (detailed information from company documents)
   - ⚠️ Prefer this option when: the question touches GDPR/RODO, AI Act, data protection, internal recruitment policy,
     or other topics that are TYPICZNIE opisane w dokumentach (regulaminy, polityki, oficjalne zasady).
   - ⚠️ Use when: the question requires detailed knowledge from documents and you reasonably expect the documents to contain the answer.
   - Examples:
     * "Jak dokładnie przetwarzacie moje dane w procesie rekrutacji?"
     * "Jak długo przechowujecie CV?"
     * "Jakie są wymagania RODO w kontekście rekrutacji?"
     * "Jak używacie AI w procesie rekrutacji i jakie są zasady?"
   - Jeśli po skorzystaniu z RAG odpowiedź nadal nie jest wystarczająco jednoznaczna lub pełna – wtedy lepiej wybrać forward_to_hr.

3. "forward_to_hr" - Forward to HR (ALWAYS when you are not 100% certain)
   - ⚠️ Use ALWAYS when:
     * You have serious doubts and confidence is low (np. < 0.7)
     * The question concerns a specific candidate application (status, decision, details)
     * The question is sensitive or requires access to candidate data
     * The question should not be handled by AI
     * RAG documents do not contain sufficiently clear / reliable answer
     * The question requires interpretation or subjective assessment
   - Examples: "What is the status of my application?", "Why was I rejected?", "I want to change data in my CV", "What are the details of AI Act?" (if you are not certain that documents contain the answer)

RETURN JSON in format:
{{
    "action": "direct_answer" | "rag_answer" | "forward_to_hr",
    "reasoning": "Detailed justification of the decision",
    "confidence": 0.0-1.0,
    "suggested_response": "Response suggestion (only if action='direct_answer', otherwise null)"
}}
"""

classify_query(email_subject, email_body, sender_email)

Classify the inquiry and decide how to respond.

Returns:

Type Description
Dict

Dict with keys:

Dict
  • action: "direct_answer" | "rag_answer" | "forward_to_hr"
Dict
  • reasoning: decision justification
Dict
  • confidence: confidence level (0.0-1.0)
Dict
  • suggested_response: response suggestion (if action="direct_answer")
Source code in agents/query_classifier_agent.py
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
def classify_query(self, email_subject: str, email_body: str, sender_email: str) -> Dict:
    """
    Classify the inquiry and decide how to respond.

    Returns:
        Dict with keys:
        - action: "direct_answer" | "rag_answer" | "forward_to_hr"
        - reasoning: decision justification
        - confidence: confidence level (0.0-1.0)
        - suggested_response: response suggestion (if action="direct_answer")
    """
    prompt = self._create_classification_prompt(email_subject, email_body, sender_email)

    try:
        result_text, _ = self._chat(
            messages=[
                {
                    "role": "system",
                    "content": "You are an expert in classifying email inquiries in the recruitment process. You analyze inquiries and decide on the best way to respond.",
                },
                {"role": "user", "content": prompt},
            ],
            # response_format is only understood by some providers; pass through
            response_format={"type": "json_object"},
        )

        result_text = (result_text or "").strip()
        result = json.loads(result_text)

        # Validate result
        if result.get("action") not in ["direct_answer", "rag_answer", "forward_to_hr"]:
            result["action"] = "forward_to_hr"
            result["reasoning"] = "Invalid classification - forwarded to HR for safety"
            result["confidence"] = 0.0

        # CRITICAL VALIDATION: Different thresholds for different actions
        # For rag_answer: allow trying even with lower confidence (0.5+), as RAG may find the answer
        # For direct_answer and forward_to_hr: require higher confidence (0.7+)
        confidence = result.get("confidence", 0.0)
        try:
            confidence = float(confidence)
        except (ValueError, TypeError):
            confidence = 0.0

        action = result.get("action", "forward_to_hr")
        if action == "rag_answer" and confidence < 0.5:
            # For rag_answer: threshold 0.5 (lower, as RAG may find the answer)
            result["action"] = "forward_to_hr"
            result["reasoning"] = (
                f"Confidence level ({confidence}) is too low for rag_answer (< 0.5). Forwarded to HR for safety."
            )
            result["confidence"] = confidence
        elif action != "rag_answer" and confidence < 0.7:
            # For direct_answer and forward_to_hr: threshold 0.7
            result["action"] = "forward_to_hr"
            result["reasoning"] = (
                f"Confidence level ({confidence}) is below required (0.7). Forwarded to HR for safety."
            )
            result["confidence"] = confidence

        return result

    except Exception as e:
        # On error, safer to forward to HR
        return {
            "action": "forward_to_hr",
            "reasoning": f"Error during classification: {str(e)}",
            "confidence": 0.0,
        }

Bases: BaseAgent

Agent that generates responses to email inquiries. Can use basic knowledge or RAG from the vector database.

Source code in agents/query_responder_agent.py
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
class QueryResponderAgent(BaseAgent):
    """
    Agent that generates responses to email inquiries.
    Can use basic knowledge or RAG from the vector database.
    """

    def __init__(self, model_name: Optional[str] = None, temperature: float = 0.7):
        from config import settings

        model_name = model_name or settings.openai_model
        super().__init__(model_name=model_name, temperature=temperature)

        # Basic knowledge
        self.basic_knowledge = """
PODSTAWOWA WIEDZA O REKRUTACJI:

1. Proces rekrutacji:
- Pierwsza selekcja (screening) - weryfikacja CV i podstawowych wymagań
- Rozmowa HR - ocena kompetencji miękkich, motywacji, dopasowania kulturowego
- Ocena techniczna - testy, zadania praktyczne (dla stanowisk technicznych)
- Weryfikacja wiedzy - sprawdzenie kompetencji merytorycznych
- Rozmowa finalna - spotkanie z przełożonym, negocjacje warunków

2. Komunikacja z kandydatami:
- Odpowiedzi na aplikacje w ciągu 5 dni roboczych
- Wszystkie komunikaty są profesjonalne, przyjazne i empatyczne
- Informacja zwrotna zawsze zawiera konstruktywne uwagi
- Unikamy słów "odrzucenie", "odmowa" - używamy łagodniejszych sformułowań

3. Zgoda na inne rekrutacje:
- Kandydaci mogą wyrazić zgodę na rozważenie ich kandydatury w innych rekrutacjach
- Zgoda jest dobrowolna i można ją wycofać w każdej chwili
- Jeśli kandydat wyraził zgodę, informujemy go o nowych, odpowiednich ofertach

4. Feedback:
- Zawsze konstruktywny i wspierający
- Zawiera mocne strony kandydata
- Wskazuje obszary do rozwoju w sposób empatyczny
- Zachęca do dalszego rozwoju zawodowego

5. Podpis w emailach:
- "Z wyrazami szacunku\n\nDział HR"
"""

    def generate_response(
        self,
        email_subject: str,
        email_body: str,
        sender_email: str,
        rag_context: Optional[List[Dict]] = None,
    ) -> Optional[str]:
        """
        Generate a response to an email inquiry.

        Args:
            email_subject: Email subject
            email_body: Email body content
            sender_email: Sender email address
            rag_context: RAG context (list of documents from the vector database)

        Returns:
            Generated response or None if the agent is not confident (then forward to HR)
        """
        prompt = self._create_response_prompt(email_subject, email_body, sender_email, rag_context)

        try:
            response_text, _ = self._chat(
                messages=[
                    {
                        "role": "system",
                        "content": "You are an HR department assistant. You respond to candidate inquiries in a professional, friendly, and helpful manner. You MUST always write responses in POLISH (Polish language). Always end your response with the signature 'Z wyrazami szacunku\n\nDział HR'. If you are not certain of the answer, return the special value: 'FORWARD_TO_HR'.",
                    },
                    {"role": "user", "content": prompt},
                ],
            )

            response_text = (response_text or "").strip()

            # Check if agent returned the forward-to-HR signal
            if response_text.upper() == "FORWARD_TO_HR" or "FORWARD_TO_HR" in response_text.upper():
                logger.info("Agent returned FORWARD_TO_HR signal - not confident enough to answer")
                return None

            # Check if response contains uncertainty phrases (in Polish)
            uncertainty_phrases = [
                "nie posiadamy szczegółowych informacji",
                "chociaż nie posiadamy",
                "nie mamy dokładnych informacji",
                "nie jesteśmy w stanie",
                "nie możemy udzielić",
                "przekazaliśmy do działu hr",
                "przekazaliśmy je do działu hr",
                "przekazaliśmy do hr",
                "przekazaliśmy je do hr",
                "dziękujemy za pańskie zapytanie. przekazaliśmy",
                "przekazaliśmy je do działu",
                "skontaktuje się z państwem w najkrótszym możliwym terminie",  # Typical phrase when forwarding to HR
            ]

            response_lower = response_text.lower()
            has_uncertainty = any(phrase in response_lower for phrase in uncertainty_phrases)

            if has_uncertainty:
                logger.warning(
                    f"Response contains uncertainty phrases - agent not confident enough. Phrases found: {[p for p in uncertainty_phrases if p in response_lower]}"
                )
                return None

            # Add privacy policy link to the end of response (after signature)
            response_text = self._add_privacy_link(response_text)

            return response_text

        except Exception as e:
            logger.error(f"Error generating response: {str(e)}")
            return None

    def _create_response_prompt(
        self,
        email_subject: str,
        email_body: str,
        sender_email: str,
        rag_context: Optional[List[Dict]] = None,
    ) -> str:
        """Create prompt for generating response."""
        context_section_english = ""
        if rag_context:
            context_section_english = "\n\nADDITIONAL CONTEXT FROM KNOWLEDGE BASE:\n"
            for i, doc in enumerate(rag_context, 1):
                context_section_english += f"\n--- Document {i} ---\n"
                context_section_english += (
                    f"Source: {doc.get('metadata', {}).get('source', 'N/A')}\n"
                )
                context_section_english += f"Content: {doc.get('document', '')}\n"

        return f"""
You are an HR assistant responding to candidate inquiries in the recruitment process.

BASIC KNOWLEDGE:
{self.basic_knowledge}
{context_section_english}

EMAIL:
Subject: {email_subject}
From: {sender_email}
Content: {email_body}

TASK:
Generate a professional, friendly, and helpful response to this inquiry.

DECISION RULES:
1. If RAG context is provided and contains relevant information → Answer based on RAG context (you can be confident)
2. If question can be answered from basic knowledge → Answer from basic knowledge
3. If question requires specific candidate data or personal information → Return "FORWARD_TO_HR"
4. If question requires interpretation or subjective assessment → Return "FORWARD_TO_HR"
5. If RAG context is empty or irrelevant → Return "FORWARD_TO_HR"

REQUIREMENTS:
1. If RAG context is provided, use it to answer the question - you can be confident if RAG found relevant documents
2. The answer must be factually accurate based on basic knowledge and RAG context (if available)
3. Use professional but friendly tone
4. Be empathetic and supportive
5. ❌ NEVER answer in style: "Although we do not have detailed information..." - this means you should forward to HR
6. ❌ NEVER answer in style: "we do not have detailed information" - this indicates lack of certainty
7. ❌ NEVER answer if question requires specific candidate data or personal information
8. ⚠️ CRITICAL: If you cannot answer based on available context, return ONLY: "FORWARD_TO_HR" (without any other text, no Polish text, just these exact words)
9. ⚠️ CRITICAL: DO NOT generate a response saying "we forwarded to HR" or "we will forward to HR" - if you cannot answer, return ONLY "FORWARD_TO_HR"
10. ⚠️ CRITICAL: DO NOT write "Dziękujemy za Pańskie zapytanie. Przekazaliśmy je do działu HR..." - if you cannot answer, return ONLY "FORWARD_TO_HR"
11. Always end the response with: "Z wyrazami szacunku\n\nDział HR" (ONLY if you are answering, not if returning FORWARD_TO_HR)
12. ⚠️ CRITICAL: The response MUST be written in POLISH (Polish language) - this is mandatory (ONLY if you are answering, not if returning FORWARD_TO_HR)
13. If the question concerns a specific candidate application, suggest direct contact with HR

LANGUAGE REQUIREMENT (ONLY if answering, not if returning FORWARD_TO_HR):
- You MUST write the ENTIRE response in POLISH (Polish language)
- Use natural, conversational Polish
- Professional but friendly tone
- All content must be in Polish, including greetings, explanations, and closing
- The response should sound natural and human-like in Polish

REMEMBER:
- If you have RAG context with relevant information → Answer in Polish (you can be confident)
- If you can answer from basic knowledge → Answer in Polish
- If you cannot answer based on available context → Return ONLY "FORWARD_TO_HR" (no Polish text, no explanation)

ODPOWIEDŹ:
"""

    def _add_privacy_link(self, response_text: str) -> str:
        """Add privacy policy link to the end of AI-generated response."""
        from config import settings

        # Find where the signature ends
        if "Z wyrazami szacunku" in response_text:
            # Add privacy link after signature
            privacy_text = ""
            if settings.privacy_policy_url:
                privacy_text = f"\n\nInformacje o przetwarzaniu danych osobowych, w tym wykorzystaniu narzędzi AI znajdziesz na naszej stronie internetowe: {settings.privacy_policy_url}"
            elif settings.company_website:
                privacy_text = f"\n\nInformacje o przetwarzaniu danych osobowych, w tym wykorzystaniu narzędzi AI znajdziesz na naszej stronie internetowe: {settings.company_website}"
            else:
                privacy_text = '\n\nInformacje o przetwarzaniu danych osobowych, w tym wykorzystaniu narzędzi AI znajdziesz na naszej stronie internetowe: "https://www.example.com/privacy".'

            # Insert privacy text after signature
            response_text = response_text.replace(
                "Z wyrazami szacunku\n\nDział HR", f"Z wyrazami szacunku\n\nDział HR{privacy_text}"
            )

        return response_text

generate_response(email_subject, email_body, sender_email, rag_context=None)

Generate a response to an email inquiry.

Parameters:

Name Type Description Default
email_subject str

Email subject

required
email_body str

Email body content

required
sender_email str

Sender email address

required
rag_context Optional[List[Dict]]

RAG context (list of documents from the vector database)

None

Returns:

Type Description
Optional[str]

Generated response or None if the agent is not confident (then forward to HR)

Source code in agents/query_responder_agent.py
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
def generate_response(
    self,
    email_subject: str,
    email_body: str,
    sender_email: str,
    rag_context: Optional[List[Dict]] = None,
) -> Optional[str]:
    """
    Generate a response to an email inquiry.

    Args:
        email_subject: Email subject
        email_body: Email body content
        sender_email: Sender email address
        rag_context: RAG context (list of documents from the vector database)

    Returns:
        Generated response or None if the agent is not confident (then forward to HR)
    """
    prompt = self._create_response_prompt(email_subject, email_body, sender_email, rag_context)

    try:
        response_text, _ = self._chat(
            messages=[
                {
                    "role": "system",
                    "content": "You are an HR department assistant. You respond to candidate inquiries in a professional, friendly, and helpful manner. You MUST always write responses in POLISH (Polish language). Always end your response with the signature 'Z wyrazami szacunku\n\nDział HR'. If you are not certain of the answer, return the special value: 'FORWARD_TO_HR'.",
                },
                {"role": "user", "content": prompt},
            ],
        )

        response_text = (response_text or "").strip()

        # Check if agent returned the forward-to-HR signal
        if response_text.upper() == "FORWARD_TO_HR" or "FORWARD_TO_HR" in response_text.upper():
            logger.info("Agent returned FORWARD_TO_HR signal - not confident enough to answer")
            return None

        # Check if response contains uncertainty phrases (in Polish)
        uncertainty_phrases = [
            "nie posiadamy szczegółowych informacji",
            "chociaż nie posiadamy",
            "nie mamy dokładnych informacji",
            "nie jesteśmy w stanie",
            "nie możemy udzielić",
            "przekazaliśmy do działu hr",
            "przekazaliśmy je do działu hr",
            "przekazaliśmy do hr",
            "przekazaliśmy je do hr",
            "dziękujemy za pańskie zapytanie. przekazaliśmy",
            "przekazaliśmy je do działu",
            "skontaktuje się z państwem w najkrótszym możliwym terminie",  # Typical phrase when forwarding to HR
        ]

        response_lower = response_text.lower()
        has_uncertainty = any(phrase in response_lower for phrase in uncertainty_phrases)

        if has_uncertainty:
            logger.warning(
                f"Response contains uncertainty phrases - agent not confident enough. Phrases found: {[p for p in uncertainty_phrases if p in response_lower]}"
            )
            return None

        # Add privacy policy link to the end of response (after signature)
        response_text = self._add_privacy_link(response_text)

        return response_text

    except Exception as e:
        logger.error(f"Error generating response: {str(e)}")
        return None

Bases: BaseAgent

Agent for validating RAG-generated responses to candidate inquiries.

Source code in agents/rag_response_validator_agent.py
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
class RAGResponseValidatorAgent(BaseAgent):
    """Agent for validating RAG-generated responses to candidate inquiries."""

    def __init__(
        self,
        model_name: str = "gpt-4o",
        temperature: float = 0.0,
        api_key: Optional[str] = None,
        timeout: int = 60,
        max_retries: int = 2,
    ):
        """
        Initialize RAG Response Validator Agent using Azure OpenAI SDK (no LangChain).
        """
        super().__init__(model_name, temperature, api_key, timeout, max_retries)

        # Static format instructions describing ValidationResult JSON schema
        self.format_instructions = (
            "Return ONLY a single JSON object with the following structure:\n"
            "{\n"
            '  "status": "approved" | "rejected",\n'
            '  "is_approved": true | false,\n'
            '  "reasoning": "short explanation",\n'
            '  "issues_found": ["issue 1", "issue 2"],\n'
            '  "ethical_concerns": ["concern 1", "concern 2"],\n'
            '  "factual_errors": ["error 1", "error 2"],\n'
            '  "suggestions": ["suggestion 1", "suggestion 2"]\n'
            "}\n\n"
            "- Do NOT return a JSON schema or description.\n"
            "- Do NOT wrap the JSON in markdown code fences.\n"
            "- All list fields must be valid JSON arrays of strings."
        )

        # Store prompt template
        self.prompt_template = RAG_RESPONSE_VALIDATION_PROMPT

    def validate_rag_response(
        self,
        generated_response: str,
        email_subject: str,
        email_body: str,
        sender_email: str,
        rag_sources: List[Dict],
        validation_number: Optional[int] = None,
    ) -> ValidationResult:
        """
        Validate RAG-generated response using Azure OpenAI (no LangChain).

        Args:
            generated_response: The AI-generated response to validate
            email_subject: Original email subject
            email_body: Original email body
            sender_email: Sender's email address
            rag_sources: List of RAG source documents used to generate the response
            validation_number: Optional validation number for tracking

        Returns:
            ValidationResult object
        """
        logger.info(f"Validating RAG response for inquiry from {sender_email}")

        # Format RAG sources for prompt
        rag_sources_str = self._format_rag_sources(rag_sources)

        # Build prompt with format instructions
        prompt_text = self.prompt_template.format(
            generated_response=generated_response,
            email_subject=email_subject,
            email_body=email_body,
            sender_email=sender_email,
            rag_sources=rag_sources_str,
            format_instructions=self.format_instructions,
        )

        # Run validation via LLM adapter
        try:
            logger.info(f"Validating RAG response for inquiry from {sender_email}")

            raw_text, response = self._chat(
                messages=[
                    {
                        "role": "system",
                        "content": (
                            "You are a careful JSON-producing validation assistant. "
                            "You must follow the format_instructions exactly."
                        ),
                    },
                    {"role": "user", "content": prompt_text},
                ],
                max_completion_tokens=2000,
            )

            if not raw_text:
                raise ValueError("Empty response from RAG validator model")

            # Track model response (optional - can be extended to save to database)
            # For now, just log it
            logger.debug(f"Validation response: {raw_text[:500]}...")

            validation_result = self._parse_validation_from_text(raw_text)
            logger.info(
                f"Validation completed for {sender_email}: {validation_result.status.value} "
                f"(is_approved: {validation_result.is_approved})"
            )
            return validation_result
        except Exception as e:
            error_msg = f"Failed to validate RAG response: {str(e)}"
            logger.error(error_msg, exc_info=True)

            # On validation failure, reject by default for safety
            return ValidationResult(
                status=ValidationStatus.REJECTED,
                is_approved=False,
                reasoning=f"Validation process failed: {str(e)}. Response rejected for safety.",
                issues_found=["Validation process error"],
                ethical_concerns=[],
                factual_errors=[],
                suggestions=["Please review the validation process and try again."],
            )

    def _format_rag_sources(self, rag_sources: List[Dict]) -> str:
        """Format RAG sources for prompt."""
        if not rag_sources:
            return "No RAG sources provided."

        formatted = []
        for i, source in enumerate(rag_sources, 1):
            metadata = source.get("metadata", {})
            document = source.get("document", "")
            source_name = metadata.get("source", "Unknown source")
            score = metadata.get("score", None)

            source_text = f"--- Source {i}: {source_name} ---\n"
            if score is not None:
                source_text += f"Relevance score: {score:.4f}\n"
            source_text += f"Content:\n{document}\n"
            formatted.append(source_text)

        return "\n".join(formatted)

    def _parse_validation_from_text(self, text: str) -> ValidationResult:
        """
        Parse ValidationResult from raw model text, handling common JSON issues.
        """
        if not text:
            raise ValueError("Empty response from model")

        # Parse JSON with fallback extraction
        data = parse_json_safe(text, fallback_to_extraction=True)

        # Map to ValidationResult, with sensible defaults
        status_str = data.get("status", "rejected")
        status = (
            ValidationStatus(status_str)
            if status_str in ("approved", "rejected")
            else ValidationStatus.REJECTED
        )
        is_approved = bool(data.get("is_approved", False))
        reasoning = data.get("reasoning") or "No reasoning provided."
        issues_found = data.get("issues_found") or []
        ethical_concerns = data.get("ethical_concerns") or []
        factual_errors = data.get("factual_errors") or []
        suggestions = data.get("suggestions") or []

        # Ensure all list fields are lists of strings
        def ensure_str_list(value):
            if isinstance(value, list):
                return [str(v) for v in value]
            if not value:
                return []
            return [str(value)]

        return ValidationResult(
            status=status,
            is_approved=is_approved,
            reasoning=str(reasoning),
            issues_found=ensure_str_list(issues_found),
            ethical_concerns=ensure_str_list(ethical_concerns),
            factual_errors=ensure_str_list(factual_errors),
            suggestions=ensure_str_list(suggestions),
        )

__init__(model_name='gpt-4o', temperature=0.0, api_key=None, timeout=60, max_retries=2)

Initialize RAG Response Validator Agent using Azure OpenAI SDK (no LangChain).

Source code in agents/rag_response_validator_agent.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
def __init__(
    self,
    model_name: str = "gpt-4o",
    temperature: float = 0.0,
    api_key: Optional[str] = None,
    timeout: int = 60,
    max_retries: int = 2,
):
    """
    Initialize RAG Response Validator Agent using Azure OpenAI SDK (no LangChain).
    """
    super().__init__(model_name, temperature, api_key, timeout, max_retries)

    # Static format instructions describing ValidationResult JSON schema
    self.format_instructions = (
        "Return ONLY a single JSON object with the following structure:\n"
        "{\n"
        '  "status": "approved" | "rejected",\n'
        '  "is_approved": true | false,\n'
        '  "reasoning": "short explanation",\n'
        '  "issues_found": ["issue 1", "issue 2"],\n'
        '  "ethical_concerns": ["concern 1", "concern 2"],\n'
        '  "factual_errors": ["error 1", "error 2"],\n'
        '  "suggestions": ["suggestion 1", "suggestion 2"]\n'
        "}\n\n"
        "- Do NOT return a JSON schema or description.\n"
        "- Do NOT wrap the JSON in markdown code fences.\n"
        "- All list fields must be valid JSON arrays of strings."
    )

    # Store prompt template
    self.prompt_template = RAG_RESPONSE_VALIDATION_PROMPT

validate_rag_response(generated_response, email_subject, email_body, sender_email, rag_sources, validation_number=None)

Validate RAG-generated response using Azure OpenAI (no LangChain).

Parameters:

Name Type Description Default
generated_response str

The AI-generated response to validate

required
email_subject str

Original email subject

required
email_body str

Original email body

required
sender_email str

Sender's email address

required
rag_sources List[Dict]

List of RAG source documents used to generate the response

required
validation_number Optional[int]

Optional validation number for tracking

None

Returns:

Type Description
ValidationResult

ValidationResult object

Source code in agents/rag_response_validator_agent.py
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
def validate_rag_response(
    self,
    generated_response: str,
    email_subject: str,
    email_body: str,
    sender_email: str,
    rag_sources: List[Dict],
    validation_number: Optional[int] = None,
) -> ValidationResult:
    """
    Validate RAG-generated response using Azure OpenAI (no LangChain).

    Args:
        generated_response: The AI-generated response to validate
        email_subject: Original email subject
        email_body: Original email body
        sender_email: Sender's email address
        rag_sources: List of RAG source documents used to generate the response
        validation_number: Optional validation number for tracking

    Returns:
        ValidationResult object
    """
    logger.info(f"Validating RAG response for inquiry from {sender_email}")

    # Format RAG sources for prompt
    rag_sources_str = self._format_rag_sources(rag_sources)

    # Build prompt with format instructions
    prompt_text = self.prompt_template.format(
        generated_response=generated_response,
        email_subject=email_subject,
        email_body=email_body,
        sender_email=sender_email,
        rag_sources=rag_sources_str,
        format_instructions=self.format_instructions,
    )

    # Run validation via LLM adapter
    try:
        logger.info(f"Validating RAG response for inquiry from {sender_email}")

        raw_text, response = self._chat(
            messages=[
                {
                    "role": "system",
                    "content": (
                        "You are a careful JSON-producing validation assistant. "
                        "You must follow the format_instructions exactly."
                    ),
                },
                {"role": "user", "content": prompt_text},
            ],
            max_completion_tokens=2000,
        )

        if not raw_text:
            raise ValueError("Empty response from RAG validator model")

        # Track model response (optional - can be extended to save to database)
        # For now, just log it
        logger.debug(f"Validation response: {raw_text[:500]}...")

        validation_result = self._parse_validation_from_text(raw_text)
        logger.info(
            f"Validation completed for {sender_email}: {validation_result.status.value} "
            f"(is_approved: {validation_result.is_approved})"
        )
        return validation_result
    except Exception as e:
        error_msg = f"Failed to validate RAG response: {str(e)}"
        logger.error(error_msg, exc_info=True)

        # On validation failure, reject by default for safety
        return ValidationResult(
            status=ValidationStatus.REJECTED,
            is_approved=False,
            reasoning=f"Validation process failed: {str(e)}. Response rejected for safety.",
            issues_found=["Validation process error"],
            ethical_concerns=[],
            factual_errors=[],
            suggestions=["Please review the validation process and try again."],
        )

Services

Service for generating candidate feedback.

Source code in services/feedback_service.py
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
class FeedbackService:
    """Service for generating candidate feedback."""

    def __init__(
        self,
        feedback_agent: FeedbackAgent,
        validator_agent: Optional[FeedbackValidatorAgent] = None,
        correction_agent: Optional[FeedbackCorrectionAgent] = None,
        max_validation_iterations: int = 3,
    ):
        """
        Initialize feedback service.

        Args:
            feedback_agent: Initialized FeedbackAgent instance
            validator_agent: Optional FeedbackValidatorAgent instance (if None, validation is skipped)
            correction_agent: Optional FeedbackCorrectionAgent instance (if None, correction is skipped)
            max_validation_iterations: Maximum number of validation-correction cycles (default: 3)
        """
        self.agent = feedback_agent
        self.validator = validator_agent
        self.corrector = correction_agent
        self.max_iterations = max_validation_iterations
        self.validation_failed = False
        self.validation_error_info: Optional[Dict[str, Any]] = None
        logger.info("FeedbackService initialized")

    def generate_feedback(
        self,
        cv_data: CVData,
        hr_feedback: HRFeedback,
        job_offer: Optional[JobOffer] = None,
        output_format: FeedbackFormat = FeedbackFormat.HTML,
        save_to_file: bool = False,
        output_dir: Optional[Path] = None,
        enable_validation: bool = True,
        candidate_id: Optional[int] = None,
        recruitment_stage: Optional[str] = None,
    ) -> tuple[CandidateFeedback, bool, Optional[Dict[str, Any]]]:
        """
        Generate personalized feedback for candidate with optional validation and correction.

        Args:
            cv_data: Parsed CV data
            hr_feedback: HR evaluation feedback
            job_offer: Optional job offer with requirements
            output_format: Feedback format (default: HTML)
            save_to_file: Whether to save feedback to file
            output_dir: Directory to save feedback file (default: current directory)
            enable_validation: Whether to enable validation and correction (default: True)

        Returns:
            Tuple of (CandidateFeedback, is_validated: bool, error_info: Optional[Dict])
            - is_validated: True if feedback was validated successfully, False if validation failed
            - error_info: Dict with error details if validation failed, None otherwise

        Raises:
            LLMError: If feedback generation fails
        """
        logger.info(f"Generating feedback for: {cv_data.full_name} (format: {output_format.value})")

        # Start timing for overall feedback generation
        generation_start_time = time.time()

        try:
            # Step 1: Generate initial feedback (with timing)
            feedback_start_time = time.time()
            feedback = self.agent.generate_feedback(
                cv_data,
                hr_feedback,
                job_offer,
                output_format=output_format,
                candidate_id=candidate_id,
                recruitment_stage=recruitment_stage,
            )
            feedback_duration = time.time() - feedback_start_time

            # Record timing metric for feedback generation
            if metrics_service:
                metrics_service.record_timing(
                    metric_type="feedback_generation",
                    operation_name="generate_feedback",
                    duration_seconds=feedback_duration,
                    metadata={"candidate_id": candidate_id, "recruitment_stage": recruitment_stage},
                )

            logger.info(
                f"Successfully generated initial feedback for: {cv_data.full_name} (took {feedback_duration:.2f}s)"
            )

            # Step 2: Validate and correct if validation is enabled
            is_validated = True
            validation_error_info = None
            if enable_validation and self.validator and self.corrector:
                feedback, is_validated, validation_error_info = self._validate_and_correct(
                    feedback, cv_data, hr_feedback, job_offer, candidate_id=candidate_id
                )
                # Store validation status for potential error saving
                self.validation_failed = not is_validated
                self.validation_error_info = validation_error_info

            # Calculate total generation time
            total_duration = time.time() - generation_start_time

            # Record total timing metric
            if metrics_service:
                metrics_service.record_timing(
                    metric_type="feedback_generation",
                    operation_name="total_feedback_generation",
                    duration_seconds=total_duration,
                    metadata={
                        "candidate_id": candidate_id,
                        "recruitment_stage": recruitment_stage,
                        "is_validated": is_validated,
                        "validation_enabled": enable_validation,
                    },
                )

                # Record success metric
                metrics_service.record_success(
                    metric_type="feedback_generation",
                    operation_name="generate_feedback",
                    success=is_validated,
                    metadata={"candidate_id": candidate_id},
                )

            logger.info(
                f"Feedback generation completed for: {cv_data.full_name} (total: {total_duration:.2f}s, validated: {is_validated})"
            )

            if save_to_file and feedback.html_content:
                html_path = self._save_feedback_html(feedback, cv_data, output_dir)
                logger.info(f"HTML feedback saved to: {html_path}")

            return feedback, is_validated, validation_error_info

        except Exception as e:
            error_msg = f"Failed to generate feedback: {str(e)}"
            logger.error(error_msg, exc_info=True)
            raise LLMError(error_msg) from e

    def _validate_and_correct(
        self,
        feedback: CandidateFeedback,
        cv_data: CVData,
        hr_feedback: HRFeedback,
        job_offer: Optional[JobOffer] = None,
        candidate_id: Optional[int] = None,
    ) -> tuple[CandidateFeedback, bool, Optional[Dict[str, Any]]]:
        """
        Validate feedback and correct if needed.

        Args:
            feedback: Initial CandidateFeedback object
            cv_data: Parsed CV data
            hr_feedback: HR evaluation feedback
            job_offer: Optional job offer information
            candidate_id: Optional candidate ID

        Returns:
            Tuple of (CandidateFeedback, is_validated: bool, error_info: Optional[Dict])
            - is_validated: True if feedback was approved, False if validation failed after max iterations
            - error_info: Dict with error details if validation failed, None otherwise
        """
        assert self.validator is not None and self.corrector is not None  # caller ensures both set
        current_feedback = feedback
        iteration = 0
        all_validation_results = []
        validation_start_time = time.time()

        while iteration < self.max_iterations:
            iteration += 1
            logger.info(
                f"Validation iteration {iteration}/{self.max_iterations} for {cv_data.full_name}"
            )

            # Validate current feedback (with timing)
            validation_iter_start = time.time()
            validation_result = self.validator.validate_feedback(
                current_feedback.html_content,
                cv_data,
                hr_feedback,
                job_offer,
                candidate_id=candidate_id,
                validation_number=iteration,
            )
            validation_iter_duration = time.time() - validation_iter_start

            # Record timing metric for validation
            if metrics_service:
                metrics_service.record_timing(
                    metric_type="validation",
                    operation_name="validate_feedback",
                    duration_seconds=validation_iter_duration,
                    metadata={
                        "candidate_id": candidate_id,
                        "iteration": iteration,
                        "is_approved": validation_result.is_approved,
                    },
                )

            # Store validation result
            all_validation_results.append(
                {
                    "iteration": iteration,
                    "is_approved": validation_result.is_approved,
                    "status": validation_result.status.value,
                    "reasoning": validation_result.reasoning,
                    "issues_found": validation_result.issues_found,
                    "ethical_concerns": validation_result.ethical_concerns,
                    "factual_errors": validation_result.factual_errors,
                    "suggestions": validation_result.suggestions,
                }
            )

            if validation_result.is_approved:
                logger.info(
                    f"Feedback approved after {iteration} iteration(s) for {cv_data.full_name}"
                )
                return current_feedback, True, None

            # Feedback was rejected, need to correct
            logger.warning(
                f"Feedback rejected in iteration {iteration} for {cv_data.full_name}. "
                f"Reason: {validation_result.reasoning}"
            )

            if iteration >= self.max_iterations:
                logger.error(
                    f"Maximum validation iterations ({self.max_iterations}) reached for {cv_data.full_name}. "
                    f"Validation failed - feedback will not be sent."
                )
                # Return error info with summary
                total_validations = len(all_validation_results)
                total_corrections = sum(
                    1 for r in all_validation_results if "correction_number" in r
                )
                error_info = {
                    "validation_results": all_validation_results,
                    "final_feedback_html": current_feedback.html_content,
                    "max_iterations_reached": True,
                    "total_validations": total_validations,
                    "total_corrections": total_corrections,
                    "last_validation_number": total_validations,
                    "last_correction_number": total_corrections,
                }
                return current_feedback, False, error_info

            # Correct the feedback (with timing)
            try:
                correction_start = time.time()
                corrected = self.corrector.correct_feedback(
                    current_feedback.html_content,
                    validation_result,
                    cv_data,
                    hr_feedback,
                    job_offer,
                    candidate_id=candidate_id,
                    correction_number=iteration,
                )
                correction_duration = time.time() - correction_start

                # Record timing metric for correction
                if metrics_service:
                    metrics_service.record_timing(
                        metric_type="validation",
                        operation_name="correct_feedback",
                        duration_seconds=correction_duration,
                        metadata={"candidate_id": candidate_id, "iteration": iteration},
                    )

                # Update current feedback with corrected version
                current_feedback = CandidateFeedback(html_content=corrected.html_content)

                # Add correction number to the last validation result
                if all_validation_results:
                    all_validation_results[-1]["correction_number"] = iteration
                    all_validation_results[-1]["corrections_made"] = corrected.corrections_made

                logger.info(
                    f"Feedback corrected for {cv_data.full_name} (correction #{iteration}). "
                    f"Corrections made: {', '.join(corrected.corrections_made)}"
                )
            except Exception as e:
                logger.error(f"Failed to correct feedback in iteration {iteration}: {str(e)}")
                # If correction fails, return error info
                total_validations = len(all_validation_results)
                total_corrections = sum(
                    1 for r in all_validation_results if "correction_number" in r
                )
                error_info = {
                    "validation_results": all_validation_results,
                    "final_feedback_html": current_feedback.html_content,
                    "correction_failed": True,
                    "correction_error": str(e),
                    "total_validations": total_validations,
                    "total_corrections": total_corrections,
                    "last_validation_number": total_validations,
                    "failed_correction_number": iteration,
                }
                return current_feedback, False, error_info

        # Should not reach here, but return current feedback as fallback
        total_validation_duration = time.time() - validation_start_time

        # Record total validation timing
        if metrics_service:
            metrics_service.record_timing(
                metric_type="validation",
                operation_name="total_validation_cycle",
                duration_seconds=total_validation_duration,
                metadata={"candidate_id": candidate_id, "iterations": len(all_validation_results)},
            )

        total_validations = len(all_validation_results)
        total_corrections = sum(1 for r in all_validation_results if "correction_number" in r)
        return (
            current_feedback,
            False,
            {
                "validation_results": all_validation_results,
                "total_validations": total_validations,
                "total_corrections": total_corrections,
            },
        )

    # _save_feedback method removed - we only use HTML format now

    def _save_feedback_html(
        self, feedback: CandidateFeedback, cv_data: CVData, output_dir: Optional[Path] = None
    ) -> Path:
        """Save feedback to HTML file."""
        if output_dir is None:
            output_dir = Path.cwd()
        else:
            output_dir = Path(output_dir)
            output_dir.mkdir(parents=True, exist_ok=True)

        filename = f"feedback_{cv_data.full_name.replace(' ', '_')}.html"
        output_path = output_dir / filename

        html_content = format_feedback_as_html(feedback)

        with open(output_path, "w", encoding="utf-8") as f:
            f.write(html_content)

        return output_path

    def get_feedback_html(
        self,
        feedback: CandidateFeedback,
        consent_for_other_positions: Optional[bool] = None,
    ) -> str:
        """
        Get HTML formatted version of feedback.

        Args:
            feedback: CandidateFeedback object
            consent_for_other_positions: Optional boolean indicating if candidate consented to other positions

        Returns:
            HTML formatted string
        """
        return format_feedback_as_html(
            feedback,
            consent_for_other_positions=(
                consent_for_other_positions if consent_for_other_positions is not None else False
            ),
        )

__init__(feedback_agent, validator_agent=None, correction_agent=None, max_validation_iterations=3)

Initialize feedback service.

Parameters:

Name Type Description Default
feedback_agent FeedbackAgent

Initialized FeedbackAgent instance

required
validator_agent Optional[FeedbackValidatorAgent]

Optional FeedbackValidatorAgent instance (if None, validation is skipped)

None
correction_agent Optional[FeedbackCorrectionAgent]

Optional FeedbackCorrectionAgent instance (if None, correction is skipped)

None
max_validation_iterations int

Maximum number of validation-correction cycles (default: 3)

3
Source code in services/feedback_service.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
def __init__(
    self,
    feedback_agent: FeedbackAgent,
    validator_agent: Optional[FeedbackValidatorAgent] = None,
    correction_agent: Optional[FeedbackCorrectionAgent] = None,
    max_validation_iterations: int = 3,
):
    """
    Initialize feedback service.

    Args:
        feedback_agent: Initialized FeedbackAgent instance
        validator_agent: Optional FeedbackValidatorAgent instance (if None, validation is skipped)
        correction_agent: Optional FeedbackCorrectionAgent instance (if None, correction is skipped)
        max_validation_iterations: Maximum number of validation-correction cycles (default: 3)
    """
    self.agent = feedback_agent
    self.validator = validator_agent
    self.corrector = correction_agent
    self.max_iterations = max_validation_iterations
    self.validation_failed = False
    self.validation_error_info: Optional[Dict[str, Any]] = None
    logger.info("FeedbackService initialized")

generate_feedback(cv_data, hr_feedback, job_offer=None, output_format=FeedbackFormat.HTML, save_to_file=False, output_dir=None, enable_validation=True, candidate_id=None, recruitment_stage=None)

Generate personalized feedback for candidate with optional validation and correction.

Parameters:

Name Type Description Default
cv_data CVData

Parsed CV data

required
hr_feedback HRFeedback

HR evaluation feedback

required
job_offer Optional[JobOffer]

Optional job offer with requirements

None
output_format FeedbackFormat

Feedback format (default: HTML)

HTML
save_to_file bool

Whether to save feedback to file

False
output_dir Optional[Path]

Directory to save feedback file (default: current directory)

None
enable_validation bool

Whether to enable validation and correction (default: True)

True

Returns:

Type Description
CandidateFeedback

Tuple of (CandidateFeedback, is_validated: bool, error_info: Optional[Dict])

bool
  • is_validated: True if feedback was validated successfully, False if validation failed
Optional[Dict[str, Any]]
  • error_info: Dict with error details if validation failed, None otherwise

Raises:

Type Description
LLMError

If feedback generation fails

Source code in services/feedback_service.py
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
def generate_feedback(
    self,
    cv_data: CVData,
    hr_feedback: HRFeedback,
    job_offer: Optional[JobOffer] = None,
    output_format: FeedbackFormat = FeedbackFormat.HTML,
    save_to_file: bool = False,
    output_dir: Optional[Path] = None,
    enable_validation: bool = True,
    candidate_id: Optional[int] = None,
    recruitment_stage: Optional[str] = None,
) -> tuple[CandidateFeedback, bool, Optional[Dict[str, Any]]]:
    """
    Generate personalized feedback for candidate with optional validation and correction.

    Args:
        cv_data: Parsed CV data
        hr_feedback: HR evaluation feedback
        job_offer: Optional job offer with requirements
        output_format: Feedback format (default: HTML)
        save_to_file: Whether to save feedback to file
        output_dir: Directory to save feedback file (default: current directory)
        enable_validation: Whether to enable validation and correction (default: True)

    Returns:
        Tuple of (CandidateFeedback, is_validated: bool, error_info: Optional[Dict])
        - is_validated: True if feedback was validated successfully, False if validation failed
        - error_info: Dict with error details if validation failed, None otherwise

    Raises:
        LLMError: If feedback generation fails
    """
    logger.info(f"Generating feedback for: {cv_data.full_name} (format: {output_format.value})")

    # Start timing for overall feedback generation
    generation_start_time = time.time()

    try:
        # Step 1: Generate initial feedback (with timing)
        feedback_start_time = time.time()
        feedback = self.agent.generate_feedback(
            cv_data,
            hr_feedback,
            job_offer,
            output_format=output_format,
            candidate_id=candidate_id,
            recruitment_stage=recruitment_stage,
        )
        feedback_duration = time.time() - feedback_start_time

        # Record timing metric for feedback generation
        if metrics_service:
            metrics_service.record_timing(
                metric_type="feedback_generation",
                operation_name="generate_feedback",
                duration_seconds=feedback_duration,
                metadata={"candidate_id": candidate_id, "recruitment_stage": recruitment_stage},
            )

        logger.info(
            f"Successfully generated initial feedback for: {cv_data.full_name} (took {feedback_duration:.2f}s)"
        )

        # Step 2: Validate and correct if validation is enabled
        is_validated = True
        validation_error_info = None
        if enable_validation and self.validator and self.corrector:
            feedback, is_validated, validation_error_info = self._validate_and_correct(
                feedback, cv_data, hr_feedback, job_offer, candidate_id=candidate_id
            )
            # Store validation status for potential error saving
            self.validation_failed = not is_validated
            self.validation_error_info = validation_error_info

        # Calculate total generation time
        total_duration = time.time() - generation_start_time

        # Record total timing metric
        if metrics_service:
            metrics_service.record_timing(
                metric_type="feedback_generation",
                operation_name="total_feedback_generation",
                duration_seconds=total_duration,
                metadata={
                    "candidate_id": candidate_id,
                    "recruitment_stage": recruitment_stage,
                    "is_validated": is_validated,
                    "validation_enabled": enable_validation,
                },
            )

            # Record success metric
            metrics_service.record_success(
                metric_type="feedback_generation",
                operation_name="generate_feedback",
                success=is_validated,
                metadata={"candidate_id": candidate_id},
            )

        logger.info(
            f"Feedback generation completed for: {cv_data.full_name} (total: {total_duration:.2f}s, validated: {is_validated})"
        )

        if save_to_file and feedback.html_content:
            html_path = self._save_feedback_html(feedback, cv_data, output_dir)
            logger.info(f"HTML feedback saved to: {html_path}")

        return feedback, is_validated, validation_error_info

    except Exception as e:
        error_msg = f"Failed to generate feedback: {str(e)}"
        logger.error(error_msg, exc_info=True)
        raise LLMError(error_msg) from e

get_feedback_html(feedback, consent_for_other_positions=None)

Get HTML formatted version of feedback.

Parameters:

Name Type Description Default
feedback CandidateFeedback

CandidateFeedback object

required
consent_for_other_positions Optional[bool]

Optional boolean indicating if candidate consented to other positions

None

Returns:

Type Description
str

HTML formatted string

Source code in services/feedback_service.py
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
def get_feedback_html(
    self,
    feedback: CandidateFeedback,
    consent_for_other_positions: Optional[bool] = None,
) -> str:
    """
    Get HTML formatted version of feedback.

    Args:
        feedback: CandidateFeedback object
        consent_for_other_positions: Optional boolean indicating if candidate consented to other positions

    Returns:
        HTML formatted string
    """
    return format_feedback_as_html(
        feedback,
        consent_for_other_positions=(
            consent_for_other_positions if consent_for_other_positions is not None else False
        ),
    )

Service for processing CV documents.

Source code in services/cv_service.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
class CVService:
    """Service for processing CV documents."""

    def __init__(self, parser_agent: CVParserAgent):
        """
        Initialize CV service.

        Args:
            parser_agent: Initialized CVParserAgent instance
        """
        self.parser = parser_agent
        logger.info("CVService initialized")

    def process_cv_from_pdf(
        self, pdf_path: str, verbose: bool = False, candidate_id: Optional[int] = None
    ) -> CVData:
        """
        Process CV from PDF file with proper error handling.

        Args:
            pdf_path: Path to PDF file
            verbose: Enable verbose logging

        Returns:
            Parsed CVData object

        Raises:
            PDFReadError: If PDF cannot be read
            ValidationError: If CV data is invalid
            LLMError: If LLM processing fails
        """
        pdf_path_obj = Path(pdf_path)

        # Validate file exists
        if not pdf_path_obj.exists():
            error_msg = f"PDF file not found: {pdf_path}"
            logger.error(error_msg)
            raise PDFReadError(error_msg)

        if not pdf_path_obj.suffix.lower() == ".pdf":
            error_msg = f"File is not a PDF: {pdf_path}"
            logger.error(error_msg)
            raise PDFReadError(error_msg)

        logger.info(f"Processing CV from PDF: {pdf_path}")
        logger.info(f"File size: {pdf_path_obj.stat().st_size / 1024:.2f} KB")
        logger.info(f"Verbose mode: {verbose}")

        try:
            logger.info("Starting CV parsing process...")
            cv_data = self.parser.parse_cv_from_pdf(
                str(pdf_path_obj), verbose=verbose, candidate_id=candidate_id
            )
            logger.info(f"Successfully parsed CV for: {cv_data.full_name}")
            logger.info(
                f"Extracted data: {len(cv_data.education)} education entries, {len(cv_data.experience)} experience entries, {len(cv_data.skills)} skills"
            )
            return cv_data

        except Exception as e:
            error_msg = f"Failed to process CV: {str(e)}"
            logger.error(error_msg, exc_info=True)

            if "PDF" in str(e) or "pdf" in str(e):
                raise PDFReadError(error_msg) from e
            elif "validation" in str(e).lower() or "pydantic" in str(e).lower():
                raise ValidationError(error_msg) from e
            else:
                raise LLMError(error_msg) from e

__init__(parser_agent)

Initialize CV service.

Parameters:

Name Type Description Default
parser_agent CVParserAgent

Initialized CVParserAgent instance

required
Source code in services/cv_service.py
15
16
17
18
19
20
21
22
23
def __init__(self, parser_agent: CVParserAgent):
    """
    Initialize CV service.

    Args:
        parser_agent: Initialized CVParserAgent instance
    """
    self.parser = parser_agent
    logger.info("CVService initialized")

process_cv_from_pdf(pdf_path, verbose=False, candidate_id=None)

Process CV from PDF file with proper error handling.

Parameters:

Name Type Description Default
pdf_path str

Path to PDF file

required
verbose bool

Enable verbose logging

False

Returns:

Type Description
CVData

Parsed CVData object

Raises:

Type Description
PDFReadError

If PDF cannot be read

ValidationError

If CV data is invalid

LLMError

If LLM processing fails

Source code in services/cv_service.py
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
def process_cv_from_pdf(
    self, pdf_path: str, verbose: bool = False, candidate_id: Optional[int] = None
) -> CVData:
    """
    Process CV from PDF file with proper error handling.

    Args:
        pdf_path: Path to PDF file
        verbose: Enable verbose logging

    Returns:
        Parsed CVData object

    Raises:
        PDFReadError: If PDF cannot be read
        ValidationError: If CV data is invalid
        LLMError: If LLM processing fails
    """
    pdf_path_obj = Path(pdf_path)

    # Validate file exists
    if not pdf_path_obj.exists():
        error_msg = f"PDF file not found: {pdf_path}"
        logger.error(error_msg)
        raise PDFReadError(error_msg)

    if not pdf_path_obj.suffix.lower() == ".pdf":
        error_msg = f"File is not a PDF: {pdf_path}"
        logger.error(error_msg)
        raise PDFReadError(error_msg)

    logger.info(f"Processing CV from PDF: {pdf_path}")
    logger.info(f"File size: {pdf_path_obj.stat().st_size / 1024:.2f} KB")
    logger.info(f"Verbose mode: {verbose}")

    try:
        logger.info("Starting CV parsing process...")
        cv_data = self.parser.parse_cv_from_pdf(
            str(pdf_path_obj), verbose=verbose, candidate_id=candidate_id
        )
        logger.info(f"Successfully parsed CV for: {cv_data.full_name}")
        logger.info(
            f"Extracted data: {len(cv_data.education)} education entries, {len(cv_data.experience)} experience entries, {len(cv_data.skills)} skills"
        )
        return cv_data

    except Exception as e:
        error_msg = f"Failed to process CV: {str(e)}"
        logger.error(error_msg, exc_info=True)

        if "PDF" in str(e) or "pdf" in str(e):
            raise PDFReadError(error_msg) from e
        elif "validation" in str(e).lower() or "pydantic" in str(e).lower():
            raise ValidationError(error_msg) from e
        else:
            raise LLMError(error_msg) from e

Service for routing classified emails to appropriate departments.

Source code in services/email_router.py
  23
  24
  25
  26
  27
  28
  29
  30
  31
  32
  33
  34
  35
  36
  37
  38
  39
  40
  41
  42
  43
  44
  45
  46
  47
  48
  49
  50
  51
  52
  53
  54
  55
  56
  57
  58
  59
  60
  61
  62
  63
  64
  65
  66
  67
  68
  69
  70
  71
  72
  73
  74
  75
  76
  77
  78
  79
  80
  81
  82
  83
  84
  85
  86
  87
  88
  89
  90
  91
  92
  93
  94
  95
  96
  97
  98
  99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 250
 251
 252
 253
 254
 255
 256
 257
 258
 259
 260
 261
 262
 263
 264
 265
 266
 267
 268
 269
 270
 271
 272
 273
 274
 275
 276
 277
 278
 279
 280
 281
 282
 283
 284
 285
 286
 287
 288
 289
 290
 291
 292
 293
 294
 295
 296
 297
 298
 299
 300
 301
 302
 303
 304
 305
 306
 307
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317
 318
 319
 320
 321
 322
 323
 324
 325
 326
 327
 328
 329
 330
 331
 332
 333
 334
 335
 336
 337
 338
 339
 340
 341
 342
 343
 344
 345
 346
 347
 348
 349
 350
 351
 352
 353
 354
 355
 356
 357
 358
 359
 360
 361
 362
 363
 364
 365
 366
 367
 368
 369
 370
 371
 372
 373
 374
 375
 376
 377
 378
 379
 380
 381
 382
 383
 384
 385
 386
 387
 388
 389
 390
 391
 392
 393
 394
 395
 396
 397
 398
 399
 400
 401
 402
 403
 404
 405
 406
 407
 408
 409
 410
 411
 412
 413
 414
 415
 416
 417
 418
 419
 420
 421
 422
 423
 424
 425
 426
 427
 428
 429
 430
 431
 432
 433
 434
 435
 436
 437
 438
 439
 440
 441
 442
 443
 444
 445
 446
 447
 448
 449
 450
 451
 452
 453
 454
 455
 456
 457
 458
 459
 460
 461
 462
 463
 464
 465
 466
 467
 468
 469
 470
 471
 472
 473
 474
 475
 476
 477
 478
 479
 480
 481
 482
 483
 484
 485
 486
 487
 488
 489
 490
 491
 492
 493
 494
 495
 496
 497
 498
 499
 500
 501
 502
 503
 504
 505
 506
 507
 508
 509
 510
 511
 512
 513
 514
 515
 516
 517
 518
 519
 520
 521
 522
 523
 524
 525
 526
 527
 528
 529
 530
 531
 532
 533
 534
 535
 536
 537
 538
 539
 540
 541
 542
 543
 544
 545
 546
 547
 548
 549
 550
 551
 552
 553
 554
 555
 556
 557
 558
 559
 560
 561
 562
 563
 564
 565
 566
 567
 568
 569
 570
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582
 583
 584
 585
 586
 587
 588
 589
 590
 591
 592
 593
 594
 595
 596
 597
 598
 599
 600
 601
 602
 603
 604
 605
 606
 607
 608
 609
 610
 611
 612
 613
 614
 615
 616
 617
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633
 634
 635
 636
 637
 638
 639
 640
 641
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655
 656
 657
 658
 659
 660
 661
 662
 663
 664
 665
 666
 667
 668
 669
 670
 671
 672
 673
 674
 675
 676
 677
 678
 679
 680
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698
 699
 700
 701
 702
 703
 704
 705
 706
 707
 708
 709
 710
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
class EmailRouter:
    """Service for routing classified emails to appropriate departments."""

    def __init__(
        self,
        email_username: str,
        email_password: str,
        smtp_host: str,
        smtp_port: int,
        smtp_use_tls: bool = True,
        iod_email: Optional[str] = None,
        hr_email: Optional[str] = None,
    ):
        """
        Initialize email router.

        Args:
            email_username: Email username for sending emails
            email_password: Email password or app password
            smtp_host: SMTP server hostname
            smtp_port: SMTP server port
            smtp_use_tls: Whether to use TLS (True for port 587, False for port 465)
            iod_email: Email address for IOD department
            hr_email: Email address for HR department
        """
        self.email_username = email_username
        self.email_password = email_password
        self.smtp_host = smtp_host
        self.smtp_port = smtp_port
        self.smtp_use_tls = smtp_use_tls
        self.iod_email = iod_email
        self.hr_email = hr_email
        # Track processed emails to prevent duplicates (using Message-ID or UID)
        # Use insertion-ordered dict to preserve recency when trimming
        self.processed_emails: dict[str, None] = {}
        self.max_processed_emails = 1000  # Maximum number of processed emails to track

        # Initialize AI client for ticket priority/deadline determination
        self.ai_client: Any = None
        self.model_name: Optional[str] = None
        self.query_classifier: Optional[QueryClassifierAgent] = None
        self.query_responder: Optional[QueryResponderAgent] = None
        self.rag_validator: Optional[RAGResponseValidatorAgent] = None
        self.rag_db: Optional[QdrantRAG] = None
        try:
            from openai import AzureOpenAI

            self.ai_client = AzureOpenAI(
                api_version=settings.azure_openai_api_version,
                azure_endpoint=settings.azure_openai_endpoint,
                api_key=settings.api_key,
            )
            self.model_name = settings.openai_model
            logger.info("AI client initialized for ticket priority/deadline determination")
        except Exception as e:
            logger.warning(
                f"Failed to initialize AI client: {e}. Will use default priority/deadline."
            )
            self.ai_client = None
            self.model_name = None

        # Initialize AI agents for query handling
        try:
            self.query_classifier = QueryClassifierAgent(
                model_name=settings.azure_openai_gpt_deployment
            )
            self.query_responder = QueryResponderAgent(
                model_name=settings.azure_openai_gpt_deployment
            )
            self.rag_validator = RAGResponseValidatorAgent(
                model_name=settings.azure_openai_gpt_deployment
            )
            # Initialize RAG (will be lazy-loaded when needed)
            self.rag_db = None
            logger.info("Query classification agents initialized")
        except Exception as e:
            logger.warning(
                f"Failed to initialize query agents: {e}. Will forward all queries to HR."
            )
            self.query_classifier = None
            self.query_responder = None
            self.rag_validator = None
            self.rag_db = None

    def route_email(self, email_data: Dict, classification: str) -> bool:
        """
        Route email based on classification.

        Args:
            email_data: Email dictionary
            classification: Email classification ('iod', 'consent_yes', 'consent_no', 'default')

        Returns:
            True if routing was successful
        """
        try:
            # Check for duplicates using Message-ID or UID (normalize to string)
            # Use combination of UID and Message-ID for better deduplication
            uid = str(email_data.get("uid", ""))
            message_id = str(email_data.get("message_id", ""))

            # Create unique identifier: prefer Message-ID, fallback to UID, or combination
            if message_id and message_id != "":
                email_id = message_id
            elif uid and uid != "":
                email_id = f"uid:{uid}"
            else:
                # Fallback: use from_email + subject + date as identifier
                email_id = f"{email_data.get('from_email', 'unknown')}:{email_data.get('subject', 'no-subject')}:{email_data.get('date', 'no-date')}"

            if email_id:
                if email_id in self.processed_emails:
                    logger.warning(
                        f"Email {email_id} already processed (classification: {classification}, "
                        f"from: {email_data.get('from_email', 'unknown')}), skipping duplicate"
                    )
                    return True  # Return True to avoid reprocessing
                self.processed_emails[email_id] = None
                logger.debug(
                    f"Marking email {email_id} as processed (classification: {classification})"
                )
                # Keep only last N processed emails (oldest evicted first)
                if len(self.processed_emails) > self.max_processed_emails:
                    keep_count = self.max_processed_emails // 2
                    keys = list(self.processed_emails)
                    for k in keys[:-keep_count]:
                        del self.processed_emails[k]
                    logger.debug(
                        f"Trimmed processed_emails to {len(self.processed_emails)} entries"
                    )

            if classification == "iod":
                return self._route_to_iod(email_data)
            elif classification in ["consent_yes", "consent_no"]:
                return self._handle_consent(email_data, classification)
            else:  # default - general query
                return self._handle_general_query(email_data)
        except Exception as e:
            logger.error(f"Error routing email: {str(e)}", exc_info=True)
            return False

    def _route_to_iod(self, email_data: Dict) -> bool:
        """Route email to IOD department, create ticket, and send acknowledgment to sender."""
        try:
            from datetime import datetime, timedelta
            from database.models import (
                create_ticket,
                TicketDepartment,
                TicketPriority,
            )

            # Create ticket for IOD
            from_email = email_data.get("from_email", "Nieznany")
            email_subject = email_data.get("subject", "Brak tematu")
            email_body = email_data.get("body", "Brak treści")

            # Try to find related candidate (get the latest one if multiple exist)
            related_candidate_id = None
            try:
                from database.models import get_all_candidates

                # Get all candidates with this email and take the latest one (most recent created_at or highest ID)
                all_candidates = get_all_candidates()
                candidates_with_email = [
                    c for c in all_candidates if c.email.lower() == from_email.lower()
                ]
                if candidates_with_email:
                    # Sort by created_at DESC (most recent first), then by ID DESC as fallback
                    # Use a tuple for sorting: (created_at timestamp, id) - both descending
                    latest_candidate = max(
                        candidates_with_email,
                        key=lambda c: (c.created_at.timestamp() if c.created_at else 0, c.id or 0),
                    )
                    related_candidate_id = latest_candidate.id
                    logger.info(
                        f"Found candidate {related_candidate_id} ({latest_candidate.full_name}) for email {from_email} (selected latest from {len(candidates_with_email)} candidates)"
                    )
            except Exception as e:
                logger.warning(f"Error finding candidate for email {from_email}: {str(e)}")
                pass

            # Check if ticket already exists for this email (prevent duplicates)
            message_id = email_data.get("message_id")
            existing_ticket = None
            if message_id:
                try:
                    from database.models import get_all_tickets

                    all_tickets = get_all_tickets()
                    # Check if ticket with same related_email_id already exists
                    existing_ticket = next(
                        (t for t in all_tickets if t.related_email_id == message_id), None
                    )
                    if existing_ticket:
                        logger.info(
                            f"Ticket #{existing_ticket.id} already exists for email {message_id}, skipping duplicate ticket creation"
                        )
                except Exception as e:
                    logger.warning(f"Error checking for existing ticket: {str(e)}")

            # Create ticket only if it doesn't exist
            if not existing_ticket:
                deadline = datetime.now() + timedelta(days=7)
                ticket_description = (
                    f"Email od: {from_email}\n"
                    f"Temat: {email_subject}\n\n"
                    f"Treść wiadomości:\n{email_body}\n\n"
                    f"Message-ID: {message_id or 'Brak'}"
                )

                try:
                    ticket = create_ticket(
                        department=TicketDepartment.IOD,
                        priority=TicketPriority.HIGH,
                        description=ticket_description,
                        deadline=deadline,
                        related_candidate_id=related_candidate_id,
                        related_email_id=message_id,
                    )
                    logger.info(f"Created ticket #{ticket.id} for IOD incident from {from_email}")
                except Exception as e:
                    logger.warning(f"Failed to create ticket for IOD: {str(e)}")

            # Check if acknowledgment was already sent (by checking if ticket exists)
            # If ticket exists, it means this email was already processed
            if existing_ticket:
                logger.info(
                    f"Email {message_id or 'with UID'} already processed (ticket #{existing_ticket.id} exists), skipping duplicate processing"
                )
                return True  # Return True to avoid reprocessing

            # Forward email to IOD
            subject = f"[IOD] {email_subject}"
            # Ensure body is not empty - if email_body is empty, add default message
            if not email_body or not email_body.strip():
                email_body = "(Brak treści w oryginalnej wiadomości)"
                logger.warning(f"Empty email body from {from_email}, using default message")

            body = f"""
Email otrzymany od: {from_email}
Data: {email_data.get('date', 'Nieznana')}
Temat: {email_subject}

---
Treść wiadomości:
---

{email_body}

---
Oryginalny Message-ID: {email_data.get('message_id', 'Brak')}
In-Reply-To: {email_data.get('in_reply_to', 'Brak')}
"""

            success = (
                self._send_email(
                    to_email=self.iod_email, subject=subject, body=body, reply_to=from_email
                )
                if self.iod_email
                else False
            )

            # Send acknowledgment to original sender (always, even if forwarding to IOD failed)
            # Ticket was created, so candidate should be notified
            ack_subject = "Re: " + email_subject
            ack_body = """
Dziękujemy za kontakt.

Twoja wiadomość została przekazana do działu Inspektora Ochrony Danych (IOD) w celu rozpatrzenia.

Otrzymasz odpowiedź w najkrótszym możliwym terminie.

Z wyrazami szacunku

Dział HR
"""
            ack_success = self._send_email(to_email=from_email, subject=ack_subject, body=ack_body)

            if success:
                if ack_success:
                    logger.info(f"Email routed to IOD and acknowledgment sent to {from_email}")
                else:
                    logger.warning(
                        f"Email routed to IOD, but failed to send acknowledgment to {from_email}"
                    )
            else:
                if ack_success:
                    logger.warning(
                        f"Failed to send email to IOD for {from_email}, but acknowledgment sent to candidate"
                    )
                else:
                    logger.error(f"Failed to send email to IOD and acknowledgment to {from_email}")

            # Return True if either forwarding or acknowledgment succeeded (ticket was created)
            return success or ack_success

        except Exception as e:
            logger.error(f"Error routing email to IOD: {str(e)}", exc_info=True)
            return False

    def _handle_consent(self, email_data: Dict, classification: str) -> bool:
        """Handle consent email and update database."""
        try:
            from_email = email_data.get("from_email", "")
            if not from_email:
                logger.warning("Cannot handle consent - no email address")
                return False

            # 1) Try to link candidate by Message-ID (reply to our feedback)
            candidate = None
            in_reply_to = email_data.get("in_reply_to") or email_data.get("message_id")
            feedback_email = None

            if in_reply_to:
                # Normalize Message-ID (keep as stored in DB, usually with <>)
                possible_ids = {in_reply_to.strip()}
                # Sometimes the client strips angle brackets – try without them too
                if in_reply_to.startswith("<") and in_reply_to.endswith(">"):
                    possible_ids.add(in_reply_to[1:-1].strip())

                for mid in possible_ids:
                    try:
                        feedback_email = get_feedback_email_by_message_id(mid)
                        if feedback_email:
                            break
                    except Exception as e:
                        logger.warning(f"Error looking up feedback_email by Message-ID={mid}: {e}")

            if feedback_email:
                # We have linked feedback → find candidate by candidate_id
                try:
                    candidate = get_candidate_by_id(feedback_email.candidate_id)
                    if candidate:
                        logger.info(
                            f"Consent email linked to candidate {candidate.id} via Message-ID "
                            f"({feedback_email.message_id})"
                        )
                except Exception as e:
                    logger.warning(
                        f"Error loading candidate by id {feedback_email.candidate_id}: {e}"
                    )

            # 2) If Message-ID lookup failed, try by email address
            if not candidate:
                candidate = get_candidate_by_email(from_email)

            if not candidate:
                logger.warning(
                    f"Cannot find candidate for consent handling. "
                    f"from_email={from_email}, in_reply_to={in_reply_to}"
                )
                # Still route to HR for manual handling
                return self._route_to_hr(email_data)

            # Update consent in database
            consent_value = classification == "consent_yes"
            if candidate.id is not None:
                update_candidate(candidate.id, consent_for_other_positions=consent_value)

            logger.info(
                f"Updated consent_for_other_positions for candidate {candidate.id} ({from_email}) to {consent_value}"
            )

            # Send acknowledgment
            ack_subject = "Re: " + email_data.get("subject", "Twoja wiadomość")
            if consent_value:
                ack_body = """
Dziękujemy za kontakt.

Zarejestrowaliśmy Twoją zgodę na rozważenie Twojej kandydatury w kontekście innych stanowisk.

Jeśli znajdziemy odpowiednią dla Ciebie ofertę, skontaktujemy się z Tobą niezwłocznie.

W razie chęci zmiany decyzji prosimy o poinformowanie nas, odpowiadając na tę wiadomość.

Z wyrazami szacunku

Dział HR
"""
            else:
                ack_body = """
Dziękujemy za kontakt.

Zarejestrowaliśmy informację, że nie wyrażasz zgody na rozważenie Twojej kandydatury w kontekście innych stanowisk.

W razie chęci zmiany decyzji prosimy o poinformowanie nas, odpowiadając na tę wiadomość.

Z wyrazami szacunku

Dział HR
"""

            self._send_email(to_email=from_email, subject=ack_subject, body=ack_body)

            # Notify HR about consent change (fuller picture of the situation)
            try:
                hr_subject = f"[HR] Zmiana zgody kandydata na inne rekrutacje – {candidate.first_name} {candidate.last_name}"
                hr_body = f"""
Kandydat: {candidate.first_name} {candidate.last_name}
Email: {candidate.email}
ID kandydata w systemie: {candidate.id}

Nowa wartość zgody na inne rekrutacje: {"TAK" if consent_value else "NIE"}

Oryginalna wiadomość kandydata:
--------------------------------
Temat: {email_data.get('subject', 'Brak tematu')}

{email_data.get('body', 'Brak treści')}
"""
                if self.hr_email:
                    self._send_email(to_email=self.hr_email, subject=hr_subject, body=hr_body)
                logger.info(f"HR notified about consent change for candidate {candidate.id}")
            except Exception as e:
                logger.warning(f"Failed to notify HR about consent change: {e}")

            logger.info(f"Consent handled and acknowledgment sent to {from_email}")
            return True

        except Exception as e:
            logger.error(f"Error handling consent: {str(e)}", exc_info=True)
            return False

    def _get_rag_db(self) -> Optional[QdrantRAG]:
        """Lazy-load RAG database when needed."""
        if self.rag_db is None:
            try:
                azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
                azure_api_key = os.getenv("AZURE_OPENAI_API_KEY")
                azure_deployment = os.getenv(
                    "AZURE_OPENAI_EMBEDDING_DEPLOYMENT", "text-embedding-3-small"
                )
                azure_api_version = os.getenv("AZURE_OPENAI_API_VERSION", "2024-12-01-preview")

                # Prefer Qdrant server over local path (avoids locking issues)
                qdrant_host = os.getenv(
                    "QDRANT_HOST", "qdrant"
                )  # Default to service name in Docker
                qdrant_port = int(os.getenv("QDRANT_PORT", "6333"))
                qdrant_path = os.getenv("QDRANT_PATH")  # Optional, only if server not available

                if azure_api_key:
                    # Try server first, fallback to local path
                    if qdrant_host:
                        self.rag_db = QdrantRAG(
                            collection_name="recruitment_knowledge_base",
                            use_azure_openai=True,
                            azure_endpoint=azure_endpoint,
                            azure_api_key=azure_api_key,
                            azure_deployment=azure_deployment,
                            azure_api_version=azure_api_version,
                            qdrant_host=qdrant_host,
                            qdrant_port=qdrant_port,
                        )
                    elif qdrant_path:
                        self.rag_db = QdrantRAG(
                            collection_name="recruitment_knowledge_base",
                            use_azure_openai=True,
                            azure_endpoint=azure_endpoint,
                            azure_api_key=azure_api_key,
                            azure_deployment=azure_deployment,
                            azure_api_version=azure_api_version,
                            qdrant_path=qdrant_path,
                        )
                    else:
                        logger.warning("Neither QDRANT_HOST nor QDRANT_PATH set, RAG unavailable")
                        return None

                    logger.info("RAG database initialized")
                else:
                    logger.warning("Azure OpenAI API key not set, RAG unavailable")
            except Exception as e:
                logger.warning(f"Failed to initialize RAG database: {e}")
        return self.rag_db

    def _handle_general_query(self, email_data: Dict) -> bool:
        """
        Handle general query email - classify and respond using AI or forward to HR.

        Workflow:
        1. QueryClassifierAgent decides: direct_answer, rag_answer, or forward_to_hr
        2. If direct_answer: QueryResponderAgent generates response from basic knowledge
        3. If rag_answer: QueryResponderAgent uses RAG to find relevant context, then generates response
        4. If forward_to_hr: Forward email to HR department
        """
        try:
            from_email = email_data.get("from_email", "")
            email_subject = email_data.get("subject", "")
            email_body = email_data.get("body", "")

            if not self.query_classifier or not self.query_responder:
                # Fallback: forward to HR if agents not initialized
                logger.warning("Query agents not initialized, forwarding to HR")
                return self._route_to_hr(email_data)
            assert self.query_classifier is not None and self.query_responder is not None

            # Step 1: Classify query
            logger.info(f"Classifying query from {from_email}")
            classification_result = self.query_classifier.classify_query(
                email_subject, email_body, from_email
            )

            action = classification_result.get("action", "forward_to_hr")
            reasoning = classification_result.get("reasoning", "")
            confidence = classification_result.get("confidence", 0.0)

            # Konwertuj confidence na float
            try:
                confidence = float(confidence)
            except (ValueError, TypeError):
                confidence = 0.0

            logger.info(
                f"Query classified as: {action} (confidence: {confidence:.2f}, reasoning: {reasoning})"
            )

            # CRITICAL VALIDATION: Different thresholds for different actions
            # For rag_answer: allow trying even with lower confidence (0.5+), as RAG may find the answer
            # For direct_answer: require higher confidence (0.85+)
            # For forward_to_hr or confidence < 0.5: always forward to HR
            if action == "rag_answer" and confidence < 0.5:
                logger.warning(
                    f"Confidence ({confidence:.2f}) < 0.50 for rag_answer, forwarding to HR instead"
                )
                action = "forward_to_hr"
                reasoning = f"Poziom pewności ({confidence:.2f}) jest zbyt niski dla rag_answer. {reasoning}"
            elif action == "direct_answer" and confidence < 0.7:
                # Only downgrade direct_answer to forward_to_hr on low confidence; leave forward_to_hr unchanged
                logger.warning(
                    f"Confidence ({confidence:.2f}) < 0.70 for direct_answer, forwarding to HR instead"
                )
                action = "forward_to_hr"
                reasoning = f"Poziom pewności ({confidence:.2f}) jest mniejszy niż wymagany (0.70). {reasoning}"

            # Step 2: Handle based on classification
            if action == "forward_to_hr":
                # Forward to HR - don't auto-respond
                logger.info(f"Forwarding query to HR: {reasoning}")
                return self._route_to_hr(email_data)

            elif action == "direct_answer":
                # Generate response from basic knowledge
                # ADDITIONAL VALIDATION: Check confidence again before responding
                if confidence < 0.85:
                    logger.warning(
                        f"Confidence ({confidence:.2f}) < 0.85 for direct_answer, forwarding to HR"
                    )
                    return self._route_to_hr(email_data)

                logger.info(
                    f"Generating direct answer from basic knowledge (confidence = {confidence:.2f})"
                )
                response = self.query_responder.generate_response(
                    email_subject, email_body, from_email, rag_context=None
                )

                # VALIDATION: If agent returned None, it is not confident – forward to HR
                if response is None:
                    logger.warning("Agent returned None (not confident enough), forwarding to HR")
                    return self._route_to_hr(email_data)

                # Send response to candidate
                reply_subject = (
                    f"Re: {email_subject}" if email_subject else "Odpowiedź na Twoje zapytanie"
                )
                success = self._send_email(
                    to_email=from_email, subject=reply_subject, body=response
                )

                if success:
                    logger.info(f"Direct answer sent to {from_email}")
                    # Also notify HR about the auto-response
                    self._notify_hr_about_auto_response(email_data, response, "direct_answer")

                return success

            elif action == "rag_answer":
                # For rag_answer: allow trying if confidence >= 0.5 (already validated earlier)
                # RAG may find the answer even if classification was uncertain
                logger.info(f"Generating answer using RAG (confidence = {confidence:.2f})")
                rag_db = self._get_rag_db()

                if not rag_db:
                    logger.warning("RAG database not available, forwarding to HR")
                    return self._route_to_hr(email_data)

                # Search RAG for relevant context
                query_text = f"{email_subject} {email_body}".strip()
                rag_results = rag_db.search(query_text, n_results=3)

                logger.info(f"Found {len(rag_results)} relevant documents from RAG")

                # VALIDATION: Check whether RAG found relevant documents
                if not rag_results or len(rag_results) == 0:
                    logger.warning("No relevant documents found in RAG, forwarding to HR")
                    return self._route_to_hr(email_data)

                # Generate response with RAG context
                response = self.query_responder.generate_response(
                    email_subject, email_body, from_email, rag_context=rag_results
                )

                # VALIDATION: If agent returned None, it is not confident – forward to HR
                if response is None:
                    logger.warning("Agent returned None (not confident enough), forwarding to HR")
                    return self._route_to_hr(email_data)

                # VALIDATION: Check quality of RAG response before sending
                if self.rag_validator:
                    logger.info("Validating RAG-generated response before sending")
                    validation_result = self.rag_validator.validate_rag_response(
                        generated_response=response,
                        email_subject=email_subject,
                        email_body=email_body,
                        sender_email=from_email,
                        rag_sources=rag_results,
                    )

                    if not validation_result.is_approved:
                        logger.warning(
                            f"RAG response validation FAILED for {from_email}: {validation_result.reasoning}. "
                            f"Issues: {validation_result.issues_found}, "
                            f"Factual errors: {validation_result.factual_errors}. "
                            f"Forwarding to HR instead."
                        )
                        # Forward to HR with validation details
                        return self._route_to_hr(email_data)
                    else:
                        logger.info(
                            f"RAG response validation PASSED for {from_email}: {validation_result.reasoning}"
                        )
                else:
                    logger.warning(
                        "RAG validator not available, skipping validation (sending response anyway)"
                    )

                # Send response to candidate
                reply_subject = (
                    f"Re: {email_subject}" if email_subject else "Odpowiedź na Twoje zapytanie"
                )
                success = self._send_email(
                    to_email=from_email, subject=reply_subject, body=response
                )

                if success:
                    logger.info(f"RAG-based answer sent to {from_email}")
                    # Also notify HR about the auto-response
                    self._notify_hr_about_auto_response(
                        email_data, response, "rag_answer", rag_results
                    )

                return success

            else:
                # Unknown action - forward to HR
                logger.warning(f"Unknown action: {action}, forwarding to HR")
                return self._route_to_hr(email_data)

        except Exception as e:
            logger.error(f"Error handling general query: {str(e)}", exc_info=True)
            # On error, forward to HR for manual handling
            return self._route_to_hr(email_data)

    def _notify_hr_about_auto_response(
        self,
        email_data: Dict,
        response: str,
        response_type: str,
        rag_context: Optional[list] = None,
    ):
        """Notify HR about auto-generated response."""
        try:
            hr_subject = f"[HR-AUTO] Automatyczna odpowiedź wysłana do {email_data.get('from_email', 'Nieznany')}"
            hr_body = f"""
Automatyczna odpowiedź została wygenerowana i wysłana do kandydata.

TYP ODPOWIEDZI: {response_type.upper()}
Kandydat: {email_data.get('from_email', 'Nieznany')}
Temat oryginalnej wiadomości: {email_data.get('subject', 'Brak tematu')}

---
ORYGINALNA WIADOMOŚĆ KANDYDATA:
---
{email_data.get('body', 'Brak treści')}

---
WYGENEROWANA ODPOWIEDŹ:
---
{response}
"""

            if rag_context:
                hr_body += "\n\n---\nUŻYTE ŹRÓDŁA Z RAG:\n---\n"
                for i, doc in enumerate(rag_context, 1):
                    hr_body += f"\n{i}. Źródło: {doc.get('metadata', {}).get('source', 'N/A')}\n"
                    hr_body += f"   Fragment: {doc.get('document', '')[:200]}...\n"

            hr_body += f"\n\n---\nMessage-ID: {email_data.get('message_id', 'Brak')}\n"

            if self.hr_email:
                self._send_email(to_email=self.hr_email, subject=hr_subject, body=hr_body)
            logger.info(f"HR notified about auto-response ({response_type})")
        except Exception as e:
            logger.warning(f"Failed to notify HR about auto-response: {e}")

    def _determine_ticket_priority_and_deadline(
        self, email_subject: str, email_body: str, from_email: str
    ) -> tuple:
        """
        Determine ticket priority and deadline using AI based on email content.

        Returns:
            tuple: (priority: TicketPriority, deadline_days: int) where deadline_days is between 5-14
        """
        try:
            from database.models import TicketPriority
            import json

            # Note: email_subject, from_email, email_body are user-controlled; consider sanitizing
            # or passing as a separate user message to reduce prompt injection risk in production.
            prompt = f"""
Analyze the following email inquiry and determine the appropriate ticket priority and deadline.

EMAIL:
Subject: {email_subject}
From: {from_email}
Content: {email_body}

TASK:
Determine:
1. Priority: LOW, MEDIUM, HIGH, or URGENT
   - URGENT: Time-sensitive issues, complaints, urgent requests, critical problems
   - HIGH: Important questions, status inquiries, significant concerns
   - MEDIUM: Standard questions, general inquiries, moderate importance
   - LOW: Simple questions, informational requests, low urgency

2. Deadline (in days, between 5-14 days):
   - URGENT: 5-7 days
   - HIGH: 7-10 days
   - MEDIUM: 10-12 days
   - LOW: 12-14 days

Return JSON in format:
{{
    "priority": "LOW" | "MEDIUM" | "HIGH" | "URGENT",
    "deadline_days": <number between 5-14>,
    "reasoning": "Brief explanation of the decision"
}}
"""

            if not self.ai_client or not self.model_name:
                logger.warning("AI client not available, using default priority/deadline")
                from database.models import TicketPriority

                return TicketPriority.MEDIUM, 10

            response = self.ai_client.chat.completions.create(
                model=self.model_name,
                messages=[
                    {
                        "role": "system",
                        "content": "You are an expert in prioritizing HR inquiries and determining appropriate response deadlines.",
                    },
                    {"role": "user", "content": prompt},
                ],
                temperature=0.3,
                response_format={"type": "json_object"},
            )

            content = response.choices[0].message.content
            result_text = (content or "").strip()
            result = json.loads(result_text)

            priority_str = result.get("priority", "MEDIUM").upper()
            deadline_days = result.get("deadline_days", 10)

            # Validate and clamp deadline_days to 5-14 range
            try:
                deadline_days = int(deadline_days)
                deadline_days = max(5, min(14, deadline_days))  # Clamp to 5-14
            except (ValueError, TypeError):
                deadline_days = 10  # Default

            # Map priority string to enum
            priority_map = {
                "LOW": TicketPriority.LOW,
                "MEDIUM": TicketPriority.MEDIUM,
                "HIGH": TicketPriority.HIGH,
                "URGENT": TicketPriority.URGENT,
            }
            priority = priority_map.get(priority_str, TicketPriority.MEDIUM)

            reasoning = result.get("reasoning", "No reasoning provided")
            logger.info(
                f"AI determined priority: {priority_str}, deadline: {deadline_days} days. Reasoning: {reasoning}"
            )

            return priority, deadline_days

        except Exception as e:
            logger.warning(
                f"Error determining ticket priority/deadline with AI: {str(e)}. Using defaults."
            )
            from database.models import TicketPriority

            return TicketPriority.MEDIUM, 10  # Default fallback

    def _route_to_hr(self, email_data: Dict) -> bool:
        """Route email to HR department, create ticket, and send email."""
        try:
            from datetime import datetime, timedelta
            from database.models import (
                create_ticket,
                TicketDepartment,
                get_position_by_id,
                get_all_tickets,
                get_all_candidates,
            )

            from_email = email_data.get("from_email", "Nieznany")
            message_id = email_data.get("message_id")
            email_subject = email_data.get("subject", "Brak tematu")
            email_body = email_data.get("body", "Brak treści")

            # Check for duplicates - if email was already forwarded to HR (by checking processed_emails)
            # Create unique identifier for HR forwarding
            hr_forward_id = f"hr_forward:{message_id or email_data.get('uid', '')}:{from_email}"
            if hr_forward_id in self.processed_emails:
                logger.warning(
                    f"Email {message_id or 'with UID'} from {from_email} already forwarded to HR, "
                    f"skipping duplicate"
                )
                return True  # Return True to avoid reprocessing

            # Mark as processed
            self.processed_emails[hr_forward_id] = None
            logger.debug(f"Marking HR forward {hr_forward_id} as processed")

            # Try to find related candidate (get the latest one if multiple exist)
            related_candidate_id = None
            candidate = None
            try:
                # Get all candidates with this email and take the latest one
                all_candidates = get_all_candidates()
                candidates_with_email = [
                    c for c in all_candidates if c.email.lower() == from_email.lower()
                ]
                if candidates_with_email:
                    latest_candidate = max(
                        candidates_with_email,
                        key=lambda c: (c.created_at.timestamp() if c.created_at else 0, c.id or 0),
                    )
                    related_candidate_id = latest_candidate.id
                    candidate = latest_candidate
                    logger.info(
                        f"Found candidate {related_candidate_id} ({latest_candidate.full_name}) for email {from_email}"
                    )
            except Exception as e:
                logger.warning(f"Error finding candidate for email {from_email}: {str(e)}")

            # Check if ticket already exists for this email (prevent duplicates)
            existing_ticket = None
            if message_id:
                try:
                    all_tickets = get_all_tickets()
                    existing_ticket = next(
                        (t for t in all_tickets if t.related_email_id == message_id), None
                    )
                    if existing_ticket:
                        logger.info(
                            f"Ticket #{existing_ticket.id} already exists for email {message_id}, skipping duplicate ticket creation"
                        )
                except Exception as e:
                    logger.warning(f"Error checking for existing ticket: {str(e)}")

            # Create ticket only if it doesn't exist
            if not existing_ticket:
                # Determine priority and deadline using AI
                priority, deadline_days = self._determine_ticket_priority_and_deadline(
                    email_subject, email_body, from_email
                )
                deadline = datetime.now() + timedelta(days=deadline_days)

                ticket_description = (
                    f"Email od: {from_email}\n"
                    f"Temat: {email_subject}\n\n"
                    f"Treść wiadomości:\n{email_body}\n\n"
                    f"Message-ID: {message_id or 'Brak'}"
                )

                try:
                    ticket = create_ticket(
                        department=TicketDepartment.HR,
                        priority=priority,
                        description=ticket_description,
                        deadline=deadline,
                        related_candidate_id=related_candidate_id,
                        related_email_id=message_id,
                    )
                    logger.info(
                        f"Created ticket #{ticket.id} for HR inquiry from {from_email} (priority: {priority.value}, deadline: {deadline_days} days)"
                    )
                except Exception as e:
                    logger.warning(f"Failed to create ticket for HR: {str(e)}")

            # If ticket exists, it means this email was already processed
            if existing_ticket:
                logger.info(
                    f"Email {message_id or 'with UID'} already processed (ticket #{existing_ticket.id} exists), skipping duplicate processing"
                )
                return True  # Return True to avoid reprocessing

            subject = f"[HR] {email_subject}"

            # Check if candidate exists in the database (for email content)
            candidate_info = ""
            if candidate:
                try:
                    position_info = ""
                    if candidate.position_id:
                        try:
                            position = get_position_by_id(candidate.position_id)
                            if position:
                                position_info = (
                                    f"\nStanowisko: {position.title} ({position.company})"
                                )
                        except Exception as e:
                            logger.warning(f"Error loading position {candidate.position_id}: {e}")

                    # Formatuj informacje o kandydacie
                    stage_display = {
                        "initial_screening": "Pierwsza selekcja",
                        "hr_interview": "Rozmowa HR",
                        "technical_assessment": "Weryfikacja wiedzy",
                        "final_interview": "Rozmowa końcowa",
                        "offer": "Oferta",
                    }.get(
                        (
                            candidate.stage.value
                            if hasattr(candidate.stage, "value")
                            else str(candidate.stage)
                        ),
                        str(candidate.stage),
                    )

                    status_display = {
                        "in_progress": "W trakcie",
                        "accepted": "Zaakceptowany",
                        "rejected": "Odrzucony",
                    }.get(
                        (
                            candidate.status.value
                            if hasattr(candidate.status, "value")
                            else str(candidate.status)
                        ),
                        str(candidate.status),
                    )

                    consent_info = ""
                    if candidate.consent_for_other_positions is not None:
                        consent_info = f"\nZgoda na inne rekrutacje: {'Tak' if candidate.consent_for_other_positions else 'Nie'}"

                    candidate_info = f"""
---
INFORMACJE O KANDYDACIE (dane z bazy):
---
ID kandydata: {candidate.id}
Imię i nazwisko: {candidate.full_name}
Email: {candidate.email}
Status: {status_display}
Etap rekrutacji: {stage_display}{position_info}{consent_info}
Link do profilu: http://localhost:5000/candidate/{candidate.id} (w środowisku produkcyjnym należy podać właściwy adres URL)
---
"""
                    logger.info(
                        f"Found candidate {candidate.id} ({candidate.full_name}) for email {from_email}"
                    )
                except Exception as e:
                    logger.warning(f"Error formatting candidate info: {str(e)}")
                    candidate_info = ""
            else:
                logger.debug(f"No candidate found in database for email {from_email}")

            # Ensure body is not empty - if email_body is empty, add default message
            email_body = email_data.get("body", "")
            if not email_body or not email_body.strip():
                email_body = "(Brak treści w oryginalnej wiadomości)"
                logger.warning(f"Empty email body from {from_email}, using default message")

            body = f"""
Email otrzymany od: {from_email}
Data: {email_data.get('date', 'Nieznana')}
Temat: {email_data.get('subject', 'Brak tematu')}
{candidate_info}
---
Treść wiadomości:
---

{email_body}

---
Oryginalny Message-ID: {email_data.get('message_id', 'Brak')}
In-Reply-To: {email_data.get('in_reply_to', 'Brak')}
"""

            success = (
                self._send_email(
                    to_email=self.hr_email, subject=subject, body=body, reply_to=from_email
                )
                if self.hr_email
                else False
            )

            # Send acknowledgment to original sender (always, even if forwarding to HR failed)
            # Ticket was created, so candidate should be notified
            ack_subject = "Re: " + email_subject
            ack_body = """
Dziękujemy za kontakt.

Twoja wiadomość została przekazana do działu HR w celu rozpatrzenia.

Otrzymasz odpowiedź w najkrótszym możliwym terminie.

Z wyrazami szacunku

Dział HR
"""
            ack_success = self._send_email(to_email=from_email, subject=ack_subject, body=ack_body)

            if success:
                if ack_success:
                    if candidate_info:
                        logger.info(
                            f"Email routed to HR and acknowledgment sent to {from_email} (candidate ID: {candidate.id if candidate else 'N/A'})"
                        )
                    else:
                        logger.info(f"Email routed to HR and acknowledgment sent to {from_email}")
                else:
                    logger.warning(
                        f"Email routed to HR, but failed to send acknowledgment to {from_email}"
                    )
            else:
                if ack_success:
                    logger.warning(
                        f"Failed to send email to HR for {from_email}, but acknowledgment sent to candidate"
                    )
                else:
                    logger.error(f"Failed to send email to HR and acknowledgment to {from_email}")

            # Return True if either forwarding or acknowledgment succeeded (ticket was created)
            return success or ack_success

        except Exception as e:
            logger.error(f"Error routing email to HR: {str(e)}", exc_info=True)
            return False

    def _send_email(
        self, to_email: str, subject: str, body: str, reply_to: Optional[str] = None
    ) -> bool:
        """Send email via SMTP."""
        try:
            # Validate email content - prevent sending empty emails
            if not body or not body.strip():
                logger.warning(
                    f"Attempted to send empty email to {to_email} with subject: {subject}. Skipping."
                )
                return False

            if not subject or not subject.strip():
                logger.warning(
                    f"Attempted to send email to {to_email} with empty subject. Skipping."
                )
                return False

            msg = MIMEMultipart("alternative")
            msg["Subject"] = subject
            msg["From"] = self.email_username
            msg["To"] = to_email
            if reply_to:
                msg["Reply-To"] = reply_to

            # Add text content
            text_part = MIMEText(body, "plain", "utf-8")
            msg.attach(text_part)

            # Send email via SMTP
            if self.smtp_port == 465:
                # Use SSL for port 465
                with smtplib.SMTP_SSL(self.smtp_host, self.smtp_port) as server:
                    server.login(self.email_username, self.email_password)
                    server.send_message(msg)
            else:
                # Use TLS for port 587
                with smtplib.SMTP(self.smtp_host, self.smtp_port) as server:
                    if self.smtp_use_tls:
                        server.starttls()
                    server.login(self.email_username, self.email_password)
                    server.send_message(msg)

            logger.info(f"Email sent successfully to {to_email}")
            return True

        except Exception as e:
            logger.error(f"Failed to send email to {to_email}: {str(e)}")
            return False

__init__(email_username, email_password, smtp_host, smtp_port, smtp_use_tls=True, iod_email=None, hr_email=None)

Initialize email router.

Parameters:

Name Type Description Default
email_username str

Email username for sending emails

required
email_password str

Email password or app password

required
smtp_host str

SMTP server hostname

required
smtp_port int

SMTP server port

required
smtp_use_tls bool

Whether to use TLS (True for port 587, False for port 465)

True
iod_email Optional[str]

Email address for IOD department

None
hr_email Optional[str]

Email address for HR department

None
Source code in services/email_router.py
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
def __init__(
    self,
    email_username: str,
    email_password: str,
    smtp_host: str,
    smtp_port: int,
    smtp_use_tls: bool = True,
    iod_email: Optional[str] = None,
    hr_email: Optional[str] = None,
):
    """
    Initialize email router.

    Args:
        email_username: Email username for sending emails
        email_password: Email password or app password
        smtp_host: SMTP server hostname
        smtp_port: SMTP server port
        smtp_use_tls: Whether to use TLS (True for port 587, False for port 465)
        iod_email: Email address for IOD department
        hr_email: Email address for HR department
    """
    self.email_username = email_username
    self.email_password = email_password
    self.smtp_host = smtp_host
    self.smtp_port = smtp_port
    self.smtp_use_tls = smtp_use_tls
    self.iod_email = iod_email
    self.hr_email = hr_email
    # Track processed emails to prevent duplicates (using Message-ID or UID)
    # Use insertion-ordered dict to preserve recency when trimming
    self.processed_emails: dict[str, None] = {}
    self.max_processed_emails = 1000  # Maximum number of processed emails to track

    # Initialize AI client for ticket priority/deadline determination
    self.ai_client: Any = None
    self.model_name: Optional[str] = None
    self.query_classifier: Optional[QueryClassifierAgent] = None
    self.query_responder: Optional[QueryResponderAgent] = None
    self.rag_validator: Optional[RAGResponseValidatorAgent] = None
    self.rag_db: Optional[QdrantRAG] = None
    try:
        from openai import AzureOpenAI

        self.ai_client = AzureOpenAI(
            api_version=settings.azure_openai_api_version,
            azure_endpoint=settings.azure_openai_endpoint,
            api_key=settings.api_key,
        )
        self.model_name = settings.openai_model
        logger.info("AI client initialized for ticket priority/deadline determination")
    except Exception as e:
        logger.warning(
            f"Failed to initialize AI client: {e}. Will use default priority/deadline."
        )
        self.ai_client = None
        self.model_name = None

    # Initialize AI agents for query handling
    try:
        self.query_classifier = QueryClassifierAgent(
            model_name=settings.azure_openai_gpt_deployment
        )
        self.query_responder = QueryResponderAgent(
            model_name=settings.azure_openai_gpt_deployment
        )
        self.rag_validator = RAGResponseValidatorAgent(
            model_name=settings.azure_openai_gpt_deployment
        )
        # Initialize RAG (will be lazy-loaded when needed)
        self.rag_db = None
        logger.info("Query classification agents initialized")
    except Exception as e:
        logger.warning(
            f"Failed to initialize query agents: {e}. Will forward all queries to HR."
        )
        self.query_classifier = None
        self.query_responder = None
        self.rag_validator = None
        self.rag_db = None

route_email(email_data, classification)

Route email based on classification.

Parameters:

Name Type Description Default
email_data Dict

Email dictionary

required
classification str

Email classification ('iod', 'consent_yes', 'consent_no', 'default')

required

Returns:

Type Description
bool

True if routing was successful

Source code in services/email_router.py
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
def route_email(self, email_data: Dict, classification: str) -> bool:
    """
    Route email based on classification.

    Args:
        email_data: Email dictionary
        classification: Email classification ('iod', 'consent_yes', 'consent_no', 'default')

    Returns:
        True if routing was successful
    """
    try:
        # Check for duplicates using Message-ID or UID (normalize to string)
        # Use combination of UID and Message-ID for better deduplication
        uid = str(email_data.get("uid", ""))
        message_id = str(email_data.get("message_id", ""))

        # Create unique identifier: prefer Message-ID, fallback to UID, or combination
        if message_id and message_id != "":
            email_id = message_id
        elif uid and uid != "":
            email_id = f"uid:{uid}"
        else:
            # Fallback: use from_email + subject + date as identifier
            email_id = f"{email_data.get('from_email', 'unknown')}:{email_data.get('subject', 'no-subject')}:{email_data.get('date', 'no-date')}"

        if email_id:
            if email_id in self.processed_emails:
                logger.warning(
                    f"Email {email_id} already processed (classification: {classification}, "
                    f"from: {email_data.get('from_email', 'unknown')}), skipping duplicate"
                )
                return True  # Return True to avoid reprocessing
            self.processed_emails[email_id] = None
            logger.debug(
                f"Marking email {email_id} as processed (classification: {classification})"
            )
            # Keep only last N processed emails (oldest evicted first)
            if len(self.processed_emails) > self.max_processed_emails:
                keep_count = self.max_processed_emails // 2
                keys = list(self.processed_emails)
                for k in keys[:-keep_count]:
                    del self.processed_emails[k]
                logger.debug(
                    f"Trimmed processed_emails to {len(self.processed_emails)} entries"
                )

        if classification == "iod":
            return self._route_to_iod(email_data)
        elif classification in ["consent_yes", "consent_no"]:
            return self._handle_consent(email_data, classification)
        else:  # default - general query
            return self._handle_general_query(email_data)
    except Exception as e:
        logger.error(f"Error routing email: {str(e)}", exc_info=True)
        return False

Service for listening to incoming emails via IMAP.

Source code in services/email_listener.py
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
class EmailListener:
    """Service for listening to incoming emails via IMAP."""

    # Keywords for IOD/RODO classification
    IOD_KEYWORDS = [
        "rodo",
        "iod",
        "dpo",
        "gdpr",
        "dane osobowe",
        "ochrona danych",
        "sprzeciw",
        "zgoda",
        "wycofanie",
        "skarga",
        "uodo",
        "organ nadzorczy",
        "profilowanie",
        "automatyczna decyzja",
        "sztuczna inteligencja",
        "ai",
        "si",
    ]

    # Keywords for consent classification
    CONSENT_KEYWORDS_POSITIVE = [
        "zgoda",
        "zgadzam się",
        "wyrażam zgodę",
        "tak",
        "chcę",
        "zainteresowany",
        "rozważenie",
        "inne oferty",
        "inne stanowiska",
        "inne pozycje",
        "inne rekrutacje",
    ]

    CONSENT_KEYWORDS_NEGATIVE = [
        "nie zgadzam się",
        "odmawiam",
        "nie",
        "nie chcę",
        "nie wyrażam zgody",
        "wycofuję zgodę",
        "nie jestem zainteresowany",
        "nie rozważaj",
    ]

    def __init__(
        self,
        email_username: str,
        email_password: str,
        imap_server: str = "imap.zoho.com",
        imap_port: int = 993,
    ):
        """
        Initialize email listener.

        Args:
            email_username: Email username/address
            email_password: Email password or app password
            imap_server: IMAP server address (default: imap.zoho.com; can be any provider)
            imap_port: IMAP server port (default: 993 for SSL)
        """
        self.email_username = email_username
        self.email_password = email_password
        self.imap_server = imap_server
        self.imap_port = imap_port
        self.mail: Optional[imaplib.IMAP4_SSL] = None

    def connect(self) -> bool:
        """Connect to IMAP server."""
        try:
            if not self.email_username or not self.email_password:
                logger.error("Email credentials not provided to EmailListener")
                return False

            # Create IMAP connection with timeout (only this socket, not global default)
            import socket

            self.mail = imaplib.IMAP4_SSL(self.imap_server, self.imap_port, timeout=30)
            self.mail.login(self.email_username, self.email_password)
            logger.debug(f"Connected to IMAP server {self.imap_server}: {self.email_username}")
            return True
        except socket.timeout:
            logger.error(
                f"IMAP connection timeout to {self.imap_server}:{self.imap_port}. Check network/firewall."
            )
            return False
        except socket.error as e:
            error_msg = str(e)
            if "EOF" in error_msg or "Connection reset" in error_msg:
                logger.warning(
                    f"IMAP connection error (EOF/reset) to {self.imap_server}:{self.imap_port}. "
                    "This may be temporary. Will retry in next cycle. "
                    "Possible causes: server-side connection limit, firewall, or network issue."
                )
            else:
                logger.error(
                    f"IMAP socket error to {self.imap_server}:{self.imap_port}: {error_msg}"
                )
            return False
        except imaplib.IMAP4.error as e:
            error_msg = str(e)
            if "AUTHENTICATIONFAILED" in error_msg or "Invalid credentials" in error_msg:
                logger.error(
                    f"IMAP authentication failed for {self.email_username} on {self.imap_server}. "
                    "This usually means:\n"
                    "  1. Wrong username or password\n"
                    "  2. For Gmail: You need 'App Password' instead of regular password\n"
                    "  3. For Zoho: Check if password is correct and account is active\n"
                    "  4. IMAP might be disabled in your email account settings"
                )
            else:
                logger.error(f"IMAP protocol error to {self.imap_server}: {error_msg}")
            return False
        except Exception as e:
            logger.error(
                f"Failed to connect to IMAP server {self.imap_server}:{self.imap_port}: {str(e)}"
            )
            return False

    def disconnect(self):
        """Disconnect from Gmail IMAP server."""
        if self.mail:
            try:
                self.mail.close()
                self.mail.logout()
                # logger.info("Disconnected from IMAP server")
            except Exception as e:
                logger.warning(f"Error disconnecting from IMAP: {str(e)}")
            finally:
                self.mail = None

    def get_unread_emails(self, folder: str = "INBOX") -> List[Dict]:
        """
        Get unread emails from specified folder.

        Args:
            folder: Email folder to check (default: 'INBOX')

        Returns:
            List of email dictionaries with keys: id, from_email, subject, body, date, message_id, in_reply_to
        """
        if not self.mail:
            if not self.connect():
                return []

        assert self.mail is not None  # narrow type after connect
        try:
            # Select folder
            status, messages = self.mail.select(folder)
            if status != "OK":
                logger.error(
                    f"Failed to select folder {folder} (status={status}, messages={messages})"
                )
                return []

            # Search for unread emails
            status, message_numbers = self.mail.search(None, "UNSEEN")
            if status != "OK":
                logger.error(
                    f"Failed to search for unread emails (status={status}, data={message_numbers})"
                )
                return []

            raw_ids = message_numbers[0]
            if not raw_ids:
                # No unread emails
                return []

            message_ids = raw_ids.split()

            email_list = []

            for msg_num in message_ids:
                try:
                    # Fetch email
                    status, msg_data = self.mail.fetch(msg_num, "(RFC822)")
                    if status != "OK" or not msg_data or msg_data[0] is None:
                        logger.warning(f"Failed to fetch email {msg_num} (status={status})")
                        continue

                    # Parse email
                    email_body = msg_data[0][1]
                    if not isinstance(email_body, (bytes, bytearray)):
                        continue
                    email_message = email.message_from_bytes(email_body)

                    # Extract email data
                    email_data = self._parse_email(email_message)
                    if email_data:
                        email_data["uid"] = msg_num.decode()
                        email_list.append(email_data)
                    else:
                        logger.warning(f"Parsed email {msg_num} returned None")

                except Exception as e:
                    logger.warning(f"Error parsing email {msg_num}: {str(e)}", exc_info=True)
                    continue

            logger.info(f"Found {len(email_list)} unread emails in {folder} after parsing")
            return email_list

        except Exception as e:
            logger.error(f"Error getting unread emails: {str(e)}")
            return []

    def _parse_email(self, email_message: Message) -> Optional[Dict]:
        """Parse email message into dictionary."""
        try:
            # Get headers
            subject = self._decode_header(email_message.get("Subject", ""))
            from_email = self._decode_header(email_message.get("From", ""))
            message_id = email_message.get("Message-ID", "")
            in_reply_to = email_message.get("In-Reply-To", "")
            date_str = email_message.get("Date", "")

            # Parse date
            try:
                date = parsedate_to_datetime(date_str) if date_str else datetime.now()
            except Exception:
                date = datetime.now()

            # Extract email address from From field
            from_email_clean = self._extract_email_address(from_email)

            # Get email body
            body = self._get_email_body(email_message)

            return {
                "from_email": from_email_clean,
                "from_name": from_email,
                "subject": subject,
                "body": body,
                "date": date,
                "message_id": message_id,
                "in_reply_to": in_reply_to,
                "raw_message": email_message,
            }
        except Exception as e:
            logger.error(f"Error parsing email: {str(e)}")
            return None

    def _decode_header(self, header: str) -> str:
        """Decode email header (handles problematic encodings like 'unknown-8bit')."""
        try:
            decoded_parts = email.header.decode_header(header)
            decoded_string = ""
            for part, encoding in decoded_parts:
                if isinstance(part, bytes):
                    # Try declared encoding first, fall back to utf-8 on errors or unknown encodings
                    if encoding:
                        try:
                            decoded_string += part.decode(encoding, errors="ignore")
                        except (LookupError, UnicodeError):
                            # Unknown codec (e.g. 'unknown-8bit') or decode error – fall back to utf-8
                            decoded_string += part.decode("utf-8", errors="ignore")
                    else:
                        decoded_string += part.decode("utf-8", errors="ignore")
                else:
                    decoded_string += part
            return decoded_string
        except Exception as e:
            logger.warning(f"Error decoding header: {str(e)}")
            return str(header)

    def _extract_email_address(self, email_string: str) -> str:
        """Extract email address from 'Name <email@domain.com>' format."""
        match = re.search(r"[\w\.-]+@[\w\.-]+\.\w+", email_string)
        if match:
            return match.group(0)
        return email_string

    def _get_email_body(self, email_message: Message) -> str:
        """Extract email body text."""
        body = ""

        if email_message.is_multipart():
            for part in email_message.walk():
                content_type = part.get_content_type()
                content_disposition = str(part.get("Content-Disposition", ""))

                # Skip attachments
                if "attachment" in content_disposition:
                    continue

                # Get text content
                if content_type == "text/plain":
                    try:
                        body_bytes = part.get_payload(decode=True)
                        if isinstance(body_bytes, bytes):
                            charset = part.get_content_charset() or "utf-8"
                            body += body_bytes.decode(charset, errors="ignore")
                    except Exception as e:
                        logger.warning(f"Error decoding email body part: {str(e)}")
                elif content_type == "text/html":
                    # Fallback to HTML if no plain text
                    if not body:
                        try:
                            body_bytes = part.get_payload(decode=True)
                            if isinstance(body_bytes, bytes):
                                charset = part.get_content_charset() or "utf-8"
                                html_body = body_bytes.decode(charset, errors="ignore")
                                # Simple HTML to text conversion (remove tags)
                                body = re.sub(r"<[^>]+>", "", html_body)
                        except Exception as e:
                            logger.warning(f"Error decoding HTML body: {str(e)}")
        else:
            # Not multipart
            try:
                payload_bytes: Any = email_message.get_payload(decode=True)
                if isinstance(payload_bytes, bytes):
                    charset = email_message.get_content_charset() or "utf-8"
                    body = payload_bytes.decode(charset, errors="ignore")
            except Exception as e:
                logger.warning(f"Error decoding email body: {str(e)}")

        return body.strip()

    def mark_as_read(self, uid: str, folder: str = "INBOX") -> bool:
        """Mark email as read."""
        if not self.mail:
            return False
        assert self.mail is not None
        try:
            self.mail.select(folder)
            self.mail.store(uid, "+FLAGS", "\\Seen")
            return True
        except Exception as e:
            logger.error(f"Error marking email as read: {str(e)}")
            return False

    def classify_email(self, email_data: Dict, classifier_agent=None) -> str:
        """
        Classify email into categories using AI agent.

        Args:
            email_data: Email dictionary with 'subject' and 'body' keys
            classifier_agent: Optional EmailClassifierAgent instance (if None, uses simple keyword matching)

        Returns:
            Classification: 'iod', 'consent_yes', 'consent_no', or 'default'
        """
        # If AI classifier is available, use it
        if classifier_agent:
            try:
                classification = classifier_agent.classify_email(
                    from_email=email_data.get("from_email", ""),
                    subject=email_data.get("subject", ""),
                    body=email_data.get("body", ""),
                )
                logger.info(
                    f"Email classified by AI as '{classification.category}' "
                    f"(confidence: {classification.confidence:.2f})"
                )
                return classification.category
            except Exception as e:
                logger.warning(
                    f"AI classification failed, falling back to keyword matching: {str(e)}"
                )

        # Fallback to simple keyword matching
        text = f"{email_data.get('subject', '')} {email_data.get('body', '')}".lower()

        # Check for IOD keywords (at least 1 critical one required)
        critical_iod_keywords = [
            "rodo",
            "iod",
            "dpo",
            "gdpr",
            "dane osobowe",
            "ochrona danych",
            "uodo",
            "organ nadzorczy",
            "profilowanie",
            "automatyczna decyzja",
        ]
        found_iod_keywords = [kw for kw in critical_iod_keywords if kw.lower() in text]

        if len(found_iod_keywords) >= 1:
            logger.info(f"Email classified as IOD (keywords: {found_iod_keywords})")
            return "iod"

        # Check for consent keywords
        for keyword in self.CONSENT_KEYWORDS_POSITIVE:
            if keyword.lower() in text:
                logger.info(f"Email classified as consent_yes (keyword: {keyword})")
                return "consent_yes"

        for keyword in self.CONSENT_KEYWORDS_NEGATIVE:
            if keyword.lower() in text:
                logger.info(f"Email classified as consent_no (keyword: {keyword})")
                return "consent_no"

        # Default classification
        logger.info("Email classified as default (HR)")
        return "default"

__init__(email_username, email_password, imap_server='imap.zoho.com', imap_port=993)

Initialize email listener.

Parameters:

Name Type Description Default
email_username str

Email username/address

required
email_password str

Email password or app password

required
imap_server str

IMAP server address (default: imap.zoho.com; can be any provider)

'imap.zoho.com'
imap_port int

IMAP server port (default: 993 for SSL)

993
Source code in services/email_listener.py
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
def __init__(
    self,
    email_username: str,
    email_password: str,
    imap_server: str = "imap.zoho.com",
    imap_port: int = 993,
):
    """
    Initialize email listener.

    Args:
        email_username: Email username/address
        email_password: Email password or app password
        imap_server: IMAP server address (default: imap.zoho.com; can be any provider)
        imap_port: IMAP server port (default: 993 for SSL)
    """
    self.email_username = email_username
    self.email_password = email_password
    self.imap_server = imap_server
    self.imap_port = imap_port
    self.mail: Optional[imaplib.IMAP4_SSL] = None

classify_email(email_data, classifier_agent=None)

Classify email into categories using AI agent.

Parameters:

Name Type Description Default
email_data Dict

Email dictionary with 'subject' and 'body' keys

required
classifier_agent

Optional EmailClassifierAgent instance (if None, uses simple keyword matching)

None

Returns:

Name Type Description
Classification str

'iod', 'consent_yes', 'consent_no', or 'default'

Source code in services/email_listener.py
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
def classify_email(self, email_data: Dict, classifier_agent=None) -> str:
    """
    Classify email into categories using AI agent.

    Args:
        email_data: Email dictionary with 'subject' and 'body' keys
        classifier_agent: Optional EmailClassifierAgent instance (if None, uses simple keyword matching)

    Returns:
        Classification: 'iod', 'consent_yes', 'consent_no', or 'default'
    """
    # If AI classifier is available, use it
    if classifier_agent:
        try:
            classification = classifier_agent.classify_email(
                from_email=email_data.get("from_email", ""),
                subject=email_data.get("subject", ""),
                body=email_data.get("body", ""),
            )
            logger.info(
                f"Email classified by AI as '{classification.category}' "
                f"(confidence: {classification.confidence:.2f})"
            )
            return classification.category
        except Exception as e:
            logger.warning(
                f"AI classification failed, falling back to keyword matching: {str(e)}"
            )

    # Fallback to simple keyword matching
    text = f"{email_data.get('subject', '')} {email_data.get('body', '')}".lower()

    # Check for IOD keywords (at least 1 critical one required)
    critical_iod_keywords = [
        "rodo",
        "iod",
        "dpo",
        "gdpr",
        "dane osobowe",
        "ochrona danych",
        "uodo",
        "organ nadzorczy",
        "profilowanie",
        "automatyczna decyzja",
    ]
    found_iod_keywords = [kw for kw in critical_iod_keywords if kw.lower() in text]

    if len(found_iod_keywords) >= 1:
        logger.info(f"Email classified as IOD (keywords: {found_iod_keywords})")
        return "iod"

    # Check for consent keywords
    for keyword in self.CONSENT_KEYWORDS_POSITIVE:
        if keyword.lower() in text:
            logger.info(f"Email classified as consent_yes (keyword: {keyword})")
            return "consent_yes"

    for keyword in self.CONSENT_KEYWORDS_NEGATIVE:
        if keyword.lower() in text:
            logger.info(f"Email classified as consent_no (keyword: {keyword})")
            return "consent_no"

    # Default classification
    logger.info("Email classified as default (HR)")
    return "default"

connect()

Connect to IMAP server.

Source code in services/email_listener.py
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
def connect(self) -> bool:
    """Connect to IMAP server."""
    try:
        if not self.email_username or not self.email_password:
            logger.error("Email credentials not provided to EmailListener")
            return False

        # Create IMAP connection with timeout (only this socket, not global default)
        import socket

        self.mail = imaplib.IMAP4_SSL(self.imap_server, self.imap_port, timeout=30)
        self.mail.login(self.email_username, self.email_password)
        logger.debug(f"Connected to IMAP server {self.imap_server}: {self.email_username}")
        return True
    except socket.timeout:
        logger.error(
            f"IMAP connection timeout to {self.imap_server}:{self.imap_port}. Check network/firewall."
        )
        return False
    except socket.error as e:
        error_msg = str(e)
        if "EOF" in error_msg or "Connection reset" in error_msg:
            logger.warning(
                f"IMAP connection error (EOF/reset) to {self.imap_server}:{self.imap_port}. "
                "This may be temporary. Will retry in next cycle. "
                "Possible causes: server-side connection limit, firewall, or network issue."
            )
        else:
            logger.error(
                f"IMAP socket error to {self.imap_server}:{self.imap_port}: {error_msg}"
            )
        return False
    except imaplib.IMAP4.error as e:
        error_msg = str(e)
        if "AUTHENTICATIONFAILED" in error_msg or "Invalid credentials" in error_msg:
            logger.error(
                f"IMAP authentication failed for {self.email_username} on {self.imap_server}. "
                "This usually means:\n"
                "  1. Wrong username or password\n"
                "  2. For Gmail: You need 'App Password' instead of regular password\n"
                "  3. For Zoho: Check if password is correct and account is active\n"
                "  4. IMAP might be disabled in your email account settings"
            )
        else:
            logger.error(f"IMAP protocol error to {self.imap_server}: {error_msg}")
        return False
    except Exception as e:
        logger.error(
            f"Failed to connect to IMAP server {self.imap_server}:{self.imap_port}: {str(e)}"
        )
        return False

disconnect()

Disconnect from Gmail IMAP server.

Source code in services/email_listener.py
139
140
141
142
143
144
145
146
147
148
149
def disconnect(self):
    """Disconnect from Gmail IMAP server."""
    if self.mail:
        try:
            self.mail.close()
            self.mail.logout()
            # logger.info("Disconnected from IMAP server")
        except Exception as e:
            logger.warning(f"Error disconnecting from IMAP: {str(e)}")
        finally:
            self.mail = None

get_unread_emails(folder='INBOX')

Get unread emails from specified folder.

Parameters:

Name Type Description Default
folder str

Email folder to check (default: 'INBOX')

'INBOX'

Returns:

Type Description
List[Dict]

List of email dictionaries with keys: id, from_email, subject, body, date, message_id, in_reply_to

Source code in services/email_listener.py
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
def get_unread_emails(self, folder: str = "INBOX") -> List[Dict]:
    """
    Get unread emails from specified folder.

    Args:
        folder: Email folder to check (default: 'INBOX')

    Returns:
        List of email dictionaries with keys: id, from_email, subject, body, date, message_id, in_reply_to
    """
    if not self.mail:
        if not self.connect():
            return []

    assert self.mail is not None  # narrow type after connect
    try:
        # Select folder
        status, messages = self.mail.select(folder)
        if status != "OK":
            logger.error(
                f"Failed to select folder {folder} (status={status}, messages={messages})"
            )
            return []

        # Search for unread emails
        status, message_numbers = self.mail.search(None, "UNSEEN")
        if status != "OK":
            logger.error(
                f"Failed to search for unread emails (status={status}, data={message_numbers})"
            )
            return []

        raw_ids = message_numbers[0]
        if not raw_ids:
            # No unread emails
            return []

        message_ids = raw_ids.split()

        email_list = []

        for msg_num in message_ids:
            try:
                # Fetch email
                status, msg_data = self.mail.fetch(msg_num, "(RFC822)")
                if status != "OK" or not msg_data or msg_data[0] is None:
                    logger.warning(f"Failed to fetch email {msg_num} (status={status})")
                    continue

                # Parse email
                email_body = msg_data[0][1]
                if not isinstance(email_body, (bytes, bytearray)):
                    continue
                email_message = email.message_from_bytes(email_body)

                # Extract email data
                email_data = self._parse_email(email_message)
                if email_data:
                    email_data["uid"] = msg_num.decode()
                    email_list.append(email_data)
                else:
                    logger.warning(f"Parsed email {msg_num} returned None")

            except Exception as e:
                logger.warning(f"Error parsing email {msg_num}: {str(e)}", exc_info=True)
                continue

        logger.info(f"Found {len(email_list)} unread emails in {folder} after parsing")
        return email_list

    except Exception as e:
        logger.error(f"Error getting unread emails: {str(e)}")
        return []

mark_as_read(uid, folder='INBOX')

Mark email as read.

Source code in services/email_listener.py
337
338
339
340
341
342
343
344
345
346
347
348
def mark_as_read(self, uid: str, folder: str = "INBOX") -> bool:
    """Mark email as read."""
    if not self.mail:
        return False
    assert self.mail is not None
    try:
        self.mail.select(folder)
        self.mail.store(uid, "+FLAGS", "\\Seen")
        return True
    except Exception as e:
        logger.error(f"Error marking email as read: {str(e)}")
        return False

Background service for monitoring and processing incoming emails.

Source code in services/email_monitor.py
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
class EmailMonitor:
    """Background service for monitoring and processing incoming emails."""

    def __init__(
        self,
        email_username: str,
        email_password: str,
        imap_host: str,
        imap_port: int,
        smtp_host: str,
        smtp_port: int,
        smtp_use_tls: bool = True,
        iod_email: Optional[str] = None,
        hr_email: Optional[str] = None,
        check_interval: int = 60,
    ):
        """
        Initialize email monitor.

        Args:
            email_username: Email username
            email_password: Email password or app password
            imap_host: IMAP server hostname
            imap_port: IMAP server port
            smtp_host: SMTP server hostname
            smtp_port: SMTP server port
            smtp_use_tls: Whether to use TLS for SMTP
            iod_email: Email address for IOD department
            hr_email: Email address for HR department
            check_interval: Interval between email checks in seconds (default: 60)
        """
        self.email_username = email_username
        self.email_password = email_password
        self.iod_email = iod_email
        self.hr_email = hr_email
        self.check_interval = check_interval

        self.listener = EmailListener(email_username, email_password, imap_host, imap_port)
        self.router = EmailRouter(
            email_username, email_password, smtp_host, smtp_port, smtp_use_tls, iod_email, hr_email
        )

        # Track last processed message sequence number within this process.
        # This way we react to all NEW messages (ALL),
        # regardless of whether they were already marked as read.
        self.last_msg_num: Optional[int] = None

        # Initialize AI classifier
        self.classifier: Optional[EmailClassifierAgent] = None
        try:
            from config import settings

            self.classifier = EmailClassifierAgent(
                model_name=settings.openai_model, api_key=settings.api_key
            )
            logger.info(f"AI email classifier initialized with model: {settings.openai_model}")
        except Exception as e:
            logger.warning(
                f"Failed to initialize AI classifier: {str(e)}. Will use keyword-based classification."
            )
            self.classifier = None

        self.running = False
        self.thread: Optional[threading.Thread] = None

    def start(self):
        """Start email monitoring in background thread."""
        if self.running:
            logger.warning("Email monitor is already running")
            return

        if not self.email_username or not self.email_password:
            logger.warning("Email credentials not configured. Email monitoring disabled.")
            return

        if not self.iod_email or not self.hr_email:
            logger.warning("IOD or HR email not configured. Email monitoring disabled.")
            return

        self.running = True
        self.thread = threading.Thread(target=self._monitor_loop, daemon=True)
        self.thread.start()
        logger.info("Email monitor started")

    def stop(self):
        """Stop email monitoring."""
        self.running = False
        if self.listener:
            self.listener.disconnect()
        logger.info("Email monitor stopped")

    def _monitor_loop(self):
        """Main monitoring loop."""
        logger.info(f"Email monitor loop started (check interval: {self.check_interval}s)")

        while self.running:
            try:
                # Connect to email server
                if not self.listener.connect():
                    logger.debug(
                        f"Failed to connect to email server. Retrying in {self.check_interval}s."
                    )
                    time.sleep(self.check_interval)
                    continue

                # Select INBOX
                status, _ = self.listener.mail.select("INBOX")
                if status != "OK":
                    logger.warning("Failed to select INBOX for email monitoring")
                    self.listener.disconnect()
                    time.sleep(self.check_interval)
                    continue

                # Get ALL message sequence numbers
                status, data = self.listener.mail.search(None, "ALL")
                if status != "OK":
                    logger.warning(
                        f"Failed to search ALL messages in INBOX (status={status}, data={data})"
                    )
                    self.listener.disconnect()
                    time.sleep(self.check_interval)
                    continue

                raw_ids = data[0]
                if not raw_ids:
                    # No messages at all
                    self.listener.disconnect()
                    time.sleep(self.check_interval)
                    continue

                all_msg_nums = [int(x) for x in raw_ids.split()]
                if not all_msg_nums:
                    self.listener.disconnect()
                    time.sleep(self.check_interval)
                    continue

                # On first run: set last_msg_num to the latest message,
                # so we don't process the entire history.
                if self.last_msg_num is None:
                    self.last_msg_num = max(all_msg_nums)
                else:
                    # New messages have a number greater than last_msg_num
                    new_msg_nums = [n for n in all_msg_nums if n > self.last_msg_num]

                    if new_msg_nums:
                        # Track successfully processed message numbers
                        successfully_processed = []

                        for msg_num in new_msg_nums:
                            try:
                                # Fetch full message by sequence number
                                status, msg_data = self.listener.mail.fetch(
                                    str(msg_num), "(RFC822)"
                                )
                                if status != "OK" or not msg_data or msg_data[0] is None:
                                    logger.warning(
                                        f"Failed to fetch email seq={msg_num} (status={status})"
                                    )
                                    continue

                                email_body = msg_data[0][1]
                                email_message = email.message_from_bytes(email_body)

                                # Parse using listener helper
                                email_data = self.listener._parse_email(email_message)
                                if not email_data:
                                    logger.warning(f"Parsed email seq={msg_num} returned None")
                                    continue

                                email_data["uid"] = str(msg_num)

                                # Classify email using AI agent
                                classification = self.listener.classify_email(
                                    email_data, classifier_agent=self.classifier
                                )
                                logger.info(
                                    f"Email seq={msg_num} from {email_data.get('from_email')} "
                                    f"classified as: {classification}"
                                )

                                # Route email
                                success = self.router.route_email(email_data, classification)

                                if success:
                                    logger.info(f"Email seq={msg_num} processed successfully")
                                    # Only track successfully processed messages
                                    successfully_processed.append(msg_num)
                                else:
                                    logger.warning(
                                        f"Failed to route email seq={msg_num} from {email_data.get('from_email')}"
                                    )
                                    # Don't add to successfully_processed - will retry next cycle

                            except Exception as e:
                                logger.error(
                                    f"Error processing email seq={msg_num}: {str(e)}", exc_info=True
                                )
                                # Don't add to successfully_processed - will retry next cycle

                        # Advance to highest successfully processed (failed messages are not retried to avoid duplicate processing)
                        if successfully_processed:
                            self.last_msg_num = max(successfully_processed)
                            logger.debug(
                                f"Updated last_msg_num to {self.last_msg_num} ({len(successfully_processed)} messages processed)"
                            )

                # Disconnect
                self.listener.disconnect()

                # Wait before next check
                time.sleep(self.check_interval)

            except Exception as e:
                logger.error(f"Error in email monitor loop: {str(e)}", exc_info=True)
                time.sleep(self.check_interval)

__init__(email_username, email_password, imap_host, imap_port, smtp_host, smtp_port, smtp_use_tls=True, iod_email=None, hr_email=None, check_interval=60)

Initialize email monitor.

Parameters:

Name Type Description Default
email_username str

Email username

required
email_password str

Email password or app password

required
imap_host str

IMAP server hostname

required
imap_port int

IMAP server port

required
smtp_host str

SMTP server hostname

required
smtp_port int

SMTP server port

required
smtp_use_tls bool

Whether to use TLS for SMTP

True
iod_email Optional[str]

Email address for IOD department

None
hr_email Optional[str]

Email address for HR department

None
check_interval int

Interval between email checks in seconds (default: 60)

60
Source code in services/email_monitor.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
def __init__(
    self,
    email_username: str,
    email_password: str,
    imap_host: str,
    imap_port: int,
    smtp_host: str,
    smtp_port: int,
    smtp_use_tls: bool = True,
    iod_email: Optional[str] = None,
    hr_email: Optional[str] = None,
    check_interval: int = 60,
):
    """
    Initialize email monitor.

    Args:
        email_username: Email username
        email_password: Email password or app password
        imap_host: IMAP server hostname
        imap_port: IMAP server port
        smtp_host: SMTP server hostname
        smtp_port: SMTP server port
        smtp_use_tls: Whether to use TLS for SMTP
        iod_email: Email address for IOD department
        hr_email: Email address for HR department
        check_interval: Interval between email checks in seconds (default: 60)
    """
    self.email_username = email_username
    self.email_password = email_password
    self.iod_email = iod_email
    self.hr_email = hr_email
    self.check_interval = check_interval

    self.listener = EmailListener(email_username, email_password, imap_host, imap_port)
    self.router = EmailRouter(
        email_username, email_password, smtp_host, smtp_port, smtp_use_tls, iod_email, hr_email
    )

    # Track last processed message sequence number within this process.
    # This way we react to all NEW messages (ALL),
    # regardless of whether they were already marked as read.
    self.last_msg_num: Optional[int] = None

    # Initialize AI classifier
    self.classifier: Optional[EmailClassifierAgent] = None
    try:
        from config import settings

        self.classifier = EmailClassifierAgent(
            model_name=settings.openai_model, api_key=settings.api_key
        )
        logger.info(f"AI email classifier initialized with model: {settings.openai_model}")
    except Exception as e:
        logger.warning(
            f"Failed to initialize AI classifier: {str(e)}. Will use keyword-based classification."
        )
        self.classifier = None

    self.running = False
    self.thread: Optional[threading.Thread] = None

start()

Start email monitoring in background thread.

Source code in services/email_monitor.py
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
def start(self):
    """Start email monitoring in background thread."""
    if self.running:
        logger.warning("Email monitor is already running")
        return

    if not self.email_username or not self.email_password:
        logger.warning("Email credentials not configured. Email monitoring disabled.")
        return

    if not self.iod_email or not self.hr_email:
        logger.warning("IOD or HR email not configured. Email monitoring disabled.")
        return

    self.running = True
    self.thread = threading.Thread(target=self._monitor_loop, daemon=True)
    self.thread.start()
    logger.info("Email monitor started")

stop()

Stop email monitoring.

Source code in services/email_monitor.py
 98
 99
100
101
102
103
def stop(self):
    """Stop email monitoring."""
    self.running = False
    if self.listener:
        self.listener.disconnect()
    logger.info("Email monitor stopped")

Qdrant RAG service for vector database operations.

Source code in services/qdrant_service.py
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
class QdrantRAG:
    """Qdrant RAG service for vector database operations."""

    def __init__(
        self,
        collection_name: str = "recruitment_knowledge_base",
        use_azure_openai: bool = False,
        azure_endpoint: Optional[str] = None,
        azure_api_key: Optional[str] = None,
        azure_deployment: Optional[str] = None,
        azure_api_version: Optional[str] = None,
        qdrant_path: Optional[str] = None,
        qdrant_host: Optional[str] = None,
        qdrant_port: Optional[int] = None,
    ):
        """
        Initialize Qdrant RAG service.

        Args:
            collection_name: Collection name
            use_azure_openai: Whether to use Azure OpenAI embeddings
            azure_endpoint: Azure OpenAI endpoint URL
            azure_api_key: Azure OpenAI API key
            azure_deployment: Deployment name (e.g., "text-embedding-3-small")
            azure_api_version: Azure OpenAI API version
            qdrant_path: Path to local Qdrant database (None = in-memory or server)
            qdrant_host: Qdrant server hostname (e.g., "qdrant" or "localhost")
            qdrant_port: Qdrant server port (default: 6333)
        """
        # Initialize Qdrant client
        # Priority: server (host+port) > local path > in-memory
        if qdrant_host:
            # Connect to Qdrant server
            qdrant_port = qdrant_port or 6333
            self.client = QdrantClient(host=qdrant_host, port=qdrant_port)
            logger.info(f"Qdrant server connection: {qdrant_host}:{qdrant_port}")
        elif qdrant_path:
            try:
                self.client = QdrantClient(path=qdrant_path)
                logger.info(f"Qdrant local database: {qdrant_path}")
            except RuntimeError as e:
                if "already accessed by another instance" in str(e) or "AlreadyLocked" in str(e):
                    error_msg = (
                        f"Qdrant database at {qdrant_path} is already locked by another instance. "
                        "Close other Qdrant clients (e.g., app.py) before accessing. "
                        "Consider using Qdrant server (qdrant_host/qdrant_port) instead."
                    )
                    logger.error(error_msg)
                    raise RuntimeError(error_msg) from e
                else:
                    raise
        else:
            self.client = QdrantClient(":memory:")  # In-memory
            logger.info("Qdrant in-memory database")

        self.collection_name = collection_name

        # Initialize Azure OpenAI if needed
        if use_azure_openai and azure_endpoint and azure_api_key:
            if not AZURE_OPENAI_AVAILABLE:
                raise ImportError("openai is not installed. Run: pip install openai")

            self.azure_client = AzureOpenAI(
                api_version=azure_api_version or "2024-12-01-preview",
                azure_endpoint=azure_endpoint,
                api_key=azure_api_key,
            )
            self.azure_deployment = azure_deployment or "text-embedding-3-small"
            self.use_azure_openai = True
            logger.info(f"Using Azure OpenAI embeddings (deployment: {self.azure_deployment})")
        else:
            self.use_azure_openai = False
            raise ValueError("Azure OpenAI credentials must be provided")

        # Create collection if it doesn't exist
        try:
            self.client.get_collection(collection_name)
            logger.info(f"Loaded existing collection: {collection_name}")
        except Exception:
            # Create new collection
            # text-embedding-3-small has 1536 dimensions
            self.client.create_collection(
                collection_name=collection_name,
                vectors_config=VectorParams(
                    size=1536, distance=Distance.COSINE  # text-embedding-3-small
                ),
            )
            logger.info(f"Created new collection: {collection_name}")

    def _generate_embeddings(self, texts: List[str]) -> List[List[float]]:
        """Generate embeddings for a list of texts."""
        if not self.use_azure_openai:
            raise ValueError("Azure OpenAI is not configured")

        response = self.azure_client.embeddings.create(
            input=texts, model=self.azure_deployment, timeout=60.0
        )

        # Return embeddings in order matching input
        embeddings: List[List[float]] = [[] for _ in range(len(texts))]
        for item in response.data:
            embeddings[item.index] = item.embedding

        return embeddings

    def add_documents(
        self,
        documents: List[str],
        ids: Optional[List[Union[str, int, uuid.UUID]]] = None,
        metadatas: Optional[List[Dict]] = None,
    ):
        """Add documents to collection."""
        if ids is None:
            # Generate UUID for each document
            ids = [uuid.uuid4() for _ in range(len(documents))]
        else:
            # Convert string IDs to UUID if needed
            converted_ids: List[Union[str, int, uuid.UUID]] = []
            for id_val in ids:
                if isinstance(id_val, str):
                    try:
                        converted_ids.append(uuid.UUID(id_val))
                    except ValueError:
                        # If not UUID, generate new UUID
                        converted_ids.append(uuid.uuid4())
                elif isinstance(id_val, int):
                    converted_ids.append(id_val)
                else:
                    converted_ids.append(id_val)
            ids = converted_ids

        if metadatas is None:
            metadatas = [{}] * len(documents)

        logger.info(f"Generating embeddings for {len(documents)} documents...")
        embeddings = self._generate_embeddings(documents)
        logger.info(f"Generated {len(embeddings)} embeddings")

        logger.info("Saving to Qdrant...")
        points = [
            PointStruct(
                id=ids[i],
                vector=embeddings[i],
                payload={
                    "document": documents[i],
                    "original_id": str(ids[i]),  # Save original ID in payload
                    **metadatas[i],
                },
            )
            for i in range(len(documents))
        ]

        self.client.upsert(collection_name=self.collection_name, points=points)
        logger.info(f"Added {len(documents)} documents to collection")

    def search(self, query: str, n_results: int = 5) -> List[Dict]:
        """Search for similar documents."""
        logger.debug(f"Generating embedding for query: {query[:50]}...")
        query_embedding = self._generate_embeddings([query])[0]

        logger.debug("Searching in Qdrant...")
        # Use search API (works with Qdrant 1.8.4+)
        try:
            # Try search method first (standard API)
            results = self.client.search(
                collection_name=self.collection_name, query_vector=query_embedding, limit=n_results
            )
        except (AttributeError, Exception) as e:
            # Fallback for older Qdrant versions or different API
            logger.warning(f"Search method failed ({e}), trying alternative API...")
            try:
                # Try scroll with vectors (for manual similarity calculation)
                scroll_results = self.client.scroll(
                    collection_name=self.collection_name,
                    limit=100,  # Get more points to calculate similarity
                    with_payload=True,
                    with_vectors=True,  # Need vectors for similarity calculation
                )
                # Calculate distances manually (simple cosine similarity)
                import numpy as np

                query_vec = np.array(query_embedding)
                scored_results = []
                for point in scroll_results[0]:
                    if point.vector:
                        point_vec = np.array(point.vector)
                        # Cosine similarity
                        similarity = np.dot(query_vec, point_vec) / (
                            np.linalg.norm(query_vec) * np.linalg.norm(point_vec)
                        )
                        scored_results.append((point, similarity))

                # Sort by similarity and take top n
                scored_results.sort(key=lambda x: x[1], reverse=True)
                results = [point for point, score in scored_results[:n_results]]
            except Exception as e2:
                logger.error(f"All search methods failed: {e2}")
                return []

        formatted_results = []
        for point in results:
            # Handle both search result format and scroll format
            point_id = point.id if hasattr(point, "id") else getattr(point, "id", None)
            point_payload = (
                point.payload if hasattr(point, "payload") else getattr(point, "payload", {})
            )
            point_score = point.score if hasattr(point, "score") else getattr(point, "score", None)

            formatted_results.append(
                {
                    "id": str(point_id),  # Convert UUID to string
                    "document": point_payload.get("document", ""),
                    "metadata": {
                        k: v
                        for k, v in point_payload.items()
                        if k not in ["document", "original_id"]
                    },
                    "distance": point_score,
                }
            )

        logger.debug(f"Found {len(formatted_results)} results")
        return formatted_results

    def count(self) -> int:
        """Return number of documents in collection."""
        info = self.client.get_collection(self.collection_name)
        return info.points_count

    def get_all(self) -> List[Dict]:
        """Get all documents from collection."""
        results = self.client.scroll(
            collection_name=self.collection_name, limit=10000  # Large limit
        )

        formatted_results = []
        for point in results[0]:  # results[0] is list of points
            formatted_results.append(
                {
                    "id": point.id,
                    "document": point.payload.get("document", ""),
                    "metadata": {k: v for k, v in point.payload.items() if k != "document"},
                }
            )

        return formatted_results

__init__(collection_name='recruitment_knowledge_base', use_azure_openai=False, azure_endpoint=None, azure_api_key=None, azure_deployment=None, azure_api_version=None, qdrant_path=None, qdrant_host=None, qdrant_port=None)

Initialize Qdrant RAG service.

Parameters:

Name Type Description Default
collection_name str

Collection name

'recruitment_knowledge_base'
use_azure_openai bool

Whether to use Azure OpenAI embeddings

False
azure_endpoint Optional[str]

Azure OpenAI endpoint URL

None
azure_api_key Optional[str]

Azure OpenAI API key

None
azure_deployment Optional[str]

Deployment name (e.g., "text-embedding-3-small")

None
azure_api_version Optional[str]

Azure OpenAI API version

None
qdrant_path Optional[str]

Path to local Qdrant database (None = in-memory or server)

None
qdrant_host Optional[str]

Qdrant server hostname (e.g., "qdrant" or "localhost")

None
qdrant_port Optional[int]

Qdrant server port (default: 6333)

None
Source code in services/qdrant_service.py
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
def __init__(
    self,
    collection_name: str = "recruitment_knowledge_base",
    use_azure_openai: bool = False,
    azure_endpoint: Optional[str] = None,
    azure_api_key: Optional[str] = None,
    azure_deployment: Optional[str] = None,
    azure_api_version: Optional[str] = None,
    qdrant_path: Optional[str] = None,
    qdrant_host: Optional[str] = None,
    qdrant_port: Optional[int] = None,
):
    """
    Initialize Qdrant RAG service.

    Args:
        collection_name: Collection name
        use_azure_openai: Whether to use Azure OpenAI embeddings
        azure_endpoint: Azure OpenAI endpoint URL
        azure_api_key: Azure OpenAI API key
        azure_deployment: Deployment name (e.g., "text-embedding-3-small")
        azure_api_version: Azure OpenAI API version
        qdrant_path: Path to local Qdrant database (None = in-memory or server)
        qdrant_host: Qdrant server hostname (e.g., "qdrant" or "localhost")
        qdrant_port: Qdrant server port (default: 6333)
    """
    # Initialize Qdrant client
    # Priority: server (host+port) > local path > in-memory
    if qdrant_host:
        # Connect to Qdrant server
        qdrant_port = qdrant_port or 6333
        self.client = QdrantClient(host=qdrant_host, port=qdrant_port)
        logger.info(f"Qdrant server connection: {qdrant_host}:{qdrant_port}")
    elif qdrant_path:
        try:
            self.client = QdrantClient(path=qdrant_path)
            logger.info(f"Qdrant local database: {qdrant_path}")
        except RuntimeError as e:
            if "already accessed by another instance" in str(e) or "AlreadyLocked" in str(e):
                error_msg = (
                    f"Qdrant database at {qdrant_path} is already locked by another instance. "
                    "Close other Qdrant clients (e.g., app.py) before accessing. "
                    "Consider using Qdrant server (qdrant_host/qdrant_port) instead."
                )
                logger.error(error_msg)
                raise RuntimeError(error_msg) from e
            else:
                raise
    else:
        self.client = QdrantClient(":memory:")  # In-memory
        logger.info("Qdrant in-memory database")

    self.collection_name = collection_name

    # Initialize Azure OpenAI if needed
    if use_azure_openai and azure_endpoint and azure_api_key:
        if not AZURE_OPENAI_AVAILABLE:
            raise ImportError("openai is not installed. Run: pip install openai")

        self.azure_client = AzureOpenAI(
            api_version=azure_api_version or "2024-12-01-preview",
            azure_endpoint=azure_endpoint,
            api_key=azure_api_key,
        )
        self.azure_deployment = azure_deployment or "text-embedding-3-small"
        self.use_azure_openai = True
        logger.info(f"Using Azure OpenAI embeddings (deployment: {self.azure_deployment})")
    else:
        self.use_azure_openai = False
        raise ValueError("Azure OpenAI credentials must be provided")

    # Create collection if it doesn't exist
    try:
        self.client.get_collection(collection_name)
        logger.info(f"Loaded existing collection: {collection_name}")
    except Exception:
        # Create new collection
        # text-embedding-3-small has 1536 dimensions
        self.client.create_collection(
            collection_name=collection_name,
            vectors_config=VectorParams(
                size=1536, distance=Distance.COSINE  # text-embedding-3-small
            ),
        )
        logger.info(f"Created new collection: {collection_name}")

add_documents(documents, ids=None, metadatas=None)

Add documents to collection.

Source code in services/qdrant_service.py
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
def add_documents(
    self,
    documents: List[str],
    ids: Optional[List[Union[str, int, uuid.UUID]]] = None,
    metadatas: Optional[List[Dict]] = None,
):
    """Add documents to collection."""
    if ids is None:
        # Generate UUID for each document
        ids = [uuid.uuid4() for _ in range(len(documents))]
    else:
        # Convert string IDs to UUID if needed
        converted_ids: List[Union[str, int, uuid.UUID]] = []
        for id_val in ids:
            if isinstance(id_val, str):
                try:
                    converted_ids.append(uuid.UUID(id_val))
                except ValueError:
                    # If not UUID, generate new UUID
                    converted_ids.append(uuid.uuid4())
            elif isinstance(id_val, int):
                converted_ids.append(id_val)
            else:
                converted_ids.append(id_val)
        ids = converted_ids

    if metadatas is None:
        metadatas = [{}] * len(documents)

    logger.info(f"Generating embeddings for {len(documents)} documents...")
    embeddings = self._generate_embeddings(documents)
    logger.info(f"Generated {len(embeddings)} embeddings")

    logger.info("Saving to Qdrant...")
    points = [
        PointStruct(
            id=ids[i],
            vector=embeddings[i],
            payload={
                "document": documents[i],
                "original_id": str(ids[i]),  # Save original ID in payload
                **metadatas[i],
            },
        )
        for i in range(len(documents))
    ]

    self.client.upsert(collection_name=self.collection_name, points=points)
    logger.info(f"Added {len(documents)} documents to collection")

count()

Return number of documents in collection.

Source code in services/qdrant_service.py
245
246
247
248
def count(self) -> int:
    """Return number of documents in collection."""
    info = self.client.get_collection(self.collection_name)
    return info.points_count

get_all()

Get all documents from collection.

Source code in services/qdrant_service.py
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
def get_all(self) -> List[Dict]:
    """Get all documents from collection."""
    results = self.client.scroll(
        collection_name=self.collection_name, limit=10000  # Large limit
    )

    formatted_results = []
    for point in results[0]:  # results[0] is list of points
        formatted_results.append(
            {
                "id": point.id,
                "document": point.payload.get("document", ""),
                "metadata": {k: v for k, v in point.payload.items() if k != "document"},
            }
        )

    return formatted_results

search(query, n_results=5)

Search for similar documents.

Source code in services/qdrant_service.py
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
def search(self, query: str, n_results: int = 5) -> List[Dict]:
    """Search for similar documents."""
    logger.debug(f"Generating embedding for query: {query[:50]}...")
    query_embedding = self._generate_embeddings([query])[0]

    logger.debug("Searching in Qdrant...")
    # Use search API (works with Qdrant 1.8.4+)
    try:
        # Try search method first (standard API)
        results = self.client.search(
            collection_name=self.collection_name, query_vector=query_embedding, limit=n_results
        )
    except (AttributeError, Exception) as e:
        # Fallback for older Qdrant versions or different API
        logger.warning(f"Search method failed ({e}), trying alternative API...")
        try:
            # Try scroll with vectors (for manual similarity calculation)
            scroll_results = self.client.scroll(
                collection_name=self.collection_name,
                limit=100,  # Get more points to calculate similarity
                with_payload=True,
                with_vectors=True,  # Need vectors for similarity calculation
            )
            # Calculate distances manually (simple cosine similarity)
            import numpy as np

            query_vec = np.array(query_embedding)
            scored_results = []
            for point in scroll_results[0]:
                if point.vector:
                    point_vec = np.array(point.vector)
                    # Cosine similarity
                    similarity = np.dot(query_vec, point_vec) / (
                        np.linalg.norm(query_vec) * np.linalg.norm(point_vec)
                    )
                    scored_results.append((point, similarity))

            # Sort by similarity and take top n
            scored_results.sort(key=lambda x: x[1], reverse=True)
            results = [point for point, score in scored_results[:n_results]]
        except Exception as e2:
            logger.error(f"All search methods failed: {e2}")
            return []

    formatted_results = []
    for point in results:
        # Handle both search result format and scroll format
        point_id = point.id if hasattr(point, "id") else getattr(point, "id", None)
        point_payload = (
            point.payload if hasattr(point, "payload") else getattr(point, "payload", {})
        )
        point_score = point.score if hasattr(point, "score") else getattr(point, "score", None)

        formatted_results.append(
            {
                "id": str(point_id),  # Convert UUID to string
                "document": point_payload.get("document", ""),
                "metadata": {
                    k: v
                    for k, v in point_payload.items()
                    if k not in ["document", "original_id"]
                },
                "distance": point_score,
            }
        )

    logger.debug(f"Found {len(formatted_results)} results")
    return formatted_results

Configuration

Bases: BaseSettings

Application settings loaded from environment variables.

Source code in config/settings.py
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
class Settings(BaseSettings):
    """Application settings loaded from environment variables."""

    model_config = SettingsConfigDict(
        env_file=".env", env_file_encoding="utf-8", case_sensitive=False, extra="ignore"
    )

    # LLM provider selection
    # azure  – current default, uses Azure OpenAI
    # openai – optional, uses api.openai.com via OpenAI official client
    llm_provider: str = "azure"

    # Azure OpenAI Configuration (single source of truth for models)
    # This is the default and currently most supported provider.
    azure_openai_api_key: Optional[str] = None
    azure_openai_endpoint: str = "https://openai-agentai-pl.openai.azure.com/"
    azure_openai_api_version: str = "2024-12-01-preview"
    # IMPORTANT: these names MUST match deployment names in Azure
    azure_openai_gpt_deployment: str = "gpt-5-mini"
    azure_openai_vision_deployment: str = "gpt-5-nano"

    # Alias for the current text model „bieżący model tekstowy” – usually points
    # to the Azure deployment; can also be used as a logical model name for
    # other providers (e.g. OpenAI) when LLM_PROVIDER=openai.
    openai_model: str = "gpt-5-nano"
    openai_vision_model: str = "gpt-5-nano"

    # Temperature / timeout configuration shared by all agents
    openai_temperature: float = 1.0
    openai_feedback_temperature: float = 0.7
    openai_timeout: int = 600
    openai_max_retries: int = 2

    # OCR Configuration
    use_ocr: bool = False
    ocr_timeout: int = 600

    # PDF Processing
    max_text_length: int = 15000
    pdf_min_text_threshold: int = 100

    # Logging
    log_level: str = "INFO"
    verbose: bool = False

    # Email/SMTP Configuration
    email_username: Optional[str] = None  # Email username (for Gmail, Zoho, etc.)
    email_password: Optional[str] = None  # Email password or app password
    smtp_host: str = (
        "smtp.zoho.eu"  # Default to Zoho EU, can be changed to smtp.zoho.com or smtp.gmail.com
    )
    smtp_port: int = 587  # 587 for TLS, 465 for SSL
    smtp_use_tls: bool = True  # Use TLS (True for port 587, False for port 465 with SSL)

    # IMAP Configuration (for email monitoring)
    imap_host: str = (
        "imap.zoho.eu"  # Default to Zoho EU, can be changed to imap.zoho.com or imap.gmail.com
    )
    imap_port: int = 993  # 993 for SSL

    # Email routing configuration
    iod_email: Optional[str] = None
    hr_email: Optional[str] = None
    email_check_interval: int = 60  # seconds
    email_monitor_enabled: bool = False

    # Privacy policy and information clause
    privacy_policy_url: Optional[str] = None  # URL to privacy policy / information clause
    company_website: Optional[str] = None  # Company website URL (optional)

    # Backward compatibility aliases (deprecated, use email_username/email_password)
    @property
    def gmail_username(self) -> Optional[str]:
        """Backward compatibility: returns email_username."""
        return self.email_username

    @property
    def gmail_password(self) -> Optional[str]:
        """Backward compatibility: returns email_password."""
        return self.email_password

    # OpenAI official (api.openai.com) configuration – used when llm_provider="openai"
    openai_api_key: Optional[str] = None
    openai_base_url: Optional[str] = None
    openai_chat_model: Optional[str] = None

    @property
    def api_key(self) -> str:
        """
        Returns the API key for **Azure OpenAI**.

        This property is intentionally Azure-specific and used by parts of the
        code that historically relied on `AZURE_OPENAI_API_KEY`. The new
        OpenAI (api.openai.com) support uses `openai_api_key` directly,
        without going through this property.
        """
        if not self.azure_openai_api_key:
            raise ValueError(
                "AZURE_OPENAI_API_KEY not found. "
                "Add it to the .env file or environment variables."
            )
        return self.azure_openai_api_key

    @property
    def is_azure_configured(self) -> bool:
        """Check if Azure OpenAI is configured."""
        return bool(self.azure_openai_api_key and self.azure_openai_endpoint)

    def model_post_init(self, __context) -> None:
        """
        Additional initialization after settings are loaded.

        If Azure OpenAI is configured (endpoint + api_key), we set:
        - environment variables expected by the OpenAI client (Azure mode),
        - openai_model to the Azure deployment name (azure_openai_gpt_deployment).
        This way agents still use the single field settings.openai_model,
        but actually refer to the Azure deployment.
        """
        if self.azure_openai_api_key and self.azure_openai_endpoint:
            # Configure Azure mode for the OpenAI library
            os.environ["OPENAI_API_KEY"] = self.azure_openai_api_key
            # Use endpoint from Azure portal (e.g. https://xxx.openai.azure.com)
            os.environ["OPENAI_API_BASE"] = self.azure_openai_endpoint
            os.environ["OPENAI_API_TYPE"] = "azure"
            os.environ["OPENAI_API_VERSION"] = self.azure_openai_api_version

            # If deployment name is defined – use it as the model
            if self.azure_openai_gpt_deployment:
                self.openai_model = self.azure_openai_gpt_deployment
            if self.azure_openai_vision_deployment:
                self.openai_vision_model = self.azure_openai_vision_deployment

api_key property

Returns the API key for Azure OpenAI.

This property is intentionally Azure-specific and used by parts of the code that historically relied on AZURE_OPENAI_API_KEY. The new OpenAI (api.openai.com) support uses openai_api_key directly, without going through this property.

gmail_password property

Backward compatibility: returns email_password.

gmail_username property

Backward compatibility: returns email_username.

is_azure_configured property

Check if Azure OpenAI is configured.

model_post_init(__context)

Additional initialization after settings are loaded.

If Azure OpenAI is configured (endpoint + api_key), we set: - environment variables expected by the OpenAI client (Azure mode), - openai_model to the Azure deployment name (azure_openai_gpt_deployment). This way agents still use the single field settings.openai_model, but actually refer to the Azure deployment.

Source code in config/settings.py
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
def model_post_init(self, __context) -> None:
    """
    Additional initialization after settings are loaded.

    If Azure OpenAI is configured (endpoint + api_key), we set:
    - environment variables expected by the OpenAI client (Azure mode),
    - openai_model to the Azure deployment name (azure_openai_gpt_deployment).
    This way agents still use the single field settings.openai_model,
    but actually refer to the Azure deployment.
    """
    if self.azure_openai_api_key and self.azure_openai_endpoint:
        # Configure Azure mode for the OpenAI library
        os.environ["OPENAI_API_KEY"] = self.azure_openai_api_key
        # Use endpoint from Azure portal (e.g. https://xxx.openai.azure.com)
        os.environ["OPENAI_API_BASE"] = self.azure_openai_endpoint
        os.environ["OPENAI_API_TYPE"] = "azure"
        os.environ["OPENAI_API_VERSION"] = self.azure_openai_api_version

        # If deployment name is defined – use it as the model
        if self.azure_openai_gpt_deployment:
            self.openai_model = self.azure_openai_gpt_deployment
        if self.azure_openai_vision_deployment:
            self.openai_vision_model = self.azure_openai_vision_deployment