diff --git a/worker/tasks/boss_recruit.py b/worker/tasks/boss_recruit.py index 1a3adde..b3c785e 100644 --- a/worker/tasks/boss_recruit.py +++ b/worker/tasks/boss_recruit.py @@ -13,7 +13,7 @@ import json import random import re import time -from typing import Any, Callable, Coroutine, Dict, List +from typing import Any, Callable, Coroutine, Dict, List, Optional from common.protocol import TaskType from worker.bit_browser import BitBrowserAPI @@ -170,7 +170,14 @@ class BossRecruitHandler(BaseTaskHandler): self.ensure_not_cancelled(cancel_event) action_state = self._ask_and_exchange_wechat_like_script(tab) - contacts = self._extract_contacts(messages) + panel_texts = self._collect_chat_panel_texts(tab) + contacts = self._extract_contacts(messages, extra_texts=panel_texts) + contact_written = bool(contacts["wechat"] or contacts["phone"]) + if has_contact_keyword and not contact_written: + self.logger.warning( + "[%s] 历史消息含联系方式关键词,但未提取到有效联系方式,疑似识别失败", + name, + ) collected.append( { "name": name, @@ -178,6 +185,7 @@ class BossRecruitHandler(BaseTaskHandler): "job_id": friend_job_id, "wechat": contacts["wechat"], "phone": contacts["phone"], + "contact_written": contact_written, "has_contact_keyword": has_contact_keyword, **action_state, } @@ -189,6 +197,10 @@ class BossRecruitHandler(BaseTaskHandler): err_msg = f"处理第 {i} 个会话出错: {e}" self.logger.error(err_msg) errors.append(err_msg) + finally: + if i < total: + self.ensure_not_cancelled(cancel_event) + self._sleep_between_sessions() self.ensure_not_cancelled(cancel_event) return {"details": collected, "errors": errors} @@ -244,6 +256,11 @@ class BossRecruitHandler(BaseTaskHandler): return messages return [] + @staticmethod + def _sleep_between_sessions() -> None: + """会话间随机停顿,降低频繁切换带来的风控风险。""" + time.sleep(random.uniform(1.8, 4.2)) + def _ask_and_exchange_wechat_like_script(self, tab) -> dict: state = { "asked_wechat": False, @@ -485,17 +502,40 @@ class BossRecruitHandler(BaseTaskHandler): return texts + def _collect_chat_panel_texts(self, tab, max_items: int = 80) -> list[str]: + """从当前聊天面板读取可见消息文本,补充接口历史消息。""" + texts: list[str] = [] + try: + items = tab.eles("css:.message-item", timeout=1) + except Exception: + return texts + + if not items: + return texts + + for e in items[-max_items:]: + try: + text = (e.text or "").strip() + except Exception: + text = "" + if text: + texts.append(text) + return texts + def _has_contact_keyword(self, messages: list) -> bool: for text in self._history_texts(messages): if any(k in text for k in CONTACT_KEYWORDS): return True return False - def _extract_contacts(self, messages: list) -> dict: + def _extract_contacts(self, messages: list, extra_texts: Optional[list[str]] = None) -> dict: wechats: list[str] = [] phones: list[str] = [] + all_texts = self._history_texts(messages) + if extra_texts: + all_texts.extend([str(t).strip() for t in extra_texts if str(t).strip()]) - for text in self._history_texts(messages): + for text in all_texts: wechats.extend(self._extract_wechat(text)) phones.extend(self._extract_phone(text)) @@ -518,7 +558,6 @@ class BossRecruitHandler(BaseTaskHandler): r"微信[::\s]*([a-zA-Z0-9_\-]{6,20})", r"wx[::\s]*([a-zA-Z0-9_\-]{6,20})", r"wechat[::\s]*([a-zA-Z0-9_\-]{6,20})", - r"([a-zA-Z][a-zA-Z0-9_\-]{5,19})", ] for pattern in patterns: