This commit is contained in:
ddrwode
2026-03-03 13:51:15 +08:00
parent a66cbc7424
commit 6780f184c2

View File

@@ -13,7 +13,7 @@ import json
import random
import re
import time
from typing import Any, Callable, Coroutine, Dict, List
from typing import Any, Callable, Coroutine, Dict, List, Optional
from common.protocol import TaskType
from worker.bit_browser import BitBrowserAPI
@@ -170,7 +170,14 @@ class BossRecruitHandler(BaseTaskHandler):
self.ensure_not_cancelled(cancel_event)
action_state = self._ask_and_exchange_wechat_like_script(tab)
contacts = self._extract_contacts(messages)
panel_texts = self._collect_chat_panel_texts(tab)
contacts = self._extract_contacts(messages, extra_texts=panel_texts)
contact_written = bool(contacts["wechat"] or contacts["phone"])
if has_contact_keyword and not contact_written:
self.logger.warning(
"[%s] 历史消息含联系方式关键词,但未提取到有效联系方式,疑似识别失败",
name,
)
collected.append(
{
"name": name,
@@ -178,6 +185,7 @@ class BossRecruitHandler(BaseTaskHandler):
"job_id": friend_job_id,
"wechat": contacts["wechat"],
"phone": contacts["phone"],
"contact_written": contact_written,
"has_contact_keyword": has_contact_keyword,
**action_state,
}
@@ -189,6 +197,10 @@ class BossRecruitHandler(BaseTaskHandler):
err_msg = f"处理第 {i} 个会话出错: {e}"
self.logger.error(err_msg)
errors.append(err_msg)
finally:
if i < total:
self.ensure_not_cancelled(cancel_event)
self._sleep_between_sessions()
self.ensure_not_cancelled(cancel_event)
return {"details": collected, "errors": errors}
@@ -244,6 +256,11 @@ class BossRecruitHandler(BaseTaskHandler):
return messages
return []
@staticmethod
def _sleep_between_sessions() -> None:
"""会话间随机停顿,降低频繁切换带来的风控风险。"""
time.sleep(random.uniform(1.8, 4.2))
def _ask_and_exchange_wechat_like_script(self, tab) -> dict:
state = {
"asked_wechat": False,
@@ -485,17 +502,40 @@ class BossRecruitHandler(BaseTaskHandler):
return texts
def _collect_chat_panel_texts(self, tab, max_items: int = 80) -> list[str]:
"""从当前聊天面板读取可见消息文本,补充接口历史消息。"""
texts: list[str] = []
try:
items = tab.eles("css:.message-item", timeout=1)
except Exception:
return texts
if not items:
return texts
for e in items[-max_items:]:
try:
text = (e.text or "").strip()
except Exception:
text = ""
if text:
texts.append(text)
return texts
def _has_contact_keyword(self, messages: list) -> bool:
for text in self._history_texts(messages):
if any(k in text for k in CONTACT_KEYWORDS):
return True
return False
def _extract_contacts(self, messages: list) -> dict:
def _extract_contacts(self, messages: list, extra_texts: Optional[list[str]] = None) -> dict:
wechats: list[str] = []
phones: list[str] = []
all_texts = self._history_texts(messages)
if extra_texts:
all_texts.extend([str(t).strip() for t in extra_texts if str(t).strip()])
for text in self._history_texts(messages):
for text in all_texts:
wechats.extend(self._extract_wechat(text))
phones.extend(self._extract_phone(text))
@@ -518,7 +558,6 @@ class BossRecruitHandler(BaseTaskHandler):
r"微信[:\s]*([a-zA-Z0-9_\-]{6,20})",
r"wx[:\s]*([a-zA-Z0-9_\-]{6,20})",
r"wechat[:\s]*([a-zA-Z0-9_\-]{6,20})",
r"([a-zA-Z][a-zA-Z0-9_\-]{5,19})",
]
for pattern in patterns: