Files
boss_dp/worker/tasks/boss_recruit.py
2026-02-12 16:27:43 +08:00

336 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
"""
BOSS 直聘招聘任务处理器。
复用并重构原 boss_drission.py 的核心流程。
"""
from __future__ import annotations
import asyncio
import random
import re
import time
from typing import Any, Callable, Coroutine, Dict, List
from common.protocol import TaskType
from worker.tasks.base import BaseTaskHandler
from worker.bit_browser import BitBrowserAPI
from worker.browser_control import (
connect_browser,
find_element,
find_elements,
human_click,
human_delay,
safe_click,
)
# ─── 常量 ───
CHAT_INDEX_URL = "https://www.zhipin.com/web/chat/index"
ACTION_DELAY = 1.5
BETWEEN_CHAT_DELAY = 2.5
class BossRecruitHandler(BaseTaskHandler):
"""BOSS 直聘招聘自动化任务。"""
task_type = TaskType.BOSS_RECRUIT.value
async def execute(
self,
task_id: str,
params: Dict[str, Any],
progress_cb: Callable[[str, str], Coroutine],
) -> Any:
"""
执行 BOSS 招聘流程。
params:
- job_title: str 招聘岗位名称
- max_greet: int 最大打招呼人数(默认 5
- account_name: str 比特浏览器窗口名(用于打开浏览器)
- account_id: str 比特浏览器窗口 ID可选优先级高于 name
- bit_api_base: str 比特浏览器 API 地址(可选)
"""
job_title = params.get("job_title", "相关岗位")
max_greet = params.get("max_greet", 5)
account_name = params.get("account_name", "")
account_id = params.get("account_id", "")
bit_api_base = params.get("bit_api_base", "http://127.0.0.1:54345")
await progress_cb(task_id, "正在打开比特浏览器...")
# 在线程池中执行同步的浏览器操作DrissionPage 是同步库)
result = await asyncio.get_event_loop().run_in_executor(
None,
self._run_sync,
task_id, job_title, max_greet, account_name, account_id, bit_api_base, progress_cb,
)
return result
def _run_sync(
self,
task_id: str,
job_title: str,
max_greet: int,
account_name: str,
account_id: str,
bit_api_base: str,
progress_cb: Callable,
) -> dict:
"""同步执行浏览器自动化(在线程池中运行)。"""
# 1. 打开比特浏览器
bit_api = BitBrowserAPI(bit_api_base)
addr, port = bit_api.get_browser_for_drission(
browser_id=account_id or None,
name=account_name or None,
)
self.logger.info("已打开浏览器, CDP: %s (port=%d)", addr, port)
# 2. 连接浏览器
browser = connect_browser(port=port)
tab = browser.latest_tab
# 3. 打开 BOSS 直聘聊天页
tab.get(CHAT_INDEX_URL)
tab.wait.load_start()
human_delay(2.5, 4.0)
# 4. 执行招聘流程
collected = self._recruit_flow(tab, job_title, max_greet)
return {
"job_title": job_title,
"total_processed": len(collected),
"wechat_collected": sum(1 for c in collected if c.get("wechat")),
"details": collected,
}
def _recruit_flow(self, tab, job_title: str, max_greet: int) -> List[dict]:
"""核心招聘流程:遍历聊天列表,打招呼、询问微信号、收集结果。"""
greeting = f"您好,我们正在招【{job_title}】,看到您的经历比较匹配,方便简单聊聊吗?"
ask_wechat = "后续沟通会更及时,您方便留一下您的微信号吗?我这边加您。"
collected = []
# 获取左侧会话列表
items = self._get_conversation_items(tab)
if not items:
self.logger.warning("未找到会话列表元素")
return collected
total = min(len(items), max_greet)
self.logger.info("会话数约 %d,本次处理前 %d", len(items), total)
for i in range(total):
try:
human_delay(max(1.8, BETWEEN_CHAT_DELAY - 0.7), BETWEEN_CHAT_DELAY + 1.0)
items = self._get_conversation_items(tab)
if i >= len(items):
break
item = items[i]
# 获取候选人名称
name = self._get_candidate_name(item, i)
# 点击进入聊天
self._click_conversation(tab, item)
human_delay(1.2, 2.2)
# 等待输入框
inp = self._wait_for_input(tab)
if not inp:
self.logger.info("[%s] 未进入聊天,跳过", name)
continue
# 分析聊天上下文
messages = self._get_chat_messages(tab)
ctx = self._analyze_context(messages, job_title)
# 发招呼
if not ctx["already_greeting"]:
if self._send_message(tab, inp, greeting):
self.logger.info("[%s] 已发送招呼", name)
human_delay(1.5, 2.8)
else:
self.logger.info("[%s] 已有招呼记录,跳过", name)
# 询问微信号
if not ctx["already_asked_wechat"]:
if self._send_message(tab, inp, ask_wechat):
self.logger.info("[%s] 已询问微信号", name)
human_delay(1.5, 2.8)
# 收集微信号
human_delay(1.0, 2.0)
messages = self._get_chat_messages(tab)
ctx = self._analyze_context(messages, job_title)
wechats = ctx["wechats"][:2]
collected.append({
"name": name,
"job": job_title,
"wechat": wechats[0] if wechats else "",
})
except Exception as e:
self.logger.error("处理第 %d 个会话出错: %s", i + 1, e)
continue
return collected
# ─── 辅助方法 ───
def _get_conversation_items(self, tab) -> list:
selectors = [
"css:div.chat-container div.geek-item",
"css:div[role='listitem'] div.geek-item",
"css:.geek-item-wrap .geek-item",
"css:div.geek-item",
"css:div[role='listitem']",
]
return find_elements(tab, selectors, timeout=3)
def _get_candidate_name(self, item, index: int) -> str:
try:
name_el = item.ele("css:.geek-name", timeout=1)
if name_el and name_el.text:
return name_el.text.strip()
except Exception:
pass
try:
if item.text:
return item.text.strip()[:20]
except Exception:
pass
return f"候选人{index + 1}"
def _click_conversation(self, tab, item) -> bool:
if safe_click(tab, item):
return True
try:
parent = item.parent()
if parent and parent.attr("role") == "listitem":
return safe_click(tab, parent)
except Exception:
pass
return False
def _wait_for_input(self, tab, retries: int = 6):
for _ in range(retries):
inp = find_element(tab, [
"css:#boss-chat-editor-input",
"css:.boss-chat-editor-input",
], timeout=1)
if inp:
return inp
human_delay(0.5, 0.9)
return None
def _send_message(self, tab, inp, message: str) -> bool:
try:
human_click(tab, inp)
human_delay(0.25, 0.55)
try:
inp.clear()
except Exception:
pass
human_delay(0.15, 0.4)
inp.input(message)
human_delay(0.4, 0.9)
if self._click_send_button(tab):
return True
inp.input("\n")
human_delay(0.35, 0.7)
return True
except Exception:
return False
def _click_send_button(self, tab) -> bool:
human_delay(0.2, 0.45)
btn = find_element(tab, [
"css:.conversation-editor .submit-content .submit.active",
"css:.conversation-editor div.submit.active",
"css:div.submit-content div.submit.active",
"css:div.submit.active",
"css:.submit-content .submit",
"css:div.submit",
"text:发送",
], timeout=1)
if btn:
if safe_click(tab, btn):
return True
# JS 兜底
scripts = [
"var el = document.querySelector('.conversation-editor .submit.active') || document.querySelector('.conversation-editor .submit'); if(el){ el.click(); return true; } return false;",
"var els = document.querySelectorAll('div[class*=\"submit\"]'); for(var i=0;i<els.length;i++){ if(els[i].textContent.trim()==='发送'){ els[i].click(); return true; } } return false;",
]
for script in scripts:
try:
if tab.run_js(script) is True:
return True
except Exception:
continue
return False
def _get_chat_messages(self, tab) -> List[dict]:
result = []
try:
items = tab.eles("css:.message-item", timeout=2)
if not items:
return result
for e in items[-50:]:
t = (e.text or "").strip()
if not t or "沟通的职位" in t:
continue
role = "friend"
try:
if e.ele("css:.item-boss", timeout=0):
role = "boss"
elif e.ele("css:.item-friend", timeout=0):
role = "friend"
else:
cls = (e.attr("class") or "") + " "
if "item-boss" in cls or ("boss" in cls and "friend" not in cls):
role = "boss"
except Exception:
if any(k in t for k in ("", "岗位", "微信号", "方便留", "加您")):
role = "boss"
result.append({"role": role, "text": t})
except Exception:
pass
return result
def _analyze_context(self, messages: list, job_title: str) -> dict:
boss_texts = [m["text"] for m in messages if m.get("role") == "boss"]
friend_texts = [m["text"] for m in messages if m.get("role") == "friend"]
full_boss = " ".join(boss_texts)
wechats = []
for t in friend_texts:
wechats.extend(self._extract_wechat(t))
wechats = list(dict.fromkeys(wechats))[:3]
return {
"already_greeting": job_title in full_boss or "" in full_boss,
"already_asked_wechat": "微信" in full_boss or "微信号" in full_boss,
"wechats": wechats,
}
@staticmethod
def _extract_wechat(text: str) -> list:
if not text or not text.strip():
return []
found = []
patterns = [
r"微信号[:\s]*([a-zA-Z0-9_\-]{6,20})",
r"微信[:\s]*([a-zA-Z0-9_\-]{6,20})",
r"wx[:\s]*([a-zA-Z0-9_\-]{6,20})",
r"wechat[:\s]*([a-zA-Z0-9_\-]{6,20})",
r"([a-zA-Z][a-zA-Z0-9_\-]{5,19})",
]
for p in patterns:
for m in re.finditer(p, text, re.IGNORECASE):
s = m.group(1).strip() if m.lastindex else m.group(0).strip()
if s and s not in found and len(s) >= 6:
found.append(s)
return found[:3]