336 lines
12 KiB
Python
336 lines
12 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""
|
||
BOSS 直聘招聘任务处理器。
|
||
复用并重构原 boss_drission.py 的核心流程。
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import asyncio
|
||
import random
|
||
import re
|
||
import time
|
||
from typing import Any, Callable, Coroutine, Dict, List
|
||
|
||
from common.protocol import TaskType
|
||
from worker.tasks.base import BaseTaskHandler
|
||
from worker.bit_browser import BitBrowserAPI
|
||
from worker.browser_control import (
|
||
connect_browser,
|
||
find_element,
|
||
find_elements,
|
||
human_click,
|
||
human_delay,
|
||
safe_click,
|
||
)
|
||
|
||
# ─── 常量 ───
|
||
CHAT_INDEX_URL = "https://www.zhipin.com/web/chat/index"
|
||
ACTION_DELAY = 1.5
|
||
BETWEEN_CHAT_DELAY = 2.5
|
||
|
||
|
||
class BossRecruitHandler(BaseTaskHandler):
|
||
"""BOSS 直聘招聘自动化任务。"""
|
||
|
||
task_type = TaskType.BOSS_RECRUIT.value
|
||
|
||
async def execute(
|
||
self,
|
||
task_id: str,
|
||
params: Dict[str, Any],
|
||
progress_cb: Callable[[str, str], Coroutine],
|
||
) -> Any:
|
||
"""
|
||
执行 BOSS 招聘流程。
|
||
|
||
params:
|
||
- job_title: str 招聘岗位名称
|
||
- max_greet: int 最大打招呼人数(默认 5)
|
||
- account_name: str 比特浏览器窗口名(用于打开浏览器)
|
||
- account_id: str 比特浏览器窗口 ID(可选,优先级高于 name)
|
||
- bit_api_base: str 比特浏览器 API 地址(可选)
|
||
"""
|
||
job_title = params.get("job_title", "相关岗位")
|
||
max_greet = params.get("max_greet", 5)
|
||
account_name = params.get("account_name", "")
|
||
account_id = params.get("account_id", "")
|
||
bit_api_base = params.get("bit_api_base", "http://127.0.0.1:54345")
|
||
|
||
await progress_cb(task_id, "正在打开比特浏览器...")
|
||
|
||
# 在线程池中执行同步的浏览器操作(DrissionPage 是同步库)
|
||
result = await asyncio.get_event_loop().run_in_executor(
|
||
None,
|
||
self._run_sync,
|
||
task_id, job_title, max_greet, account_name, account_id, bit_api_base, progress_cb,
|
||
)
|
||
return result
|
||
|
||
def _run_sync(
|
||
self,
|
||
task_id: str,
|
||
job_title: str,
|
||
max_greet: int,
|
||
account_name: str,
|
||
account_id: str,
|
||
bit_api_base: str,
|
||
progress_cb: Callable,
|
||
) -> dict:
|
||
"""同步执行浏览器自动化(在线程池中运行)。"""
|
||
# 1. 打开比特浏览器
|
||
bit_api = BitBrowserAPI(bit_api_base)
|
||
addr, port = bit_api.get_browser_for_drission(
|
||
browser_id=account_id or None,
|
||
name=account_name or None,
|
||
)
|
||
self.logger.info("已打开浏览器, CDP: %s (port=%d)", addr, port)
|
||
|
||
# 2. 连接浏览器
|
||
browser = connect_browser(port=port)
|
||
tab = browser.latest_tab
|
||
|
||
# 3. 打开 BOSS 直聘聊天页
|
||
tab.get(CHAT_INDEX_URL)
|
||
tab.wait.load_start()
|
||
human_delay(2.5, 4.0)
|
||
|
||
# 4. 执行招聘流程
|
||
collected = self._recruit_flow(tab, job_title, max_greet)
|
||
|
||
return {
|
||
"job_title": job_title,
|
||
"total_processed": len(collected),
|
||
"wechat_collected": sum(1 for c in collected if c.get("wechat")),
|
||
"details": collected,
|
||
}
|
||
|
||
def _recruit_flow(self, tab, job_title: str, max_greet: int) -> List[dict]:
|
||
"""核心招聘流程:遍历聊天列表,打招呼、询问微信号、收集结果。"""
|
||
greeting = f"您好,我们正在招【{job_title}】,看到您的经历比较匹配,方便简单聊聊吗?"
|
||
ask_wechat = "后续沟通会更及时,您方便留一下您的微信号吗?我这边加您。"
|
||
collected = []
|
||
|
||
# 获取左侧会话列表
|
||
items = self._get_conversation_items(tab)
|
||
if not items:
|
||
self.logger.warning("未找到会话列表元素")
|
||
return collected
|
||
|
||
total = min(len(items), max_greet)
|
||
self.logger.info("会话数约 %d,本次处理前 %d 个", len(items), total)
|
||
|
||
for i in range(total):
|
||
try:
|
||
human_delay(max(1.8, BETWEEN_CHAT_DELAY - 0.7), BETWEEN_CHAT_DELAY + 1.0)
|
||
|
||
items = self._get_conversation_items(tab)
|
||
if i >= len(items):
|
||
break
|
||
item = items[i]
|
||
|
||
# 获取候选人名称
|
||
name = self._get_candidate_name(item, i)
|
||
|
||
# 点击进入聊天
|
||
self._click_conversation(tab, item)
|
||
human_delay(1.2, 2.2)
|
||
|
||
# 等待输入框
|
||
inp = self._wait_for_input(tab)
|
||
if not inp:
|
||
self.logger.info("[%s] 未进入聊天,跳过", name)
|
||
continue
|
||
|
||
# 分析聊天上下文
|
||
messages = self._get_chat_messages(tab)
|
||
ctx = self._analyze_context(messages, job_title)
|
||
|
||
# 发招呼
|
||
if not ctx["already_greeting"]:
|
||
if self._send_message(tab, inp, greeting):
|
||
self.logger.info("[%s] 已发送招呼", name)
|
||
human_delay(1.5, 2.8)
|
||
else:
|
||
self.logger.info("[%s] 已有招呼记录,跳过", name)
|
||
|
||
# 询问微信号
|
||
if not ctx["already_asked_wechat"]:
|
||
if self._send_message(tab, inp, ask_wechat):
|
||
self.logger.info("[%s] 已询问微信号", name)
|
||
human_delay(1.5, 2.8)
|
||
|
||
# 收集微信号
|
||
human_delay(1.0, 2.0)
|
||
messages = self._get_chat_messages(tab)
|
||
ctx = self._analyze_context(messages, job_title)
|
||
wechats = ctx["wechats"][:2]
|
||
|
||
collected.append({
|
||
"name": name,
|
||
"job": job_title,
|
||
"wechat": wechats[0] if wechats else "",
|
||
})
|
||
|
||
except Exception as e:
|
||
self.logger.error("处理第 %d 个会话出错: %s", i + 1, e)
|
||
continue
|
||
|
||
return collected
|
||
|
||
# ─── 辅助方法 ───
|
||
|
||
def _get_conversation_items(self, tab) -> list:
|
||
selectors = [
|
||
"css:div.chat-container div.geek-item",
|
||
"css:div[role='listitem'] div.geek-item",
|
||
"css:.geek-item-wrap .geek-item",
|
||
"css:div.geek-item",
|
||
"css:div[role='listitem']",
|
||
]
|
||
return find_elements(tab, selectors, timeout=3)
|
||
|
||
def _get_candidate_name(self, item, index: int) -> str:
|
||
try:
|
||
name_el = item.ele("css:.geek-name", timeout=1)
|
||
if name_el and name_el.text:
|
||
return name_el.text.strip()
|
||
except Exception:
|
||
pass
|
||
try:
|
||
if item.text:
|
||
return item.text.strip()[:20]
|
||
except Exception:
|
||
pass
|
||
return f"候选人{index + 1}"
|
||
|
||
def _click_conversation(self, tab, item) -> bool:
|
||
if safe_click(tab, item):
|
||
return True
|
||
try:
|
||
parent = item.parent()
|
||
if parent and parent.attr("role") == "listitem":
|
||
return safe_click(tab, parent)
|
||
except Exception:
|
||
pass
|
||
return False
|
||
|
||
def _wait_for_input(self, tab, retries: int = 6):
|
||
for _ in range(retries):
|
||
inp = find_element(tab, [
|
||
"css:#boss-chat-editor-input",
|
||
"css:.boss-chat-editor-input",
|
||
], timeout=1)
|
||
if inp:
|
||
return inp
|
||
human_delay(0.5, 0.9)
|
||
return None
|
||
|
||
def _send_message(self, tab, inp, message: str) -> bool:
|
||
try:
|
||
human_click(tab, inp)
|
||
human_delay(0.25, 0.55)
|
||
try:
|
||
inp.clear()
|
||
except Exception:
|
||
pass
|
||
human_delay(0.15, 0.4)
|
||
inp.input(message)
|
||
human_delay(0.4, 0.9)
|
||
if self._click_send_button(tab):
|
||
return True
|
||
inp.input("\n")
|
||
human_delay(0.35, 0.7)
|
||
return True
|
||
except Exception:
|
||
return False
|
||
|
||
def _click_send_button(self, tab) -> bool:
|
||
human_delay(0.2, 0.45)
|
||
btn = find_element(tab, [
|
||
"css:.conversation-editor .submit-content .submit.active",
|
||
"css:.conversation-editor div.submit.active",
|
||
"css:div.submit-content div.submit.active",
|
||
"css:div.submit.active",
|
||
"css:.submit-content .submit",
|
||
"css:div.submit",
|
||
"text:发送",
|
||
], timeout=1)
|
||
if btn:
|
||
if safe_click(tab, btn):
|
||
return True
|
||
# JS 兜底
|
||
scripts = [
|
||
"var el = document.querySelector('.conversation-editor .submit.active') || document.querySelector('.conversation-editor .submit'); if(el){ el.click(); return true; } return false;",
|
||
"var els = document.querySelectorAll('div[class*=\"submit\"]'); for(var i=0;i<els.length;i++){ if(els[i].textContent.trim()==='发送'){ els[i].click(); return true; } } return false;",
|
||
]
|
||
for script in scripts:
|
||
try:
|
||
if tab.run_js(script) is True:
|
||
return True
|
||
except Exception:
|
||
continue
|
||
return False
|
||
|
||
def _get_chat_messages(self, tab) -> List[dict]:
|
||
result = []
|
||
try:
|
||
items = tab.eles("css:.message-item", timeout=2)
|
||
if not items:
|
||
return result
|
||
for e in items[-50:]:
|
||
t = (e.text or "").strip()
|
||
if not t or "沟通的职位" in t:
|
||
continue
|
||
role = "friend"
|
||
try:
|
||
if e.ele("css:.item-boss", timeout=0):
|
||
role = "boss"
|
||
elif e.ele("css:.item-friend", timeout=0):
|
||
role = "friend"
|
||
else:
|
||
cls = (e.attr("class") or "") + " "
|
||
if "item-boss" in cls or ("boss" in cls and "friend" not in cls):
|
||
role = "boss"
|
||
except Exception:
|
||
if any(k in t for k in ("招", "岗位", "微信号", "方便留", "加您")):
|
||
role = "boss"
|
||
result.append({"role": role, "text": t})
|
||
except Exception:
|
||
pass
|
||
return result
|
||
|
||
def _analyze_context(self, messages: list, job_title: str) -> dict:
|
||
boss_texts = [m["text"] for m in messages if m.get("role") == "boss"]
|
||
friend_texts = [m["text"] for m in messages if m.get("role") == "friend"]
|
||
full_boss = " ".join(boss_texts)
|
||
|
||
wechats = []
|
||
for t in friend_texts:
|
||
wechats.extend(self._extract_wechat(t))
|
||
wechats = list(dict.fromkeys(wechats))[:3]
|
||
|
||
return {
|
||
"already_greeting": job_title in full_boss or "招" in full_boss,
|
||
"already_asked_wechat": "微信" in full_boss or "微信号" in full_boss,
|
||
"wechats": wechats,
|
||
}
|
||
|
||
@staticmethod
|
||
def _extract_wechat(text: str) -> list:
|
||
if not text or not text.strip():
|
||
return []
|
||
found = []
|
||
patterns = [
|
||
r"微信号[::\s]*([a-zA-Z0-9_\-]{6,20})",
|
||
r"微信[::\s]*([a-zA-Z0-9_\-]{6,20})",
|
||
r"wx[::\s]*([a-zA-Z0-9_\-]{6,20})",
|
||
r"wechat[::\s]*([a-zA-Z0-9_\-]{6,20})",
|
||
r"([a-zA-Z][a-zA-Z0-9_\-]{5,19})",
|
||
]
|
||
for p in patterns:
|
||
for m in re.finditer(p, text, re.IGNORECASE):
|
||
s = m.group(1).strip() if m.lastindex else m.group(0).strip()
|
||
if s and s not in found and len(s) >= 6:
|
||
found.append(s)
|
||
return found[:3]
|