# -*- coding: utf-8 -*- """ BOSS 直聘招聘任务处理器。 招聘流程与本地脚本 boss_dp/自动化.py 的 main 保持一致: 1) 监听并获取 friendList 2) 逐个点开会话并监听历史消息 3) 若历史消息中未出现“手机号/微信号”,则发送询问并执行“换微信” """ from __future__ import annotations import asyncio import json import random import re import time from datetime import datetime, timedelta from typing import Any, Callable, Coroutine, Dict, List, Optional from common.protocol import TaskType from worker.bit_browser import BitBrowserAPI from worker.browser_control import connect_browser from worker.tasks.base import BaseTaskHandler, TaskCancelledError CHAT_INDEX_URL = "https://www.zhipin.com/web/chat/index" FRIEND_LIST_API = "wapi/zprelation/friend/getBossFriendListV2" HISTORY_API = "wapi/zpchat/boss/historyMsg" ASK_WECHAT_TEXT = "后续沟通会更及时,您方便留一下您的微信号吗?我这边加您。" CONTACT_KEYWORDS = ("手机号", "微信号") EXCHANGE_CONFIRM_XPATH = ( "x://span[contains(text(),'确定与对方交换微信吗?')]/../div[@class='btn-box']/" "span[contains(@class,'boss-btn-primary')]" ) EXCHANGE_CONFIRM_XPATH_ASCII = ( "x://span[contains(text(),'确定与对方交换微信吗?')]/../div[@class='btn-box']/" "span[contains(@class,'boss-btn-primary')]" ) class BossReplyHandler(BaseTaskHandler): """BOSS 直聘招聘自动化任务。""" task_type = TaskType.BOSS_REPLY.value async def execute( self, task_id: str, params: Dict[str, Any], progress_cb: Callable[[str, str], Coroutine], ) -> Any: """ 执行 BOSS 招聘流程。 params: - job_title: str 招聘岗位名称(用于结果展示) - account_name: str 比特浏览器窗口名(用于打开浏览器) - account_id: str 比特浏览器窗口 ID(可选,优先级高于 name) - bit_api_base: str 比特浏览器 API 地址(可选) """ job_title = params.get("job_title", "相关岗位") account_name = params.get("account_name", "") account_id = params.get("account_id", "") bit_api_base = params.get("bit_api_base", "http://127.0.0.1:54345") cancel_event = params.get("_cancel_event") self.ensure_not_cancelled(cancel_event) await progress_cb(task_id, "正在打开比特浏览器...") result = await asyncio.get_event_loop().run_in_executor( None, self._run_sync, task_id, job_title, account_name, account_id, bit_api_base, progress_cb, cancel_event, ) return result def _run_sync( self, task_id: str, job_title: str, account_name: str, account_id: str, bit_api_base: str, progress_cb: Callable, cancel_event, ) -> dict: """同步执行浏览器自动化(在线程池中运行)。""" _ = (task_id, progress_cb) self.ensure_not_cancelled(cancel_event) bit_api = BitBrowserAPI(bit_api_base) addr, port = bit_api.get_browser_for_drission( browser_id=account_id or None, name=account_name or None, ) self.logger.info("已打开浏览器, CDP: %s (port=%d)", addr, port) browser = connect_browser(port=port) tab = browser.latest_tab flow_result = self._recruit_flow_like_script(tab, job_title, cancel_event) collected = flow_result["details"] errors = flow_result["errors"] wechat_set = {str(c.get("wechat", "")).strip() for c in collected if str(c.get("wechat", "")).strip()} phone_set = {str(c.get("phone", "")).strip() for c in collected if str(c.get("phone", "")).strip()} has_errors = bool(errors) result = { "job_title": job_title, "total_processed": len(collected), "wechat_collected": len(wechat_set), "phone_collected": len(phone_set), "details": collected, "error_count": len(errors), "errors": errors[:20], "success": not has_errors, } if has_errors: result["error"] = f"招聘流程出现 {len(errors)} 处错误" return result def _recruit_flow_like_script(self, tab, job_title: str, cancel_event) -> dict: """按 boss_dp/自动化.py 的 main 流程执行。""" collected: List[dict] = [] errors: List[str] = [] self.ensure_not_cancelled(cancel_event) try: friend_list = self._open_chat_and_fetch_friend_list(tab) except Exception as e: err = f"获取 friendList 失败: {e}" self.logger.error(err) return {"details": collected, "errors": [err]} if not friend_list: return {"details": collected, "errors": ["未拿到 friendList"]} # 应用筛选条件 friend_list = self._apply_filters(friend_list) total = len(friend_list) self.logger.info("friendList 筛选后=%d,本次处理=%d", len(friend_list), total) for i, friend in enumerate(friend_list[:total], start=1): try: self.ensure_not_cancelled(cancel_event) name = str(friend.get("name", "")).strip() or f"候选人{i}" friend_job_name = str(friend.get("jobName", "")).strip() friend_job_id = str(friend.get("jobId", "")).strip() tab.listen.start(HISTORY_API) if not self._open_friend_chat_like_script(tab, name): errors.append(f"[{name}] 未找到联系人或点击失败") continue messages = self._wait_history_messages(tab) self.ensure_not_cancelled(cancel_event) # 过滤掉自己发送的消息 filtered_messages = self._filter_my_messages(messages) has_contact_keyword = self._has_contact_keyword(filtered_messages) contacts = self._extract_contacts(filtered_messages) contact_written = bool(contacts.get("wechat") or contacts.get("phone")) action_state = { "asked_wechat": False, "send_success": False, "exchange_clicked": False, "exchange_confirmed": False, } if not has_contact_keyword: self.ensure_not_cancelled(cancel_event) action_state = self._ask_and_exchange_wechat_like_script(tab) # 先保存联系人记录(如果有的话) temp_contact_id = None if contacts.get("wechat") or contacts.get("phone"): temp_contact_id = self._save_contact_record(name, friend_job_name, contacts, action_state) # 发送后等待对方回复,进行复聊管理 if action_state["send_success"]: self.ensure_not_cancelled(cancel_event) reply_result = self._handle_follow_up_chat(tab, name, friend_job_name, temp_contact_id) action_state.update(reply_result) # 如果复聊中提取到了新的联系方式,更新联系人记录 if reply_result.get("extracted_contact_from_reply"): panel_texts = self._collect_chat_panel_texts(tab) new_contacts = self._extract_contacts(filtered_messages, extra_texts=panel_texts) if new_contacts.get("wechat") or new_contacts.get("phone"): contacts.update(new_contacts) contact_written = True panel_texts = self._collect_chat_panel_texts(tab) contacts = self._extract_contacts(filtered_messages, extra_texts=panel_texts) contact_written = bool(contacts["wechat"] or contacts["phone"]) if has_contact_keyword and not contact_written: self.logger.warning( "[%s] 历史消息含联系方式关键词,但未提取到有效联系方式,疑似识别失败", name, ) # 保存联系人记录到数据库,获取contact_id用于复聊 contact_id = None if contact_written: contact_id = self._save_contact_record(name, friend_job_name, contacts, action_state) collected.append( { "name": name, "job": friend_job_name or job_title, "job_id": friend_job_id, "wechat": contacts["wechat"], "phone": contacts["phone"], "contact_written": contact_written, "has_contact_keyword": has_contact_keyword, **action_state, } ) except TaskCancelledError: self.logger.info("任务执行中收到取消信号,提前结束") break except Exception as e: err_msg = f"处理第 {i} 个会话出错: {e}" self.logger.error(err_msg) errors.append(err_msg) finally: if i < total: self.ensure_not_cancelled(cancel_event) self._sleep_between_sessions() self.ensure_not_cancelled(cancel_event) return {"details": collected, "errors": errors} def _open_chat_and_fetch_friend_list(self, tab) -> list: tab.listen.start(FRIEND_LIST_API) tab.get(CHAT_INDEX_URL) packet = tab.listen.wait() body = self._packet_body(packet) zp_data = body.get("zpData", {}) if isinstance(body, dict) else {} friend_list = zp_data.get("friendList", []) if isinstance(zp_data, dict) else [] if isinstance(friend_list, list): return friend_list return [] @staticmethod def _xpath_literal(value: str) -> str: if "'" not in value: return f"'{value}'" if '"' not in value: return f'"{value}"' parts = value.split("'") return "concat(" + ", \"'\", ".join(f"'{part}'" for part in parts) + ")" def _open_friend_chat_like_script(self, tab, name: str) -> bool: name_selector = f"x://span[text()={self._xpath_literal(name)}]" ele = tab.ele(name_selector, timeout=2) if not ele: return False try: ele.run_js("this.scrollIntoView({block: 'center', behavior: 'auto'})") except Exception: pass time.sleep(0.8) try: clickable = tab.ele(name_selector, timeout=2) if not clickable: return False clickable.click(by_js=True) return True except Exception: return False def _wait_history_messages(self, tab) -> list: packet = tab.listen.wait() body = self._packet_body(packet) zp_data = body.get("zpData", {}) if isinstance(body, dict) else {} messages = zp_data.get("messages", []) if isinstance(zp_data, dict) else [] if isinstance(messages, list): return messages return [] @staticmethod def _sleep_between_sessions() -> None: """会话间随机停顿,降低频繁切换带来的风控风险。""" time.sleep(random.uniform(1.8, 4.2)) def _ask_and_exchange_wechat_like_script(self, tab) -> dict: state = { "asked_wechat": False, "send_success": False, "exchange_clicked": False, "exchange_confirmed": False, } input_box = tab.ele('x://*[@id="boss-chat-editor-input"]', timeout=2) if not input_box: return state try: input_box.click(by_js=True) except Exception: pass try: input_box.clear() except Exception: pass input_box.input(ASK_WECHAT_TEXT) state["asked_wechat"] = True time.sleep(random.randint(1, 3) + random.random()) state["send_success"] = self._send_with_confirm( tab, input_box=input_box, message=ASK_WECHAT_TEXT, ) time.sleep(random.randint(1, 5) + random.random()) state["exchange_clicked"] = self._click_change_wechat_like_script(tab) if state["exchange_clicked"]: time.sleep(random.randint(1, 2) + random.random()) state["exchange_confirmed"] = self._click_exchange_confirm_like_script(tab) return state def _click_send_like_script(self, tab, input_box=None) -> bool: selectors = ( 'x://div[text()="发送"]', 'x://span[text()="发送"]', 'x://button[normalize-space(.)="发送"]', 'x://*[@role="button" and normalize-space(.)="发送"]', ) for _ in range(3): for selector in selectors: try: btn = tab.ele(selector, timeout=1) if not btn: continue try: btn.click(by_js=True) except Exception: btn.click() time.sleep(0.25) return True except Exception: continue time.sleep(0.25) if input_box: try: input_box.input("\n") time.sleep(0.25) return True except Exception: pass return False def _send_with_confirm(self, tab, input_box, message: str, max_attempts: int = 2) -> bool: """发送后检查末条消息,避免因确认误判导致重复补发。""" msg = (message or "").strip() if not msg: return False for _ in range(max_attempts): clicked = self._click_send_like_script(tab, input_box=input_box) if not clicked: continue if self._confirm_last_sent_message(tab, msg): return True # 已点击发送但未在列表中确认时,不直接补发,先判断输入框是否已清空。 # 若已清空,通常表示消息已发出,只是 UI 刷新慢或识别未命中,避免重复发送。 if not self._editor_has_text(input_box, msg): return True time.sleep(0.35) sent = self._confirm_last_sent_message(tab, msg) if sent: return True self._clear_editor(input_box) return False @staticmethod def _normalize_text(text: str) -> str: return re.sub(r"\s+", "", text or "") def _editor_has_text(self, input_box, expected: str = "") -> bool: """判断输入框是否仍残留指定文本。""" expected_norm = self._normalize_text(expected) current = "" try: current = input_box.run_js("return (this.innerText || this.textContent || this.value || '').trim();") except Exception: try: current = input_box.text except Exception: current = "" current_norm = self._normalize_text(str(current)) if not current_norm: return False if not expected_norm: return True return expected_norm in current_norm def _clear_editor(self, input_box) -> None: """清空聊天输入框,避免残留预输入内容。""" try: input_box.click(by_js=True) except Exception: pass try: input_box.clear() except Exception: pass try: input_box.run_js( "if (this.isContentEditable) { this.innerHTML=''; this.textContent=''; }" ) except Exception: pass def _confirm_last_sent_message(self, tab, message: str) -> bool: """确认当前聊天窗口末条是否为刚发送内容。""" msg = (message or "").strip() if not msg: return False target = re.sub(r"\s+", "", msg) try: for _ in range(6): items = tab.eles("css:.message-item", timeout=1) if not items: time.sleep(0.2) continue for e in reversed(items[-6:]): text = (e.text or "").strip() if not text: continue normalized = re.sub(r"\s+", "", text) is_boss = False try: if e.ele("css:.item-boss", timeout=0): is_boss = True except Exception: pass if is_boss and target in normalized: return True time.sleep(0.2) except Exception: return False return False def _click_change_wechat_like_script(self, tab) -> bool: try: btn = tab.ele('x://span[text()="换微信"]', timeout=1) if not btn: return False btn.click() return True except Exception: return False def _click_exchange_confirm_like_script(self, tab) -> bool: try: confirm = tab.ele(EXCHANGE_CONFIRM_XPATH, timeout=2) if not confirm: confirm = tab.ele(EXCHANGE_CONFIRM_XPATH_ASCII, timeout=2) if not confirm: return False confirm.click(by_js=True) return True except Exception: return False @staticmethod def _packet_body(packet) -> dict: if not packet: return {} response = getattr(packet, "response", None) body = getattr(response, "body", None) if response is not None else None if isinstance(body, dict): return body if isinstance(body, str): try: parsed = json.loads(body) if isinstance(parsed, dict): return parsed except Exception: return {} return {} def _history_texts(self, messages: list) -> list[str]: texts: list[str] = [] for msg in messages: if not isinstance(msg, dict): continue body = msg.get("body") text = "" if isinstance(body, dict): maybe_text = body.get("text") if isinstance(maybe_text, str): text = maybe_text elif isinstance(msg.get("text"), str): text = str(msg.get("text")) text = text.strip() if text: texts.append(text) return texts def _collect_chat_panel_texts(self, tab, max_items: int = 80) -> list[str]: """从当前聊天面板读取可见消息文本,补充接口历史消息。""" texts: list[str] = [] try: items = tab.eles("css:.message-item", timeout=1) except Exception: return texts if not items: return texts for e in items[-max_items:]: try: text = (e.text or "").strip() except Exception: text = "" if text: texts.append(text) return texts def _has_contact_keyword(self, messages: list) -> bool: for text in self._history_texts(messages): if any(k in text for k in CONTACT_KEYWORDS): return True return False def _extract_contacts(self, messages: list, extra_texts: Optional[list[str]] = None) -> dict: wechats: list[str] = [] phones: list[str] = [] all_texts = self._history_texts(messages) if extra_texts: all_texts.extend([str(t).strip() for t in extra_texts if str(t).strip()]) for text in all_texts: wechats.extend(self._extract_wechat(text)) phones.extend(self._extract_phone(text)) wechats = list(dict.fromkeys(wechats)) phones = list(dict.fromkeys(phones)) return { "wechat": wechats[0] if wechats else "", "phone": phones[0] if phones else "", } @staticmethod def _extract_wechat(text: str) -> list: if not text or not text.strip(): return [] found = [] patterns = [ r"微信号[::\s]*([a-zA-Z0-9_\-]{6,20})", r"微信[::\s]*([a-zA-Z0-9_\-]{6,20})", r"wx[::\s]*([a-zA-Z0-9_\-]{6,20})", r"wechat[::\s]*([a-zA-Z0-9_\-]{6,20})", ] for pattern in patterns: for match in re.finditer(pattern, text, re.IGNORECASE): value = match.group(1).strip() if match.lastindex else match.group(0).strip() if value and value not in found and len(value) >= 6: found.append(value) return found[:3] @staticmethod def _extract_phone(text: str) -> list: if not text or not text.strip(): return [] found = [] raw_candidates = re.findall(r"1[3-9][\d\-\s]{9,15}", text) for raw in raw_candidates: digits = re.sub(r"\D", "", raw) if len(digits) == 11 and digits.startswith("1") and digits not in found: found.append(digits) return found[:3] def _apply_filters(self, friend_list: list) -> list: """应用筛选条件过滤候选人列表。""" try: from server.models import FilterConfig # 获取启用的筛选配置 filter_config = FilterConfig.objects.filter(is_active=True).first() if not filter_config: self.logger.info("未找到启用的筛选配置,跳过筛选") return friend_list filtered = [] for friend in friend_list: # 筛选活跃度(最后上线时间) last_time = friend.get("lastTime", "") if not self._check_activity(last_time, filter_config.activity): continue # 从简历信息中获取年龄、学历、期望职位 resume = friend.get("resume", {}) or {} # 筛选年龄 age = resume.get("age") if age and not (filter_config.age_min <= int(age) <= filter_config.age_max): continue # 筛选学历 education = resume.get("education", "") if filter_config.education != "不限" and education: if not self._check_education(education, filter_config.education): continue # 筛选期望职位 job_name = friend.get("jobName", "") if filter_config.positions and job_name: if not any(pos in job_name for pos in filter_config.positions): continue filtered.append(friend) self.logger.info("筛选前: %d 人,筛选后: %d 人", len(friend_list), len(filtered)) return filtered except Exception as e: self.logger.error("应用筛选条件失败: %s,返回原列表", e) return friend_list def _check_activity(self, last_time: str, activity_filter: str) -> bool: """检查活跃度是否符合要求。""" if activity_filter == "不限": return True try: # 解析时间字符串 now = datetime.now() if "昨天" in last_time: last_active = now - timedelta(days=1) elif "今天" in last_time or "刚刚" in last_time: last_active = now elif "月" in last_time and "日" in last_time: # 格式如 "03月03日" match = re.search(r"(\d+)月(\d+)日", last_time) if match: month = int(match.group(1)) day = int(match.group(2)) year = now.year # 如果月份大于当前月份,说明是去年的 if month > now.month: year -= 1 last_active = datetime(year, month, day) else: return True else: return True # 计算天数差 days_diff = (now - last_active).days # 根据筛选条件判断 if activity_filter == "今天活跃": return days_diff == 0 elif activity_filter == "3天内活跃": return days_diff <= 3 elif activity_filter == "本周活跃": return days_diff <= 7 elif activity_filter == "本月活跃": return days_diff <= 30 return True except Exception as e: self.logger.warning("解析活跃度时间失败: %s, last_time=%s", e, last_time) return True @staticmethod def _check_education(candidate_edu: str, required_edu: str) -> bool: """检查学历是否符合要求。""" edu_levels = ["初中", "高中", "中专", "大专", "本科", "硕士", "博士"] try: candidate_level = next((i for i, edu in enumerate(edu_levels) if edu in candidate_edu), -1) required_level = next((i for i, edu in enumerate(edu_levels) if edu in required_edu), -1) if candidate_level == -1 or required_level == -1: return True return candidate_level >= required_level except Exception: return True def _filter_my_messages(self, messages: list) -> list: """过滤掉自己发送的消息,只保留对方的消息。""" filtered = [] for msg in messages: if not isinstance(msg, dict): continue # from_id 为 0 表示是对方发送的消息 from_id = msg.get("fromId", 0) if from_id == 0: filtered.append(msg) return filtered def _handle_follow_up_chat(self, tab, name: str, job_name: str, contact_id: int = None) -> dict: """处理复聊管理,根据配置发送多轮话术。""" result = { "follow_up_attempted": False, "got_reply": False, "extracted_contact_from_reply": False, } if not contact_id: return result try: from server.models import FollowUpConfig, FollowUpScript, FollowUpRecord from django.utils import timezone # 获取该岗位的复聊配置 config = FollowUpConfig.objects.filter( position=job_name, is_active=True ).first() if not config: # 尝试获取通用配置 config = FollowUpConfig.objects.filter( position="通用", is_active=True ).first() if not config: self.logger.info("[%s] 未找到复聊配置,跳过复聊", name) return result # 获取该联系人的复聊记录 last_record = FollowUpRecord.objects.filter( contact_id=contact_id, config_id=config.id ).order_by('-sent_at').first() # 确定当前是第几天 if not last_record: # 第一次复聊 day_number = 1 else: # 计算距离上次发送的时间 hours_since_last = (timezone.now() - last_record.sent_at).total_seconds() / 3600 # 获取上次使用的话术的间隔时间 last_script = FollowUpScript.objects.filter(id=last_record.script_id).first() if last_script and hours_since_last < last_script.interval_hours: self.logger.info("[%s] 距离上次复聊不足 %d 小时,跳过", name, last_script.interval_hours) return result # 下一天 day_number = last_record.day_number + 1 # 获取该天的话术 script = FollowUpScript.objects.filter( config_id=config.id, day_number=day_number, is_active=True ).order_by('order').first() # 如果没有该天的话术,尝试获取"往后一直"的话术(day_number=0) if not script: script = FollowUpScript.objects.filter( config_id=config.id, day_number=0, is_active=True ).order_by('order').first() if not script: self.logger.info("[%s] 未找到第 %d 天的复聊话术", name, day_number) return result # 发送话术 result["follow_up_attempted"] = True send_success = self._send_message(tab, script.content) if send_success: # 记录发送 record = FollowUpRecord.objects.create( contact_id=contact_id, config_id=config.id, script_id=script.id, day_number=day_number, content=script.content, ) # 等待回复 reply_info = self._wait_for_reply(tab, script.content) if reply_info["got_reply"]: result["got_reply"] = True result["extracted_contact_from_reply"] = reply_info["has_contact"] # 更新记录 record.got_reply = True record.reply_content = reply_info["reply_text"] record.replied_at = timezone.now() record.save() self.logger.info("[%s] 第 %d 天复聊得到回复", name, day_number) except Exception as e: self.logger.error("复聊管理失败: %s", e) return result def _save_contact_record(self, name: str, job_name: str, contacts: dict, action_state: dict) -> int: """保存联系人记录到数据库,返回contact_id。""" try: from server.models import ContactRecord from django.utils import timezone contact_value = contacts.get("wechat") or contacts.get("phone") or "" if not contact_value: return None # 检查是否已存在 existing = ContactRecord.objects.filter( name=name, contact=contact_value ).first() if existing: # 更新现有记录 existing.wechat_exchanged = action_state.get("exchange_confirmed", False) existing.reply_status = "已回复" if action_state.get("got_reply", False) else "未回复" existing.save() self.logger.info("更新联系人记录: %s - %s", name, contact_value) return existing.id else: # 创建新记录 record = ContactRecord.objects.create( name=name, position=job_name, contact=contact_value, reply_status="已回复" if action_state.get("got_reply", False) else "未回复", wechat_exchanged=action_state.get("exchange_confirmed", False), contacted_at=timezone.now(), notes=f"自动招聘获取 - 微信: {contacts.get('wechat', '')}, 手机: {contacts.get('phone', '')}" ) self.logger.info("保存新联系人记录: %s - %s", name, contact_value) return record.id except Exception as e: self.logger.error("保存联系人记录失败: %s", e) return None def _send_message(self, tab, message: str) -> bool: """发送消息的通用方法。""" try: input_box = tab.ele('x://*[@id="boss-chat-editor-input"]', timeout=2) if not input_box: return False try: input_box.click(by_js=True) input_box.clear() except Exception: pass input_box.input(message) time.sleep(random.uniform(1, 2)) return self._send_with_confirm(tab, input_box=input_box, message=message) except Exception as e: self.logger.error("发送消息失败: %s", e) return False def _wait_for_reply(self, tab, sent_message: str, max_wait: int = 30) -> dict: """等待对方回复并提取信息。""" result = { "got_reply": False, "has_contact": False, "reply_text": "", } try: check_interval = 3 # 每3秒检查一次 for _ in range(max_wait // check_interval): time.sleep(check_interval) # 重新获取聊天面板的消息 panel_texts = self._collect_chat_panel_texts(tab, max_items=10) # 检查最后几条消息 for text in panel_texts[-5:]: # 过滤掉我们发送的消息(包含发送的话术内容) if sent_message in text: continue # 过滤掉包含"微信号"关键词但不是真实微信号的消息 if "微信号" in text and not self._extract_wechat(text): continue # 尝试提取联系方式 wechats = self._extract_wechat(text) phones = self._extract_phone(text) if wechats or phones: result["got_reply"] = True result["has_contact"] = True result["reply_text"] = text return result # 即使没有联系方式,只要有新消息也算回复 if text and text not in [sent_message, ASK_WECHAT_TEXT]: result["got_reply"] = True result["reply_text"] = text return result except Exception as e: self.logger.error("等待回复失败: %s", e) return result