Files
boss_dp/worker/tasks/boss_recruit.py
2026-03-06 10:05:49 +08:00

506 lines
18 KiB
Python

# -*- coding: utf-8 -*-
"""
BOSS recruit task handler (new greeting flow).
This flow is aligned with `1.py -> main`:
1) open recommend page
2) iterate positions
3) apply selected filters
4) greet candidates until target count is reached
"""
from __future__ import annotations
import asyncio
import json
import random
import time
from datetime import datetime
from typing import Any, Callable, Coroutine, Dict, List, Optional, Tuple
from common.protocol import TaskType
from worker.bit_browser import BitBrowserAPI
from worker.browser_control import connect_browser, human_delay, safe_click
from worker.tasks.base import BaseTaskHandler, TaskCancelledError
RECOMMEND_URL = "https://www.zhipin.com/web/chat/recommend"
GEEK_LIST_API = "wapi/zpjob/rec/geek/list"
FAST_REPLY_TEXT = "您好,我们目前有相关岗位机会,方便了解一下吗?"
# Keep selectors consistent with 1.py logic.
JOB_LIST_SELECTORS = [
"x://*[contains(@class,'job-item') or contains(@class,'position-item')]",
"x://li[contains(@class,'job')]",
"x://div[contains(@class,'job-list')]/div",
"x://ul[contains(@class,'job')]/li",
"x://*[contains(@class,'recommend-job')]//*[contains(@class,'item')]",
"x://*[contains(@class,'job-list')]/*",
"x://a[contains(@href,'job')]",
]
class BossRecruitHandler(BaseTaskHandler):
"""Recruit by greeting candidates from recommend list."""
task_type = TaskType.BOSS_RECRUIT.value
async def execute(
self,
task_id: str,
params: Dict[str, Any],
progress_cb: Callable[[str, str], Coroutine],
) -> Any:
job_title = str(params.get("job_title", "") or "").strip() or "相关岗位"
account_name = str(params.get("account_name", "") or "").strip()
account_id = str(params.get("account_id", "") or "").strip()
bit_api_base = str(params.get("bit_api_base", "http://127.0.0.1:54345") or "").strip()
cancel_event = params.get("_cancel_event")
worker_id = str(params.get("worker_id", "") or "").strip()
selected_filters = self._normalize_selected_filters(params)
if not selected_filters:
raise ValueError("招聘前必须先选择至少一个筛选条件")
greet_target = self._parse_positive_int(params.get("greet_target"), default=20)
position_names = self._normalize_string_list(params.get("position_names"))
self.ensure_not_cancelled(cancel_event)
await progress_cb(task_id, f"正在启动招聘流程,目标打招呼人数: {greet_target}")
loop = asyncio.get_event_loop()
result = await loop.run_in_executor(
None,
self._run_sync,
job_title,
account_name,
account_id,
bit_api_base,
selected_filters,
position_names,
greet_target,
worker_id,
cancel_event,
)
return result
def _run_sync(
self,
job_title: str,
account_name: str,
account_id: str,
bit_api_base: str,
selected_filters: List[str],
position_names: List[str],
greet_target: int,
worker_id: str,
cancel_event,
) -> Dict[str, Any]:
self.ensure_not_cancelled(cancel_event)
bit_api = BitBrowserAPI(bit_api_base)
_, port = bit_api.get_browser_for_drission(
browser_id=account_id or None,
name=account_name or None,
)
browser = connect_browser(port=port)
page = browser.latest_tab
flow = self._run_main_like_flow(
page=page,
job_title=job_title,
selected_filters=selected_filters,
position_names=position_names,
greet_target=greet_target,
worker_id=worker_id,
account_name=account_name,
cancel_event=cancel_event,
)
errors = flow.get("errors", [])
return {
"job_title": job_title,
"target_greet_count": greet_target,
"actual_greeted": flow.get("actual_greeted", 0),
"rounds": flow.get("rounds", 0),
"selected_filters": selected_filters,
"positions": flow.get("positions", []),
"details": flow.get("details", []),
"contact_records_created": flow.get("contact_records_created", 0),
"error_count": len(errors),
"errors": errors[:20],
"success": len(errors) == 0,
}
def _run_main_like_flow(
self,
*,
page,
job_title: str,
selected_filters: List[str],
position_names: List[str],
greet_target: int,
worker_id: str,
account_name: str,
cancel_event,
) -> Dict[str, Any]:
details: List[dict] = []
errors: List[str] = []
self.ensure_not_cancelled(cancel_event)
page.listen.start(GEEK_LIST_API)
page.get(RECOMMEND_URL)
human_delay(1.2, 2.4)
container = self._get_container(page)
positions = self._build_positions(container, position_names)
if not positions:
return {
"actual_greeted": 0,
"rounds": 0,
"positions": [],
"details": [],
"contact_records_created": 0,
"errors": ["未识别到岗位列表,无法开始招聘"],
}
greeted_keys: set[str] = set()
total_greeted = 0
round_num = 0
contact_records_created = 0
while True:
self.ensure_not_cancelled(cancel_event)
round_num += 1
round_added = 0
for pos_type, pos_value in positions:
self.ensure_not_cancelled(cancel_event)
if total_greeted >= greet_target:
break
container = self._get_container(page)
label = self._activate_position(page, container, pos_type, pos_value)
if not label:
continue
human_delay(1.0, 2.1)
container = self._get_container(page)
packet = self._load_geek_list_packet(page, container, selected_filters)
geek_list = self._extract_geek_list(packet)
if not geek_list:
continue
added, new_records = self._greet_geek_list(
page=page,
container=container,
geek_list=geek_list,
greeted_keys=greeted_keys,
position_label=label,
default_job=job_title,
cancel_event=cancel_event,
)
if not added:
continue
total_greeted += added
round_added += added
details.extend(new_records)
contact_records_created += self._save_new_greet_contacts(
records=new_records,
worker_id=worker_id,
account_name=account_name,
)
if total_greeted >= greet_target:
break
if round_added == 0:
break
human_delay(0.8, 1.6)
return {
"actual_greeted": total_greeted,
"rounds": round_num,
"positions": [self._position_label(t, v) for t, v in positions],
"details": details,
"contact_records_created": contact_records_created,
"errors": errors,
}
@staticmethod
def _packet_body(packet) -> dict:
if not packet:
return {}
response = getattr(packet, "response", None)
body = getattr(response, "body", None) if response is not None else None
if isinstance(body, dict):
return body
if isinstance(body, str):
try:
parsed = json.loads(body)
return parsed if isinstance(parsed, dict) else {}
except Exception:
return {}
return {}
@staticmethod
def _normalize_string_list(value: Any) -> List[str]:
if value is None:
return []
if isinstance(value, list):
return [str(v).strip() for v in value if str(v).strip()]
if isinstance(value, str):
raw = value.strip()
if not raw:
return []
if raw.startswith("[") and raw.endswith("]"):
try:
parsed = json.loads(raw)
if isinstance(parsed, list):
return [str(v).strip() for v in parsed if str(v).strip()]
except Exception:
pass
return [raw]
return []
def _normalize_selected_filters(self, params: Dict[str, Any]) -> List[str]:
filters = params.get("selected_filters")
if filters is None:
filters = params.get("filters")
return self._normalize_string_list(filters)
@staticmethod
def _parse_positive_int(value: Any, default: int) -> int:
try:
parsed = int(value)
return parsed if parsed > 0 else default
except Exception:
return default
@staticmethod
def _get_container(page):
try:
return page.get_frame("recommendFrame")
except Exception:
return page
def _get_all_position_elements(self, container) -> List[Any]:
for selector in JOB_LIST_SELECTORS:
try:
eles = container.eles(selector, timeout=2)
if eles and 1 <= len(eles) <= 100:
return eles
except Exception:
continue
return []
def _build_positions(self, container, position_names: List[str]) -> List[Tuple[str, Any]]:
if position_names:
return [("name", name) for name in position_names]
job_eles = self._get_all_position_elements(container)
if not job_eles:
return [("current", None)]
return [("index", i) for i in range(len(job_eles))]
def _position_label(self, pos_type: str, pos_value: Any) -> str:
if pos_type == "name":
return str(pos_value)
if pos_type == "index":
return f"岗位{int(pos_value) + 1}"
return "当前岗位"
def _activate_position(self, page, container, pos_type: str, pos_value: Any) -> str:
label = self._position_label(pos_type, pos_value)
try:
if pos_type == "name":
ele = container.ele(f'x://*[contains(text(),"{pos_value}")]', timeout=5)
if not ele:
return ""
if not safe_click(page, ele):
return ""
elif pos_type == "index":
job_eles = self._get_all_position_elements(container)
if int(pos_value) >= len(job_eles):
return ""
if not safe_click(page, job_eles[int(pos_value)]):
return ""
return label
except Exception:
return ""
def _apply_filter_and_confirm(self, page, container, selected_filters: List[str]) -> None:
trigger = container.ele("x://*[contains(text(),'筛选')]", timeout=3)
if not trigger:
raise RuntimeError("未找到筛选按钮")
if not safe_click(page, trigger):
raise RuntimeError("点击筛选按钮失败")
human_delay(1.3, 2.0)
for item in selected_filters:
option = container.ele(f"x://*[contains(text(),'{item}')]", timeout=2)
if not option:
self.logger.warning("筛选项未命中: %s", item)
continue
safe_click(page, option)
human_delay(0.35, 0.9)
confirm = container.ele("x://*[contains(text(),'确定')]", timeout=3)
if not confirm:
raise RuntimeError("未找到筛选确认按钮")
if not safe_click(page, confirm):
raise RuntimeError("点击筛选确认失败")
def _load_geek_list_packet(self, page, container, selected_filters: List[str]):
if selected_filters:
self._apply_filter_and_confirm(page, container, selected_filters)
packets = page.listen.wait(count=2, timeout=30)
if isinstance(packets, list) and packets:
return packets[-1]
return packets
return page.listen.wait(timeout=30)
def _extract_geek_list(self, packet) -> List[dict]:
body = self._packet_body(packet)
zp_data = body.get("zpData", {}) if isinstance(body, dict) else {}
geek_list = zp_data.get("geekList", []) if isinstance(zp_data, dict) else []
return geek_list if isinstance(geek_list, list) else []
@staticmethod
def _geek_key(item: dict) -> str:
card = item.get("geekCard") or {}
return str(
card.get("encryptGeekId")
or card.get("geekId")
or card.get("geekName")
or ""
).strip()
def _greet_geek_list(
self,
*,
page,
container,
geek_list: List[dict],
greeted_keys: set[str],
position_label: str,
default_job: str,
cancel_event,
) -> Tuple[int, List[dict]]:
added = 0
records: List[dict] = []
for item in geek_list or []:
self.ensure_not_cancelled(cancel_event)
geek_key = self._geek_key(item)
if not geek_key or geek_key in greeted_keys:
continue
if not self._greet_one_geek(page, container, item):
continue
greeted_keys.add(geek_key)
added += 1
card = item.get("geekCard") or {}
name = str(card.get("geekName", "")).strip() or geek_key
position = str(card.get("expectPositionName", "")).strip() or position_label or default_job
records.append(
{
"name": name,
"position": position,
"geek_key": geek_key,
"source": "new_greet",
"greeted_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
}
)
return added, records
def _greet_one_geek(self, page, container, item: dict) -> bool:
geek_name = str((item.get("geekCard") or {}).get("geekName", "")).strip()
if not geek_name:
return False
try:
name_ele = container.ele(f'x://span[contains(text(),"{geek_name}")]', timeout=5)
if not name_ele:
name_ele = container.ele(f'x://span[text()="{geek_name}"]', timeout=2)
if not name_ele:
return False
try:
name_ele.run_js("this.scrollIntoView()")
except Exception:
pass
if not safe_click(page, name_ele):
return False
human_delay(2.2, 3.6)
panel = page.get_frame(1)
human_delay(0.4, 1.0)
greet_btn = panel.ele('x://*[contains(text(),"打招呼")]', timeout=3)
if not greet_btn or not safe_click(panel, greet_btn):
return False
human_delay(0.4, 1.0)
collect_btn = panel.ele('x://*[contains(text(),"收藏")]', timeout=2)
if collect_btn:
safe_click(panel, collect_btn)
human_delay(0.3, 0.8)
for _ in range(3):
extra_btn = panel.ele('x://*[@class="btn-v2 btn-outline-v2"]', timeout=2)
if extra_btn:
safe_click(panel, extra_btn)
human_delay(0.3, 0.8)
input_box = page.ele('x://*[@data-placeholder="快速回复"]', timeout=2)
if input_box:
input_box.input(FAST_REPLY_TEXT)
send_btn = page.ele('x://*[contains(text(),"发送")]', timeout=2)
if send_btn:
safe_click(page, send_btn)
human_delay(0.3, 0.8)
close_side = page.ele('x://*[@class="iboss iboss-close"]', timeout=1)
if close_side:
safe_click(page, close_side)
human_delay(0.2, 0.6)
close_popup = panel.ele('x://*[@class="boss-popup__close"]', timeout=1)
if close_popup:
safe_click(panel, close_popup)
human_delay(0.2, 0.6)
return True
except Exception:
return False
def _save_new_greet_contacts(self, *, records: List[dict], worker_id: str, account_name: str) -> int:
if not records:
return 0
from django.utils import timezone
from server.models import ContactRecord
created = 0
for rec in records:
geek_key = str(rec.get("geek_key", "")).strip()
if not geek_key:
continue
contact_key = f"greet:{geek_key}"
defaults = {
"name": str(rec.get("name", "")).strip() or contact_key,
"position": str(rec.get("position", "")).strip(),
"reply_status": "新打招呼",
"wechat_exchanged": False,
"worker_id": worker_id,
"contacted_at": timezone.now(),
"notes": f"新打招呼记录; account={account_name}",
}
obj, was_created = ContactRecord.objects.update_or_create(
contact=contact_key,
defaults=defaults,
)
if was_created:
created += 1
elif not obj.contacted_at:
obj.contacted_at = timezone.now()
obj.save(update_fields=["contacted_at"])
return created