Files
boss_dp/worker/tasks/boss_recruit.py

506 lines
18 KiB
Python
Raw Normal View History

# -*- coding: utf-8 -*-
"""
2026-03-06 10:05:49 +08:00
BOSS recruit task handler (new greeting flow).
This flow is aligned with `1.py -> main`:
1) open recommend page
2) iterate positions
3) apply selected filters
4) greet candidates until target count is reached
"""
from __future__ import annotations
import asyncio
2026-03-03 10:20:02 +08:00
import json
import random
import time
2026-03-06 10:05:49 +08:00
from datetime import datetime
from typing import Any, Callable, Coroutine, Dict, List, Optional, Tuple
from common.protocol import TaskType
from worker.bit_browser import BitBrowserAPI
2026-03-06 10:05:49 +08:00
from worker.browser_control import connect_browser, human_delay, safe_click
2026-03-03 10:50:32 +08:00
from worker.tasks.base import BaseTaskHandler, TaskCancelledError
2026-03-06 10:05:49 +08:00
RECOMMEND_URL = "https://www.zhipin.com/web/chat/recommend"
GEEK_LIST_API = "wapi/zpjob/rec/geek/list"
FAST_REPLY_TEXT = "您好,我们目前有相关岗位机会,方便了解一下吗?"
# Keep selectors consistent with 1.py logic.
JOB_LIST_SELECTORS = [
"x://*[contains(@class,'job-item') or contains(@class,'position-item')]",
"x://li[contains(@class,'job')]",
"x://div[contains(@class,'job-list')]/div",
"x://ul[contains(@class,'job')]/li",
"x://*[contains(@class,'recommend-job')]//*[contains(@class,'item')]",
"x://*[contains(@class,'job-list')]/*",
"x://a[contains(@href,'job')]",
]
class BossRecruitHandler(BaseTaskHandler):
2026-03-06 10:05:49 +08:00
"""Recruit by greeting candidates from recommend list."""
task_type = TaskType.BOSS_RECRUIT.value
async def execute(
self,
task_id: str,
params: Dict[str, Any],
progress_cb: Callable[[str, str], Coroutine],
) -> Any:
2026-03-06 10:05:49 +08:00
job_title = str(params.get("job_title", "") or "").strip() or "相关岗位"
account_name = str(params.get("account_name", "") or "").strip()
account_id = str(params.get("account_id", "") or "").strip()
bit_api_base = str(params.get("bit_api_base", "http://127.0.0.1:54345") or "").strip()
2026-03-03 10:50:32 +08:00
cancel_event = params.get("_cancel_event")
2026-03-06 10:05:49 +08:00
worker_id = str(params.get("worker_id", "") or "").strip()
selected_filters = self._normalize_selected_filters(params)
if not selected_filters:
raise ValueError("招聘前必须先选择至少一个筛选条件")
greet_target = self._parse_positive_int(params.get("greet_target"), default=20)
position_names = self._normalize_string_list(params.get("position_names"))
2026-03-03 10:50:32 +08:00
self.ensure_not_cancelled(cancel_event)
2026-03-06 10:05:49 +08:00
await progress_cb(task_id, f"正在启动招聘流程,目标打招呼人数: {greet_target}")
2026-03-06 10:05:49 +08:00
loop = asyncio.get_event_loop()
result = await loop.run_in_executor(
None,
self._run_sync,
2026-03-03 10:20:02 +08:00
job_title,
account_name,
account_id,
bit_api_base,
2026-03-06 10:05:49 +08:00
selected_filters,
position_names,
greet_target,
worker_id,
2026-03-03 10:50:32 +08:00
cancel_event,
)
return result
def _run_sync(
self,
job_title: str,
account_name: str,
account_id: str,
bit_api_base: str,
2026-03-06 10:05:49 +08:00
selected_filters: List[str],
position_names: List[str],
greet_target: int,
worker_id: str,
2026-03-03 10:50:32 +08:00
cancel_event,
2026-03-06 10:05:49 +08:00
) -> Dict[str, Any]:
2026-03-03 10:50:32 +08:00
self.ensure_not_cancelled(cancel_event)
2026-03-06 10:05:49 +08:00
bit_api = BitBrowserAPI(bit_api_base)
2026-03-06 10:05:49 +08:00
_, port = bit_api.get_browser_for_drission(
browser_id=account_id or None,
name=account_name or None,
)
browser = connect_browser(port=port)
2026-03-06 10:05:49 +08:00
page = browser.latest_tab
flow = self._run_main_like_flow(
page=page,
job_title=job_title,
selected_filters=selected_filters,
position_names=position_names,
greet_target=greet_target,
worker_id=worker_id,
account_name=account_name,
cancel_event=cancel_event,
)
errors = flow.get("errors", [])
return {
"job_title": job_title,
2026-03-06 10:05:49 +08:00
"target_greet_count": greet_target,
"actual_greeted": flow.get("actual_greeted", 0),
"rounds": flow.get("rounds", 0),
"selected_filters": selected_filters,
"positions": flow.get("positions", []),
"details": flow.get("details", []),
"contact_records_created": flow.get("contact_records_created", 0),
2026-03-02 00:56:09 +08:00
"error_count": len(errors),
"errors": errors[:20],
2026-03-06 10:05:49 +08:00
"success": len(errors) == 0,
}
2026-03-02 00:56:09 +08:00
2026-03-06 10:05:49 +08:00
def _run_main_like_flow(
self,
*,
page,
job_title: str,
selected_filters: List[str],
position_names: List[str],
greet_target: int,
worker_id: str,
account_name: str,
cancel_event,
) -> Dict[str, Any]:
details: List[dict] = []
2026-03-03 10:20:02 +08:00
errors: List[str] = []
2026-03-03 10:50:32 +08:00
self.ensure_not_cancelled(cancel_event)
2026-03-06 10:05:49 +08:00
page.listen.start(GEEK_LIST_API)
page.get(RECOMMEND_URL)
human_delay(1.2, 2.4)
container = self._get_container(page)
positions = self._build_positions(container, position_names)
if not positions:
return {
"actual_greeted": 0,
"rounds": 0,
"positions": [],
"details": [],
"contact_records_created": 0,
"errors": ["未识别到岗位列表,无法开始招聘"],
}
greeted_keys: set[str] = set()
total_greeted = 0
round_num = 0
contact_records_created = 0
while True:
self.ensure_not_cancelled(cancel_event)
round_num += 1
round_added = 0
for pos_type, pos_value in positions:
2026-03-03 10:50:32 +08:00
self.ensure_not_cancelled(cancel_event)
2026-03-06 10:05:49 +08:00
if total_greeted >= greet_target:
break
2026-03-06 10:05:49 +08:00
container = self._get_container(page)
label = self._activate_position(page, container, pos_type, pos_value)
if not label:
continue
2026-03-06 10:05:49 +08:00
human_delay(1.0, 2.1)
container = self._get_container(page)
2026-03-03 10:20:02 +08:00
2026-03-06 10:05:49 +08:00
packet = self._load_geek_list_packet(page, container, selected_filters)
geek_list = self._extract_geek_list(packet)
if not geek_list:
2026-03-03 12:41:18 +08:00
continue
2026-03-06 10:05:49 +08:00
added, new_records = self._greet_geek_list(
page=page,
container=container,
geek_list=geek_list,
greeted_keys=greeted_keys,
position_label=label,
default_job=job_title,
cancel_event=cancel_event,
)
if not added:
2026-03-03 12:41:18 +08:00
continue
2026-03-06 10:05:49 +08:00
total_greeted += added
round_added += added
details.extend(new_records)
contact_records_created += self._save_new_greet_contacts(
records=new_records,
worker_id=worker_id,
account_name=account_name,
)
2026-03-03 03:00:36 +08:00
2026-03-06 10:05:49 +08:00
if total_greeted >= greet_target:
break
if round_added == 0:
break
human_delay(0.8, 1.6)
2026-03-03 03:00:36 +08:00
2026-03-06 10:05:49 +08:00
return {
"actual_greeted": total_greeted,
"rounds": round_num,
"positions": [self._position_label(t, v) for t, v in positions],
"details": details,
"contact_records_created": contact_records_created,
"errors": errors,
}
2026-03-03 03:00:36 +08:00
2026-03-03 10:20:02 +08:00
@staticmethod
def _packet_body(packet) -> dict:
if not packet:
return {}
response = getattr(packet, "response", None)
body = getattr(response, "body", None) if response is not None else None
if isinstance(body, dict):
return body
if isinstance(body, str):
try:
2026-03-03 10:20:02 +08:00
parsed = json.loads(body)
2026-03-06 10:05:49 +08:00
return parsed if isinstance(parsed, dict) else {}
except Exception:
2026-03-03 10:20:02 +08:00
return {}
return {}
@staticmethod
2026-03-06 10:05:49 +08:00
def _normalize_string_list(value: Any) -> List[str]:
if value is None:
return []
2026-03-06 10:05:49 +08:00
if isinstance(value, list):
return [str(v).strip() for v in value if str(v).strip()]
if isinstance(value, str):
raw = value.strip()
if not raw:
return []
if raw.startswith("[") and raw.endswith("]"):
try:
parsed = json.loads(raw)
if isinstance(parsed, list):
return [str(v).strip() for v in parsed if str(v).strip()]
except Exception:
pass
return [raw]
return []
2026-03-03 10:20:02 +08:00
2026-03-06 10:05:49 +08:00
def _normalize_selected_filters(self, params: Dict[str, Any]) -> List[str]:
filters = params.get("selected_filters")
if filters is None:
filters = params.get("filters")
return self._normalize_string_list(filters)
2026-03-02 00:56:09 +08:00
@staticmethod
2026-03-06 10:05:49 +08:00
def _parse_positive_int(value: Any, default: int) -> int:
2026-03-05 10:27:28 +08:00
try:
2026-03-06 10:05:49 +08:00
parsed = int(value)
return parsed if parsed > 0 else default
except Exception:
return default
2026-03-05 10:27:28 +08:00
@staticmethod
2026-03-06 10:05:49 +08:00
def _get_container(page):
2026-03-05 10:27:28 +08:00
try:
2026-03-06 10:05:49 +08:00
return page.get_frame("recommendFrame")
2026-03-05 10:27:28 +08:00
except Exception:
2026-03-06 10:05:49 +08:00
return page
2026-03-05 10:27:28 +08:00
2026-03-06 10:05:49 +08:00
def _get_all_position_elements(self, container) -> List[Any]:
for selector in JOB_LIST_SELECTORS:
try:
eles = container.eles(selector, timeout=2)
if eles and 1 <= len(eles) <= 100:
return eles
except Exception:
2026-03-05 10:27:28 +08:00
continue
2026-03-06 10:05:49 +08:00
return []
def _build_positions(self, container, position_names: List[str]) -> List[Tuple[str, Any]]:
if position_names:
return [("name", name) for name in position_names]
job_eles = self._get_all_position_elements(container)
if not job_eles:
return [("current", None)]
return [("index", i) for i in range(len(job_eles))]
def _position_label(self, pos_type: str, pos_value: Any) -> str:
if pos_type == "name":
return str(pos_value)
if pos_type == "index":
return f"岗位{int(pos_value) + 1}"
return "当前岗位"
def _activate_position(self, page, container, pos_type: str, pos_value: Any) -> str:
label = self._position_label(pos_type, pos_value)
2026-03-05 10:27:28 +08:00
try:
2026-03-06 10:05:49 +08:00
if pos_type == "name":
ele = container.ele(f'x://*[contains(text(),"{pos_value}")]', timeout=5)
if not ele:
return ""
if not safe_click(page, ele):
return ""
elif pos_type == "index":
job_eles = self._get_all_position_elements(container)
if int(pos_value) >= len(job_eles):
return ""
if not safe_click(page, job_eles[int(pos_value)]):
return ""
return label
except Exception:
return ""
def _apply_filter_and_confirm(self, page, container, selected_filters: List[str]) -> None:
trigger = container.ele("x://*[contains(text(),'筛选')]", timeout=3)
if not trigger:
raise RuntimeError("未找到筛选按钮")
if not safe_click(page, trigger):
raise RuntimeError("点击筛选按钮失败")
human_delay(1.3, 2.0)
for item in selected_filters:
option = container.ele(f"x://*[contains(text(),'{item}')]", timeout=2)
if not option:
self.logger.warning("筛选项未命中: %s", item)
continue
safe_click(page, option)
human_delay(0.35, 0.9)
confirm = container.ele("x://*[contains(text(),'确定')]", timeout=3)
if not confirm:
raise RuntimeError("未找到筛选确认按钮")
if not safe_click(page, confirm):
raise RuntimeError("点击筛选确认失败")
def _load_geek_list_packet(self, page, container, selected_filters: List[str]):
if selected_filters:
self._apply_filter_and_confirm(page, container, selected_filters)
packets = page.listen.wait(count=2, timeout=30)
if isinstance(packets, list) and packets:
return packets[-1]
return packets
return page.listen.wait(timeout=30)
def _extract_geek_list(self, packet) -> List[dict]:
body = self._packet_body(packet)
zp_data = body.get("zpData", {}) if isinstance(body, dict) else {}
geek_list = zp_data.get("geekList", []) if isinstance(zp_data, dict) else []
return geek_list if isinstance(geek_list, list) else []
2026-03-05 10:27:28 +08:00
2026-03-06 10:05:49 +08:00
@staticmethod
def _geek_key(item: dict) -> str:
card = item.get("geekCard") or {}
return str(
card.get("encryptGeekId")
or card.get("geekId")
or card.get("geekName")
or ""
).strip()
def _greet_geek_list(
self,
*,
page,
container,
geek_list: List[dict],
greeted_keys: set[str],
position_label: str,
default_job: str,
cancel_event,
) -> Tuple[int, List[dict]]:
added = 0
records: List[dict] = []
for item in geek_list or []:
self.ensure_not_cancelled(cancel_event)
geek_key = self._geek_key(item)
if not geek_key or geek_key in greeted_keys:
continue
if not self._greet_one_geek(page, container, item):
continue
2026-03-05 10:27:28 +08:00
2026-03-06 10:05:49 +08:00
greeted_keys.add(geek_key)
added += 1
card = item.get("geekCard") or {}
name = str(card.get("geekName", "")).strip() or geek_key
position = str(card.get("expectPositionName", "")).strip() or position_label or default_job
records.append(
{
"name": name,
"position": position,
"geek_key": geek_key,
"source": "new_greet",
"greeted_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
}
)
return added, records
def _greet_one_geek(self, page, container, item: dict) -> bool:
geek_name = str((item.get("geekCard") or {}).get("geekName", "")).strip()
if not geek_name:
return False
2026-03-05 10:27:28 +08:00
try:
2026-03-06 10:05:49 +08:00
name_ele = container.ele(f'x://span[contains(text(),"{geek_name}")]', timeout=5)
if not name_ele:
name_ele = container.ele(f'x://span[text()="{geek_name}"]', timeout=2)
if not name_ele:
2026-03-05 10:27:28 +08:00
return False
2026-03-06 10:05:49 +08:00
2026-03-05 10:27:28 +08:00
try:
2026-03-06 10:05:49 +08:00
name_ele.run_js("this.scrollIntoView()")
2026-03-05 10:27:28 +08:00
except Exception:
pass
2026-03-06 10:05:49 +08:00
if not safe_click(page, name_ele):
return False
human_delay(2.2, 3.6)
panel = page.get_frame(1)
human_delay(0.4, 1.0)
greet_btn = panel.ele('x://*[contains(text(),"打招呼")]', timeout=3)
if not greet_btn or not safe_click(panel, greet_btn):
return False
human_delay(0.4, 1.0)
collect_btn = panel.ele('x://*[contains(text(),"收藏")]', timeout=2)
if collect_btn:
safe_click(panel, collect_btn)
human_delay(0.3, 0.8)
for _ in range(3):
extra_btn = panel.ele('x://*[@class="btn-v2 btn-outline-v2"]', timeout=2)
if extra_btn:
safe_click(panel, extra_btn)
human_delay(0.3, 0.8)
input_box = page.ele('x://*[@data-placeholder="快速回复"]', timeout=2)
if input_box:
input_box.input(FAST_REPLY_TEXT)
send_btn = page.ele('x://*[contains(text(),"发送")]', timeout=2)
if send_btn:
safe_click(page, send_btn)
human_delay(0.3, 0.8)
close_side = page.ele('x://*[@class="iboss iboss-close"]', timeout=1)
if close_side:
safe_click(page, close_side)
human_delay(0.2, 0.6)
close_popup = panel.ele('x://*[@class="boss-popup__close"]', timeout=1)
if close_popup:
safe_click(panel, close_popup)
human_delay(0.2, 0.6)
return True
except Exception:
2026-03-05 10:27:28 +08:00
return False
2026-03-06 10:05:49 +08:00
def _save_new_greet_contacts(self, *, records: List[dict], worker_id: str, account_name: str) -> int:
if not records:
return 0
from django.utils import timezone
from server.models import ContactRecord
created = 0
for rec in records:
geek_key = str(rec.get("geek_key", "")).strip()
if not geek_key:
continue
contact_key = f"greet:{geek_key}"
defaults = {
"name": str(rec.get("name", "")).strip() or contact_key,
"position": str(rec.get("position", "")).strip(),
"reply_status": "新打招呼",
"wechat_exchanged": False,
"worker_id": worker_id,
"contacted_at": timezone.now(),
"notes": f"新打招呼记录; account={account_name}",
}
obj, was_created = ContactRecord.objects.update_or_create(
contact=contact_key,
defaults=defaults,
)
if was_created:
created += 1
elif not obj.contacted_at:
obj.contacted_at = timezone.now()
obj.save(update_fields=["contacted_at"])
return created
2026-03-05 10:27:28 +08:00