This commit is contained in:
ddrwode
2026-02-27 13:56:15 +08:00
parent afa6e56630
commit 530d7fe135
9 changed files with 272 additions and 4 deletions

View File

@@ -268,6 +268,7 @@ Set-Cookie: auth_token=a1b2c3d4e5f6...; HttpOnly; Max-Age=31536000; SameSite=Lax
| browser_id | string | 比特浏览器窗口 ID | | browser_id | string | 比特浏览器窗口 ID |
| browser_name | string | 浏览器环境名称 | | browser_name | string | 浏览器环境名称 |
| boss_username | string | BOSS 直聘用户名(检测后填充) | | boss_username | string | BOSS 直聘用户名(检测后填充) |
| boss_id | string | BOSS 直聘用户 ID检测登录成功时填充 |
| is_logged_in | boolean | 是否已登录 BOSS | | is_logged_in | boolean | 是否已登录 BOSS |
| current_task_id | string/null | 当前关联的任务 ID | | current_task_id | string/null | 当前关联的任务 ID |
| current_task_status | string/null | 当前任务状态:`pending` / `dispatched` / `running` / `success` / `failed` | | current_task_status | string/null | 当前任务状态:`pending` / `dispatched` / `running` / `success` / `failed` |

View File

@@ -0,0 +1,173 @@
# 设计BOSS 列表增加 boss_id + 电脑/环境字段同步
## 一、需求概述
1. **boss_id 字段**:在 BOSS 账号列表中加入 BOSS 直聘用户 ID
2. **电脑/环境同步**:电脑、环境字段以比特浏览器 / BOSS 平台的实际数据为准,而不是仅依赖添加账号时的一次填写
---
## 二、boss_id 字段设计
### 2.1 数据来源
BOSS 直聘页面前端将用户 ID 放在 `window._PAGE` 中,可通过:
```javascript
window._PAGE?.uid
```
获取。该对象在用户登录后加载,检测登录任务执行时页面已就绪,可在此阶段提取。
### 2.2 数据库变更
| 项目 | 说明 |
|------|------|
| 模型 | `BossAccount` |
| 新字段 | `boss_id``CharField(max_length=64, default="", blank=True)` |
| 含义 | BOSS 直聘平台用户 ID检测登录成功时写入 |
| 唯一性 | 不设唯一约束(同一 BOSS 账号可绑定多台电脑/环境) |
### 2.3 修改点
| 文件 | 修改内容 |
|------|----------|
| `server/models.py` | 增加 `boss_id` 字段 |
| `server/migrations/` | 新建迁移 |
| `worker/tasks/check_login.py` | 页面加载后执行 JS 读取 `window._PAGE?.uid`,写入返回值 |
| `server/ws/consumers.py` | `_upsert_account_status` 中增加 `boss_id` 的更新 |
| `server/serializers.py` | `BossAccountSerializer.fields` 中加入 `"boss_id"` |
| `API文档.md` | 补充 `boss_id` 字段说明 |
### 2.4 check_login 返回值扩展
```python
# worker/tasks/check_login.py 返回
{
"browser_id": "...",
"browser_name": "...",
"boss_username": "...",
"boss_id": "12345678", # 新增:从 window._PAGE?.uid 读取
"is_logged_in": True,
}
```
### 2.5 提取 boss_id 的 JS 示例
```javascript
(function() {
try {
var page = window._PAGE;
if (page && page.uid) return String(page.uid);
} catch (e) {}
return "";
})();
```
使用 DrissionPage 的 `tab.run_js()` 执行上述脚本即可。
---
## 三、电脑 / 环境字段同步设计
### 3.1 当前数据流
| 字段 | 来源 | 更新时机 |
|------|------|----------|
| 电脑 (worker_name) | `worker_manager` 运行时状态 | 每次 GET /api/accounts 时通过 `_enrich` 实时补充 ✓ |
| 环境 (browser_name) | `BossAccount.browser_name` | 添加账号、check_login 完成时写入 |
电脑字段已经是实时数据,无需改动。
### 3.2 环境同步缺口
- 用户在比特浏览器中修改环境名称后,`BossAccount.browser_name` 仍为旧值
- 系统已有 `BROWSER_LIST_UPDATE` 消息类型,但 Worker 目前未发送
### 3.3 同步策略
以比特浏览器为权威来源Worker 定期/按需上报最新浏览器列表Server 据此更新 BossAccount。
```
Worker 拉取 BitBrowser API
对比上次上报的 browsers
若有变化 → 发送 BROWSER_LIST_UPDATE
Server 更新 worker_manager.browsers
Server 同步更新 BossAccount (browser_id, browser_name)
```
### 3.4 实现要点
#### 3.4.1 Worker 端
| 文件 | 修改内容 |
|------|----------|
| `worker/ws_client.py` | 在心跳逻辑中周期性拉取比特浏览器列表;若与上次不同,发送 `BROWSER_LIST_UPDATE` |
建议逻辑:
- 每 N 次心跳(如每 3 次,约 90 秒)拉取一次 `bit_api.list_browsers()`
- 与上次 `_last_browsers` 比较(按 id+name 或 id 的字典列表)
- 有变化则发送 `browser_list_update`,并更新 `_last_browsers`
#### 3.4.2 Server 端
| 文件 | 修改内容 |
|------|----------|
| `server/ws/consumers.py` | 收到 `BROWSER_LIST_UPDATE` 时,除更新 `worker_manager` 外,同步更新 `BossAccount` |
更新规则:
- 该 Worker 下的 BossAccount`browser_id` 匹配
- 若新列表中有同 id 的项:用新 `name` 更新 `browser_name`
- 若某 `browser_id` 在新列表中不存在:可选保留不更新,或记录日志(环境可能已删除)
---
## 四、实现顺序建议
1. **Phase 1boss_id**
- 模型 + 迁移
- check_login 中提取并返回 boss_id
- consumers 中 upsert boss_id
- serializer + API 文档
2. **Phase 2环境同步**
- Worker 心跳中增加周期性拉取并发送 BROWSER_LIST_UPDATE
- consumers 中处理 BROWSER_LIST_UPDATE 时同步 BossAccount
---
## 五、API 变更示例
### GET /api/accounts 响应新增字段
```json
{
"id": 1,
"worker_id": "pc-a",
"browser_id": "abc123",
"browser_name": "第一个",
"boss_id": "12345678",
"boss_username": "某用户",
"worker_name": "电脑A",
"worker_online": true,
...
}
```
| 字段 | 类型 | 说明 |
|------|------|------|
| boss_id | string | BOSS 直聘用户 ID检测登录成功时填充 |
---
## 六、注意事项
1. **boss_id 为空**:未登录或页面未暴露 uid 时为空字符串,前端需兼容
2. **环境被删除**比特浏览器中删除环境后BossAccount 仍可能保留;可后续增加「环境不存在」的标记或清理逻辑
3. **window._PAGE 结构**BOSS 直聘可能改版,建议加 try-catch取不到时静默返回空

View File

@@ -4,6 +4,11 @@
import os import os
import sys import sys
# 将项目根目录加入路径,确保能 import server
_project_root = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
if _project_root not in sys.path:
sys.path.insert(0, _project_root)
def main(): def main():
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "server.settings") os.environ.setdefault("DJANGO_SETTINGS_MODULE", "server.settings")

View File

@@ -0,0 +1,19 @@
# -*- coding: utf-8 -*-
# Generated manually for boss_id field
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('server', '0002_chatscript_contactrecord_filterconfig_systemconfig'),
]
operations = [
migrations.AddField(
model_name='bossaccount',
name='boss_id',
field=models.CharField(blank=True, default='', max_length=64, verbose_name='BOSS 直聘用户 ID'),
),
]

View File

@@ -24,6 +24,7 @@ class BossAccount(models.Model):
browser_id = models.CharField(max_length=128, default="", verbose_name="比特浏览器窗口 ID") browser_id = models.CharField(max_length=128, default="", verbose_name="比特浏览器窗口 ID")
browser_name = models.CharField(max_length=128, default="", verbose_name="比特浏览器窗口名称(环境名)") browser_name = models.CharField(max_length=128, default="", verbose_name="比特浏览器窗口名称(环境名)")
boss_username = models.CharField(max_length=128, default="", verbose_name="BOSS 直聘用户名") boss_username = models.CharField(max_length=128, default="", verbose_name="BOSS 直聘用户名")
boss_id = models.CharField(max_length=64, default="", blank=True, verbose_name="BOSS 直聘用户 ID")
is_logged_in = models.BooleanField(default=False, verbose_name="是否已登录") is_logged_in = models.BooleanField(default=False, verbose_name="是否已登录")
current_task_id = models.CharField(max_length=32, null=True, blank=True, verbose_name="当前检测任务 ID") current_task_id = models.CharField(max_length=32, null=True, blank=True, verbose_name="当前检测任务 ID")
current_task_status = models.CharField(max_length=32, null=True, blank=True, verbose_name="当前检测任务状态") current_task_status = models.CharField(max_length=32, null=True, blank=True, verbose_name="当前检测任务状态")

View File

@@ -19,7 +19,7 @@ class BossAccountSerializer(serializers.ModelSerializer):
model = BossAccount model = BossAccount
fields = [ fields = [
"id", "worker_id", "browser_id", "browser_name", "id", "worker_id", "browser_id", "browser_name",
"boss_username", "is_logged_in", "boss_username", "boss_id", "is_logged_in",
"current_task_id", "current_task_status", "current_task_id", "current_task_status",
"checked_at", "created_at", "updated_at", "checked_at", "created_at", "updated_at",
"worker_name", "worker_online", "worker_name", "worker_online",

View File

@@ -93,6 +93,10 @@ class WorkerConsumer(AsyncWebsocketConsumer):
elif msg_type == MsgType.BROWSER_LIST_UPDATE.value: elif msg_type == MsgType.BROWSER_LIST_UPDATE.value:
worker_manager.update_browsers(self.worker_id, data.get("browsers", [])) worker_manager.update_browsers(self.worker_id, data.get("browsers", []))
try:
self._sync_boss_account_browsers(self.worker_id, data.get("browsers", []))
except Exception as sync_err:
logger.warning("同步 BossAccount 环境失败: %s", sync_err)
elif msg_type == MsgType.TASK_PROGRESS.value: elif msg_type == MsgType.TASK_PROGRESS.value:
task_id = data.get("task_id", "") task_id = data.get("task_id", "")
@@ -158,6 +162,7 @@ class WorkerConsumer(AsyncWebsocketConsumer):
browser_id = result.get("browser_id", "") browser_id = result.get("browser_id", "")
browser_name = result.get("browser_name", "") browser_name = result.get("browser_name", "")
boss_username = result.get("boss_username", "") boss_username = result.get("boss_username", "")
boss_id = result.get("boss_id", "")
is_logged_in = result.get("is_logged_in", False) is_logged_in = result.get("is_logged_in", False)
# 优先按 worker_id + browser_name 匹配 # 优先按 worker_id + browser_name 匹配
@@ -176,6 +181,8 @@ class WorkerConsumer(AsyncWebsocketConsumer):
account.browser_id = browser_id or account.browser_id account.browser_id = browser_id or account.browser_id
account.browser_name = browser_name or account.browser_name account.browser_name = browser_name or account.browser_name
account.boss_username = boss_username account.boss_username = boss_username
if boss_id:
account.boss_id = boss_id
account.is_logged_in = is_logged_in account.is_logged_in = is_logged_in
account.checked_at = now account.checked_at = now
account.save() account.save()
@@ -185,10 +192,38 @@ class WorkerConsumer(AsyncWebsocketConsumer):
browser_id=browser_id or f"name:{browser_name}", browser_id=browser_id or f"name:{browser_name}",
browser_name=browser_name, browser_name=browser_name,
boss_username=boss_username, boss_username=boss_username,
boss_id=boss_id,
is_logged_in=is_logged_in, is_logged_in=is_logged_in,
checked_at=now, checked_at=now,
) )
logger.info( logger.info(
"账号状态更新: worker=%s, browser=%s(%s), username=%s, logged_in=%s", "账号状态更新: worker=%s, browser=%s(%s), username=%s, boss_id=%s, logged_in=%s",
self.worker_id, browser_name, browser_id, boss_username, is_logged_in, self.worker_id, browser_name, browser_id, boss_username, boss_id, is_logged_in,
) )
@staticmethod
def _sync_boss_account_browsers(worker_id: str, browsers: list) -> None:
"""根据 Worker 上报的浏览器列表,同步更新 BossAccount 的 browser_id、browser_name。"""
from server.models import BossAccount
if not browsers:
return
browser_map = {str(b.get("id", "")).strip(): b for b in browsers if b.get("id")}
if not browser_map:
return
accounts = BossAccount.objects.filter(worker_id=worker_id)
updated = 0
for acc in accounts:
bid = (acc.browser_id or "").strip()
if not bid:
continue
new_info = browser_map.get(bid)
if new_info:
new_name = (new_info.get("name") or "").strip()
if new_name and new_name != acc.browser_name:
acc.browser_name = new_name
acc.save(update_fields=["browser_name"])
updated += 1
if updated:
logger.info("BossAccount 环境同步: worker=%s, 更新 %d", worker_id, updated)

View File

@@ -115,6 +115,7 @@ class CheckLoginHandler(BaseTaskHandler):
# ── 5. 查找 .user-name 元素,判断是否已登录 ── # ── 5. 查找 .user-name 元素,判断是否已登录 ──
boss_username = "" boss_username = ""
boss_id = ""
is_logged_in = False is_logged_in = False
try: try:
@@ -123,6 +124,17 @@ class CheckLoginHandler(BaseTaskHandler):
boss_username = user_name_ele.text.strip() boss_username = user_name_ele.text.strip()
is_logged_in = bool(boss_username) is_logged_in = bool(boss_username)
self.logger.info("环境 %s 已登录: %s", browser_name, boss_username) self.logger.info("环境 %s 已登录: %s", browser_name, boss_username)
# 提取 BOSS 直聘用户 IDwindow._PAGE?.uid
try:
uid_result = tab.run_js(
"(function(){try{var p=window._PAGE;if(p&&p.uid)return String(p.uid);}catch(e){}return '';})()"
)
if uid_result is not None and str(uid_result).strip():
boss_id = str(uid_result).strip()
self.logger.info("环境 %s boss_id=%s", browser_name, boss_id)
except Exception as js_err:
self.logger.debug("提取 boss_id 失败: %s", js_err)
else: else:
self.logger.info("环境 %s 未检测到 .user-name账号未登录", browser_name) self.logger.info("环境 %s 未检测到 .user-name账号未登录", browser_name)
except Exception as e: except Exception as e:
@@ -132,5 +144,6 @@ class CheckLoginHandler(BaseTaskHandler):
"browser_id": browser_id, "browser_id": browser_id,
"browser_name": browser_name, "browser_name": browser_name,
"boss_username": boss_username, "boss_username": boss_username,
"boss_id": boss_id,
"is_logged_in": is_logged_in, "is_logged_in": is_logged_in,
} }

View File

@@ -39,6 +39,8 @@ class WorkerWSClient:
self._ws: Optional[websockets.WebSocketClientProtocol] = None self._ws: Optional[websockets.WebSocketClientProtocol] = None
self._running = False self._running = False
self._reconnect_delay = config.RECONNECT_DELAY self._reconnect_delay = config.RECONNECT_DELAY
self._last_browsers: List[dict] = []
self._heartbeat_count = 0
# ────────────────────────── 主循环 ────────────────────────── # ────────────────────────── 主循环 ──────────────────────────
@@ -93,6 +95,7 @@ class WorkerWSClient:
async def _register(self, ws) -> None: async def _register(self, ws) -> None:
"""发送注册消息。""" """发送注册消息。"""
browsers = self._fetch_browser_list() browsers = self._fetch_browser_list()
self._last_browsers = browsers
msg = make_msg( msg = make_msg(
MsgType.REGISTER, MsgType.REGISTER,
worker_id=self.worker_id, worker_id=self.worker_id,
@@ -112,16 +115,34 @@ class WorkerWSClient:
# ────────────────────────── 心跳 ────────────────────────── # ────────────────────────── 心跳 ──────────────────────────
async def _heartbeat_loop(self, ws) -> None: async def _heartbeat_loop(self, ws) -> None:
"""定期发送心跳。""" """定期发送心跳;每 3 次心跳检查并上报浏览器列表变更"""
while True: while True:
try: try:
await asyncio.sleep(config.HEARTBEAT_INTERVAL) await asyncio.sleep(config.HEARTBEAT_INTERVAL)
msg = make_msg(MsgType.HEARTBEAT, worker_id=self.worker_id) msg = make_msg(MsgType.HEARTBEAT, worker_id=self.worker_id)
await ws.send(json.dumps(msg)) await ws.send(json.dumps(msg))
logger.debug("心跳已发送") logger.debug("心跳已发送")
self._heartbeat_count += 1
if self._heartbeat_count >= 3:
self._heartbeat_count = 0
await self._maybe_send_browser_list_update(ws)
except Exception: except Exception:
break break
async def _maybe_send_browser_list_update(self, ws) -> None:
"""拉取比特浏览器列表,若有变化则发送 BROWSER_LIST_UPDATE。"""
try:
new_list = self._fetch_browser_list()
key = lambda b: (b.get("id", ""), b.get("name", ""))
if sorted(new_list, key=key) != sorted(self._last_browsers, key=key):
self._last_browsers = new_list
msg = make_msg(MsgType.BROWSER_LIST_UPDATE, browsers=new_list)
await ws.send(json.dumps(msg))
logger.info("浏览器列表变更,已上报 %d 个环境", len(new_list))
except Exception as e:
logger.debug("检查浏览器列表变更失败: %s", e)
# ────────────────────────── 消息处理 ────────────────────────── # ────────────────────────── 消息处理 ──────────────────────────
async def _handle_message(self, ws, data: dict) -> None: async def _handle_message(self, ws, data: dict) -> None: