diff --git a/API文档.md b/API文档.md index 2846cae..39d5291 100644 --- a/API文档.md +++ b/API文档.md @@ -268,6 +268,7 @@ Set-Cookie: auth_token=a1b2c3d4e5f6...; HttpOnly; Max-Age=31536000; SameSite=Lax | browser_id | string | 比特浏览器窗口 ID | | browser_name | string | 浏览器环境名称 | | boss_username | string | BOSS 直聘用户名(检测后填充) | +| boss_id | string | BOSS 直聘用户 ID(检测登录成功时填充) | | is_logged_in | boolean | 是否已登录 BOSS | | current_task_id | string/null | 当前关联的任务 ID | | current_task_status | string/null | 当前任务状态:`pending` / `dispatched` / `running` / `success` / `failed` | diff --git a/docs/design-bossid-and-sync.md b/docs/design-bossid-and-sync.md new file mode 100644 index 0000000..fafccdf --- /dev/null +++ b/docs/design-bossid-and-sync.md @@ -0,0 +1,173 @@ +# 设计:BOSS 列表增加 boss_id + 电脑/环境字段同步 + +## 一、需求概述 + +1. **boss_id 字段**:在 BOSS 账号列表中加入 BOSS 直聘用户 ID +2. **电脑/环境同步**:电脑、环境字段以比特浏览器 / BOSS 平台的实际数据为准,而不是仅依赖添加账号时的一次填写 + +--- + +## 二、boss_id 字段设计 + +### 2.1 数据来源 + +BOSS 直聘页面前端将用户 ID 放在 `window._PAGE` 中,可通过: + +```javascript +window._PAGE?.uid +``` + +获取。该对象在用户登录后加载,检测登录任务执行时页面已就绪,可在此阶段提取。 + +### 2.2 数据库变更 + +| 项目 | 说明 | +|------|------| +| 模型 | `BossAccount` | +| 新字段 | `boss_id`,`CharField(max_length=64, default="", blank=True)` | +| 含义 | BOSS 直聘平台用户 ID,检测登录成功时写入 | +| 唯一性 | 不设唯一约束(同一 BOSS 账号可绑定多台电脑/环境) | + +### 2.3 修改点 + +| 文件 | 修改内容 | +|------|----------| +| `server/models.py` | 增加 `boss_id` 字段 | +| `server/migrations/` | 新建迁移 | +| `worker/tasks/check_login.py` | 页面加载后执行 JS 读取 `window._PAGE?.uid`,写入返回值 | +| `server/ws/consumers.py` | `_upsert_account_status` 中增加 `boss_id` 的更新 | +| `server/serializers.py` | `BossAccountSerializer.fields` 中加入 `"boss_id"` | +| `API文档.md` | 补充 `boss_id` 字段说明 | + +### 2.4 check_login 返回值扩展 + +```python +# worker/tasks/check_login.py 返回 +{ + "browser_id": "...", + "browser_name": "...", + "boss_username": "...", + "boss_id": "12345678", # 新增:从 window._PAGE?.uid 读取 + "is_logged_in": True, +} +``` + +### 2.5 提取 boss_id 的 JS 示例 + +```javascript +(function() { + try { + var page = window._PAGE; + if (page && page.uid) return String(page.uid); + } catch (e) {} + return ""; +})(); +``` + +使用 DrissionPage 的 `tab.run_js()` 执行上述脚本即可。 + +--- + +## 三、电脑 / 环境字段同步设计 + +### 3.1 当前数据流 + +| 字段 | 来源 | 更新时机 | +|------|------|----------| +| 电脑 (worker_name) | `worker_manager` 运行时状态 | 每次 GET /api/accounts 时通过 `_enrich` 实时补充 ✓ | +| 环境 (browser_name) | `BossAccount.browser_name` | 添加账号、check_login 完成时写入 | + +电脑字段已经是实时数据,无需改动。 + +### 3.2 环境同步缺口 + +- 用户在比特浏览器中修改环境名称后,`BossAccount.browser_name` 仍为旧值 +- 系统已有 `BROWSER_LIST_UPDATE` 消息类型,但 Worker 目前未发送 + +### 3.3 同步策略 + +以比特浏览器为权威来源,Worker 定期/按需上报最新浏览器列表,Server 据此更新 BossAccount。 + +``` +Worker 拉取 BitBrowser API + ↓ +对比上次上报的 browsers + ↓ +若有变化 → 发送 BROWSER_LIST_UPDATE + ↓ +Server 更新 worker_manager.browsers + ↓ +Server 同步更新 BossAccount (browser_id, browser_name) +``` + +### 3.4 实现要点 + +#### 3.4.1 Worker 端 + +| 文件 | 修改内容 | +|------|----------| +| `worker/ws_client.py` | 在心跳逻辑中周期性拉取比特浏览器列表;若与上次不同,发送 `BROWSER_LIST_UPDATE` | + +建议逻辑: + +- 每 N 次心跳(如每 3 次,约 90 秒)拉取一次 `bit_api.list_browsers()` +- 与上次 `_last_browsers` 比较(按 id+name 或 id 的字典列表) +- 有变化则发送 `browser_list_update`,并更新 `_last_browsers` + +#### 3.4.2 Server 端 + +| 文件 | 修改内容 | +|------|----------| +| `server/ws/consumers.py` | 收到 `BROWSER_LIST_UPDATE` 时,除更新 `worker_manager` 外,同步更新 `BossAccount` | + +更新规则: + +- 该 Worker 下的 BossAccount,按 `browser_id` 匹配 +- 若新列表中有同 id 的项:用新 `name` 更新 `browser_name` +- 若某 `browser_id` 在新列表中不存在:可选保留不更新,或记录日志(环境可能已删除) + +--- + +## 四、实现顺序建议 + +1. **Phase 1:boss_id** + - 模型 + 迁移 + - check_login 中提取并返回 boss_id + - consumers 中 upsert boss_id + - serializer + API 文档 + +2. **Phase 2:环境同步** + - Worker 心跳中增加周期性拉取并发送 BROWSER_LIST_UPDATE + - consumers 中处理 BROWSER_LIST_UPDATE 时同步 BossAccount + +--- + +## 五、API 变更示例 + +### GET /api/accounts 响应新增字段 + +```json +{ + "id": 1, + "worker_id": "pc-a", + "browser_id": "abc123", + "browser_name": "第一个", + "boss_id": "12345678", + "boss_username": "某用户", + "worker_name": "电脑A", + "worker_online": true, + ... +} +``` + +| 字段 | 类型 | 说明 | +|------|------|------| +| boss_id | string | BOSS 直聘用户 ID,检测登录成功时填充 | + +--- + +## 六、注意事项 + +1. **boss_id 为空**:未登录或页面未暴露 uid 时为空字符串,前端需兼容 +2. **环境被删除**:比特浏览器中删除环境后,BossAccount 仍可能保留;可后续增加「环境不存在」的标记或清理逻辑 +3. **window._PAGE 结构**:BOSS 直聘可能改版,建议加 try-catch,取不到时静默返回空 diff --git a/server/manage.py b/server/manage.py index 3e717e5..99f7218 100644 --- a/server/manage.py +++ b/server/manage.py @@ -4,6 +4,11 @@ import os import sys +# 将项目根目录加入路径,确保能 import server +_project_root = os.path.dirname(os.path.abspath(os.path.dirname(__file__))) +if _project_root not in sys.path: + sys.path.insert(0, _project_root) + def main(): os.environ.setdefault("DJANGO_SETTINGS_MODULE", "server.settings") diff --git a/server/migrations/0003_add_boss_id.py b/server/migrations/0003_add_boss_id.py new file mode 100644 index 0000000..3f2a0f4 --- /dev/null +++ b/server/migrations/0003_add_boss_id.py @@ -0,0 +1,19 @@ +# -*- coding: utf-8 -*- +# Generated manually for boss_id field + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('server', '0002_chatscript_contactrecord_filterconfig_systemconfig'), + ] + + operations = [ + migrations.AddField( + model_name='bossaccount', + name='boss_id', + field=models.CharField(blank=True, default='', max_length=64, verbose_name='BOSS 直聘用户 ID'), + ), + ] diff --git a/server/models.py b/server/models.py index 9147146..1e63695 100644 --- a/server/models.py +++ b/server/models.py @@ -24,6 +24,7 @@ class BossAccount(models.Model): browser_id = models.CharField(max_length=128, default="", verbose_name="比特浏览器窗口 ID") browser_name = models.CharField(max_length=128, default="", verbose_name="比特浏览器窗口名称(环境名)") boss_username = models.CharField(max_length=128, default="", verbose_name="BOSS 直聘用户名") + boss_id = models.CharField(max_length=64, default="", blank=True, verbose_name="BOSS 直聘用户 ID") is_logged_in = models.BooleanField(default=False, verbose_name="是否已登录") current_task_id = models.CharField(max_length=32, null=True, blank=True, verbose_name="当前检测任务 ID") current_task_status = models.CharField(max_length=32, null=True, blank=True, verbose_name="当前检测任务状态") diff --git a/server/serializers.py b/server/serializers.py index dd09b38..5108bbd 100644 --- a/server/serializers.py +++ b/server/serializers.py @@ -19,7 +19,7 @@ class BossAccountSerializer(serializers.ModelSerializer): model = BossAccount fields = [ "id", "worker_id", "browser_id", "browser_name", - "boss_username", "is_logged_in", + "boss_username", "boss_id", "is_logged_in", "current_task_id", "current_task_status", "checked_at", "created_at", "updated_at", "worker_name", "worker_online", diff --git a/server/ws/consumers.py b/server/ws/consumers.py index 7505f7c..86a7095 100644 --- a/server/ws/consumers.py +++ b/server/ws/consumers.py @@ -93,6 +93,10 @@ class WorkerConsumer(AsyncWebsocketConsumer): elif msg_type == MsgType.BROWSER_LIST_UPDATE.value: worker_manager.update_browsers(self.worker_id, data.get("browsers", [])) + try: + self._sync_boss_account_browsers(self.worker_id, data.get("browsers", [])) + except Exception as sync_err: + logger.warning("同步 BossAccount 环境失败: %s", sync_err) elif msg_type == MsgType.TASK_PROGRESS.value: task_id = data.get("task_id", "") @@ -158,6 +162,7 @@ class WorkerConsumer(AsyncWebsocketConsumer): browser_id = result.get("browser_id", "") browser_name = result.get("browser_name", "") boss_username = result.get("boss_username", "") + boss_id = result.get("boss_id", "") is_logged_in = result.get("is_logged_in", False) # 优先按 worker_id + browser_name 匹配 @@ -176,6 +181,8 @@ class WorkerConsumer(AsyncWebsocketConsumer): account.browser_id = browser_id or account.browser_id account.browser_name = browser_name or account.browser_name account.boss_username = boss_username + if boss_id: + account.boss_id = boss_id account.is_logged_in = is_logged_in account.checked_at = now account.save() @@ -185,10 +192,38 @@ class WorkerConsumer(AsyncWebsocketConsumer): browser_id=browser_id or f"name:{browser_name}", browser_name=browser_name, boss_username=boss_username, + boss_id=boss_id, is_logged_in=is_logged_in, checked_at=now, ) logger.info( - "账号状态更新: worker=%s, browser=%s(%s), username=%s, logged_in=%s", - self.worker_id, browser_name, browser_id, boss_username, is_logged_in, + "账号状态更新: worker=%s, browser=%s(%s), username=%s, boss_id=%s, logged_in=%s", + self.worker_id, browser_name, browser_id, boss_username, boss_id, is_logged_in, ) + + @staticmethod + def _sync_boss_account_browsers(worker_id: str, browsers: list) -> None: + """根据 Worker 上报的浏览器列表,同步更新 BossAccount 的 browser_id、browser_name。""" + from server.models import BossAccount + + if not browsers: + return + browser_map = {str(b.get("id", "")).strip(): b for b in browsers if b.get("id")} + if not browser_map: + return + + accounts = BossAccount.objects.filter(worker_id=worker_id) + updated = 0 + for acc in accounts: + bid = (acc.browser_id or "").strip() + if not bid: + continue + new_info = browser_map.get(bid) + if new_info: + new_name = (new_info.get("name") or "").strip() + if new_name and new_name != acc.browser_name: + acc.browser_name = new_name + acc.save(update_fields=["browser_name"]) + updated += 1 + if updated: + logger.info("BossAccount 环境同步: worker=%s, 更新 %d 条", worker_id, updated) diff --git a/worker/tasks/check_login.py b/worker/tasks/check_login.py index 7425d20..88fafb7 100644 --- a/worker/tasks/check_login.py +++ b/worker/tasks/check_login.py @@ -115,6 +115,7 @@ class CheckLoginHandler(BaseTaskHandler): # ── 5. 查找 .user-name 元素,判断是否已登录 ── boss_username = "" + boss_id = "" is_logged_in = False try: @@ -123,6 +124,17 @@ class CheckLoginHandler(BaseTaskHandler): boss_username = user_name_ele.text.strip() is_logged_in = bool(boss_username) self.logger.info("环境 %s 已登录: %s", browser_name, boss_username) + + # 提取 BOSS 直聘用户 ID(window._PAGE?.uid) + try: + uid_result = tab.run_js( + "(function(){try{var p=window._PAGE;if(p&&p.uid)return String(p.uid);}catch(e){}return '';})()" + ) + if uid_result is not None and str(uid_result).strip(): + boss_id = str(uid_result).strip() + self.logger.info("环境 %s boss_id=%s", browser_name, boss_id) + except Exception as js_err: + self.logger.debug("提取 boss_id 失败: %s", js_err) else: self.logger.info("环境 %s 未检测到 .user-name,账号未登录", browser_name) except Exception as e: @@ -132,5 +144,6 @@ class CheckLoginHandler(BaseTaskHandler): "browser_id": browser_id, "browser_name": browser_name, "boss_username": boss_username, + "boss_id": boss_id, "is_logged_in": is_logged_in, } diff --git a/worker/ws_client.py b/worker/ws_client.py index 1c0daed..a88a807 100644 --- a/worker/ws_client.py +++ b/worker/ws_client.py @@ -39,6 +39,8 @@ class WorkerWSClient: self._ws: Optional[websockets.WebSocketClientProtocol] = None self._running = False self._reconnect_delay = config.RECONNECT_DELAY + self._last_browsers: List[dict] = [] + self._heartbeat_count = 0 # ────────────────────────── 主循环 ────────────────────────── @@ -93,6 +95,7 @@ class WorkerWSClient: async def _register(self, ws) -> None: """发送注册消息。""" browsers = self._fetch_browser_list() + self._last_browsers = browsers msg = make_msg( MsgType.REGISTER, worker_id=self.worker_id, @@ -112,16 +115,34 @@ class WorkerWSClient: # ────────────────────────── 心跳 ────────────────────────── async def _heartbeat_loop(self, ws) -> None: - """定期发送心跳。""" + """定期发送心跳;每 3 次心跳检查并上报浏览器列表变更。""" while True: try: await asyncio.sleep(config.HEARTBEAT_INTERVAL) msg = make_msg(MsgType.HEARTBEAT, worker_id=self.worker_id) await ws.send(json.dumps(msg)) logger.debug("心跳已发送") + + self._heartbeat_count += 1 + if self._heartbeat_count >= 3: + self._heartbeat_count = 0 + await self._maybe_send_browser_list_update(ws) except Exception: break + async def _maybe_send_browser_list_update(self, ws) -> None: + """拉取比特浏览器列表,若有变化则发送 BROWSER_LIST_UPDATE。""" + try: + new_list = self._fetch_browser_list() + key = lambda b: (b.get("id", ""), b.get("name", "")) + if sorted(new_list, key=key) != sorted(self._last_browsers, key=key): + self._last_browsers = new_list + msg = make_msg(MsgType.BROWSER_LIST_UPDATE, browsers=new_list) + await ws.send(json.dumps(msg)) + logger.info("浏览器列表变更,已上报 %d 个环境", len(new_list)) + except Exception as e: + logger.debug("检查浏览器列表变更失败: %s", e) + # ────────────────────────── 消息处理 ────────────────────────── async def _handle_message(self, ws, data: dict) -> None: