boss_dp/scripts/test_recruit_features.py

# -*- coding: utf-8 -*-
"""
功能验证脚本：测试消息过滤与联系方式提取功能
"""

import os
import sys
from datetime import datetime, timedelta

# 添加项目路径
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

# 测试时间解析功能
def test_time_parsing():
    print("=" * 60)
    print("测试时间解析功能")
    print("=" * 60)

    test_cases = [
        ("03月03日", "应该解析为今年或去年的3月3日"),
        ("昨天", "应该解析为昨天"),
        ("今天", "应该解析为今天"),
        ("刚刚", "应该解析为今天"),
        ("12月25日", "应该解析为去年或今年的12月25日"),
    ]

    now = datetime.now()

    for time_str, expected in test_cases:
        print(f"\n输入: {time_str}")
        print(f"期望: {expected}")

        # 模拟解析逻辑
        if "昨天" in time_str:
            last_active = now - timedelta(days=1)
            print(f"解析结果: {last_active.strftime('%Y-%m-%d')}")
        elif "今天" in time_str or "刚刚" in time_str:
            last_active = now
            print(f"解析结果: {last_active.strftime('%Y-%m-%d')}")
        elif "月" in time_str and "日" in time_str:
            import re
            match = re.search(r"(\d+)月(\d+)日", time_str)
            if match:
                month = int(match.group(1))
                day = int(match.group(2))
                year = now.year
                if month > now.month:
                    year -= 1
                last_active = datetime(year, month, day)
                print(f"解析结果: {last_active.strftime('%Y-%m-%d')}")

        print("[OK] 通过")


# 测试消息过滤功能
def test_message_filtering():
    print("\n" + "=" * 60)
    print("测试消息过滤功能")
    print("=" * 60)

    # 模拟消息列表
    messages = [
        {"fromId": 0, "body": {"text": "你好，我的微信是 wx123456"}},  # 对方发送
        {"fromId": 12345, "body": {"text": "后续沟通会更及时，您方便留一下您的微信号吗？"}},  # 自己发送
        {"fromId": 0, "body": {"text": "好的，我的微信号是 test_wx_001"}},  # 对方发送
        {"fromId": 12345, "body": {"text": "我的微信是 my_wechat"}},  # 自己发送（应该被过滤）
    ]

    print("\n原始消息列表:")
    for i, msg in enumerate(messages, 1):
        from_id = msg.get("fromId", 0)
        text = msg.get("body", {}).get("text", "")
        sender = "对方" if from_id == 0 else "自己"
        print(f"  {i}. [{sender}] {text}")

    # 过滤消息
    filtered = [msg for msg in messages if msg.get("fromId", 0) == 0]

    print("\n过滤后的消息列表（只保留对方的消息）:")
    for i, msg in enumerate(filtered, 1):
        text = msg.get("body", {}).get("text", "")
        print(f"  {i}. [对方] {text}")

    print(f"\n[OK] 过滤前: {len(messages)} 条消息")
    print(f"[OK] 过滤后: {len(filtered)} 条消息")
    print(f"[OK] 成功过滤掉 {len(messages) - len(filtered)} 条自己发送的消息")


# 测试联系方式提取
def test_contact_extraction():
    print("\n" + "=" * 60)
    print("测试联系方式提取功能")
    print("=" * 60)

    import re

    test_texts = [
        "我的微信号是 wx123456",
        "微信：test_wechat_001",
        "手机号：13812345678",
        "你可以加我微信 hello_world_123",
        "我的电话是 138-1234-5678",
        "后续沟通会更及时，您方便留一下您的微信号吗？",  # 不应该提取到
    ]

    def extract_wechat(text):
        patterns = [
            r"微信号[：:\s]*([a-zA-Z0-9_\-]{6,20})",
            r"微信[：:\s]*([a-zA-Z0-9_\-]{6,20})",
            r"wx[：:\s]*([a-zA-Z0-9_\-]{6,20})",
        ]
        for pattern in patterns:
            match = re.search(pattern, text, re.IGNORECASE)
            if match:
                return match.group(1)
        return None

    def extract_phone(text):
        match = re.search(r"1[3-9]\d{9}", text.replace("-", "").replace(" ", ""))
        return match.group(0) if match else None

    for text in test_texts:
        print(f"\n文本: {text}")
        wechat = extract_wechat(text)
        phone = extract_phone(text)

        if wechat:
            print(f"  [OK] 提取到微信: {wechat}")
        if phone:
            print(f"  [OK] 提取到手机: {phone}")
        if not wechat and not phone:
            print(f"  [-] 未提取到联系方式")


def main():
    print("\n" + "=" * 60)
    print("BOSS招聘自动化 - 功能验证")
    print("=" * 60)

    try:
        test_time_parsing()
        test_message_filtering()
        test_contact_extraction()

        print("\n" + "=" * 60)
        print("所有测试完成！")
        print("=" * 60)

    except Exception as e:
        print(f"\n[ERROR] 测试失败: {e}")
        import traceback
        traceback.print_exc()
        return 1

    return 0


if __name__ == "__main__":
    sys.exit(main())