Files
boss_dp/scripts/test_recruit_features.py
ddrwode 7b351039f8 haha
2026-03-06 10:47:46 +08:00

160 lines
5.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
"""
功能验证脚本:测试消息过滤与联系方式提取功能
"""
import os
import sys
from datetime import datetime, timedelta
# 添加项目路径
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# 测试时间解析功能
def test_time_parsing():
print("=" * 60)
print("测试时间解析功能")
print("=" * 60)
test_cases = [
("03月03日", "应该解析为今年或去年的3月3日"),
("昨天", "应该解析为昨天"),
("今天", "应该解析为今天"),
("刚刚", "应该解析为今天"),
("12月25日", "应该解析为去年或今年的12月25日"),
]
now = datetime.now()
for time_str, expected in test_cases:
print(f"\n输入: {time_str}")
print(f"期望: {expected}")
# 模拟解析逻辑
if "昨天" in time_str:
last_active = now - timedelta(days=1)
print(f"解析结果: {last_active.strftime('%Y-%m-%d')}")
elif "今天" in time_str or "刚刚" in time_str:
last_active = now
print(f"解析结果: {last_active.strftime('%Y-%m-%d')}")
elif "" in time_str and "" in time_str:
import re
match = re.search(r"(\d+)月(\d+)日", time_str)
if match:
month = int(match.group(1))
day = int(match.group(2))
year = now.year
if month > now.month:
year -= 1
last_active = datetime(year, month, day)
print(f"解析结果: {last_active.strftime('%Y-%m-%d')}")
print("[OK] 通过")
# 测试消息过滤功能
def test_message_filtering():
print("\n" + "=" * 60)
print("测试消息过滤功能")
print("=" * 60)
# 模拟消息列表
messages = [
{"fromId": 0, "body": {"text": "你好,我的微信是 wx123456"}}, # 对方发送
{"fromId": 12345, "body": {"text": "后续沟通会更及时,您方便留一下您的微信号吗?"}}, # 自己发送
{"fromId": 0, "body": {"text": "好的,我的微信号是 test_wx_001"}}, # 对方发送
{"fromId": 12345, "body": {"text": "我的微信是 my_wechat"}}, # 自己发送(应该被过滤)
]
print("\n原始消息列表:")
for i, msg in enumerate(messages, 1):
from_id = msg.get("fromId", 0)
text = msg.get("body", {}).get("text", "")
sender = "对方" if from_id == 0 else "自己"
print(f" {i}. [{sender}] {text}")
# 过滤消息
filtered = [msg for msg in messages if msg.get("fromId", 0) == 0]
print("\n过滤后的消息列表(只保留对方的消息):")
for i, msg in enumerate(filtered, 1):
text = msg.get("body", {}).get("text", "")
print(f" {i}. [对方] {text}")
print(f"\n[OK] 过滤前: {len(messages)} 条消息")
print(f"[OK] 过滤后: {len(filtered)} 条消息")
print(f"[OK] 成功过滤掉 {len(messages) - len(filtered)} 条自己发送的消息")
# 测试联系方式提取
def test_contact_extraction():
print("\n" + "=" * 60)
print("测试联系方式提取功能")
print("=" * 60)
import re
test_texts = [
"我的微信号是 wx123456",
"微信test_wechat_001",
"手机号13812345678",
"你可以加我微信 hello_world_123",
"我的电话是 138-1234-5678",
"后续沟通会更及时,您方便留一下您的微信号吗?", # 不应该提取到
]
def extract_wechat(text):
patterns = [
r"微信号[:\s]*([a-zA-Z0-9_\-]{6,20})",
r"微信[:\s]*([a-zA-Z0-9_\-]{6,20})",
r"wx[:\s]*([a-zA-Z0-9_\-]{6,20})",
]
for pattern in patterns:
match = re.search(pattern, text, re.IGNORECASE)
if match:
return match.group(1)
return None
def extract_phone(text):
match = re.search(r"1[3-9]\d{9}", text.replace("-", "").replace(" ", ""))
return match.group(0) if match else None
for text in test_texts:
print(f"\n文本: {text}")
wechat = extract_wechat(text)
phone = extract_phone(text)
if wechat:
print(f" [OK] 提取到微信: {wechat}")
if phone:
print(f" [OK] 提取到手机: {phone}")
if not wechat and not phone:
print(f" [-] 未提取到联系方式")
def main():
print("\n" + "=" * 60)
print("BOSS招聘自动化 - 功能验证")
print("=" * 60)
try:
test_time_parsing()
test_message_filtering()
test_contact_extraction()
print("\n" + "=" * 60)
print("所有测试完成!")
print("=" * 60)
except Exception as e:
print(f"\n[ERROR] 测试失败: {e}")
import traceback
traceback.print_exc()
return 1
return 0
if __name__ == "__main__":
sys.exit(main())