codex_jxs_code/find_captcha.py

"""
验证码图片匹配脚本
在 1.jpg 中找到 2.png、3.png、4.png 的位置
"""
import cv2
import numpy as np
import os

def find_template_in_image(main_img_path, template_path, threshold=0.5):
    """
    在主图中查找模板图片的位置（原始尺寸 + 多尺度）
    支持 PNG alpha 通道作为模板轮廓
    返回置信度最高的匹配结果
    """
    main_img = cv2.imread(main_img_path)
    template = cv2.imread(template_path, cv2.IMREAD_UNCHANGED)

    if main_img is None:
        print(f"无法读取主图: {main_img_path}")
        return []
    if template is None:
        print(f"无法读取模板: {template_path}")
        return []

    h, w = template.shape[:2]
    main_gray = cv2.cvtColor(main_img, cv2.COLOR_BGR2GRAY)

    # 如果模板有 alpha 通道且 RGB 全黑，用 alpha 通道作为灰度图
    if template.shape[2] == 4:
        alpha = template[:, :, 3]
        rgb_sum = template[:, :, :3].sum()
        if rgb_sum == 0:
            tmpl_gray = alpha  # 用 alpha 通道
        else:
            tmpl_gray = cv2.cvtColor(template[:, :, :3], cv2.COLOR_BGR2GRAY)
    else:
        tmpl_gray = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)

    best = None

    # 先尝试原始尺寸
    scales = [1.0] + list(np.linspace(0.6, 1.4, 17))
    for scale in scales:
        rw, rh = int(w * scale), int(h * scale)
        if rh < 5 or rw < 5:
            continue
        if rh > main_gray.shape[0] or rw > main_gray.shape[1]:
            continue

        resized_tmpl = cv2.resize(tmpl_gray, (rw, rh))
        res = cv2.matchTemplate(main_gray, resized_tmpl, cv2.TM_CCOEFF_NORMED)
        _, max_val, _, max_loc = cv2.minMaxLoc(res)

        if best is None or max_val > best["confidence"]:
            best = {
                "x": max_loc[0],
                "y": max_loc[1],
                "w": rw,
                "h": rh,
                "confidence": max_val,
                "scale": scale,
                "center_x": max_loc[0] + rw // 2,
                "center_y": max_loc[1] + rh // 2,
            }

    if best is None or best["confidence"] < threshold:
        return []
    return [best]


def main():
    base_dir = os.path.join(os.path.dirname(__file__), "images")
    main_img_path = os.path.join(base_dir, "1.jpg")
    templates = ["2.png", "3.png", "4.png"]

    print(f"主图: {main_img_path}\n")

    all_results = {}
    for tmpl_name in templates:
        tmpl_path = os.path.join(base_dir, tmpl_name)
        matches = find_template_in_image(main_img_path, tmpl_path, threshold=0.5)
        all_results[tmpl_name] = matches

        if matches:
            m = matches[0]
            print(f"[{tmpl_name}] 找到匹配:")
            print(f"  位置: ({m['x']}, {m['y']})")
            print(f"  中心点: ({m['center_x']}, {m['center_y']})")
            print(f"  尺寸: {m['w']}x{m['h']}")
            print(f"  置信度: {m['confidence']:.4f}")
            print(f"  缩放比例: {m['scale']:.2f}")
        else:
            print(f"[{tmpl_name}] 未找到匹配（置信度不足）")
        print()

    # 可视化结果，保存标注图
    main_img = cv2.imread(main_img_path)
    colors = [(0, 0, 255), (0, 255, 0), (255, 0, 0)]  # 红、绿、蓝
    for i, tmpl_name in enumerate(templates):
        matches = all_results.get(tmpl_name, [])
        for m in matches:
            x, y, w, h = m["x"], m["y"], m["w"], m["h"]
            color = colors[i % len(colors)]
            cv2.rectangle(main_img, (x, y), (x + w, y + h), color, 2)
            cv2.putText(main_img, tmpl_name, (x, y - 5),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
            # 画中心点
            cx, cy = m["center_x"], m["center_y"]
            cv2.circle(main_img, (cx, cy), 5, color, -1)

    output_path = os.path.join(base_dir, "result.jpg")
    cv2.imwrite(output_path, main_img)
    print(f"标注结果已保存到: {output_path}")

    # 打印点击坐标汇总（用于自动化点击）
    print("\n=== 点击坐标汇总 ===")
    for tmpl_name in templates:
        matches = all_results.get(tmpl_name, [])
        if matches:
            m = matches[0]
            print(f"{tmpl_name}: 点击 ({m['center_x']}, {m['center_y']})")
        else:
            print(f"{tmpl_name}: 未找到")


if __name__ == "__main__":
    main()