Files
haha/count_text_xpath.py
27942 52e53706c7 gui
第一版完整版
2026-01-26 22:22:36 +08:00

66 lines
2.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
使用XPath语法统计HTML文件中"视频上传成功"文本的出现次数
"""
from lxml import etree
import html
def count_text_with_xpath(html_file, search_text):
"""
使用XPath查找指定文本在HTML中的出现次数
Args:
html_file: HTML文件路径
search_text: 要搜索的文本
Returns:
出现次数
"""
# 读取HTML文件
with open(html_file, 'r', encoding='utf-8') as f:
html_content = f.read()
# 解析HTML
parser = etree.HTMLParser()
tree = etree.fromstring(html_content.encode('utf-8'), parser)
# XPath表达式查找包含指定文本的所有节点
# 使用contains()函数来匹配包含指定文本的节点
xpath_expr = f"//text()[contains(., '{search_text}')]"
# 执行XPath查询
results = tree.xpath(xpath_expr)
# 统计出现次数(可能一个文本节点包含多次出现)
total_count = 0
for text_node in results:
# 计算该文本节点中搜索文本的出现次数
count = text_node.count(search_text)
total_count += count
print(f"找到文本节点,内容片段: ...{text_node[:100]}... (包含 {count} 次)")
return total_count, len(results)
if __name__ == "__main__":
html_file = "1.html"
search_text = "视频上传成功"
print(f"正在使用XPath查找文本: '{search_text}'")
print("=" * 60)
total_count, node_count = count_text_with_xpath(html_file, search_text)
print("=" * 60)
print(f"XPath查询结果:")
print(f" - 包含该文本的文本节点数量: {node_count}")
print(f" - 文本总出现次数: {total_count}")
# 也提供其他XPath表达式示例
print("\n" + "=" * 60)
print("其他可用的XPath表达式示例:")
print(f" 1. //text()[contains(., '{search_text}')]")
print(f" 2. //*[contains(text(), '{search_text}')]")
print(f" 3. //*[normalize-space(text())='{search_text}']")