Files
haha/count_text_xpath.py

66 lines
2.0 KiB
Python
Raw Normal View History

2026-01-26 22:22:36 +08:00
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
使用XPath语法统计HTML文件中"视频上传成功"文本的出现次数
"""
from lxml import etree
import html
def count_text_with_xpath(html_file, search_text):
"""
使用XPath查找指定文本在HTML中的出现次数
Args:
html_file: HTML文件路径
search_text: 要搜索的文本
Returns:
出现次数
"""
# 读取HTML文件
with open(html_file, 'r', encoding='utf-8') as f:
html_content = f.read()
# 解析HTML
parser = etree.HTMLParser()
tree = etree.fromstring(html_content.encode('utf-8'), parser)
# XPath表达式查找包含指定文本的所有节点
# 使用contains()函数来匹配包含指定文本的节点
xpath_expr = f"//text()[contains(., '{search_text}')]"
# 执行XPath查询
results = tree.xpath(xpath_expr)
# 统计出现次数(可能一个文本节点包含多次出现)
total_count = 0
for text_node in results:
# 计算该文本节点中搜索文本的出现次数
count = text_node.count(search_text)
total_count += count
print(f"找到文本节点,内容片段: ...{text_node[:100]}... (包含 {count} 次)")
return total_count, len(results)
if __name__ == "__main__":
html_file = "1.html"
search_text = "视频上传成功"
print(f"正在使用XPath查找文本: '{search_text}'")
print("=" * 60)
total_count, node_count = count_text_with_xpath(html_file, search_text)
print("=" * 60)
print(f"XPath查询结果:")
print(f" - 包含该文本的文本节点数量: {node_count}")
print(f" - 文本总出现次数: {total_count}")
# 也提供其他XPath表达式示例
print("\n" + "=" * 60)
print("其他可用的XPath表达式示例:")
print(f" 1. //text()[contains(., '{search_text}')]")
print(f" 2. //*[contains(text(), '{search_text}')]")
print(f" 3. //*[normalize-space(text())='{search_text}']")