66 lines
2.0 KiB
Python
66 lines
2.0 KiB
Python
#!/usr/bin/env python
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
使用XPath语法统计HTML文件中"视频上传成功"文本的出现次数
|
||
"""
|
||
|
||
from lxml import etree
|
||
import html
|
||
|
||
def count_text_with_xpath(html_file, search_text):
|
||
"""
|
||
使用XPath查找指定文本在HTML中的出现次数
|
||
|
||
Args:
|
||
html_file: HTML文件路径
|
||
search_text: 要搜索的文本
|
||
|
||
Returns:
|
||
出现次数
|
||
"""
|
||
# 读取HTML文件
|
||
with open(html_file, 'r', encoding='utf-8') as f:
|
||
html_content = f.read()
|
||
|
||
# 解析HTML
|
||
parser = etree.HTMLParser()
|
||
tree = etree.fromstring(html_content.encode('utf-8'), parser)
|
||
|
||
# XPath表达式:查找包含指定文本的所有节点
|
||
# 使用contains()函数来匹配包含指定文本的节点
|
||
xpath_expr = f"//text()[contains(., '{search_text}')]"
|
||
|
||
# 执行XPath查询
|
||
results = tree.xpath(xpath_expr)
|
||
|
||
# 统计出现次数(可能一个文本节点包含多次出现)
|
||
total_count = 0
|
||
for text_node in results:
|
||
# 计算该文本节点中搜索文本的出现次数
|
||
count = text_node.count(search_text)
|
||
total_count += count
|
||
print(f"找到文本节点,内容片段: ...{text_node[:100]}... (包含 {count} 次)")
|
||
|
||
return total_count, len(results)
|
||
|
||
if __name__ == "__main__":
|
||
html_file = "1.html"
|
||
search_text = "视频上传成功"
|
||
|
||
print(f"正在使用XPath查找文本: '{search_text}'")
|
||
print("=" * 60)
|
||
|
||
total_count, node_count = count_text_with_xpath(html_file, search_text)
|
||
|
||
print("=" * 60)
|
||
print(f"XPath查询结果:")
|
||
print(f" - 包含该文本的文本节点数量: {node_count}")
|
||
print(f" - 文本总出现次数: {total_count}")
|
||
|
||
# 也提供其他XPath表达式示例
|
||
print("\n" + "=" * 60)
|
||
print("其他可用的XPath表达式示例:")
|
||
print(f" 1. //text()[contains(., '{search_text}')]")
|
||
print(f" 2. //*[contains(text(), '{search_text}')]")
|
||
print(f" 3. //*[normalize-space(text())='{search_text}']")
|