#!/usr/bin/env python # -*- coding: utf-8 -*- """ 使用XPath语法统计HTML文件中"视频上传成功"文本的出现次数 """ from lxml import etree import html def count_text_with_xpath(html_file, search_text): """ 使用XPath查找指定文本在HTML中的出现次数 Args: html_file: HTML文件路径 search_text: 要搜索的文本 Returns: 出现次数 """ # 读取HTML文件 with open(html_file, 'r', encoding='utf-8') as f: html_content = f.read() # 解析HTML parser = etree.HTMLParser() tree = etree.fromstring(html_content.encode('utf-8'), parser) # XPath表达式:查找包含指定文本的所有节点 # 使用contains()函数来匹配包含指定文本的节点 xpath_expr = f"//text()[contains(., '{search_text}')]" # 执行XPath查询 results = tree.xpath(xpath_expr) # 统计出现次数(可能一个文本节点包含多次出现) total_count = 0 for text_node in results: # 计算该文本节点中搜索文本的出现次数 count = text_node.count(search_text) total_count += count print(f"找到文本节点,内容片段: ...{text_node[:100]}... (包含 {count} 次)") return total_count, len(results) if __name__ == "__main__": html_file = "1.html" search_text = "视频上传成功" print(f"正在使用XPath查找文本: '{search_text}'") print("=" * 60) total_count, node_count = count_text_with_xpath(html_file, search_text) print("=" * 60) print(f"XPath查询结果:") print(f" - 包含该文本的文本节点数量: {node_count}") print(f" - 文本总出现次数: {total_count}") # 也提供其他XPath表达式示例 print("\n" + "=" * 60) print("其他可用的XPath表达式示例:") print(f" 1. //text()[contains(., '{search_text}')]") print(f" 2. //*[contains(text(), '{search_text}')]") print(f" 3. //*[normalize-space(text())='{search_text}']")