2025-12-15 14:54:55 +08:00
|
|
|
import time
|
|
|
|
|
from loguru import *
|
|
|
|
|
from urllib.parse import urlparse, parse_qs
|
|
|
|
|
|
|
|
|
|
import pandas as pd
|
|
|
|
|
from DrissionPage import *
|
2025-11-27 15:15:44 +08:00
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2025-12-01 17:22:53 +08:00
|
|
|
|
2025-12-15 14:54:55 +08:00
|
|
|
co = ChromiumOptions()
|
|
|
|
|
co.set_local_port(9999)
|
|
|
|
|
|
|
|
|
|
page = ChromiumPage(addr_or_opts=co)
|
|
|
|
|
|
|
|
|
|
# 读取 Excel 文件
|
|
|
|
|
excel_file = pd.ExcelFile('sample_items_1000_api.xlsx')
|
|
|
|
|
|
|
|
|
|
# 获取指定工作表中的数据
|
|
|
|
|
df = excel_file.parse('in')
|
|
|
|
|
|
|
|
|
|
# 读取第一列数据
|
|
|
|
|
first_column = df.iloc[:, 0]
|
|
|
|
|
|
|
|
|
|
# 逐行遍历第一列数据
|
|
|
|
|
n = 0
|
|
|
|
|
n1 = 0
|
|
|
|
|
for value in first_column:
|
|
|
|
|
print(value)
|
|
|
|
|
|
|
|
|
|
# 解析 URL
|
|
|
|
|
parsed_url = urlparse(value)
|
|
|
|
|
|
|
|
|
|
# 提取查询参数部分
|
|
|
|
|
query_params = parsed_url.query
|
|
|
|
|
|
|
|
|
|
# 解析查询参数为字典
|
|
|
|
|
param_dict = parse_qs(query_params)
|
|
|
|
|
|
|
|
|
|
a = time.time()
|
|
|
|
|
tab = page.new_tab()
|
|
|
|
|
tab.listen.start("shopee.tw/api/v4/pdp/get_pc")
|
|
|
|
|
tab.get(f"https://shopee.tw/product/{param_dict['shop_id'][0]}/{param_dict['item_id'][0]}")
|
|
|
|
|
res = tab.listen.wait(timeout=15)
|
|
|
|
|
print(res.response.body)
|
|
|
|
|
if res.response.body.get("data", {}).get("item", {}):
|
|
|
|
|
n += 1
|
|
|
|
|
n1 += time.time() - a
|
|
|
|
|
|
|
|
|
|
logger.info(f"成功第{n}个,耗时{time.time() - a}秒")
|
|
|
|
|
logger.success(f"成功第{n}个,一共耗时{n1}秒")
|
|
|
|
|
|
|
|
|
|
tab.close()
|