65 lines
2.1 KiB
Python
65 lines
2.1 KiB
Python
import time
|
|
from loguru import *
|
|
from urllib.parse import urlparse, parse_qs
|
|
|
|
import pandas as pd
|
|
from DrissionPage import *
|
|
|
|
if __name__ == '__main__':
|
|
|
|
co = ChromiumOptions()
|
|
co.set_local_port(9999)
|
|
|
|
co.set_local_port(9999)
|
|
co.headless(False) # 先用有头模式测试,看是否还能触发
|
|
co.incognito(False) # 避免无痕
|
|
co.set_argument('--disable-blink-features=AutomationControlled')
|
|
co.set_argument('--no-sandbox')
|
|
co.set_argument('--disable-infobars')
|
|
co.set_argument('--disable-extensions')
|
|
co.set_argument('--start-maximized')
|
|
co.set_user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36") # 真实 UA
|
|
|
|
page = ChromiumPage(addr_or_opts=co)
|
|
|
|
# 读取 Excel 文件
|
|
excel_file = pd.ExcelFile('sample_items_1000_api.xlsx')
|
|
|
|
# 获取指定工作表中的数据
|
|
df = excel_file.parse('in')
|
|
|
|
# 读取第一列数据
|
|
first_column = df.iloc[:, 0]
|
|
|
|
# 逐行遍历第一列数据
|
|
n = 0
|
|
n1 = 0
|
|
for value in first_column:
|
|
print(value)
|
|
|
|
# 解析 URL
|
|
parsed_url = urlparse(value)
|
|
|
|
# 提取查询参数部分
|
|
query_params = parsed_url.query
|
|
|
|
# 解析查询参数为字典
|
|
param_dict = parse_qs(query_params)
|
|
|
|
print(param_dict['shop_id'][0])
|
|
print(param_dict['item_id'][0])
|
|
|
|
# a = time.time()
|
|
# tab = page.new_tab()
|
|
# tab.listen.start("shopee.tw/api/v4/pdp/get_pc")
|
|
# tab.get(f"https://shopee.tw/product/{param_dict['shop_id'][0]}/{param_dict['item_id'][0]}")
|
|
# res = tab.listen.wait(timeout=15)
|
|
# print(res.response.body)
|
|
# if res.response.body.get("data", {}).get("item", {}):
|
|
# n += 1
|
|
# n1 += time.time() - a
|
|
# # https: // shopee.tw / api / v4 / pdp / get_pc?item_id = 22577587881 & shop_id = 1014505717 & tz_offset_minutes = 480 & detail_level = 0 & logger.info(f"成功第{n}个,耗时{time.time() - a}秒")
|
|
# logger.success(f"成功第{n}个,一共耗时{n1}秒")
|
|
#
|
|
# tab.close()
|