import time from loguru import * from urllib.parse import urlparse, parse_qs import pandas as pd from DrissionPage import * if __name__ == '__main__': co = ChromiumOptions() co.set_local_port(9999) co.set_local_port(9999) co.headless(False) # 先用有头模式测试,看是否还能触发 co.incognito(False) # 避免无痕 co.set_argument('--disable-blink-features=AutomationControlled') co.set_argument('--no-sandbox') co.set_argument('--disable-infobars') co.set_argument('--disable-extensions') co.set_argument('--start-maximized') co.set_user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36") # 真实 UA page = ChromiumPage(addr_or_opts=co) # 读取 Excel 文件 excel_file = pd.ExcelFile('sample_items_1000_api.xlsx') # 获取指定工作表中的数据 df = excel_file.parse('in') # 读取第一列数据 first_column = df.iloc[:, 0] # 逐行遍历第一列数据 n = 0 n1 = 0 for value in first_column: print(value) # 解析 URL parsed_url = urlparse(value) # 提取查询参数部分 query_params = parsed_url.query # 解析查询参数为字典 param_dict = parse_qs(query_params) print(param_dict['shop_id'][0]) print(param_dict['item_id'][0]) # a = time.time() # tab = page.new_tab() # tab.listen.start("shopee.tw/api/v4/pdp/get_pc") # tab.get(f"https://shopee.tw/product/{param_dict['shop_id'][0]}/{param_dict['item_id'][0]}") # res = tab.listen.wait(timeout=15) # print(res.response.body) # if res.response.body.get("data", {}).get("item", {}): # n += 1 # n1 += time.time() - a # # https: // shopee.tw / api / v4 / pdp / get_pc?item_id = 22577587881 & shop_id = 1014505717 & tz_offset_minutes = 480 & detail_level = 0 & logger.info(f"成功第{n}个,耗时{time.time() - a}秒") # logger.success(f"成功第{n}个,一共耗时{n1}秒") # # tab.close()