方法一: 先登录
方法二:携带登录好的数据(cookies, local_storage, session)
本文讲述第二种方法。
- 利用Chrome extension “Storage & Cookie Exporter” 下载已经登录网站的cookies, storage, session为json格式文件。
- 启动浏览器并注入cookie,, storage, session
- 先尝试只在context注入cookie,启动浏览器看能否正常进入账号,如果不行,就注入storage, session。
async def _load_cookies_to_context(self, context):
if not self._cookie_data:
logging.debug("No cookie data available")
return
cookies_data = self._cookie_data.get("cookieStorageData", [])
if cookies_data:
playwright_cookies = [
{
**cookie,
"expires": cookie["expirationDate"] if not cookie["session"] else -1,
"sameSite": cookie["sameSite"].capitalize() if cookie["sameSite"].lower() == "lax" or cookie[
"sameSite"].lower() == "strict" else "None"
}
for cookie in cookies_data
]
await context.add_cookies(playwright_cookies)
# Verify loaded data
cookies = await context.cookies()
logging.debug("Current Cookies:", cookies)
logging.debug("Cookies loaded successfully")
这样可以正常启动浏览器和创建新context(),可以注入时区等参数
async def scrape_store_data(self, store_data: StoreData, idx: int)->Optional[OutputStoreData] :
async with async_playwright() as p:
# Connect to the browser via Chrome DevTools Protocol
browser = await p.chromium.launch(
headless=False, # 设置为 False 以查看浏览器界面
args=["--disable-blink-features=AutomationControlled"],
)
# Get the existing context and create AgentQL wrapper
# Wrap the page with AgentQL using the async wrapper
context = await browser.new_context(
locale='en-US',
timezone_id='America/Los_Angeles', # 指定时区为 America/Los_Angeles
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
)
await self._load_cookies_to_context(context)
pg = await context.new_page()
# await self._load_storage_to_page(pg)
async with await agentql.wrap_async(pg) as agentql_page:
try:
如果需要注入local storage及session,则需要在page注入:
async def load_storage_to_page(page, root_path):
# Create a new page
page = await context.new_page()
# Navigate to the target URL (required for local/session storage)
await page.goto(target_url)
# Load local storage from localStorageData
local_storage_data = storage_data.get("localStorageData", {})
if local_storage_data:
for key, value in local_storage_data.items():
# Convert value to string if it isn't already
value_str = str(value) if not isinstance(value, str) else value
await page.evaluate(f"() => localStorage.setItem('{key}', '{value_str}')")
logging.debug("Local storage loaded successfully")
# Load session storage from sessionStorageData
session_storage_data = storage_data.get("sessionStorageData", {})
if session_storage_data:
for key, value in session_storage_data.items():
# Convert value to string if it isn't already
value_str = str(value) if not isinstance(value, str) else value
await page.evaluate(f"() => sessionStorage.setItem('{key}', '{value_str}')")
logging.debug("Session storage loaded successfully")
# Verify loaded data
cookies = await context.cookies()
logging.debug("Current Cookies:", cookies)
local_storage = await page.evaluate("() => Object.fromEntries(Object.entries(localStorage))")
logging.debug("Current Local Storage:", local_storage)
session_storage = await page.evaluate("() => Object.fromEntries(Object.entries(sessionStorage))")
logging.debug("Current Session Storage:", session_storage)
启动context需要携带user_data_dir:
import shutil
user_data_dir="../output/user_data"
if not os.path.exists(self._user_data_dir):
os.makedirs(self._user_data_dir, exist_ok=True)
else:
shutil.rmtree(user_data_dir, ignore_errors=True)
context = await p.chromium.launch_persistent_context(
user_data_dir,
headless=False,
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
)
至此,可以正常启动并自动进入账号了。