真实场景案例
从实际需求出发的完整代码示例,覆盖自动化测试、数据采集、流程自动化等常见场景。
// 案例 01
E2E 登录测试(pytest 集成)
使用 pytest-playwright 编写完整的登录流程端到端测试,包含成功和失败场景。
E2E 测试
pytest
断言
test_login.py
# pip install pytest-playwright # pytest test_login.py --headed --browser chromium import pytest from playwright.sync_api import Page, expect class TestLogin: """登录功能测试套件""" URL = "https://example.com/login" def test_successful_login(self, page: Page): """测试正确账号密码能成功登录""" page.goto(self.URL) # 填写登录表单 page.get_by_label("用户名").fill("admin") page.get_by_label("密码").fill("password123") page.get_by_role("button", name="登录").click() # 验证:跳转到仪表盘 expect(page).to_have_url("**/dashboard") expect(page.get_by_text("欢迎回来")).to_be_visible() def test_wrong_password(self, page: Page): """测试错误密码显示错误提示""" page.goto(self.URL) page.get_by_label("用户名").fill("admin") page.get_by_label("密码").fill("wrong") page.get_by_role("button", name="登录").click() # 验证:显示错误信息,停留在登录页 error = page.locator(".error-message") expect(error).to_be_visible() expect(error).to_contain_text("密码错误") expect(page).to_have_url(self.URL) def test_empty_fields_validation(self, page: Page): """测试空字段提交显示验证提示""" page.goto(self.URL) page.get_by_role("button", name="登录").click() expect(page.get_by_text("请输入用户名")).to_be_visible() expect(page.get_by_text("请输入密码")).to_be_visible()
conftest.py — 共享 Fixture
import pytest # pytest-playwright 自动提供 page fixture # 可在 conftest.py 中自定义浏览器配置 @pytest.fixture(scope="session") def browser_context_args(browser_context_args): return { **browser_context_args, "viewport": {"width": 1920, "height": 1080}, "locale": "zh-CN", }
// 案例 02
动态页面数据采集
抓取 JavaScript 动态渲染的商品列表,支持翻页和数据导出。
数据采集
动态渲染
CSV 导出
scraper.py
import csv from playwright.sync_api import sync_playwright def scrape_products(): products = [] with sync_playwright() as p: browser = p.chromium.launch(headless=True) ctx = browser.new_context( user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) ..." ) page = ctx.new_page() # 屏蔽图片和字体加快速度 page.route("**/*.{png,jpg,jpeg,gif,svg,woff,woff2}", lambda r: r.abort()) page.goto("https://example.com/products", wait_until="networkidle") while True: # 等待商品卡片加载 page.wait_for_selector(".product-card") # 提取当前页所有商品 cards = page.locator(".product-card").all() for card in cards: name = card.locator("h3").text_content() price = card.locator(".price").text_content() rating = card.locator(".rating").get_attribute("data-score") products.append({ "name": name.strip(), "price": price.strip(), "rating": rating, }) # 翻页 next_btn = page.locator("a.next-page") if next_btn.count() == 0 or next_btn.is_disabled(): break next_btn.click() page.wait_for_load_state("networkidle") browser.close() # 导出 CSV with open("products.csv", "w", newline="", encoding="utf-8") as f: writer = csv.DictWriter(f, fieldnames=["name", "price", "rating"]) writer.writeheader() writer.writerows(products) print(f"共采集 {len(products)} 个商品") if __name__ == "__main__": scrape_products()
// 案例 03
表单自动填写与截图监控
自动登录后台系统,填写表单提交数据,并截图记录执行结果。
自动化办公
表单填写
截图
form_automation.py
from datetime import datetime from playwright.sync_api import sync_playwright def submit_daily_report(): with sync_playwright() as p: browser = p.chromium.launch(headless=True) # 加载之前保存的登录状态 ctx = browser.new_context( storage_state="auth.json", viewport={"width": 1920, "height": 1080} ) page = ctx.new_page() # 导航到表单页 page.goto("https://oa.company.com/report/new") # 填写日报 today = datetime.now().strftime("%Y-%m-%d") page.get_by_label("日期").fill(today) page.get_by_label("今日工作").fill( "1. 完成用户模块接口开发\n" "2. 修复登录页面样式问题\n" "3. 参加项目周会" ) page.get_by_label("明日计划").fill( "1. 编写单元测试\n" "2. 代码 Review" ) page.select_option("#project", label="核心平台") # 提交 page.get_by_role("button", name="提交").click() # 等待成功提示并截图 page.wait_for_selector(".success-toast", state="visible") page.screenshot( path=f"screenshots/report_{today}.png", full_page=True ) print(f"日报提交成功: {today}") ctx.close() browser.close() if __name__ == "__main__": submit_daily_report()
// 案例 04
拦截 API 获取数据
对于 SPA 应用,可以直接拦截 XHR/Fetch 请求获取结构化的 JSON 数据,比解析 DOM 更高效可靠。
网络拦截
API 数据
高效采集
api_intercept.py
import json from playwright.sync_api import sync_playwright def capture_api_data(): captured_data = [] def on_response(response): # 只捕获目标 API if "/api/v1/products" in response.url: if response.status == 200: data = response.json() captured_data.extend(data["items"]) print(f"捕获 {len(data['items'])} 条数据") with sync_playwright() as p: browser = p.chromium.launch() page = browser.new_page() # 注册响应监听器 page.on("response", on_response) # 访问页面,触发 API 请求 page.goto("https://spa-app.com/products") page.wait_for_load_state("networkidle") # 触发加载更多 for _ in range(5): load_more = page.locator("#load-more") if load_more.is_visible(): load_more.click() page.wait_for_load_state("networkidle") else: break browser.close() # 保存结果 with open("api_data.json", "w", encoding="utf-8") as f: json.dump(captured_data, f, ensure_ascii=False, indent=2) print(f"共捕获 {len(captured_data)} 条数据") if __name__ == "__main__": capture_api_data()
// 案例 05
异步并发 — 多页面同时操作
利用 async API 同时操作多个页面,大幅提升采集或测试效率。
async
并发
高性能
parallel_scrape.py
import asyncio from playwright.async_api import async_playwright async def scrape_page(context, url): """抓取单个页面的标题和描述""" page = await context.new_page() try: await page.goto(url, timeout=15000) title = await page.title() desc_el = page.locator("meta[name='description']") desc = await desc_el.get_attribute("content") if await desc_el.count() > 0 else "" return {"url": url, "title": title, "description": desc} except Exception as e: return {"url": url, "error": str(e)} finally: await page.close() async def main(): urls = [ "https://github.com", "https://stackoverflow.com", "https://python.org", "https://playwright.dev", "https://docs.pytest.org", ] async with async_playwright() as p: browser = await p.chromium.launch() context = await browser.new_context() # 并发抓取所有页面 tasks = [scrape_page(context, url) for url in urls] results = await asyncio.gather(*tasks) for r in results: if "error" in r: print(f"✘ {r['url']}: {r['error']}") else: print(f"✔ {r['title']}") await browser.close() asyncio.run(main())
// 实用技巧
Playwright 实战小贴士
💡 Codegen 快速生成代码
playwright codegen https://example.com \ --target python \ --output script.py
录制浏览器操作自动生成 Python 代码,适合快速生成初始脚本后再手动优化。
🔎 Debug 模式
# 方式 1:环境变量 PWDEBUG=1 python test.py # 方式 2:launch 参数 browser = p.chromium.launch( headless=False, slow_mo=500 )
PWDEBUG=1 会打开 Inspector,可以逐步执行和检查选择器。slow_mo 让操作变慢便于观察。
🚀 CI/CD 集成
# GitHub Actions 示例
- name: Install Playwright
run: |
pip install playwright pytest-playwright
playwright install --with-deps chromium
- name: Run Tests
run: pytest --tracing=on
--with-deps 自动安装系统依赖。--tracing=on 失败时自动保存追踪文件。
📦 环境隔离
# 使用不同 Context 隔离测试 admin_ctx = browser.new_context( storage_state="admin_auth.json" ) user_ctx = browser.new_context( storage_state="user_auth.json" ) # 两个 context 完全隔离
多个 BrowserContext 共享一个浏览器进程但完全隔离,比启动多个浏览器更高效。