# -*- encoding:utf-8 -*- ''' @Author : dingjiawen @Date : 2023/12/7 15:12 @Usage : @Desc : playwright常用操作 ''' from playwright.sync_api import sync_playwright # 事件监听 def on_response(response): print(f'Statue {response.status}: {response.url}') # 截获ajax命令 def on_response1(response): if '/api/movie/' in response.url and response.status == 200: print(response.json()) with sync_playwright() as p: browser = p.chromium.launch(headless=False) page = browser.new_page() # 监听response时间,每次网络请求得到响应的时候会触发这个事件 # page.on('response', on_response) page.on('response', on_response1) page.goto('https://spa6.scrape.center/') page.wait_for_load_state('networkidle') browser.close() # 获取页面源代码 with sync_playwright() as p: browser = p.chromium.launch(headless=False) page = browser.new_page() page.goto('https://spa6.scrape.center/') page.wait_for_load_state('networkidle') html = page.content() print(html) browser.close() # 获取节点内容 with sync_playwright() as p: browser = p.chromium.launch(headless=False) page = browser.new_page() page.goto('https://spa6.scrape.center/') page.wait_for_load_state('networkidle') # 代表查找class为name的a节点,第二个参数传href表示获取超链接的内容 href = page.get_attribute('a.name', 'href') print(href) browser.close() # 获取多个节点 with sync_playwright() as p: browser = p.chromium.launch(headless=False) page = browser.new_page() page.goto('https://spa6.scrape.center/') page.wait_for_load_state('networkidle') elements = page.query_selector_all('a.name') for element in elements: print(element.get_attribute('href')) print(element.text_content()) browser.close() # 网络拦截 import re with sync_playwright() as p: browser = p.chromium.launch(headless=False) page = browser.new_page() def canel_request(route, request): route.abort() page.route(re.compile(r"(\.png)|(\.jpg)"), canel_request) page.goto("https://spa6.scrape.center/") page.wait_for_load_state("networkidle") page.screenshot(path='no_picture.png') browser.close() # 拦截之后填充自己的 import time with sync_playwright() as p: browser = p.chromium.launch(headless=False) page = browser.new_page() def modify_response(route, request): route.fulfill(path="./custom_response.html") page.route('/', modify_response) page.goto("https://spa6.scrape.center/") time.sleep(10) browser.close()