101 lines
2.6 KiB
Python
101 lines
2.6 KiB
Python
# -*- encoding:utf-8 -*-
|
||
|
||
'''
|
||
@Author : dingjiawen
|
||
@Date : 2023/12/7 15:12
|
||
@Usage :
|
||
@Desc : playwright常用操作
|
||
'''
|
||
|
||
from playwright.sync_api import sync_playwright
|
||
|
||
|
||
# 事件监听
|
||
def on_response(response):
|
||
print(f'Statue {response.status}: {response.url}')
|
||
|
||
|
||
# 截获ajax命令
|
||
def on_response1(response):
|
||
if '/api/movie/' in response.url and response.status == 200:
|
||
print(response.json())
|
||
|
||
|
||
with sync_playwright() as p:
|
||
browser = p.chromium.launch(headless=False)
|
||
page = browser.new_page()
|
||
# 监听response时间,每次网络请求得到响应的时候会触发这个事件
|
||
# page.on('response', on_response)
|
||
page.on('response', on_response1)
|
||
page.goto('https://spa6.scrape.center/')
|
||
page.wait_for_load_state('networkidle')
|
||
browser.close()
|
||
|
||
# 获取页面源代码
|
||
with sync_playwright() as p:
|
||
browser = p.chromium.launch(headless=False)
|
||
page = browser.new_page()
|
||
page.goto('https://spa6.scrape.center/')
|
||
page.wait_for_load_state('networkidle')
|
||
html = page.content()
|
||
print(html)
|
||
browser.close()
|
||
|
||
# 获取节点内容
|
||
with sync_playwright() as p:
|
||
browser = p.chromium.launch(headless=False)
|
||
page = browser.new_page()
|
||
page.goto('https://spa6.scrape.center/')
|
||
page.wait_for_load_state('networkidle')
|
||
# 代表查找class为name的a节点,第二个参数传href表示获取超链接的内容
|
||
href = page.get_attribute('a.name', 'href')
|
||
print(href)
|
||
browser.close()
|
||
|
||
# 获取多个节点
|
||
with sync_playwright() as p:
|
||
browser = p.chromium.launch(headless=False)
|
||
page = browser.new_page()
|
||
page.goto('https://spa6.scrape.center/')
|
||
page.wait_for_load_state('networkidle')
|
||
elements = page.query_selector_all('a.name')
|
||
for element in elements:
|
||
print(element.get_attribute('href'))
|
||
print(element.text_content())
|
||
browser.close()
|
||
|
||
# 网络拦截
|
||
import re
|
||
|
||
with sync_playwright() as p:
|
||
browser = p.chromium.launch(headless=False)
|
||
page = browser.new_page()
|
||
|
||
|
||
def canel_request(route, request):
|
||
route.abort()
|
||
|
||
|
||
page.route(re.compile(r"(\.png)|(\.jpg)"), canel_request)
|
||
page.goto("https://spa6.scrape.center/")
|
||
page.wait_for_load_state("networkidle")
|
||
page.screenshot(path='no_picture.png')
|
||
browser.close()
|
||
|
||
# 拦截之后填充自己的
|
||
import time
|
||
|
||
with sync_playwright() as p:
|
||
browser = p.chromium.launch(headless=False)
|
||
page = browser.new_page()
|
||
|
||
|
||
def modify_response(route, request):
|
||
route.fulfill(path="./custom_response.html")
|
||
|
||
|
||
page.route('/', modify_response)
|
||
page.goto("https://spa6.scrape.center/")
|
||
time.sleep(10)
|
||
browser.close()
|