一、启动
chromium --remote-debugging-port=9223 --user-data-dir=/home/abc/chromium-data
由于启动的 chromium 是在 localhost 上监听的端口,还需要一个 socat 转发
socat tcp-listen:9222,fork tcp:localhost:9223
假设运行 chromium 的机器 IP 为 10.10.10.10,那么可以用 http://10.10.10.10:9222
连接
二、代码调用
from playwright.sync_api import sync_playwright
from typing import Optional, Dict, Any
class PlaywrightRequests:
def __init__(self, ws_endpoint: str = "http://10.10.10.10:9222", proxy: Optional[Dict[str, str]] = None):
"""
Initialize the PlaywrightRequests instance
Args:
ws_endpoint: WebSocket endpoint URL for the running Chrome instance
proxy: Proxy configuration dictionary. Example:
{'server': 'http://proxy-server:8080'} or
{'server': 'socks5://proxy-server:1080'}
"""
self.ws_endpoint = ws_endpoint
self.proxy = proxy
self._browser = None
self._context = None
self._page = None
self._playwright = None
self.connect()
def connect(self):
"""Connect to the running Chrome instance"""
self._playwright = sync_playwright().start()
self._browser = self._playwright.chromium.connect_over_cdp(self.ws_endpoint)
# Create a new context with proxy settings
self._context = self._browser.new_context(proxy=self.proxy)
# Create a new page in the context
self._page = self._context.new_page()
def close(self):
"""Close the connection"""
if self._context:
self._context.close()
if self._browser:
self._browser.close()
if self._playwright:
self._playwright.stop()
def get(self, url: str, headers: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
"""
Perform GET request
Args:
url: Target URL
headers: Optional request headers
Returns:
Dictionary containing HTML content and status code
"""
response = self._page.request.get(url, headers=headers)
return {
'status': response.status,
'html': response.text()
}
def post(self, url: str, data: Optional[Dict] = None, json: Optional[Dict] = None,
headers: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
"""
Perform POST request
Args:
url: Target URL
data: Form data to send
json: JSON data to send
headers: Optional request headers
Returns:
Dictionary containing HTML content and status code
"""
if json is not None:
response = self._page.request.post(url, data=json.dumps(json), headers=headers)
else:
response = self._page.request.post(url, form=data, headers=headers)
return {
'status': response.status,
'html': response.text()
}
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
# Example usage:
if __name__ == "__main__":
# with PlaywrightRequests() as pr:
# # GET request example
# headers = {
# 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7',
# }
# response = pr.get("https://www.jd.com/", headers=headers)
# print(f"Status: {response['status']}")
# print(f"HTML: {response['html']}")
http_proxy = {
'server': 'http://192.168.82.1:7788'
}
# Example with SOCKS5 proxy
socks5_proxy = {
'server': 'socks5://192.168.82.1:7788'
}
pr = PlaywrightRequests(ws_endpoint="http://10.10.10.10:9222", proxy=http_proxy)
headers = {
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7',
}
response = pr.get("https://www.jd.com/", headers=headers)
print(f"Status: {response['status']}")
print(f"HTML: {response['html']}")
pr.close()