一、启动

chromium --remote-debugging-port=9223 --user-data-dir=/home/abc/chromium-data

由于启动的 chromium 是在 localhost 上监听的端口,还需要一个 socat 转发

socat tcp-listen:9222,fork tcp:localhost:9223

假设运行 chromium 的机器 IP 为 10.10.10.10,那么可以用 http://10.10.10.10:9222 连接

二、代码调用

from playwright.sync_api import sync_playwright
from typing import Optional, Dict, Any

class PlaywrightRequests:
    def __init__(self, ws_endpoint: str = "http://10.10.10.10:9222", proxy: Optional[Dict[str, str]] = None):
        """
        Initialize the PlaywrightRequests instance
        Args:
            ws_endpoint: WebSocket endpoint URL for the running Chrome instance
            proxy: Proxy configuration dictionary. Example:
                  {'server': 'http://proxy-server:8080'} or
                  {'server': 'socks5://proxy-server:1080'}
        """
        self.ws_endpoint = ws_endpoint
        self.proxy = proxy
        self._browser = None
        self._context = None
        self._page = None
        self._playwright = None
        self.connect()

    def connect(self):
        """Connect to the running Chrome instance"""
        self._playwright = sync_playwright().start()
        self._browser = self._playwright.chromium.connect_over_cdp(self.ws_endpoint)
        
        # Create a new context with proxy settings
        self._context = self._browser.new_context(proxy=self.proxy)
        # Create a new page in the context
        self._page = self._context.new_page()

    def close(self):
        """Close the connection"""
        if self._context:
            self._context.close()
        if self._browser:
            self._browser.close()
        if self._playwright:
            self._playwright.stop()

    def get(self, url: str, headers: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
        """
        Perform GET request
        Args:
            url: Target URL
            headers: Optional request headers
        Returns:
            Dictionary containing HTML content and status code
        """
        response = self._page.request.get(url, headers=headers)
        return {
            'status': response.status,
            'html': response.text()
        }

    def post(self, url: str, data: Optional[Dict] = None, json: Optional[Dict] = None,
             headers: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
        """
        Perform POST request
        Args:
            url: Target URL
            data: Form data to send
            json: JSON data to send
            headers: Optional request headers
        Returns:
            Dictionary containing HTML content and status code
        """
        if json is not None:
            response = self._page.request.post(url, data=json.dumps(json), headers=headers)
        else:
            response = self._page.request.post(url, form=data, headers=headers)
            
        return {
            'status': response.status,
            'html': response.text()
        }

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()

# Example usage:
if __name__ == "__main__":
    # with PlaywrightRequests() as pr:
    #     # GET request example
    #     headers = {
    #         'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7',
    #     }
    #     response = pr.get("https://www.jd.com/", headers=headers)
    #     print(f"Status: {response['status']}")
    #     print(f"HTML: {response['html']}")

    http_proxy = {
        'server': 'http://192.168.82.1:7788'
    }
    
    # Example with SOCKS5 proxy
    socks5_proxy = {
        'server': 'socks5://192.168.82.1:7788'
    }
    
    pr = PlaywrightRequests(ws_endpoint="http://10.10.10.10:9222", proxy=http_proxy)
    headers = {
        'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7',
    }
    response = pr.get("https://www.jd.com/", headers=headers)
    print(f"Status: {response['status']}")
    print(f"HTML: {response['html']}")
    pr.close()