"""
Web Browser Automation Handler
This module provides enhanced browser automation capabilities using pure Selenium
with custom timeout configurations, network monitoring, tab management, and interactive element discovery.
Required dependencies:
pip install qufe[web]
This installs: selenium>=4.0.0
Classes:
Browser: Base class for browser automation with common functionality and tab management
Chrome: Chrome browser implementation with advanced configuration options
Firefox: Firefox browser implementation with profile management
"""
import os
import sys
import json
import time
import shutil
from urllib.parse import urlparse, parse_qs
from typing import List, Dict, Any, Optional, Union
# Lazy imports for external dependencies
def _import_selenium_dependencies():
"""Lazy import selenium with helpful error message."""
try:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.firefox.options import Options as FirefoxOptions
from selenium.webdriver.firefox.service import Service as FirefoxService
from selenium.webdriver.chrome.options import Options as ChromeOptions
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.common.exceptions import (
WebDriverException, TimeoutException, NoSuchElementException,
StaleElementReferenceException, ElementNotInteractableException
)
return {
'webdriver': webdriver,
'By': By,
'WebDriverWait': WebDriverWait,
'EC': EC,
'ActionChains': ActionChains,
'FirefoxOptions': FirefoxOptions,
'FirefoxService': FirefoxService,
'ChromeOptions': ChromeOptions,
'ChromeService': ChromeService,
'Select': Select,
'WebDriverException': WebDriverException,
'TimeoutException': TimeoutException,
'NoSuchElementException': NoSuchElementException,
'StaleElementReferenceException': StaleElementReferenceException,
'ElementNotInteractableException': ElementNotInteractableException,
}
except ImportError as e:
raise ImportError(
"Web browser automation requires Selenium. "
"Install with: pip install qufe[web]"
) from e
[docs]
class TimeoutConfig:
"""Custom timeout configuration to replace SeleniumBase settings."""
MINI_TIMEOUT = 5 # Default 2s → 5s
SMALL_TIMEOUT = 20 # Default 7s → 20s
LARGE_TIMEOUT = 40 # Default 10s → 40s
EXTREME_TIMEOUT = 80 # Default 30s → 80s
PAGE_LOAD_TIMEOUT = 180 # Default 120s → 180s
SAFE_PAGE_LOAD_TIMEOUT = 30 # For timeout-based approach
[docs]
def help():
"""
Display help information for web browser automation.
Shows installation instructions, available classes, and usage examples.
"""
print("qufe.wbhandler - Web Browser Automation")
print("=" * 45)
print()
try:
_import_selenium_dependencies()
print("✓ Dependencies: INSTALLED")
except ImportError:
print("✗ Dependencies: MISSING")
print(" Install with: pip install qufe[web]")
print(" This installs: selenium>=4.0.0")
print()
return
print()
print("AVAILABLE CLASSES:")
print(" • Browser: Base class for browser automation with tab management")
print(" • Firefox: Firefox browser with profile management")
print(" • Chrome: Chrome browser with advanced configuration options")
print()
print("FEATURES:")
print(" • Cross-platform support (x86, x64, ARM including Raspberry Pi)")
print(" • Gradual element finding with automatic fallback")
print(" • Network request monitoring via JavaScript injection")
print(" • Interactive element discovery and automation")
print(" • URL parameter extraction and parsing")
print(" • Timeout-based page loading for compatibility")
print()
print("ELEMENT FINDING:")
print(" • .find_element() - Fast direct find with gradual fallback")
print(" • .wait_for_element() - Explicit wait for dynamic content")
print(" • .select_option_by_text() - Dropdown selection with retry logic")
print()
print("SELECTOR SHORTCUTS:")
print(" • XPath: '//button[text()=\"Submit\"]' (starts with //)")
print(" • CSS: '$#my-id' or '$ .my-class' (starts with $)")
print(" • Legacy: by='css' or by='xpath' (still supported)")
print()
print("TAB MANAGEMENT:")
print(" • browser.open_new_tab('https://example.com')")
print(" • browser.switch_to_tab(0) # Switch to first tab")
print(" • browser.get_tab_count() # Get number of tabs")
print(" • browser.close_current_tab()")
print()
print("PAGE LOADING OPTIONS:")
print(" • browser.open(url) # Default timeout handling")
print(" • browser.open(url, safe_timeout=True) # Shorter timeout")
print(" • browser.open(url, timeout=15) # Custom timeout")
print(" • browser.wait_for_network_idle() # Wait for network activity to settle")
print()
print("PLATFORM SUPPORT:")
print(" • ARM systems (Raspberry Pi, Apple Silicon): Automatic driver detection")
print(" • x86/x64 systems: Selenium Manager with fallback support")
print(" • WebDriver installation:")
print(" - Raspberry Pi: sudo apt install firefox-geckodriver")
print(" - Ubuntu/Debian: sudo apt install firefox-geckodriver chromium-driver")
print(" - macOS: brew install geckodriver chromedriver")
print()
print("USAGE EXAMPLE:")
print(" from qufe.wbhandler import Chrome")
print(" ")
print(" # Start browser with method chaining (secure by default)")
print(" browser = Chrome()")
print(" browser.configure_no_automation().configure_detach()")
print(" ")
print(" # Safe loading for protected environments")
print(" browser.open('https://example.com', safe_timeout=True)")
print(" browser.wait_for_network_idle(idle_time=2.0)")
print(" ")
print(" # Tab management")
print(" browser.open_new_tab('https://github.com')")
print(" browser.switch_to_tab(0) # Back to first tab")
print(" ")
print(" # Auto-detect selectors")
print(" browser.click('//button[text()=\"Login\"]') # XPath")
print(" browser.type_text('$#username', 'user') # CSS ID")
print(" browser.click('$ .submit-btn') # CSS Class")
print(" ")
print(" # Element finding")
print(" browser.select_option_by_text('#country', 'Korea') # Dropdown")
print(" ")
print(" # Clean up")
print(" browser.quit()")
print()
print("NOTE: Requires WebDriver (ChromeDriver/GeckoDriver) to be installed")
[docs]
class Browser:
"""
Base browser automation class with enhanced functionality including tab management.
Provides network monitoring, element discovery, tab management, and automation utilities
built on top of pure Selenium WebDriver with auto-detecting selectors.
Attributes:
driver: Selenium WebDriver instance for browser automation
wait: WebDriverWait instance for explicit waits
window_handles: List of window handles for tab management
"""
[docs]
def __init__(
self,
private_mode: bool = True,
mobile_mode: bool = False,
headless: bool = False,
window_size: str = "1920,1080",
window_position: str = "10,10"
):
"""
Initialize browser instance.
Args:
private_mode: Enable private/incognito browsing mode
mobile_mode: Enable mobile device emulation
headless: Run browser in headless mode
window_size: Browser window size as "width,height"
window_position: Browser window position as "x,y"
Raises:
ImportError: If required dependencies are not installed
"""
# Import required dependencies
self.selenium = _import_selenium_dependencies()
self._private_mode = private_mode
self._mobile_mode = mobile_mode
self._headless = headless
self._window_size = window_size
self._window_position = window_position
# Initialize driver and tab management
self.driver = None
self.wait = None
self.window_handles = []
self._init_webdriver()
# Configure timeouts
self._configure_timeouts()
def _init_webdriver(self) -> None:
"""Initialize webdriver with specified configuration."""
raise NotImplementedError("Subclasses must implement _init_webdriver method")
def _configure_timeouts(self) -> None:
"""Configure browser timeouts."""
if self.driver:
self.driver.implicitly_wait(TimeoutConfig.MINI_TIMEOUT)
self.driver.set_page_load_timeout(TimeoutConfig.PAGE_LOAD_TIMEOUT)
self.wait = self.selenium['WebDriverWait'](self.driver, TimeoutConfig.SMALL_TIMEOUT)
# Initialize window handles list
try:
self.window_handles = [self.driver.current_window_handle]
except self.selenium['WebDriverException']:
self.window_handles = []
def _parse_selector(self, selector: str, by: Optional[str] = None) -> tuple:
"""
Parse selector and determine the appropriate By strategy.
Auto-detects selector type based on SeleniumBase conventions:
- Starts with '//' → XPath
- Starts with '$' → CSS Selector
- Otherwise → Use explicit 'by' parameter or default to CSS
Args:
selector: The selector string
by: Explicit selector type ('css', 'xpath', or None for auto-detect)
Returns:
Tuple of (By strategy, cleaned selector)
Raises:
ValueError: If selector is empty or by parameter is invalid
"""
if not selector:
raise ValueError("Selector cannot be empty")
# Auto-detect based on selector prefix
if selector.startswith('//'):
return (self.selenium['By'].XPATH, selector)
elif selector.startswith('$'):
# Remove $ prefix and handle space after $ for class selectors
cleaned_selector = selector[1:].lstrip()
return (self.selenium['By'].CSS_SELECTOR, cleaned_selector)
# Fall back to explicit 'by' parameter
if by is None:
by = "css" # Default to CSS
if by.lower() == "css":
return (self.selenium['By'].CSS_SELECTOR, selector)
elif by.lower() == "xpath":
return (self.selenium['By'].XPATH, selector)
else:
raise ValueError("by parameter must be 'css' or 'xpath'")
[docs]
def open(
self,
url: str,
safe_timeout: bool = False,
timeout: Optional[int] = None,
wait_for_idle: bool = False,
idle_time: float = 2.0
) -> None:
"""
Navigate to the specified URL with enhanced timeout handling.
Args:
url: URL to navigate to
safe_timeout: Use shorter timeout for protected environments
timeout: Custom timeout in seconds (overrides safe_timeout)
wait_for_idle: Wait for network activity to settle after loading
idle_time: Time to wait for network idle (seconds)
Raises:
RuntimeError: If driver not initialized or page fails to load
"""
if not self.driver:
raise RuntimeError("Driver not initialized")
# Determine timeout to use
if timeout is not None:
load_timeout = timeout
elif safe_timeout:
load_timeout = TimeoutConfig.SAFE_PAGE_LOAD_TIMEOUT
else:
load_timeout = TimeoutConfig.PAGE_LOAD_TIMEOUT
# Store original timeout to restore later
original_timeout = self.driver.timeouts.page_load
try:
# Set temporary timeout
self.driver.set_page_load_timeout(load_timeout)
# Navigate to URL
self.driver.get(url)
except self.selenium['TimeoutException'] as e:
# Check if page is partially loaded and usable
try:
ready_state = self.driver.execute_script("return document.readyState")
current_url = self.driver.current_url
if (ready_state in ['interactive', 'complete']) and (current_url != 'data:,'):
print(f"Page partially loaded but continuing (ready state: {ready_state})")
else:
raise RuntimeError(f"Page failed to load properly: {e}")
except Exception:
raise RuntimeError(f"Page loading failed and unable to check status: {e}")
except Exception as e:
raise RuntimeError(f"Unexpected error during page loading: {e}")
finally:
# Restore original timeout
try:
self.driver.set_page_load_timeout(original_timeout)
except Exception:
# If restoration fails, set back to default
self.driver.set_page_load_timeout(TimeoutConfig.PAGE_LOAD_TIMEOUT)
# Optional network idle wait
if wait_for_idle:
self.wait_for_network_idle(idle_time)
[docs]
def wait_for_network_idle(self, idle_time: float = 2.0, timeout: int = 30) -> bool:
"""
Wait for network activity to settle by monitoring JavaScript activity.
This method checks for jQuery activity and document ready state,
then waits for a period of network inactivity.
Args:
idle_time: Time in seconds to wait for network to be idle
timeout: Maximum time to wait for network to become idle
Returns:
True if network became idle within timeout, False otherwise
"""
if not self.driver:
return False
end_time = time.time() + timeout
try:
# First wait for basic document ready state
self.wait_for_ready_state_complete(timeout=min(10, timeout))
# Then wait for jQuery if present
if time.time() < end_time:
remaining_timeout = int(end_time - time.time())
self.wait_for_ajax(timeout=min(remaining_timeout, 10))
# Finally wait for idle period
last_activity_time = time.time()
while time.time() < end_time:
current_time = time.time()
# Check if we've been idle long enough
if (current_time - last_activity_time) >= idle_time:
return True
# Check for ongoing network activity (simplified check)
try:
# Check if any new script tags or resources are being added
script_count = self.driver.execute_script(
"return document.getElementsByTagName('script').length"
)
# Simple heuristic: if script count changes, reset idle timer
if not hasattr(self, '_last_script_count'):
self._last_script_count = script_count
elif script_count != self._last_script_count:
last_activity_time = current_time
self._last_script_count = script_count
except Exception:
# If we can't check, assume activity has stopped
pass
time.sleep(0.5) # Check every 500ms
return False # Timeout reached
except Exception:
return False
[docs]
def wait_for_element(
self,
selector: str,
by: Optional[str] = None,
timeout: Optional[int] = None,
condition: str = 'visibility'
):
"""
Wait for element to be present and meet the specified condition.
Args:
selector: Element selector (auto-detects XPath: //, CSS: $)
by: Explicit selection method ('css' or 'xpath', optional)
timeout: Custom timeout in seconds
condition: Wait condition ('visibility', 'presence', 'clickable')
Returns:
WebElement when found
Raises:
TimeoutException: If element not found within timeout
ValueError: If invalid condition specified
"""
wait_timeout = timeout or TimeoutConfig.SMALL_TIMEOUT
wait = self.selenium['WebDriverWait'](self.driver, wait_timeout)
(by_strategy, cleaned_selector) = self._parse_selector(selector, by)
locator = (by_strategy, cleaned_selector)
# Select appropriate expected condition
if condition == 'visibility':
wait_condition = self.selenium['EC'].visibility_of_element_located(locator)
elif condition == 'presence':
wait_condition = self.selenium['EC'].presence_of_element_located(locator)
elif condition == 'clickable':
wait_condition = self.selenium['EC'].element_to_be_clickable(locator)
else:
raise ValueError(f"Invalid condition: {condition}. Must be 'visibility', 'presence', or 'clickable'")
return wait.until(wait_condition)
[docs]
def find_element(
self,
selector: str,
by: Optional[str] = None,
wait_if_needed: bool = True,
timeout: int = 10
):
"""
Find a single element with gradual approach.
Implements fail-fast + gradual fallback pattern:
1. Try immediate find (fast path for static content)
2. If not found and wait_if_needed=True, wait for element (safe path for dynamic content)
Args:
selector: Element selector (auto-detects XPath: //, CSS: $)
by: Explicit selection method ('css' or 'xpath', optional)
wait_if_needed: Enable gradual fallback with waiting
timeout: Timeout for waiting phase
Returns:
WebElement if found
Raises:
NoSuchElementException: If element not found
ValueError: If selector is invalid
"""
(by_strategy, cleaned_selector) = self._parse_selector(selector, by)
try:
# Fast path: immediate find for static content
return self.driver.find_element(by_strategy, cleaned_selector)
except self.selenium['NoSuchElementException']:
if wait_if_needed:
# Fallback: use wait_for_element for dynamic content
try:
return self.wait_for_element(selector, by, timeout, condition='presence')
except self.selenium['TimeoutException']:
raise self.selenium['NoSuchElementException'](
f"Element not found after waiting {timeout}s: {selector}"
)
else:
raise
[docs]
def find_elements(self, selector: str, by: Optional[str] = None):
"""
Find multiple elements using auto-detected or explicit selector type.
Args:
selector: Element selector (auto-detects XPath: //, CSS: $)
by: Explicit selection method ('css' or 'xpath', optional)
Returns:
List of WebElements
"""
(by_strategy, cleaned_selector) = self._parse_selector(selector, by)
return self.driver.find_elements(by_strategy, cleaned_selector)
[docs]
def select_option_by_text(
self,
dropdown_selector: str,
option_text: str,
by: Optional[str] = None,
max_retries: int = 2,
retry_delay: float = 0.5
) -> None:
"""
Select dropdown option by visible text with retry logic.
Args:
dropdown_selector: Selector for the <select> element
option_text: Visible text of the option to select
by: Explicit selection method ('css' or 'xpath', optional)
max_retries: Maximum number of retry attempts
retry_delay: Delay between retries in seconds
Raises:
RuntimeError: If selection fails after all retries
"""
last_exception = None
for attempt in range(max_retries):
try:
# Find element (with or without waiting based on attempt)
element = self.find_element(
dropdown_selector,
by,
wait_if_needed=(attempt > 0) # Use waiting on retries
)
# Try to select option
self.selenium['Select'](element).select_by_visible_text(option_text)
return # Success
except (self.selenium['StaleElementReferenceException'],
self.selenium['ElementNotInteractableException']) as e:
last_exception = e
if attempt < max_retries - 1:
time.sleep(retry_delay)
continue
except Exception as e:
# For other exceptions, fail immediately
raise RuntimeError(
f"Failed to select option '{option_text}' from dropdown '{dropdown_selector}': {e}"
)
# If we get here, all retries failed
raise RuntimeError(
f"Failed to select option '{option_text}' after {max_retries} attempts. "
f"Last error: {last_exception}"
)
[docs]
def wait_for_ready_state_complete(self, timeout: Optional[int] = None) -> None:
"""
Wait for page to reach ready state complete.
Args:
timeout: Maximum time to wait in seconds
"""
wait_timeout = timeout or TimeoutConfig.SMALL_TIMEOUT
wait = self.selenium['WebDriverWait'](self.driver, wait_timeout)
wait.until(lambda driver: driver.execute_script("return document.readyState") == "complete")
[docs]
def wait_for_ajax(self, timeout: int = 20) -> None:
"""
Wait for AJAX requests to complete.
Args:
timeout: Maximum time to wait in seconds
"""
wait = self.selenium['WebDriverWait'](self.driver, timeout)
wait.until(
lambda drv: drv.execute_script(
"return window.jQuery ? jQuery.active == 0 : true"
)
)
[docs]
def click(self, selector: str, by: Optional[str] = None, timeout: int = 10) -> None:
"""
Click on element identified by selector.
Args:
selector: Element selector (auto-detects XPath: //, CSS: $)
by: Explicit selection method ('css' or 'xpath', optional)
timeout: Timeout for element finding
"""
element = self.wait_for_element(selector, by, timeout, condition='clickable')
element.click()
[docs]
def type_text(self, selector: str, text: str, by: Optional[str] = None, timeout: int = 10) -> None:
"""
Type text into element identified by selector.
Args:
selector: Element selector (auto-detects XPath: //, CSS: $)
text: Text to type
by: Explicit selection method ('css' or 'xpath', optional)
timeout: Timeout for element finding
"""
element = self.wait_for_element(selector, by, timeout, condition='visibility')
element.clear()
element.send_keys(text)
[docs]
def quit(self) -> None:
"""Clean up and quit the browser driver."""
if self.driver:
self.driver.quit()
self.driver = None
self.window_handles.clear()
# ==================== TAB MANAGEMENT ====================
[docs]
def get_current_handle(self) -> Optional[str]:
"""Get current window handle."""
if not self.driver:
return None
try:
return self.driver.current_window_handle
except self.selenium['WebDriverException']:
return None
[docs]
def get_all_handles(self) -> List[str]:
"""Get all window handles."""
if not self.driver:
return []
try:
return self.driver.window_handles
except self.selenium['WebDriverException']:
return []
[docs]
def switch_to_window(self, handle: str) -> bool:
"""
Switch to window by handle.
Args:
handle: Window handle to switch to
Returns:
True if successful, False otherwise
"""
if not self.driver:
return False
try:
self.driver.switch_to.window(handle)
return True
except self.selenium['WebDriverException']:
return False
[docs]
def open_new_tab(self, url: Optional[str] = None, safe_timeout: bool = False) -> bool:
"""
Open new tab and optionally navigate to URL.
Args:
url: Optional URL to open in new tab
safe_timeout: Use safe timeout for page loading
Returns:
True if successful, False otherwise
"""
if not self.driver:
return False
try:
self.driver.switch_to.new_window('tab')
new_handle = self.driver.current_window_handle
self.window_handles.append(new_handle)
if url:
self.open(url, safe_timeout=safe_timeout)
return True
except self.selenium['WebDriverException']:
return False
[docs]
def switch_to_tab(self, index: int = -1) -> bool:
"""
Switch to tab by index.
Args:
index: Tab index (-1 for last tab)
Returns:
True if successful, False otherwise
"""
if not self.driver:
return False
try:
# Refresh window handles list
all_handles = self.get_all_handles()
if all_handles:
self.window_handles = all_handles
if -len(self.window_handles) <= index < len(self.window_handles):
target_handle = self.window_handles[index]
return self.switch_to_window(target_handle)
return False
except (self.selenium['WebDriverException'], IndexError):
return False
[docs]
def get_tab_count(self) -> int:
"""Get current number of tabs."""
return len(self.get_all_handles())
[docs]
def close_current_tab(self) -> bool:
"""
Close current tab and switch to previous tab.
Returns:
True if successful, False otherwise
"""
if not self.driver:
return False
try:
if self.get_tab_count() <= 1:
return False # Don't close last tab
current_handle = self.get_current_handle()
self.driver.close()
# Remove closed handle from our list
if current_handle in self.window_handles:
self.window_handles.remove(current_handle)
# Switch to remaining tab
remaining_handles = self.get_all_handles()
if remaining_handles:
return self.switch_to_window(remaining_handles[-1])
return False
except self.selenium['WebDriverException']:
return False
# ==================== NETWORK MONITORING ====================
[docs]
def inject_network_capture(self) -> None:
"""
Inject JavaScript to capture fetch/XHR network requests.
Creates a global __selenium_logs array that stores network request details
including URL, status, method, request body, and response.
"""
inject_script = """
window.__selenium_logs = [];
(function() {
const origFetch = window.fetch;
window.fetch = function(...args) {
return origFetch(...args).then(res => {
const clone = res.clone();
clone.text().then(body => {
window.__selenium_logs.push({
type: 'fetch', url: clone.url,
status: clone.status,
method: args[1]?.method||'GET',
request: args[1]?.body||null,
response: body
});
});
return res;
});
};
const _open = XMLHttpRequest.prototype.open;
XMLHttpRequest.prototype.open = function(m,u) {
this._m=m; this._u=u; return _open.apply(this, arguments);
};
const _send = XMLHttpRequest.prototype.send;
XMLHttpRequest.prototype.send = function(b) {
this.addEventListener('load', () => {
window.__selenium_logs.push({
type: 'xhr', url: this._u,
status: this.status, method: this._m,
request: b||null, response: this.responseText
});
});
return _send.apply(this, arguments);
};
})();
"""
self.driver.execute_script(inject_script)
print('Network capture script injected successfully.')
[docs]
def get_network_logs(self) -> List[Dict[str, Any]]:
"""
Retrieve captured network requests.
Returns:
List of network request dictionaries containing type, URL, status,
method, request body, and response data.
"""
# Ensure page is fully loaded before retrieving logs
self.wait_for_ready_state_complete()
self.wait_for_ajax()
time.sleep(1.0)
logs = self.driver.execute_script("return window.__selenium_logs;")
return logs or []
# ==================== UTILITY METHODS ====================
[docs]
def find_element_info(self, selector: str, concat_text: bool = False) -> None:
"""
Find and display information about elements matching the selector.
Args:
selector: CSS, XPath selector, or auto-detected (XPath: //, CSS: $)
concat_text: If True, concatenate element text; if False, show detailed info
"""
elements = self.find_elements(selector)
if not elements:
print(f'No elements found with selector: {selector}')
return
for element in elements:
try:
if not concat_text:
print(f'outerHTML: {element.get_attribute("outerHTML")}')
print(f'class: {element.get_attribute("class")}')
print(f'value: {element.get_attribute("value")}')
print(f'text: {element.text.strip()}', end='\n\n')
else:
print(f"'{element.text.strip()}'", end=', ')
except Exception:
if not concat_text:
print("Error getting element info")
else:
print("'[error]'", end=', ')
[docs]
@staticmethod
def generate_text_selectors(
texts: List[str],
element_type: str,
) -> List[str]:
"""
Generate XPath selectors for elements containing specific text.
Args:
texts: List of text content to match
element_type: HTML element type (e.g., 'a', 'span', 'button')
Returns:
List of XPath selectors
Example:
generate_text_selectors(['Home', 'About'], 'a')
# Returns: ["//a[normalize-space(.)='Home']", "//a[normalize-space(.)='About']"]
"""
return [f"//{element_type}[normalize-space(.)='{text}']" for text in texts]
[docs]
def find_common_attribute(
self,
selectors: List[str],
attribute: str,
verbose: bool = False
) -> str:
"""
Find the most common attribute value among elements matched by selectors.
This method helps discover common patterns in element attributes,
useful for building robust selectors when class names might change.
Args:
selectors: List of CSS, XPath, or auto-detected selectors
attribute: Attribute name to analyze (e.g., 'class', 'id')
verbose: Print detailed information if True
Returns:
Most frequently occurring attribute value
Example:
names = ['RaspberryPi', 'BlackBerry', 'Apple']
selectors = [f"//label[normalize-space(text())='{name}']" for name in names]
common_class = browser.find_common_attribute(selectors, 'class')
"""
attribute_counts = {}
for selector in selectors:
elements = self.find_elements(selector)
for element in elements:
try:
attr_value = element.get_attribute(attribute)
if attr_value:
attribute_counts[attr_value] = attribute_counts.get(attr_value, 0) + 1
except Exception:
continue
if not attribute_counts:
return ''
most_common = max(attribute_counts, key=attribute_counts.get)
if verbose:
print(f'Most common {attribute}: {most_common}')
print(f'Attribute distribution: {attribute_counts}')
return most_common
[docs]
class Chrome(Browser):
"""Chrome browser implementation with enhanced configuration options and method chaining."""
[docs]
def __init__(
self,
private_mode: bool = True,
mobile_mode: bool = False,
headless: bool = False,
window_size: str = "1920,1080",
window_position: str = "10,10"
):
"""
Initialize Chrome browser with enhanced configuration.
Args:
private_mode: Enable private/incognito browsing mode
mobile_mode: Enable mobile device emulation
headless: Run browser in headless mode
window_size: Browser window size as "width,height"
window_position: Browser window position as "x,y"
"""
# Initialize options before calling parent
self.options = None
super().__init__(private_mode, mobile_mode, headless, window_size, window_position)
def _init_webdriver(self) -> None:
"""
Initialize Chrome webdriver with cross-platform support.
Attempts multiple initialization methods to ensure compatibility across
different platforms including ARM (Raspberry Pi) and x86/x64 systems.
"""
self.options = self.selenium['ChromeOptions']()
self._setup_default_options()
driver_initialized = False
initialization_errors = []
# Method 1: Try with explicit chromedriver path (best for ARM/non-standard systems)
chromedriver_path = shutil.which('chromedriver')
if chromedriver_path:
try:
service = self.selenium['ChromeService'](executable_path=chromedriver_path)
self.driver = self.selenium['webdriver'].Chrome(service=service, options=self.options)
driver_initialized = True
if os.environ.get('QUFE_DEBUG'):
print(f"Chrome initialized with explicit driver path: {chromedriver_path}")
except Exception as e:
initialization_errors.append(f"Service with path ({chromedriver_path}): {str(e)[:100]}")
# Method 2: Let Selenium Manager handle it (best for standard x86/x64)
if not driver_initialized:
try:
self.driver = self.selenium['webdriver'].Chrome(options=self.options)
driver_initialized = True
if os.environ.get('QUFE_DEBUG'):
print("Chrome initialized with Selenium Manager")
except Exception as e:
initialization_errors.append(f"Selenium Manager: {str(e)[:100]}")
# Method 3: Try with Service but no explicit path (compatibility fallback)
if not driver_initialized:
try:
service = self.selenium['ChromeService']()
self.driver = self.selenium['webdriver'].Chrome(service=service, options=self.options)
driver_initialized = True
if os.environ.get('QUFE_DEBUG'):
print("Chrome initialized with Service (no explicit path)")
except Exception as e:
initialization_errors.append(f"Service without path: {str(e)[:100]}")
if not driver_initialized:
error_msg = "Failed to initialize Chrome driver.\n"
error_msg += "\nAttempted methods:\n"
for i, err in enumerate(initialization_errors, 1):
error_msg += f" {i}. {err}\n"
error_msg += "\n" + "=" * 50 + "\n"
error_msg += "TROUBLESHOOTING:\n"
error_msg += "1. Install Chrome/Chromium:\n"
error_msg += " - Ubuntu/Raspberry Pi: sudo apt install chromium-browser\n"
error_msg += " - macOS: brew install --cask google-chrome\n"
error_msg += " - Windows: Download from https://www.google.com/chrome/\n\n"
error_msg += "2. Install ChromeDriver:\n"
error_msg += " - Ubuntu/Raspberry Pi: sudo apt install chromium-driver\n"
error_msg += " - macOS: brew install chromedriver\n"
error_msg += " - Windows: Download from https://chromedriver.chromium.org/\n\n"
error_msg += "3. Verify installation:\n"
error_msg += " - Check Chrome: chromium-browser --version (or google-chrome --version)\n"
error_msg += " - Check driver: chromedriver --version\n"
error_msg += " - Check PATH: which chromedriver\n\n"
error_msg += "4. For ARM systems (Raspberry Pi, Apple Silicon):\n"
error_msg += " Ensure you have the ARM-compatible version of chromedriver\n"
error_msg += "=" * 50
raise RuntimeError(error_msg)
def _setup_default_options(self) -> None:
"""Setup safe default Chrome options."""
# Basic options
if self._headless:
self.options.add_argument('--headless')
if self._private_mode:
self.options.add_argument('--incognito')
# Safe performance and stability options
self.options.add_argument('--disable-dev-shm-usage')
self.options.add_argument('--disable-gpu')
self.options.add_argument('--disable-extensions')
self.options.add_argument('--disable-plugins')
self.options.add_argument('--disable-images')
# Mobile emulation
if self._mobile_mode:
mobile_emulation = {
"deviceMetrics": {"width": 375, "height": 667, "pixelRatio": 2.0},
"userAgent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit/605.1.15"
}
self.options.add_experimental_option("mobileEmulation", mobile_emulation)
# Window size and position
if not self._mobile_mode:
width, height = self._window_size.split(',')
self.options.add_argument(f'--window-size={width},{height}')
x, y = self._window_position.split(',')
self.options.add_argument(f'--window-position={x},{y}')
# ==================== METHOD CHAINING CONFIGURATION ====================
[docs]
def add_argument(self, argument: str) -> 'Chrome':
"""
Add Chrome argument with method chaining.
Args:
argument: Chrome command line argument
Returns:
Self for method chaining
"""
if self.options:
self.options.add_argument(argument)
return self
[docs]
def add_experimental_option(self, name: str, value: Any) -> 'Chrome':
"""
Add Chrome experimental option.
Args:
name: Option name
value: Option value
Returns:
Self for method chaining
"""
if self.options:
self.options.add_experimental_option(name, value)
return self
[docs]
class Firefox(Browser):
"""Firefox browser implementation with profile management and enhanced ARM support."""
def _init_webdriver(self) -> None:
"""
Initialize Firefox webdriver with cross-platform support.
Attempts multiple initialization methods to ensure compatibility across
different platforms including ARM (Raspberry Pi) and x86/x64 systems.
"""
options = self.selenium['FirefoxOptions']()
# Basic options
if self._headless:
options.add_argument('--headless')
# Profile configuration
profile_path = self._find_firefox_profile()
if profile_path:
options.add_argument(f'-profile')
options.add_argument(profile_path)
# Private browsing
if self._private_mode:
options.add_argument('-private')
options.set_preference('browser.privatebrowsing.autostart', True)
# Safe performance preferences (keeping security features enabled)
options.set_preference('network.proxy.type', 0) # No proxy
options.set_preference('dom.webdriver.enabled', False)
options.set_preference('useAutomationExtension', False)
# Mobile emulation for Firefox (basic user agent change)
if self._mobile_mode:
mobile_user_agent = "Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit/605.1.15"
options.set_preference("general.useragent.override", mobile_user_agent)
# Initialize driver with multiple fallback methods
driver_initialized = False
initialization_errors = []
# Method 1: Try with explicit geckodriver path (best for ARM/non-standard systems)
geckodriver_path = shutil.which('geckodriver')
if geckodriver_path:
try:
service = self.selenium['FirefoxService'](executable_path=geckodriver_path)
self.driver = self.selenium['webdriver'].Firefox(service=service, options=options)
driver_initialized = True
if os.environ.get('QUFE_DEBUG'):
print(f"Firefox initialized with explicit driver path: {geckodriver_path}")
except Exception as e:
initialization_errors.append(f"Service with path ({geckodriver_path}): {str(e)[:100]}")
# Method 2: Let Selenium Manager handle it (best for standard x86/x64)
if not driver_initialized:
try:
self.driver = self.selenium['webdriver'].Firefox(options=options)
driver_initialized = True
if os.environ.get('QUFE_DEBUG'):
print("Firefox initialized with Selenium Manager")
except Exception as e:
initialization_errors.append(f"Selenium Manager: {str(e)[:100]}")
# Method 3: Try with Service but no explicit path (compatibility fallback)
if not driver_initialized:
try:
service = self.selenium['FirefoxService']()
self.driver = self.selenium['webdriver'].Firefox(service=service, options=options)
driver_initialized = True
if os.environ.get('QUFE_DEBUG'):
print("Firefox initialized with Service (no explicit path)")
except Exception as e:
initialization_errors.append(f"Service without path: {str(e)[:100]}")
if not driver_initialized:
error_msg = "Failed to initialize Firefox driver.\n"
error_msg += "\nAttempted methods:\n"
for i, err in enumerate(initialization_errors, 1):
error_msg += f" {i}. {err}\n"
error_msg += "\n" + "=" * 50 + "\n"
error_msg += "TROUBLESHOOTING:\n"
error_msg += "1. Install Firefox:\n"
error_msg += " - Ubuntu/Raspberry Pi: sudo apt install firefox\n"
error_msg += " - macOS: brew install --cask firefox\n"
error_msg += " - Windows: Download from https://www.mozilla.org/firefox/\n\n"
error_msg += "2. Install GeckoDriver:\n"
error_msg += " - Ubuntu/Raspberry Pi: sudo apt install firefox-geckodriver\n"
error_msg += " - macOS: brew install geckodriver\n"
error_msg += " - Windows: Download from https://github.com/mozilla/geckodriver/releases\n\n"
error_msg += "3. Verify installation:\n"
error_msg += " - Check Firefox: firefox --version\n"
error_msg += " - Check driver: geckodriver --version\n"
error_msg += " - Check PATH: which geckodriver\n\n"
error_msg += "4. For ARM systems (Raspberry Pi, Apple Silicon):\n"
error_msg += " Ensure you have the ARM-compatible version of geckodriver\n"
error_msg += " On Raspberry Pi: sudo apt update && sudo apt install firefox-geckodriver\n"
error_msg += "=" * 50
raise RuntimeError(error_msg)
# Set window size and position after successful initialization
if driver_initialized:
try:
if not self._mobile_mode:
width, height = map(int, self._window_size.split(','))
x, y = map(int, self._window_position.split(','))
self.driver.set_window_size(width, height)
self.driver.set_window_position(x, y)
except Exception as e:
# Non-critical error, just log if debug mode
if os.environ.get('QUFE_DEBUG'):
print(f"Warning: Could not set window size/position: {e}")
@staticmethod
def _find_firefox_profile() -> Optional[str]:
"""
Find Firefox default profile path across different operating systems.
Returns:
Path to Firefox profile directory or None if not found
"""
try:
if sys.platform == "darwin": # macOS
profile_dir = os.path.expanduser("~/Library/Application Support/Firefox/Profiles/")
elif sys.platform == "win32": # Windows
profile_dir = os.path.expanduser("~/AppData/Roaming/Mozilla/Firefox/Profiles/")
else: # Linux and other Unix-like systems
profile_dir = os.path.expanduser("~/.mozilla/firefox/")
if os.path.exists(profile_dir):
profiles = [
d for d in os.listdir(profile_dir)
if d.endswith('.default-release')
]
if profiles:
return os.path.join(profile_dir, profiles[0])
except Exception:
# Silently fail if profile detection fails
pass
return None
# Example usage demonstrating the enhanced functionality
if __name__ == '__main__':
print("qufe.wbhandler Example Usage with Cross-Platform Support")
print("=" * 60)
# Optional: Enable debug output for driver initialization
# os.environ['QUFE_DEBUG'] = '1'
# Example with Chrome and method chaining
chrome = Chrome()
try:
print("Configuring Chrome with secure defaults...")
chrome.configure_new_window().configure_no_automation().configure_detach()
print("\nOpening first page with safe timeout...")
chrome.open("https://httpbin.org/get", safe_timeout=True)
# Inject network capture
chrome.inject_network_capture()
print("✓ Network capture injected")
# Wait for network to settle
if chrome.wait_for_network_idle(idle_time=2.0):
print("✓ Network activity settled")
else:
print("⚠ Network timeout, but continuing...")
# Demonstrate improved element finding
print("\nDemonstrating improved element finding...")
# Example 1: Static content (fast path)
try:
element = chrome.find_element("body", wait_if_needed=False)
print("✓ Static element found immediately (fast path)")
except Exception as e:
print(f"✗ Static element failed: {e}")
# Example 2: Dynamic content with fallback
try:
# This will use fast path first, then fallback if needed
element = chrome.find_element("//body", wait_if_needed=True, timeout=5)
print("✓ Element found with fallback available")
except Exception as e:
print(f"✗ Element finding failed: {e}")
# Demonstrate tab management
print(f"\nCurrent tab count: {chrome.get_tab_count()}")
print("Opening new tab with safe timeout...")
if chrome.open_new_tab("https://httpbin.org/html", safe_timeout=True):
print("✓ New tab opened")
print(f"Updated tab count: {chrome.get_tab_count()}")
# Switch between tabs
print("Switching to first tab...")
if chrome.switch_to_tab(0):
print("✓ Switched to first tab")
# Demonstrate improved dropdown selection
try:
# This would work if there was a select element on the page
# chrome.select_option_by_text("#country", "Korea", max_retries=3)
print("✓ Dropdown selection method available with retry logic")
except Exception:
print("⚠ No dropdown to test, but method is implemented")
# Demonstrate URL parameter extraction
test_url = "https://example.com?param1=value1¶m2=value2,value3"
params = Chrome.extract_url_parameters(test_url, 'param2', ',')
print(f"Extracted params: {params}")
print("\nSession completed successfully")
except KeyboardInterrupt:
print("\nSession interrupted by user")
except Exception as e:
print(f"Error: {e}")
finally:
print("Cleaning up...")
chrome.quit()
print("Session ended")
# Optional: Quick Firefox test to verify ARM compatibility
print("\n" + "=" * 60)
print("Quick Firefox compatibility test (optional)...")
try:
firefox = Firefox(headless=True)
firefox.open("https://httpbin.org/get", safe_timeout=True)
print("✓ Firefox works on this platform")
firefox.quit()
except Exception as e:
print(f"⚠ Firefox not available or configured: {str(e)[:100]}")