Python + Selenium 動態爬蟲

試用 Pythone 試作簡單的動態爬蟲。Jupyter Notebook, Selenium, WebDriver.

安裝 Selenium

pip install selenium

前置安裝請參考

Python 第一支爬蟲(Jupyter Notebook)


指令紀錄

開啟 Google 搜尋網頁

# 載入需要的套件
from selenium import webdriver

# 開啟瀏覽器視窗(Chrome)
driver = webdriver.Chrome()

# 開啟相標網頁:Google 搜尋引擎
driver.get("https://www.google.com/")

填入關鍵字並送出搜尋

from selenium.webdriver.common.by import By

# 定位搜尋輸入框
element = driver.find_element(By.XPATH,"/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/div/div[2]/textarea")
# 填入搜尋關鍵字
element.send_keys("Python 教學")

# 送出搜尋
searchForm = driver.find_element(By.XPATH,"/html/body/div[1]/div[3]/form")
searchForm.submit()

解析搜尋結果/清單

infoList = driver.find_elements(By.CSS_SELECTOR, "div.MjjYud")
for info in infoList:
    try:
        title = info.find_element(By.CSS_SELECTOR, "h3.LC20lb")
        print(title.get_attribute("innerHTML"))
        link = info.find_element(By.CSS_SELECTOR, "a[jsname=UWckNb]") 
        print(link.get_attribute("href"))
    except:
        pass #noop

關閉瀏覽器視窗

# 關閉瀏覽器視窗
driver.close() 


沒圖沒真象

參考資料

動態網頁爬蟲第一道鎖 — Selenium教學:如何使用Webdriver、send_keys(附Python 程式碼) ------ 指令已過時,開發流程仍有參考性。

AttributeError: 'WebDriver' object has no attribute 'find_element_by_xpath' ------ 新舊版指令遷移,如:

.find_element_by_class_name(
.find_element(By.CLASS_NAME, 

.find_element_by_css_selector(
.find_element(By.CSS_SELECTOR, 

.find_element_by_id(
.find_element(By.ID, 

.find_element_by_link_text(
.find_element(By.LINK_TEXT, 

.find_element_by_name(
.find_element(By.NAME, 

.find_element_by_partial_link_text(
.find_element(By.PARTIAL_LINK_TEXT, 

.find_element_by_tag_name(
.find_element(By.TAG_NAME, 

.find_element_by_xpath(
.find_element(By.XPATH, 

.find_elements_by_class_name(
.find_elements(By.CLASS_NAME, 

.find_elements_by_css_selector(
.find_elements(By.CSS_SELECTOR, 

.find_elements_by_id(
.find_elements(By.ID, 

.find_elements_by_link_text(
.find_elements(By.LINK_TEXT, 

.find_elements_by_name(
.find_elements(By.NAME, 

.find_elements_by_partial_link_text(
.find_elements(By.PARTIAL_LINK_TEXT, 

.find_elements_by_tag_name(
.find_elements(By.TAG_NAME, 

.find_elements_by_xpath(
.find_elements(By.XPATH,

(EOF)

Last updated