图虫网站有多好看的图片,然后就尝试着用python批量下载了一些(仅用于自己欣赏)。当然如果有同学跟我一样睡不着的,又想批量下载好看的图片的,不如试试下面的代码(再次声明,仅限于便于欣赏):
'''
抓去图虫网站中的风景图片
https://stock.tuchong.com/search?availableOnly=&page=1&platform=image&relevance_guarantee=false&search_id=7172918711916020000&size=100&sortBy=0&source=tc_pc_home_search&term=%E9%A3%8E%E6%99%AF&topic_id=
'''
import requests
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from pyquery import PyQuery as pq
import re
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36'
}
driver = webdriver.Chrome()
def get_page(page):
# driver.implicitly_wait(5)
url = f'https://stock.tuchong.com/search?availableOnly=&page={page}&platform=image&relevance_guarantee=false&search_id=7172918711916020000&size=100&sortBy=0&source=tc_pc_home_search&term=%E9%A3%8E%E6%99%AF&topic_id='
driver.get(url)
wait = WebDriverWait(driver, 10) # 显示等待,查询到标签存在再执行
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.justified-layout')))
html = driver.page_source
# doc = pq(html).find('.justified-layout .justified-layout__item .image-item').attr('data-lazy-url')
doc = pq(html)
items = doc.find('.justified-layout .justified-layout__item').items()
for item in items:
src = item.find('.image-item').attr('data-lazy-url')
imgSrc = re.sub('//', '', src) # 获取的链接url前面带有//,去除掉
name = re.match('.*?/ml/(.*?).webp', src).group(1) # 通过获取的每个url中保存一个图片名字
# print(src)
# print(imgSrc)
# print(name)
download_img(name, imgSrc)
# 下载图片
def download_img(name, src):
src = 'http://' + src
img_content = requests.get(src, headers=headers).content
with open('images/' + name + '.jpg', 'wb') as file:
file.write(img_content)
if __name__ == '__main__':
# 获取前3页的图片(每页100张)
for i in range(4):
get_page(i)