爬虫爬取某网站壁纸+Requset库的使用+图片下载
[toc]
网站 url:壁纸链接
完整代码
import requests
start = 1 count = 12 header = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' } def getInterface(url):
r = requests.get(url,headers=header,timeout=10) r.encoding = 'utf-8' data = r.json() str2 = "" for item in data['data']: str2+=str(item['img_1600_900']) + "\n" pass return str2 def download(): savepath = "./图片/" f = open("./data2.txt", "r+") data = f.read().split("\n") for item in range(0, 24): cont = requests.get(url=data[item], headers=header) f = open(savepath + str(item) + ".jpg", "wb") f.write(cont.content) f.close() if __name__ == '__main__': f = open("./data2.txt","w+") for i in range(0,2): url = 'http://wallpaper.apc.360.cn/index.php?c=WallPaper&start={0}&count={1}&from=360chrome&a=getAppsByCategory&cid=26'.format( start, count) print(url) str2 = getInterface(url) f.write(str2) start+=count print(str(start)+'->') pass print("写入完毕") f.close() download()
|
补充一下Request库知识
import requests headers = {"Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
url2 = 'https://www.baidu.com/s?' question = {'wd':'python'} r3 = requests.get(url2,params=question,headers=headers) r3.encoding = 'utf-8' print(r3.text)
|
获取源站url

打开这个url (注 浏览器已安装JSON插件)发现是JSON 格式的

分析一下这个url start 为开始页数 count 为一页展示多少条
于是 我们可以把这两个 作为变量 然后就可以爬取多张图片了
import requests header = { 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' } start = 1 count = 10 url = 'http://wallpaper.apc.360.cn/index.php?c=WallPaper&start={0}&count={1}&from=360chrome&a=getAppsByCategory&cid=26'.format(start,count)
|
图片链接保存代码
import requests
start = 1 count = 10
def getInterface(url): header = { 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' } r = requests.get(url,headers=header,timeout=10) r.encoding = 'utf-8' data = r.json() str2 = "" for item in data['data']: str2+=str(item['img_1600_900']) + "\n" pass return str2 if __name__ == '__main__': f = open("./data2.txt","w+")
for i in range(0,2): url = 'http://wallpaper.apc.360.cn/index.php?c=WallPaper&start={0}&count={1}&from=360chrome&a=getAppsByCategory&cid=26'.format( start, count) print(url) str2 = getInterface(url) f.write(str2) start+=count print(str(start)+'->') pass print("写入完毕") f.close()
|

图片下载
def download(): savepath = "./图片/" f = open("./data2.txt", "r+") data = f.read().split("\n") for item in range(0, 24): cont = requests.get(url=data[item], headers=header) f = open(savepath + str(item) + ".jpg", "wb") f.write(cont.content) f.close()
|
效果展示

