爬虫爬取某网站壁纸+Requset库的使用+图片下载

[toc]

网站 url：壁纸链接

完整代码

import requests

start = 1  # 开始页数
count = 12  # 每一页10张图片
header = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
def getInterface(url):

    r = requests.get(url,headers=header,timeout=10)
    r.encoding = 'utf-8'
    # print(r.json())
    data = r.json()
    # print(type(data))
    # print(type(data['data']))
    str2 = ""
    # exit()
    for item in data['data']:
        str2+=str(item['img_1600_900']) + "\n"
        pass
    return  str2
def download():
        savepath = "./图片/"
        f = open("./data2.txt", "r+")
        data = f.read().split("\n")
        for item in range(0, 24):
            cont = requests.get(url=data[item], headers=header)
            f = open(savepath + str(item) + ".jpg", "wb")
            f.write(cont.content)
            f.close()
if __name__ == '__main__':
    f = open("./data2.txt","w+")
    for i in range(0,2):
         url = 'http://wallpaper.apc.360.cn/index.php?c=WallPaper&start={0}&count={1}&from=360chrome&a=getAppsByCategory&cid=26'.format(
            start, count)
         print(url)
         str2 = getInterface(url)
         f.write(str2)
         start+=count
         print(str(start)+'->')
         pass
    print("写入完毕")
    f.close()
    download()

补充一下Request库知识

import requests
headers = {"Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}  # # 在请求头中带上User-Agent，模拟浏览器发送请求
# r = requests.get('http://wallpaper.apc.360.cn/index.php?c=WallPaper&start=18&count=15&from=360chrome&a=getAppsByCategory&cid=26',headers=headers)
# print(r.status_code)
# r.encoding = 'utf-8'  # 指定页面载入编码为 utf-8
# print(r.encoding)
# # print(r.text)  # 页面内容
# print(r.json())  # 如果页面是JSON格式的 可以通过这个来获取到内容
# print(r.headers) # 获取对象的请求头

#  关于发送带请求参数
# 1. 直接在URL 后面拼接 例如：

# url = 'https://www.baidu.com/s?wd=python'
# #  timeout 超过该时间未响应即停止
# r2 = requests.get(url, headers=headers,timeout=5)
# print(r2.text)

# 2. 将一个列表作为值传入
url2 = 'https://www.baidu.com/s?'
question = {'wd':'python'}
r3 = requests.get(url2,params=question,headers=headers)
r3.encoding = 'utf-8'
print(r3.text)

获取源站url

打开这个url （注浏览器已安装JSON插件）发现是JSON 格式的

分析一下这个url start 为开始页数 count 为一页展示多少条
于是我们可以把这两个作为变量然后就可以爬取多张图片了

import requests
header = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
start = 1  # 开始页数
count = 10 # 每一页10张图片
url = 'http://wallpaper.apc.360.cn/index.php?c=WallPaper&start={0}&count={1}&from=360chrome&a=getAppsByCategory&cid=26'.format(start,count)

图片链接保存代码

import requests

start = 1  # 开始页数
count = 10  # 每一页10张图片

def getInterface(url):
    header = {
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    r = requests.get(url,headers=header,timeout=10)
    r.encoding = 'utf-8'
    # print(r.json())
    data = r.json()
    # print(type(data))
    # print(type(data['data']))
    str2 = ""
    # exit()
    for item in data['data']:
        str2+=str(item['img_1600_900']) + "\n"
        pass
    return  str2
if __name__ == '__main__':
    f = open("./data2.txt","w+")

    for i in range(0,2):
         url = 'http://wallpaper.apc.360.cn/index.php?c=WallPaper&start={0}&count={1}&from=360chrome&a=getAppsByCategory&cid=26'.format(
            start, count)
         print(url)
         str2 = getInterface(url)
         f.write(str2)
         start+=count
         print(str(start)+'->')
         pass
    print("写入完毕")
    f.close()

图片下载

def download():
        savepath = "./图片/"
        f = open("./data2.txt", "r+")
        data = f.read().split("\n")
        for item in range(0, 24):
            cont = requests.get(url=data[item], headers=header)
            f = open(savepath + str(item) + ".jpg", "wb")
            f.write(cont.content)
            f.close()