1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
   | import json import re import os # import requests import urllib.request from multiprocessing import Pool import time   requestUrl = 'https://www.kuaishou.com/graphql' folder_path = 'D:\kuaishou'#抓cookie  ktrace-context开头的一串,通过h5快手抓取 cookie = '' pcursor = '1'   def post(Cookie,pcursor):     data = {"operationName":"visionProfileLikePhotoList","variables":{"pcursor":pcursor,"page":"profile"},"query":"query visionProfileLikePhotoList($pcursor: String, $page: String, $webPageArea: String) {\n  visionProfileLikePhotoList(pcursor: $pcursor, page: $page, webPageArea: $webPageArea) {\n    result\n    llsid\n    webPageArea\n    feeds {\n      type\n      author {\n        id\n        name\n        following\n        headerUrl\n        headerUrls {\n          cdn\n          url\n          __typename\n        }\n        __typename\n      }\n      tags {\n        type\n        name\n        __typename\n      }\n      photo {\n        id\n        duration\n        caption\n        likeCount\n        realLikeCount\n        coverUrl\n        coverUrls {\n          cdn\n          url\n          __typename\n        }\n        photoUrls {\n          cdn\n          url\n          __typename\n        }\n        photoUrl\n        liked\n        timestamp\n        expTag\n        animatedCoverUrl\n        stereoType\n        videoRatio\n        __typename\n      }\n      canAddComment\n      currentPcursor\n      llsid\n      status\n      __typename\n    }\n    hostName\n    pcursor\n    __typename\n  }\n}\n"}     failed = {'msg': 'failed...'}     headers = {         'Host':'www.kuaishou.com',         'Connection':'keep-alive',         'Content-Length':'1261',         'accept':'*/*',         'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4621.0 Safari/537.36',         'content-type':'application/json',         'Origin':'https://www.kuaishou.com',         'Sec-Fetch-Site':'same-origin',         'Sec-Fetch-Mode':'cors',         'Sec-Fetch-Dest':'empty',         'Referer':'https://www.kuaishou.com/profile', #自己点赞作品主页地址         'Accept-Language':'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',         'Cookie':Cookie,       }     r = requests.post(requestUrl, data=json.dumps(data), headers=headers)       r.encoding = 'UTF-8'     html = r.text     return html def down(feeds,keywork):     for feed in feeds:         filename = str(feed['photo']['duration']) + '.mp4'         filepath = folder_path + '/' + keywork + '/'         if not os.path.exists(filepath + filename):             progressbar(feed['photo']['photoUrl'],filepath,filename)             print(filename + ",下载完成")         else:             pass             print(filename + ",已存在,跳过") def url_response(url,filepath,filename):     r = requests.get(url, stream=True)     with open(filepath, 'wb') as f:         widgets = ['Progress: ', progressbar.Percentage(), ' ',         progressbar.Bar(marker='#', left='[', right=']'),         ' ', progressbar.ETA(), ' ', progressbar.FileTransferSpeed()]         pbar = progressbar.ProgressBar(widgets=widgets, maxval=total_length).start()         for chunk in response.iter_content(chunk_size=1):             if chunk:                 f.write(chunk)                 f.flush()             pbar.update(len(chunk) + 1)         pbar.finish() def progressbar(url,filepath,filename):     if not os.path.exists(filepath):         os.mkdir(filepath)     start = time.time()     response = requests.get(url, stream=True)     size = 0     chunk_size = 1024     content_size = int(response.headers['content-length'])     if response.status_code == 200:         print('Start download,[File size]:{size:.2f} MB'.format(size = content_size / chunk_size / 1024))         filename = filename.replace("\n", "")         filepath = filepath + filename         try:             with open(filepath,'wb') as file:                 for data in response.iter_content(chunk_size = chunk_size):                     file.write(data)                     size +=len(data)                     print('\r' + '[下载进度]:%s%.2f%%' % ('>' * int(size * 50 / content_size), float(size / content_size * 100)) ,end=' ')             end = time.time()             print('Download completed!,times: %.2f秒' % (end - start))         except :             pass              if __name__ == "__main__":      keyWork = 'zan'      links = []      index = ''      # a = ['[44,1261][63,1299]', '[44,2237][63,2276]', '[561,1104][577,1143]', '[561,2080][577,2119]']      a = '[44,1261][63,1299]'        pattern = r'\[(\d+),(\d+)\].*\[(\d+),(\d+)\]'        # match = re.search(pattern, a)      #      # print(match.group(3))      # # 输出63      #      # print(match.group(4))      # # 输出1299      # exit()        while pcursor != False:         pcursor=index         result = post(cookie,pcursor)         data = json.loads(result)           # 判断是否还存在内容         if "visionProfileLikePhotoList" not in data['data']:             print('success')             break           # 判断是否有下一页的浮标         if data['data']['visionProfileLikePhotoList']['pcursor'] == '':             print('success')             break           # 赋值下一页的浮标         index = data['data']['visionProfileLikePhotoList']['pcursor']         feeds = data['data']['visionProfileLikePhotoList']['feeds']           flen = len(feeds)         if flen == 0:             print(data['data'])             print('no videos')             break         print(feeds)                   links.append(feeds)        for link in links:             down(link,keyWork)      print('while done')
 
 
 
   |