1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
| import json import re import os # import requests import urllib.request from multiprocessing import Pool import time requestUrl = 'https://www.kuaishou.com/graphql' folder_path = 'D:\kuaishou'#抓cookie ktrace-context开头的一串,通过h5快手抓取 cookie = '' pcursor = '1' def post(Cookie,pcursor): data = {"operationName":"visionProfileLikePhotoList","variables":{"pcursor":pcursor,"page":"profile"},"query":"query visionProfileLikePhotoList($pcursor: String, $page: String, $webPageArea: String) {\n visionProfileLikePhotoList(pcursor: $pcursor, page: $page, webPageArea: $webPageArea) {\n result\n llsid\n webPageArea\n feeds {\n type\n author {\n id\n name\n following\n headerUrl\n headerUrls {\n cdn\n url\n __typename\n }\n __typename\n }\n tags {\n type\n name\n __typename\n }\n photo {\n id\n duration\n caption\n likeCount\n realLikeCount\n coverUrl\n coverUrls {\n cdn\n url\n __typename\n }\n photoUrls {\n cdn\n url\n __typename\n }\n photoUrl\n liked\n timestamp\n expTag\n animatedCoverUrl\n stereoType\n videoRatio\n __typename\n }\n canAddComment\n currentPcursor\n llsid\n status\n __typename\n }\n hostName\n pcursor\n __typename\n }\n}\n"} failed = {'msg': 'failed...'} headers = { 'Host':'www.kuaishou.com', 'Connection':'keep-alive', 'Content-Length':'1261', 'accept':'*/*', 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4621.0 Safari/537.36', 'content-type':'application/json', 'Origin':'https://www.kuaishou.com', 'Sec-Fetch-Site':'same-origin', 'Sec-Fetch-Mode':'cors', 'Sec-Fetch-Dest':'empty', 'Referer':'https://www.kuaishou.com/profile', #自己点赞作品主页地址 'Accept-Language':'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6', 'Cookie':Cookie, } r = requests.post(requestUrl, data=json.dumps(data), headers=headers) r.encoding = 'UTF-8' html = r.text return html def down(feeds,keywork): for feed in feeds: filename = str(feed['photo']['duration']) + '.mp4' filepath = folder_path + '/' + keywork + '/' if not os.path.exists(filepath + filename): progressbar(feed['photo']['photoUrl'],filepath,filename) print(filename + ",下载完成") else: pass print(filename + ",已存在,跳过") def url_response(url,filepath,filename): r = requests.get(url, stream=True) with open(filepath, 'wb') as f: widgets = ['Progress: ', progressbar.Percentage(), ' ', progressbar.Bar(marker='#', left='[', right=']'), ' ', progressbar.ETA(), ' ', progressbar.FileTransferSpeed()] pbar = progressbar.ProgressBar(widgets=widgets, maxval=total_length).start() for chunk in response.iter_content(chunk_size=1): if chunk: f.write(chunk) f.flush() pbar.update(len(chunk) + 1) pbar.finish() def progressbar(url,filepath,filename): if not os.path.exists(filepath): os.mkdir(filepath) start = time.time() response = requests.get(url, stream=True) size = 0 chunk_size = 1024 content_size = int(response.headers['content-length']) if response.status_code == 200: print('Start download,[File size]:{size:.2f} MB'.format(size = content_size / chunk_size / 1024)) filename = filename.replace("\n", "") filepath = filepath + filename try: with open(filepath,'wb') as file: for data in response.iter_content(chunk_size = chunk_size): file.write(data) size +=len(data) print('\r' + '[下载进度]:%s%.2f%%' % ('>' * int(size * 50 / content_size), float(size / content_size * 100)) ,end=' ') end = time.time() print('Download completed!,times: %.2f秒' % (end - start)) except : pass if __name__ == "__main__": keyWork = 'zan' links = [] index = '' # a = ['[44,1261][63,1299]', '[44,2237][63,2276]', '[561,1104][577,1143]', '[561,2080][577,2119]'] a = '[44,1261][63,1299]' pattern = r'\[(\d+),(\d+)\].*\[(\d+),(\d+)\]' # match = re.search(pattern, a) # # print(match.group(3)) # # 输出63 # # print(match.group(4)) # # 输出1299 # exit() while pcursor != False: pcursor=index result = post(cookie,pcursor) data = json.loads(result) # 判断是否还存在内容 if "visionProfileLikePhotoList" not in data['data']: print('success') break # 判断是否有下一页的浮标 if data['data']['visionProfileLikePhotoList']['pcursor'] == '': print('success') break # 赋值下一页的浮标 index = data['data']['visionProfileLikePhotoList']['pcursor'] feeds = data['data']['visionProfileLikePhotoList']['feeds'] flen = len(feeds) if flen == 0: print(data['data']) print('no videos') break print(feeds) links.append(feeds) for link in links: down(link,keyWork) print('while done')
|