0
点赞
收藏
分享

微信扫一扫

python3 使用多线程利用Instaloader从instagram上下载上数据


1.上次做了一个利用Instaloader下载数据的示例,我发现这方面的教程很少,由于单线程下载速度慢,我这里利用多线程来加速下载数据,注意worker的数量不要设置太多,不然会报一下错误,错误信息为:

JSON Query to explore/locations/245942146/: 429 Too Many Requests

2.如果想知道hashtags.txt里面是啥

还是一样前提是租一台国外的虚拟主机,国内下载不了,以下为多线程下载,文件名为demo.oy

from threading import Thread
from time import time, sleep
from queue import Queue
from datetime import datetime
import instaloader



# for HASHTAG in hashtags:
# try:
# posts = L.get_hashtag_posts(HASHTAG)
# count=0
# print(HASHTAG)
# for post in posts:
# if(post.is_video):
# continue
# if(count==1000):
# break
# # print(post.date)
# L.download_post(post, target='#'+HASHTAG)
# count+=1
# except Exception as e:
# print(e)
L = instaloader.Instaloader()
def download_tweets(HASHTAG):
try:
posts = L.get_hashtag_posts(HASHTAG)
count=0
print(HASHTAG)
for post in posts:
if(post.is_video):
continue
if(count==1000):
break
# print(post.date)
L.download_post(post, target='#'+HASHTAG)
count+=1
except Exception as e:
print(e)
fp = open("error.txt", "a")
fp.write(str(e)+"\n")
fp.close()


class DownloadWorker(Thread):

def __init__(self, queue,sleep=1):
Thread.__init__(self)
self.queue = queue
self.numPicrures=0
self.sleep = sleep

def run(self):
while True:
# Get the work from the queue and expand the tuple
item = self.queue.get()
if item is None:
break
# print(imageUrl)
download_tweets(item)
self.queue.task_done()
sleep(self.sleep)

if __name__ == "__main__":

with open('hashtags.txt', encoding="utf-8") as f:
examples=f.readlines()

hashtags=[]
for item in examples:
hashtag=item.strip().replace('#','')
hashtags.append(hashtag)

ts = time()
queue = Queue()
for x in range(5):
worker = DownloadWorker(queue,2)
# Setting daemon to True will let the main thread exit even though the
# workers are blocking
worker.daemon = True
worker.start()

for hashtag in hashtags:
queue.put(hashtag)
queue.join()
print('Took {}s'.format(time() - ts))

 

举报

相关推荐

0 条评论