开发者

Downloading files in twisted using queue

开发者 https://www.devze.com 2023-03-25 05:48 出处:网络
I want to download a many files from queue using twisted and (for example ) 20 clients-threads. Any examp开发者_运维百科le?from twisted.internet.defer import inlineCallbacks, DeferredQueue

I want to download a many files from queue using twisted and (for example ) 20 clients-threads. Any examp开发者_运维百科le ?


from twisted.internet.defer import inlineCallbacks, DeferredQueue

@inlineCallbacks
def worker(queue):
    while 1:
        url = yield queue.get() # wait for a url from the queue

        if url is None: # insert None into the queue to kill workers
            queue.put(None)
            return # done

        data = yield download(url) # download the file
        process(data) # do stuff with it


queue = DeferredQueue() # your queue

# make workers
MAX = 20
workers = [worker(queue) for _ in range(MAX)] 


Here's a translation of https://github.com/caolan/async to Python.

from twisted.internet import defer
class Queue:
    workers = 0
    tasks = []
    def __init__(self, worker, concurrency):
        self.worker = worker
        self.concurrency = concurrency
        self.saturated = None
        self.empty = None
        self.drain = None
    def push(self, data):
        deferred = defer.Deferred()
        self.tasks.append({'data': data, 'callback': deferred})
        if self.saturated and len(tasks) == concurrency:
            self.saturated()
        self.process()
        return deferred
    def task_finished(self, *args):
        self.workers = self.workers - 1
        if self.drain and len(self.tasks) + self.workers == 0:
            self.drain()
        self.process()
    def process(self):
        if self.workers >= self.concurrency or len(self.tasks) == 0:
            return
        task = self.tasks.pop(0)
        if self.empty and len(self.tasks) == 0:
            self.empty()
        self.workers = self.workers + 1
        d = self.worker(task['data'])
        d.addCallback(self.task_finished)
        d.addCallback(task['callback'].callback)

from twisted.web import client
from twisted.internet import reactor
def dl_worker(data):
    url = data[0]
    fname = data[1]
    print "Download file:", fname
    d = client.downloadPage(url, fname)
    return d # very important!

q = Queue(dl_worker, 2)
q.drain = reactor.stop
for i in range(0,3):
    q.push(["http://download.thinkbroadband.com/5MB.zip", "file"+str(i)])
reactor.run()

I hope this passes Glyph's QC :D Cheers!


Use the select module to do this with polling, or the threading module to this with threads.

0

精彩评论

暂无评论...
验证码 换一张
取 消

关注公众号