Path: csiph.com!v102.xanadu-bbs.net!xanadu-bbs.net!goblin2!goblin.stu.neva.ru!newsfeed.xs4all.nl!newsfeed4a.news.xs4all.nl!xs4all!newsgate.cistron.nl!newsgate.news.xs4all.nl!post.news.xs4all.nl!not-for-mail Return-Path: X-Original-To: python-list@python.org Delivered-To: python-list@mail.python.org X-Spam-Status: OK 0.007 X-Spam-Evidence: '*H*': 0.99; '*S*': 0.00; 'skip:[ 20': 0.04; 'fixes': 0.07; 'none:': 0.07; 'parameter': 0.09; 'try:': 0.09; 'cc:addr :python-list': 0.11; 'def': 0.12; 'concurrent': 0.16; 'set()': 0.16; 'subject: \n ': 0.16; 'subject:tasks': 0.16; 'timeout': 0.16; 'solution.': 0.20; 'import': 0.22; 'cc:addr:python.org': 0.22; 'cc:2**0': 0.24; 'cc:no real name:2**0': 0.24; "i've": 0.25; 'pending': 0.26; 'header:In-Reply-To:1': 0.27; 'raise': 0.29; 'see,': 0.30; 'message-id:@mail.gmail.com': 0.30; 'skip:_ 10': 0.34; 'skip:d 20': 0.34; 'except': 0.35; 'received:google.com': 0.35; 'false': 0.36; 'version:': 0.36; 'yield': 0.36; 'done': 0.36; 'thanks': 0.36; 'subject:?': 0.36; 'skip:o 20': 0.38; 'skip:& 10': 0.38; 'needed': 0.38; 'rather': 0.38; 'skip:& 20': 0.39; 'stock': 0.39; 'skip:a 30': 0.61; 'skip:n 10': 0.64; 'great': 0.65; 'to:addr:gmail.com': 0.65; 'here': 0.66; 'respect': 0.70 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113; h=mime-version:in-reply-to:references:from:date:message-id:subject:to :cc:content-type; bh=CEYkdgHshzFi5318iEqY+nc0u3lO+LLS7SlAGiXokA8=; b=vIvs5mguI2XfdwTLOnXlugjyIv/DuNJEXOwoOJ4P/o989TWakBOGFlsAnmwatRtETP TNPRCE6yBoQnOPkNh4CV3R5Hlq1bmMoJ5svpPHBlaXG++8w75jBMmwYtEtPblNAF+28C IW6nWfNsAhBEHkEuh/zvriqqKkwTYNLjjVHzGcUcjqM06aON24THwUiP4eZdJSvSSary NvWE33KhFAk7nqpa+QBXFy/k2+k+YAntksor5Ps/j132fQYPh0ngU8GIJrBtX+Pj3EL+ oN5HvvR8jbM487vQkyZx/kgfeF/UM9nO+XPWVwqwiOH7QPoyeXHJlzFtIwsDS/roF4td ObGA== X-Received: by 10.224.168.13 with SMTP id s13mr15361298qay.80.1405858604895; Sun, 20 Jul 2014 05:16:44 -0700 (PDT) MIME-Version: 1.0 In-Reply-To: References: From: Valery Khamenya Date: Sun, 20 Jul 2014 14:16:24 +0200 Subject: Re: Anything better than asyncio.as_completed() and asyncio.wait() to manage execution of large amount of tasks? To: Maxime Steisel Content-Type: multipart/alternative; boundary=089e01493b2abfd4bd04fe9ef667 Cc: python-list@python.org X-BeenThere: python-list@python.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: General discussion list for the Python programming language List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Newsgroups: comp.lang.python Message-ID: Lines: 158 NNTP-Posting-Host: 2001:888:2000:d::a6 X-Trace: 1405858609 news.xs4all.nl 2861 [2001:888:2000:d::a6]:35706 X-Complaints-To: abuse@xs4all.nl Xref: csiph.com comp.lang.python:74879 --089e01493b2abfd4bd04fe9ef667 Content-Type: text/plain; charset=UTF-8 Hi Maxime, many thanks for your great solution. It would be so great to have it in stock asyncio and use it out-of-the-box... I've made 4 fixes to it that are rather of "cosmetic" nature. Here is the final version: import asyncio from concurrent import futures def as_completed_with_max_workers(tasks, *, loop=None, max_workers=5, timeout=None): loop = loop if loop is not None else asyncio.get_event_loop() workers = [] pending = set() done = asyncio.Queue(maxsize=max_workers, loop=loop) # Valery: respect the "loop" parameter exhausted = False timeout_handle = None # Valery: added to see, if we indeed have to call timeout_handle.cancel() @asyncio.coroutine def _worker(): nonlocal exhausted while not exhausted: try: t = next(tasks) pending.add(t) yield from t yield from done.put(t) pending.remove(t) except StopIteration: exhausted = True def _on_timeout(): for f in workers: f.cancel() workers.clear() # Wake up _wait_for_one() done.put_nowait(None) @asyncio.coroutine def _wait_for_one(): f = yield from done.get() if f is None: raise futures.TimeoutError() return f.result() workers = [asyncio.async(_worker(), loop=loop) for _ in range(max_workers)] # Valery: respect the "loop" parameter if workers and timeout is not None: timeout_handle = loop.call_later(timeout, _on_timeout) while not exhausted or pending or not done.empty(): yield _wait_for_one() if timeout_handle: # Valery: call timeout_handle.cancel() only if it is needed timeout_handle.cancel() best regards -- Valery A.Khamenya --089e01493b2abfd4bd04fe9ef667 Content-Type: text/html; charset=UTF-8 Content-Transfer-Encoding: quoted-printable
Hi Maxime,

many thanks for your great s= olution. It would be so great to have it in stock asyncio and use it out-of= -the-box...
I've made 4 fixes to it that are rather of "= cosmetic" nature. Here is the final version:

import asyncio
from concur= rent import futures


def as_completed_with_max_workers(task= s, *, loop=3DNone, max_workers=3D5, timeout=3DNone):
=C2=A0 =C2=A0 loop =3D loop if loop is not None else asyncio.get_= event_loop()
=C2=A0 =C2=A0 workers =3D []
=C2=A0 =C2=A0 pending =3D set()
=C2=A0 =C2=A0 done =3D asyncio.Queue(maxsize=3Dmax_workers, loop=3Dloop) = # Valery: respect the "loop" parameter
=C2=A0 =C2=A0 exhausted =3D False
=C2=A0 =C2=A0 timeout_handle =3D None # Valery: added to = see, if we indeed have to call timeout_handle.cancel()

=C2=A0 =C2=A0 @asyncio.coroutine
=C2=A0 =C2= =A0 def _worker():
=C2=A0 =C2=A0 =C2=A0 =C2= =A0 nonlocal exhausted
=C2=A0 =C2=A0 =C2=A0= =C2=A0 while not exhausted:
=C2=A0 =C2=A0 = =C2=A0 =C2=A0 =C2=A0 =C2=A0 try:
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0= =C2=A0 t =3D next(tasks)
=C2=A0 =C2=A0 =C2= =A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 pending.add(t)
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 yield fr= om t
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2= =A0 =C2=A0 =C2=A0 yield from done.put(t)
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0= =C2=A0 pending.remove(t)
=C2=A0 =C2=A0 =C2= =A0 =C2=A0 =C2=A0 =C2=A0 except StopIteration:
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 exhausted =3D T= rue

=C2=A0 =C2=A0 def _on_timeout():
=C2=A0 =C2=A0 =C2=A0 =C2=A0 for f in workers:
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 f.cancel()
=C2=A0 =C2=A0 =C2=A0 =C2=A0 workers.clear()
=C2=A0 =C2=A0 =C2=A0 =C2=A0 # Wake up _wait_for_one()
=C2=A0 =C2=A0 =C2=A0 =C2=A0 done.put_nowait(None)

=C2=A0 =C2=A0 @async= io.coroutine
=C2=A0 =C2=A0 def _wait_for_on= e():
=C2=A0 =C2=A0 =C2=A0 =C2=A0 f =3D yield from don= e.get()
=C2=A0 =C2=A0 =C2=A0 =C2=A0 if f is= None:
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 = =C2=A0 raise futures.TimeoutError()
=C2=A0 = =C2=A0 =C2=A0 =C2=A0 return f.result()

=C2=A0 =C2= =A0 workers =3D [asyncio.async(_worker(), loop=3Dloop) for _ in range(max_w= orkers)] # Valery: respect the "loop" parameter

=C2=A0 =C2=A0 if workers and timeout is no= t None:
=C2=A0 =C2=A0 =C2=A0 =C2=A0 timeout= _handle =3D loop.call_later(timeout, _on_timeout)

=C2=A0 =C2=A0 while not exhausted or pending or not done.empty():
=C2=A0 =C2=A0 =C2=A0 =C2=A0 yield _wait_for_one()

=C2=A0 = =C2=A0 if timeout_handle: # Valery: call timeout_handle.cancel() only if it= is needed
=C2=A0 =C2=A0 =C2=A0 =C2=A0 timeout_handle.cance= l()


=
best regards
--
Valery A.Khamenya


--089e01493b2abfd4bd04fe9ef667--