From fa0c5866d4ba14c84ef32c3cbf6e85fa3d962fac Mon Sep 17 00:00:00 2001 From: Anders Blomdell <anders.blomdell@control.lth.se> Date: Fri, 10 Mar 2023 22:52:15 +0100 Subject: [PATCH] Improve urlgrabber_compat load balancing --- src/mio/urlgrabber_compat.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/src/mio/urlgrabber_compat.py b/src/mio/urlgrabber_compat.py index c5654e3..e9dfe07 100755 --- a/src/mio/urlgrabber_compat.py +++ b/src/mio/urlgrabber_compat.py @@ -1,14 +1,12 @@ +#!/usr/bin/python3 # # Hackish compat library when urlgrabber is missing (MacOS) # -try: - # Python 3 - from urllib.request import urlopen -except ImportError: - # Python 2 fallback - from urllib2 import urlopen +from urllib.request import urlopen +import urllib.parse import calendar import time +import random INFO_FILETIME = object() @@ -29,6 +27,7 @@ class CurlWrapper: def __getattr__(self, what): return getattr(self.url, what) + pass class Compat: @@ -40,15 +39,23 @@ class Compat: class MirrorGroup: def __init__(self, grabber, urls): - self.urls = urls + self.urls = list(urls) + random.shuffle(self.urls) + self.mru = dict([ (u,0) for u in self.urls ]) def urlopen(self, path): errors = [] - for u in self.urls: + for u in reversed(sorted(self.urls, key=lambda u: self.mru[u])): url = "%s/%s" % (u, path) try: - return CurlWrapper(urlopen(url)) + # print(u, self.mru[u], path) + result = CurlWrapper(urlopen(url)) + self.mru[u] += 1 + return result except IOError as e: + # print(e.__class__, e) + # TODO, penalize umreachable hosts? + self.mru[u] /= 2 errors.append((url,e)) pass raise IOError("Failed to get '%s' (%s)" % (path, errors)) -- GitLab