Commit f7d87f35 authored by Russ Cox's avatar Russ Cox

codereview: speed upload

Cuts time to upload trivial 160-file CL by 5x,
from 250 seconds to 50 seconds.

R=r
CC=golang-dev
https://golang.org/cl/1991047
parent d3a2dcf5
...@@ -237,13 +237,11 @@ class CL(object): ...@@ -237,13 +237,11 @@ class CL(object):
("subject", self.Subject()), ("subject", self.Subject()),
] ]
# NOTE(rsc): This duplicates too much of RealMain,
# but RealMain doesn't have the most reusable interface.
if self.name != "new": if self.name != "new":
form_fields.append(("issue", self.name)) form_fields.append(("issue", self.name))
vcs = None vcs = None
if self.files: if self.files:
vcs = MercurialVCS(upload_options, repo.root) vcs = MercurialVCS(upload_options, ui, repo)
data = vcs.GenerateDiff(self.files) data = vcs.GenerateDiff(self.files)
files = vcs.GetBaseFiles(data) files = vcs.GetBaseFiles(data)
if len(data) > MAX_UPLOAD_SIZE: if len(data) > MAX_UPLOAD_SIZE:
...@@ -722,7 +720,7 @@ def ReplacementForCmdutilMatch(repo, pats=[], opts={}, globbed=False, default='r ...@@ -722,7 +720,7 @@ def ReplacementForCmdutilMatch(repo, pats=[], opts={}, globbed=False, default='r
cl, err = LoadCL(repo.ui, repo, clname, web=False) cl, err = LoadCL(repo.ui, repo, clname, web=False)
if err != '': if err != '':
raise util.Abort("loading CL " + clname + ": " + err) raise util.Abort("loading CL " + clname + ": " + err)
if cl.files == None: if not cl.files:
raise util.Abort("no files in CL " + clname) raise util.Abort("no files in CL " + clname)
files = Add(files, cl.files) files = Add(files, cl.files)
pats = Sub(pats, taken) + ['path:'+f for f in files] pats = Sub(pats, taken) + ['path:'+f for f in files]
...@@ -1556,7 +1554,6 @@ def MySend(request_path, payload=None, ...@@ -1556,7 +1554,6 @@ def MySend(request_path, payload=None,
time.sleep(2) time.sleep(2)
return MySend1(request_path, payload, content_type, timeout, force_auth, **kwargs) return MySend1(request_path, payload, content_type, timeout, force_auth, **kwargs)
# Like upload.py Send but only authenticates when the # Like upload.py Send but only authenticates when the
# redirect is to www.google.com/accounts. This keeps # redirect is to www.google.com/accounts. This keeps
# unnecessary redirects from happening during testing. # unnecessary redirects from happening during testing.
...@@ -2427,6 +2424,39 @@ class VersionControlSystem(object): ...@@ -2427,6 +2424,39 @@ class VersionControlSystem(object):
StatusUpdate(" --> %s" % response_body) StatusUpdate(" --> %s" % response_body)
sys.exit(1) sys.exit(1)
# Don't want to spawn too many threads, nor do we want to
# hit Rietveld too hard, or it will start serving 500 errors.
# When 8 works, it's no better than 4, and sometimes 8 is
# too many for Rietveld to handle.
MAX_PARALLEL_UPLOADS = 4
sema = threading.BoundedSemaphore(MAX_PARALLEL_UPLOADS)
upload_threads = []
finished_upload_threads = []
class UploadFileThread(threading.Thread):
def __init__(self, args):
threading.Thread.__init__(self)
self.args = args
def run(self):
UploadFile(*self.args)
finished_upload_threads.append(self)
sema.release()
def StartUploadFile(*args):
sema.acquire()
while len(finished_upload_threads) > 0:
t = finished_upload_threads.pop()
upload_threads.remove(t)
t.join()
t = UploadFileThread(args)
upload_threads.append(t)
t.start()
def WaitForUploads():
for t in upload_threads:
t.join()
patches = dict() patches = dict()
[patches.setdefault(v, k) for k, v in patch_list] [patches.setdefault(v, k) for k, v in patch_list]
for filename in patches.keys(): for filename in patches.keys():
...@@ -2437,9 +2467,10 @@ class VersionControlSystem(object): ...@@ -2437,9 +2467,10 @@ class VersionControlSystem(object):
file_id_str = file_id_str[file_id_str.rfind("_") + 1:] file_id_str = file_id_str[file_id_str.rfind("_") + 1:]
file_id = int(file_id_str) file_id = int(file_id_str)
if base_content != None: if base_content != None:
UploadFile(filename, file_id, base_content, is_binary, status, True) StartUploadFile(filename, file_id, base_content, is_binary, status, True)
if new_content != None: if new_content != None:
UploadFile(filename, file_id, new_content, is_binary, status, False) StartUploadFile(filename, file_id, new_content, is_binary, status, False)
WaitForUploads()
def IsImage(self, filename): def IsImage(self, filename):
"""Returns true if the filename has an image extension.""" """Returns true if the filename has an image extension."""
...@@ -2458,14 +2489,25 @@ class VersionControlSystem(object): ...@@ -2458,14 +2489,25 @@ class VersionControlSystem(object):
return False return False
return not mimetype.startswith("text/") return not mimetype.startswith("text/")
class FakeMercurialUI(object):
def __init__(self):
self.quiet = True
self.output = ''
def write(self, s):
self.output += s
use_hg_shell = False # set to True to shell out to hg always; slower
class MercurialVCS(VersionControlSystem): class MercurialVCS(VersionControlSystem):
"""Implementation of the VersionControlSystem interface for Mercurial.""" """Implementation of the VersionControlSystem interface for Mercurial."""
def __init__(self, options, repo_dir): def __init__(self, options, ui, repo):
super(MercurialVCS, self).__init__(options) super(MercurialVCS, self).__init__(options)
self.ui = ui
self.repo = repo
# Absolute path to repository (we can be in a subdir) # Absolute path to repository (we can be in a subdir)
self.repo_dir = os.path.normpath(repo_dir) self.repo_dir = os.path.normpath(repo.root)
# Compute the subdir # Compute the subdir
cwd = os.path.normpath(os.getcwd()) cwd = os.path.normpath(os.getcwd())
assert cwd.startswith(self.repo_dir) assert cwd.startswith(self.repo_dir)
...@@ -2532,7 +2574,14 @@ class MercurialVCS(VersionControlSystem): ...@@ -2532,7 +2574,14 @@ class MercurialVCS(VersionControlSystem):
is_binary = False is_binary = False
oldrelpath = relpath = self._GetRelPath(filename) oldrelpath = relpath = self._GetRelPath(filename)
# "hg status -C" returns two lines for moved/copied files, one otherwise # "hg status -C" returns two lines for moved/copied files, one otherwise
if use_hg_shell:
out = RunShell(["hg", "status", "-C", "--rev", self.base_rev, relpath]) out = RunShell(["hg", "status", "-C", "--rev", self.base_rev, relpath])
else:
fui = FakeMercurialUI()
ret = commands.status(fui, self.repo, *[relpath], **{'rev': [self.base_rev], 'copies': True})
if ret:
raise util.Abort(ret)
out = fui.output
out = out.splitlines() out = out.splitlines()
# HACK: strip error message about missing file/directory if it isn't in # HACK: strip error message about missing file/directory if it isn't in
# the working copy # the working copy
...@@ -2547,13 +2596,15 @@ class MercurialVCS(VersionControlSystem): ...@@ -2547,13 +2596,15 @@ class MercurialVCS(VersionControlSystem):
else: else:
base_rev = self.base_rev base_rev = self.base_rev
if status != "A": if status != "A":
base_content = RunShell(["hg", "cat", "-r", base_rev, oldrelpath], if use_hg_shell:
silent_ok=True) base_content = RunShell(["hg", "cat", "-r", base_rev, oldrelpath], silent_ok=True)
else:
base_content = str(self.repo[base_rev][filename].data())
is_binary = "\0" in base_content # Mercurial's heuristic is_binary = "\0" in base_content # Mercurial's heuristic
if status != "R": if status != "R":
new_content = open(relpath, "rb").read() new_content = open(relpath, "rb").read()
is_binary = is_binary or "\0" in new_content is_binary = is_binary or "\0" in new_content
if is_binary and base_content: if is_binary and base_content and use_hg_shell:
# Fetch again without converting newlines # Fetch again without converting newlines
base_content = RunShell(["hg", "cat", "-r", base_rev, oldrelpath], base_content = RunShell(["hg", "cat", "-r", base_rev, oldrelpath],
silent_ok=True, universal_newlines=False) silent_ok=True, universal_newlines=False)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment