1
0
mirror of https://git.yoctoproject.org/poky synced 2026-06-02 01:19:52 +00:00

bitbake: bitbake: Rework hash equivalence

Reworks the hash equivalence server to address performance issues that
were encountered with the REST mechanism used previously, particularly
during the heavy request load encountered during signature generation.
Notable changes are:

1) The server protocol is no longer HTTP based. Instead, it uses a
   simpler JSON over a streaming protocol link. This protocol has much
   lower overhead than HTTP since it eliminates the HTTP headers.
2) The hash equivalence server can either bind to a TCP port, or a Unix
   domain socket. Unix domain sockets are more efficient for local
   communication, and so are preferred if the user enables hash
   equivalence only for the local build. The arguments to the
   'bitbake-hashserve' command have been updated accordingly.
3) The value to which BB_HASHSERVE should be set to enable a local hash
   equivalence server is changed to "auto" instead of "localhost:0". The
   latter didn't make sense when the local server was using a Unix
   domain socket.
4) Clients are expected to keep a persistent connection to the server
   instead of creating a new connection each time a request is made for
   optimal performance.
5) Most of the client logic has been moved to the hashserve module in
   bitbake. This makes it easier to share the client code.
6) A new bitbake command has been added called 'bitbake-hashclient'.
   This command can be used to query a hash equivalence server, including
   fetching the statistics and running a performance stress test.
7) The table indexes in the SQLite database have been updated to
   optimize hash lookups. This change is backward compatible, as the
   database will delete the old indexes first if they exist.
8) The server has been reworked to use python async to maximize
   performance with persistently connected clients. This requires Python
   3.5 or later.

(Bitbake rev: 2124eec3a5830afe8e07ffb6f2a0df6a417ac973)

Signed-off-by: Joshua Watt <JPEWhacker@gmail.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
This commit is contained in:
Joshua Watt
2019-09-17 08:37:11 -05:00
committed by Richard Purdie
parent 34923e4f77
commit 20f032338f
11 changed files with 949 additions and 343 deletions
+58 -195
View File
@@ -3,203 +3,21 @@
# SPDX-License-Identifier: GPL-2.0-only
#
from http.server import BaseHTTPRequestHandler, HTTPServer
import contextlib
import urllib.parse
from contextlib import closing
import re
import sqlite3
import json
import traceback
import logging
import socketserver
import queue
import threading
import signal
import socket
import struct
from datetime import datetime
logger = logging.getLogger('hashserv')
UNIX_PREFIX = "unix://"
class HashEquivalenceServer(BaseHTTPRequestHandler):
def log_message(self, f, *args):
logger.debug(f, *args)
ADDR_TYPE_UNIX = 0
ADDR_TYPE_TCP = 1
def opendb(self):
self.db = sqlite3.connect(self.dbname)
self.db.row_factory = sqlite3.Row
self.db.execute("PRAGMA synchronous = OFF;")
self.db.execute("PRAGMA journal_mode = MEMORY;")
def do_GET(self):
try:
if not self.db:
self.opendb()
p = urllib.parse.urlparse(self.path)
if p.path != self.prefix + '/v1/equivalent':
self.send_error(404)
return
query = urllib.parse.parse_qs(p.query, strict_parsing=True)
method = query['method'][0]
taskhash = query['taskhash'][0]
d = None
with contextlib.closing(self.db.cursor()) as cursor:
cursor.execute('SELECT taskhash, method, unihash FROM tasks_v2 WHERE method=:method AND taskhash=:taskhash ORDER BY created ASC LIMIT 1',
{'method': method, 'taskhash': taskhash})
row = cursor.fetchone()
if row is not None:
logger.debug('Found equivalent task %s', row['taskhash'])
d = {k: row[k] for k in ('taskhash', 'method', 'unihash')}
self.send_response(200)
self.send_header('Content-Type', 'application/json; charset=utf-8')
self.end_headers()
self.wfile.write(json.dumps(d).encode('utf-8'))
except:
logger.exception('Error in GET')
self.send_error(400, explain=traceback.format_exc())
return
def do_POST(self):
try:
if not self.db:
self.opendb()
p = urllib.parse.urlparse(self.path)
if p.path != self.prefix + '/v1/equivalent':
self.send_error(404)
return
length = int(self.headers['content-length'])
data = json.loads(self.rfile.read(length).decode('utf-8'))
with contextlib.closing(self.db.cursor()) as cursor:
cursor.execute('''
-- Find tasks with a matching outhash (that is, tasks that
-- are equivalent)
SELECT taskhash, method, unihash FROM tasks_v2 WHERE method=:method AND outhash=:outhash
-- If there is an exact match on the taskhash, return it.
-- Otherwise return the oldest matching outhash of any
-- taskhash
ORDER BY CASE WHEN taskhash=:taskhash THEN 1 ELSE 2 END,
created ASC
-- Only return one row
LIMIT 1
''', {k: data[k] for k in ('method', 'outhash', 'taskhash')})
row = cursor.fetchone()
# If no matching outhash was found, or one *was* found but it
# wasn't an exact match on the taskhash, a new entry for this
# taskhash should be added
if row is None or row['taskhash'] != data['taskhash']:
# If a row matching the outhash was found, the unihash for
# the new taskhash should be the same as that one.
# Otherwise the caller provided unihash is used.
unihash = data['unihash']
if row is not None:
unihash = row['unihash']
insert_data = {
'method': data['method'],
'outhash': data['outhash'],
'taskhash': data['taskhash'],
'unihash': unihash,
'created': datetime.now()
}
for k in ('owner', 'PN', 'PV', 'PR', 'task', 'outhash_siginfo'):
if k in data:
insert_data[k] = data[k]
cursor.execute('''INSERT INTO tasks_v2 (%s) VALUES (%s)''' % (
', '.join(sorted(insert_data.keys())),
', '.join(':' + k for k in sorted(insert_data.keys()))),
insert_data)
logger.info('Adding taskhash %s with unihash %s', data['taskhash'], unihash)
self.db.commit()
d = {'taskhash': data['taskhash'], 'method': data['method'], 'unihash': unihash}
else:
d = {k: row[k] for k in ('taskhash', 'method', 'unihash')}
self.send_response(200)
self.send_header('Content-Type', 'application/json; charset=utf-8')
self.end_headers()
self.wfile.write(json.dumps(d).encode('utf-8'))
except:
logger.exception('Error in POST')
self.send_error(400, explain=traceback.format_exc())
return
class ThreadedHTTPServer(HTTPServer):
quit = False
def serve_forever(self):
self.requestqueue = queue.Queue()
self.handlerthread = threading.Thread(target=self.process_request_thread)
self.handlerthread.daemon = False
self.handlerthread.start()
signal.signal(signal.SIGTERM, self.sigterm_exception)
super().serve_forever()
os._exit(0)
def sigterm_exception(self, signum, stackframe):
self.server_close()
os._exit(0)
def server_bind(self):
HTTPServer.server_bind(self)
self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_LINGER, struct.pack('ii', 1, 0))
def process_request_thread(self):
while not self.quit:
try:
(request, client_address) = self.requestqueue.get(True)
except queue.Empty:
continue
if request is None:
continue
try:
self.finish_request(request, client_address)
except Exception:
self.handle_error(request, client_address)
finally:
self.shutdown_request(request)
os._exit(0)
def process_request(self, request, client_address):
self.requestqueue.put((request, client_address))
def server_close(self):
super().server_close()
self.quit = True
self.requestqueue.put((None, None))
self.handlerthread.join()
def create_server(addr, dbname, prefix=''):
class Handler(HashEquivalenceServer):
pass
db = sqlite3.connect(dbname)
def setup_database(database, sync=True):
db = sqlite3.connect(database)
db.row_factory = sqlite3.Row
Handler.prefix = prefix
Handler.db = None
Handler.dbname = dbname
with contextlib.closing(db.cursor()) as cursor:
with closing(db.cursor()) as cursor:
cursor.execute('''
CREATE TABLE IF NOT EXISTS tasks_v2 (
id INTEGER PRIMARY KEY AUTOINCREMENT,
@@ -220,11 +38,56 @@ def create_server(addr, dbname, prefix=''):
UNIQUE(method, outhash, taskhash)
)
''')
cursor.execute('CREATE INDEX IF NOT EXISTS taskhash_lookup ON tasks_v2 (method, taskhash)')
cursor.execute('CREATE INDEX IF NOT EXISTS outhash_lookup ON tasks_v2 (method, outhash)')
cursor.execute('PRAGMA journal_mode = WAL')
cursor.execute('PRAGMA synchronous = %s' % ('NORMAL' if sync else 'OFF'))
ret = ThreadedHTTPServer(addr, Handler)
# Drop old indexes
cursor.execute('DROP INDEX IF EXISTS taskhash_lookup')
cursor.execute('DROP INDEX IF EXISTS outhash_lookup')
logger.info('Starting server on %s\n', ret.server_port)
# Create new indexes
cursor.execute('CREATE INDEX IF NOT EXISTS taskhash_lookup_v2 ON tasks_v2 (method, taskhash, created)')
cursor.execute('CREATE INDEX IF NOT EXISTS outhash_lookup_v2 ON tasks_v2 (method, outhash)')
return ret
return db
def parse_address(addr):
if addr.startswith(UNIX_PREFIX):
return (ADDR_TYPE_UNIX, (addr[len(UNIX_PREFIX):],))
else:
m = re.match(r'\[(?P<host>[^\]]*)\]:(?P<port>\d+)$', addr)
if m is not None:
host = m.group('host')
port = m.group('port')
else:
host, port = addr.split(':')
return (ADDR_TYPE_TCP, (host, int(port)))
def create_server(addr, dbname, *, sync=True):
from . import server
db = setup_database(dbname, sync=sync)
s = server.Server(db)
(typ, a) = parse_address(addr)
if typ == ADDR_TYPE_UNIX:
s.start_unix_server(*a)
else:
s.start_tcp_server(*a)
return s
def create_client(addr):
from . import client
c = client.Client()
(typ, a) = parse_address(addr)
if typ == ADDR_TYPE_UNIX:
c.connect_unix(*a)
else:
c.connect_tcp(*a)
return c