1
0
mirror of https://git.yoctoproject.org/poky synced 2026-05-30 12:29:55 +00:00

bitbake: bitbake: Rework hash equivalence

Reworks the hash equivalence server to address performance issues that
were encountered with the REST mechanism used previously, particularly
during the heavy request load encountered during signature generation.
Notable changes are:

1) The server protocol is no longer HTTP based. Instead, it uses a
   simpler JSON over a streaming protocol link. This protocol has much
   lower overhead than HTTP since it eliminates the HTTP headers.
2) The hash equivalence server can either bind to a TCP port, or a Unix
   domain socket. Unix domain sockets are more efficient for local
   communication, and so are preferred if the user enables hash
   equivalence only for the local build. The arguments to the
   'bitbake-hashserve' command have been updated accordingly.
3) The value to which BB_HASHSERVE should be set to enable a local hash
   equivalence server is changed to "auto" instead of "localhost:0". The
   latter didn't make sense when the local server was using a Unix
   domain socket.
4) Clients are expected to keep a persistent connection to the server
   instead of creating a new connection each time a request is made for
   optimal performance.
5) Most of the client logic has been moved to the hashserve module in
   bitbake. This makes it easier to share the client code.
6) A new bitbake command has been added called 'bitbake-hashclient'.
   This command can be used to query a hash equivalence server, including
   fetching the statistics and running a performance stress test.
7) The table indexes in the SQLite database have been updated to
   optimize hash lookups. This change is backward compatible, as the
   database will delete the old indexes first if they exist.
8) The server has been reworked to use python async to maximize
   performance with persistently connected clients. This requires Python
   3.5 or later.

(Bitbake rev: 2124eec3a5830afe8e07ffb6f2a0df6a417ac973)

Signed-off-by: Joshua Watt <JPEWhacker@gmail.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
This commit is contained in:
Joshua Watt
2019-09-17 08:37:11 -05:00
committed by Richard Purdie
parent 34923e4f77
commit 20f032338f
11 changed files with 949 additions and 343 deletions
+170
View File
@@ -0,0 +1,170 @@
#! /usr/bin/env python3
#
# Copyright (C) 2019 Garmin Ltd.
#
# SPDX-License-Identifier: GPL-2.0-only
#
import argparse
import hashlib
import logging
import os
import pprint
import sys
import threading
import time
try:
import tqdm
ProgressBar = tqdm.tqdm
except ImportError:
class ProgressBar(object):
def __init__(self, *args, **kwargs):
pass
def __enter__(self):
return self
def __exit__(self, *args, **kwargs):
pass
def update(self):
pass
sys.path.insert(0, os.path.join(os.path.dirname(os.path.dirname(__file__)), 'lib'))
import hashserv
DEFAULT_ADDRESS = 'unix://./hashserve.sock'
METHOD = 'stress.test.method'
def main():
def handle_stats(args, client):
if args.reset:
s = client.reset_stats()
else:
s = client.get_stats()
pprint.pprint(s)
return 0
def handle_stress(args, client):
def thread_main(pbar, lock):
nonlocal found_hashes
nonlocal missed_hashes
nonlocal max_time
client = hashserv.create_client(args.address)
for i in range(args.requests):
taskhash = hashlib.sha256()
taskhash.update(args.taskhash_seed.encode('utf-8'))
taskhash.update(str(i).encode('utf-8'))
start_time = time.perf_counter()
l = client.get_unihash(METHOD, taskhash.hexdigest())
elapsed = time.perf_counter() - start_time
with lock:
if l:
found_hashes += 1
else:
missed_hashes += 1
max_time = max(elapsed, max_time)
pbar.update()
max_time = 0
found_hashes = 0
missed_hashes = 0
lock = threading.Lock()
total_requests = args.clients * args.requests
start_time = time.perf_counter()
with ProgressBar(total=total_requests) as pbar:
threads = [threading.Thread(target=thread_main, args=(pbar, lock), daemon=False) for _ in range(args.clients)]
for t in threads:
t.start()
for t in threads:
t.join()
elapsed = time.perf_counter() - start_time
with lock:
print("%d requests in %.1fs. %.1f requests per second" % (total_requests, elapsed, total_requests / elapsed))
print("Average request time %.8fs" % (elapsed / total_requests))
print("Max request time was %.8fs" % max_time)
print("Found %d hashes, missed %d" % (found_hashes, missed_hashes))
if args.report:
with ProgressBar(total=args.requests) as pbar:
for i in range(args.requests):
taskhash = hashlib.sha256()
taskhash.update(args.taskhash_seed.encode('utf-8'))
taskhash.update(str(i).encode('utf-8'))
outhash = hashlib.sha256()
outhash.update(args.outhash_seed.encode('utf-8'))
outhash.update(str(i).encode('utf-8'))
client.report_unihash(taskhash.hexdigest(), METHOD, outhash.hexdigest(), taskhash.hexdigest())
with lock:
pbar.update()
parser = argparse.ArgumentParser(description='Hash Equivalence Client')
parser.add_argument('--address', default=DEFAULT_ADDRESS, help='Server address (default "%(default)s")')
parser.add_argument('--log', default='WARNING', help='Set logging level')
subparsers = parser.add_subparsers()
stats_parser = subparsers.add_parser('stats', help='Show server stats')
stats_parser.add_argument('--reset', action='store_true',
help='Reset server stats')
stats_parser.set_defaults(func=handle_stats)
stress_parser = subparsers.add_parser('stress', help='Run stress test')
stress_parser.add_argument('--clients', type=int, default=10,
help='Number of simultaneous clients')
stress_parser.add_argument('--requests', type=int, default=1000,
help='Number of requests each client will perform')
stress_parser.add_argument('--report', action='store_true',
help='Report new hashes')
stress_parser.add_argument('--taskhash-seed', default='',
help='Include string in taskhash')
stress_parser.add_argument('--outhash-seed', default='',
help='Include string in outhash')
stress_parser.set_defaults(func=handle_stress)
args = parser.parse_args()
logger = logging.getLogger('hashserv')
level = getattr(logging, args.log.upper(), None)
if not isinstance(level, int):
raise ValueError('Invalid log level: %s' % args.log)
logger.setLevel(level)
console = logging.StreamHandler()
console.setLevel(level)
logger.addHandler(console)
func = getattr(args, 'func', None)
if func:
client = hashserv.create_client(args.address)
# Try to establish a connection to the server now to detect failures
# early
client.connect()
return func(args, client)
return 0
if __name__ == '__main__':
try:
ret = main()
except Exception:
ret = 1
import traceback
traceback.print_exc()
sys.exit(ret)
+15 -9
View File
@@ -11,20 +11,26 @@ import logging
import argparse
import sqlite3
sys.path.insert(0, os.path.join(os.path.dirname(os.path.dirname(__file__)),'lib'))
sys.path.insert(0, os.path.join(os.path.dirname(os.path.dirname(__file__)), 'lib'))
import hashserv
VERSION = "1.0.0"
DEFAULT_HOST = ''
DEFAULT_PORT = 8686
DEFAULT_BIND = 'unix://./hashserve.sock'
def main():
parser = argparse.ArgumentParser(description='HTTP Equivalence Reference Server. Version=%s' % VERSION)
parser.add_argument('--address', default=DEFAULT_HOST, help='Bind address (default "%(default)s")')
parser.add_argument('--port', type=int, default=DEFAULT_PORT, help='Bind port (default %(default)d)')
parser.add_argument('--prefix', default='', help='HTTP path prefix (default "%(default)s")')
parser = argparse.ArgumentParser(description='Hash Equivalence Reference Server. Version=%s' % VERSION,
epilog='''The bind address is the path to a unix domain socket if it is
prefixed with "unix://". Otherwise, it is an IP address
and port in form ADDRESS:PORT. To bind to all addresses, leave
the ADDRESS empty, e.g. "--bind :8686". To bind to a specific
IPv6 address, enclose the address in "[]", e.g.
"--bind [::1]:8686"'''
)
parser.add_argument('--bind', default=DEFAULT_BIND, help='Bind address (default "%(default)s")')
parser.add_argument('--database', default='./hashserv.db', help='Database file (default "%(default)s")')
parser.add_argument('--log', default='WARNING', help='Set logging level')
@@ -41,10 +47,11 @@ def main():
console.setLevel(level)
logger.addHandler(console)
server = hashserv.create_server((args.address, args.port), args.database, args.prefix)
server = hashserv.create_server(args.bind, args.database)
server.serve_forever()
return 0
if __name__ == '__main__':
try:
ret = main()
@@ -53,4 +60,3 @@ if __name__ == '__main__':
import traceback
traceback.print_exc()
sys.exit(ret)
+1 -1
View File
@@ -418,7 +418,7 @@ class BitbakeWorker(object):
bb.msg.loggerDefaultDomains = self.workerdata["logdefaultdomain"]
for mc in self.databuilder.mcdata:
self.databuilder.mcdata[mc].setVar("PRSERV_HOST", self.workerdata["prhost"])
self.databuilder.mcdata[mc].setVar("BB_HASHSERVE", self.workerdata["hashservport"])
self.databuilder.mcdata[mc].setVar("BB_HASHSERVE", self.workerdata["hashservaddr"])
def handle_newtaskhashes(self, data):
self.workerdata["newhashes"] = pickle.loads(data)