#!/usr/bin/env python # -*- coding: utf-8 -*- """ (c) 2017 - Copyright Red Hat Inc Authors: Pierre-Yves Chibon This server listens to message sent to redis via post commits hook and find the list of files modified by the commits listed in the message and sync them into the database. Using this mechanism, we no longer need to block the git push until all the files have been uploaded (which when migrating some large projects over to pagure can be really time-consuming). """ import json import logging import os import traceback import inspect import trollius import trollius_redis from sqlalchemy.exc import SQLAlchemyError _log = logging.getLogger(__name__) if 'PAGURE_CONFIG' not in os.environ \ and os.path.exists('/etc/pagure/pagure.cfg'): print 'Using configuration file `/etc/pagure/pagure.cfg`' os.environ['PAGURE_CONFIG'] = '/etc/pagure/pagure.cfg' import pagure import pagure.exceptions import pagure.lib import pagure.lib.notify def format_callstack(): """ Format the callstack to find out the stack trace. """ ind = 0 for ind, frame in enumerate(f[0] for f in inspect.stack()): if '__name__' not in frame.f_globals: continue modname = frame.f_globals['__name__'].split('.')[0] if modname != "logging": break def _format_frame(frame): """ Format the frame. """ return ' File "%s", line %i in %s\n %s' % (frame) stack = traceback.extract_stack() stack = stack[:-ind] return "\n".join([_format_frame(frame) for frame in stack]) def get_files_to_load(title, new_commits_list, abspath): _log.info('%s: Retrieve the list of files changed' % title) file_list = [] new_commits_list.reverse() n = len(new_commits_list) for idx, commit in enumerate(new_commits_list): if (idx % 100) == 0: _log.info( 'Loading files change in commits for %s: %s/%s', title, idx, n) if commit == new_commits_list[0]: filenames = pagure.lib.git.read_git_lines( ['diff-tree', '--no-commit-id', '--name-only', '-r', '--root', commit], abspath) else: filenames = pagure.lib.git.read_git_lines( ['diff-tree', '--no-commit-id', '--name-only', '-r', commit], abspath) for line in filenames: if line.strip(): file_list.append(line.strip()) return file_list @trollius.coroutine def handle_messages(): ''' Handles connecting to redis and acting upon messages received. In this case, it means logging into the DB the commits specified in the message for the specified repo. The currently accepted message format looks like: :: { "project": { "name": "foo", "namespace": null, "parent": null, "username": { "name": "user" } }, "abspath": "/srv/git/repositories/pagure.git", "commits": [ "b7b4059c44d692d7df3227ce58ce01191e5407bd", "f8d0899bb6654590ffdef66b539fd3b8cf873b35", "9b6fdc48d3edab82d3de28953271ea52b0a96117" ], "data_type": "ticket", "agent": "pingou", } ''' host = pagure.APP.config.get('REDIS_HOST', '0.0.0.0') port = pagure.APP.config.get('REDIS_PORT', 6379) dbname = pagure.APP.config.get('REDIS_DB', 0) connection = yield trollius.From(trollius_redis.Connection.create( host=host, port=port, db=dbname)) # Create subscriber. subscriber = yield trollius.From(connection.start_subscribe()) # Subscribe to channel. yield trollius.From(subscriber.subscribe(['pagure.loadjson'])) # Inside a while loop, wait for incoming events. while True: reply = yield trollius.From(subscriber.next_published()) _log.info( 'Received: %s on channel: %s', repr(reply.value), reply.channel) _log.info('Loading the json') data = json.loads(reply.value) _log.info('Done: loading the json') commits = data['commits'] abspath = data['abspath'] repo = data['project']['name'] username = data['project']['username']['name'] \ if data['project']['parent'] else None namespace = data['project']['namespace'] data_type = data['data_type'] agent = data['agent'] if data_type not in ['ticket', 'pull-request']: _log.info('Invalid data_type retrieved: %s', data_type) continue session = pagure.lib.create_session(pagure.APP.config['DB_URL']) _log.info('Looking for project: %s%s of user: %s', '%s/' % namespace if namespace else '', repo, username) project = pagure.lib._get_project( session, repo, user=username, namespace=namespace) if not project: _log.info('No project found') continue _log.info('Found project: %s', project.fullname) _log.info( '%s: Processing %s commits in %s', project.fullname, len(commits), abspath) file_list = set(get_files_to_load(project.fullname, commits, abspath)) n = len(file_list) _log.info('%s files to process' % n) mail_body = [] for idx, filename in enumerate(file_list): _log.info( 'Loading: %s: %s -- %s/%s', project.fullname, filename, idx+1, n) tmp = 'Loading: %s -- %s/%s' % (filename, idx+1, n) json_data = None data = ''.join( pagure.lib.git.read_git_lines( ['show', 'HEAD:%s' % filename], abspath)) if data and not filename.startswith('files/'): try: json_data = json.loads(data) except ValueError: pass if json_data: try: if data_type == 'ticket': pagure.lib.git.update_ticket_from_git( session, reponame=repo, namespace=namespace, username=username, issue_uid=filename, json_data=json_data ) tmp += ' ... ... Done' except Exception as err: _log.info('data: %s', json_data) session.rollback() _log.exception(err) tmp += ' ... ... FAILED\n' tmp += format_callstack() break finally: mail_body.append(tmp) else: tmp += ' ... ... SKIPPED - No JSON data' mail_body.append(tmp) try: session.commit() _log.info( 'Emailing results for %s to %s', project.fullname, agent) try: if not agent: raise pagure.exceptions.PagureException( 'No agent found: %s' % agent) user_obj = pagure.lib.get_user(session, agent) pagure.lib.notify.send_email( '\n'.join(mail_body), 'Issue import report', user_obj.default_email) except pagure.exceptions.PagureException as err: _log.exception('Could not find user %s' % agent) except SQLAlchemyError as err: # pragma: no cover session.rollback() finally: session.close() _log.info('Ready for another') def main(): ''' Start the main async loop. ''' try: loop = trollius.get_event_loop() tasks = [ trollius.async(handle_messages()), ] loop.run_until_complete(trollius.wait(tasks)) loop.run_forever() except KeyboardInterrupt: pass except trollius.ConnectionResetError: pass _log.info("End Connection") loop.close() _log.info("End") if __name__ == '__main__': formatter = logging.Formatter( "%(asctime)s %(levelname)s [%(module)s:%(lineno)d] %(message)s") logging.basicConfig(level=logging.DEBUG) # setup console logging _log.setLevel(logging.DEBUG) shellhandler = logging.StreamHandler() shellhandler.setLevel(logging.DEBUG) aslog = logging.getLogger("asyncio") aslog.setLevel(logging.DEBUG) aslog = logging.getLogger("trollius") aslog.setLevel(logging.DEBUG) # Turn down the logs coming from python-markdown mklog = logging.getLogger("MARKDOWN") mklog.setLevel(logging.WARN) shellhandler.setFormatter(formatter) _log.addHandler(shellhandler) main()