Browse Source

Add support for mirroring in git repositories from outside sources

Fixes https://pagure.io/pagure/issue/1987

Signed-off-by: Pierre-Yves Chibon <pingou@pingoured.fr>
Pierre-Yves Chibon 5 years ago
parent
commit
227f4ce98b

+ 35 - 0
alembic/versions/5993f9240bcf_allow_mirroring_project_in.py

@@ -0,0 +1,35 @@
+"""Allow mirroring project in
+
+Revision ID: 5993f9240bcf
+Revises: 1f24c9c8efa5
+Create Date: 2018-12-14 10:00:05.281979
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = '5993f9240bcf'
+down_revision = '1f24c9c8efa5'
+
+
+def upgrade():
+    ''' Add the column mirrored_from to the table projects.
+    '''
+    op.add_column(
+        'projects',
+        sa.Column('mirrored_from', sa.Text, nullable=True)
+    )
+    op.add_column(
+        'projects',
+        sa.Column('mirrored_from_last_log', sa.Text, nullable=True)
+    )
+
+
+def downgrade():
+    ''' Remove the column mirrored_from from the table projects.
+    '''
+    op.drop_column('projects', 'mirrored_from')
+    op.drop_column('projects', 'mirrored_from_last_log')

+ 53 - 0
files/mirror_project_in.py

@@ -0,0 +1,53 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+import os
+import argparse
+from datetime import datetime, timedelta
+
+from sqlalchemy.exc import SQLAlchemyError
+
+import pagure.config
+import pagure.lib.query
+import pagure.lib.notify
+import pagure.lib.model as model
+
+if 'PAGURE_CONFIG' not in os.environ \
+        and os.path.exists('/etc/pagure/pagure.cfg'):
+    print('Using configuration file `/etc/pagure/pagure.cfg`')
+    os.environ['PAGURE_CONFIG'] = '/etc/pagure/pagure.cfg'
+
+_config = pagure.config.reload_config()
+
+
+def main(check=False, debug=False):
+    ''' The function pulls in all the changes from upstream'''
+
+    session = pagure.lib.query.create_session(_config['DB_URL'])
+    projects = session.query(
+        model.Project
+    ).filter(
+        model.Project.mirrored_from != None
+    ).all()
+
+    for project in projects:
+        if debug:
+            print("Mirrorring %s" % project.fullname)
+        pagure.lib.git.mirror_pull_project(session, project, debug=debug)
+
+    session.remove()
+    if debug:
+        print('Done')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+            description='Script to send email before the api token expires')
+    parser.add_argument(
+        '--check', dest='check', action='store_true', default=False,
+        help='Print the some output but does not send any email')
+    parser.add_argument(
+        '--debug', dest='debug', action='store_true', default=False,
+        help='Print the debugging output')
+    args = parser.parse_args()
+    main(debug=args.debug)

+ 7 - 0
pagure/forms.py

@@ -156,6 +156,13 @@ class ProjectForm(ProjectFormSimplified):
     """ Form to create or edit project. """
 
     name = wtforms.StringField('Project name <span class="error">*</span>')
+    mirrored_from = wtforms.StringField(
+        "Mirror from URL",
+        [
+            wtforms.validators.optional(),
+            wtforms.validators.Regexp(urlpattern, flags=re.IGNORECASE),
+        ],
+    )
     create_readme = wtforms.BooleanField(
         "Create README",
         [wtforms.validators.optional()],

+ 133 - 3
pagure/lib/git.py

@@ -1211,10 +1211,12 @@ def read_output(cmd, abspath, input=None, keepends=False, error=False, **kw):
         cwd=abspath,
         **kw
     )
-    (out, err) = procs.communicate(input)
-    out = out.decode("utf-8")
-    err = err.decode("utf-8")
     retcode = procs.wait()
+    (out, err) = procs.communicate(input)
+    if not isinstance(out, str):
+        out = out.decode("utf-8")
+    if not isinstance(err, str):
+        err = err.decode("utf-8")
     if retcode:
         print("ERROR: %s =-- %s" % (cmd, retcode))
         print(out)
@@ -2760,3 +2762,131 @@ def generate_archive(project, commit, tag, name, archive_fmt):
             raise pagure.exceptions.PagureException(
                 "Un-support archive format requested: %s", archive_fmt
             )
+
+
+def mirror_pull_project(session, project, debug=False):
+    """ Mirror locally a project from a remote URL. """
+    remote = project.mirrored_from
+    repopath = tempfile.mkdtemp(prefix="pagure-mirror_in-")
+    lclrepopath = pagure.utils.get_repo_path(project)
+
+    def _run_command(command, logs):
+        _log.info("Running the command: %s" % command)
+        if debug:
+            print("Running the command: %s" % command)
+            print("  Running in: %s" % repopath)
+        (stdout, stderr) = pagure.lib.git.read_git_lines(
+            command, abspath=repopath, error=True
+        )
+        log = "Output from %s:\n  stdout: %s\n  stderr: %s" % (
+            command,
+            stdout,
+            stderr,
+        )
+        logs.append(log)
+        if debug:
+            print(log)
+        return logs
+
+    try:
+        # Pull
+        logs = []
+        logs = _run_command(["clone", "--mirror", remote, "."], logs)
+        logs = _run_command(["remote", "add", "local", lclrepopath], logs)
+
+        # Push the changes
+        _log.info("Pushing")
+        if debug:
+            print("Pushing to the local git repo")
+        extra = {}
+        if project.is_on_repospanner:
+            regioninfo = pagure_config["REPOSPANNER_REGIONS"][
+                project.repospanner_region
+            ]
+
+            extra.update(
+                {
+                    "username": "pagure",
+                    "repotype": "main",
+                    "project_name": project.name,
+                    "project_user": project.user.username
+                    if project.is_fork
+                    else "",
+                    "project_namespace": project.namespace or "",
+                }
+            )
+            args = []
+            for opt in extra:
+                args.extend(["--extra", opt, extra[opt]])
+            command = [
+                "git",
+                "-c",
+                "protocol.ext.allow=always",
+                "push",
+                "ext::%s %s %s"
+                % (
+                    pagure_config["REPOBRIDGE_BINARY"],
+                    " ".join(args),
+                    project._repospanner_repo_name("main"),
+                ),
+                "--repo",
+                repopath,
+            ]
+            environ = {
+                "USER": "pagure",
+                "REPOBRIDGE_CONFIG": ":environment:",
+                "REPOBRIDGE_BASEURL": regioninfo["url"],
+                "REPOBRIDGE_CA": regioninfo["ca"],
+                "REPOBRIDGE_CERT": regioninfo["push_cert"]["cert"],
+                "REPOBRIDGE_KEY": regioninfo["push_cert"]["key"],
+            }
+        else:
+            command = ["git", "push", "local", "--mirror"]
+            environ = {}
+
+        _log.debug("Running a git push to %s", project.fullname)
+        env = os.environ.copy()
+        env["GL_USER"] = "pagure"
+        env["GL_BYPASS_ACCESS_CHECKS"] = "1"
+        if pagure_config.get("GITOLITE_HOME"):
+            env["HOME"] = pagure_config["GITOLITE_HOME"]
+        env.update(environ)
+        env.update(extra)
+        out = subprocess.check_output(
+            command, cwd=repopath, stderr=subprocess.STDOUT, env=env
+        )
+        log = "Output from %s:" % command
+        logs.append(log)
+        logs.append(out)
+        _log.debug("Output: %s" % out)
+
+        project.mirrored_from_last_log = "\n".join(logs)
+        session.add(project)
+        session.commit()
+        _log.info("\n".join(logs))
+    except subprocess.CalledProcessError as err:
+        _log.debug(
+            "Rebase FAILED: {cmd} returned code {code} with the "
+            "following output: {output}".format(
+                cmd=err.cmd, code=err.returncode, output=err.output
+            )
+        )
+        # This should never really happen, since we control the repos, but
+        # this way, we can be sure to get the output logged
+        remotes = []
+        for line in err.output.decode("utf-8").split("\n"):
+            _log.info("Remote line: %s", line)
+            if line.startswith("remote: "):
+                _log.debug("Remote: %s" % line)
+                remotes.append(line[len("remote: ") :].strip())
+        if remotes:
+            _log.info("Remote rejected with: %s" % remotes)
+            raise pagure.exceptions.PagurePushDenied(
+                "Remote hook declined the push: %s" % "\n".join(remotes)
+            )
+        else:
+            # Something else happened, pass the original
+            _log.exception("Error pushing. Output: %s", err.output)
+            raise
+    finally:
+        shutil.rmtree(repopath)

+ 2 - 0
pagure/lib/model.py

@@ -380,6 +380,8 @@ class Project(BASE):
     _reports = sa.Column(sa.Text, nullable=True)
     _notifications = sa.Column(sa.Text, nullable=True)
     _close_status = sa.Column(sa.Text, nullable=True)
+    mirrored_from = sa.Column(sa.Text, nullable=True)
+    mirrored_from_last_log = sa.Column(sa.Text, nullable=True)
 
     date_created = sa.Column(
         sa.DateTime, nullable=False, default=datetime.datetime.utcnow

+ 2 - 0
pagure/lib/query.py

@@ -1631,6 +1631,7 @@ def new_project(
     avatar_email=None,
     parent_id=None,
     add_readme=False,
+    mirrored_from=None,
     userobj=None,
     prevent_40_chars=False,
     namespace=None,
@@ -1712,6 +1713,7 @@ def new_project(
         avatar_email=avatar_email if avatar_email else None,
         user_id=user_obj.id,
         parent_id=parent_id,
+        mirrored_from=mirrored_from,
         private=private,
         hook_token=pagure.lib.login.id_generator(40),
     )

+ 24 - 0
pagure/templates/new_project.html

@@ -23,6 +23,7 @@
             {% if config.get('PRIVATE_PROJECTS', False) %}
               {{ render_bootstrap_field(form.private, field_description="To mark the repo private") }}
             {% endif %}
+            {{ render_bootstrap_field(form.mirrored_from, field_description="Mirror this project from another git server") }}
             {{ render_bootstrap_field(form.create_readme, field_description="Create a README file automatically") }}
             {% if form.repospanner_region %}
               {{ render_bootstrap_field(form.repospanner_region, field_description="repoSpanner region to create the project in") }}
@@ -36,6 +37,15 @@
       </div>
     </div>
   </div>
+  <div class="row justify-content-around">
+    <div class="col-md-8">
+        <div class="mt-3">
+          <p id="mirrored_doc">
+            Note that Pagure only supports mirroring from a public server.
+          </p>
+        </div>
+    </div>
+  </div>
 </div>
 {% endblock %}
 
@@ -56,6 +66,20 @@ $('#private').change(function(){
     $('#namespace').removeAttr("disabled");
   }
 });
+function update_if_mirror() {
+  if ($('#mirrored_from').val()){
+    $('#create_readme').attr("disabled", "disabled");
+    $('#create_readme').prop('checked', false);
+    $('#mirrored_doc').show();
+  } else {
+    $('#create_readme').removeAttr("disabled");
+    $('#mirrored_doc').hide();
+  }
+};
+$('#mirrored_from').keyup(function(){
+  update_if_mirror();
+});
+update_if_mirror();
 </script>
 {% endblock %}
 {% endif %}

+ 8 - 1
pagure/templates/repo_info.html

@@ -279,7 +279,14 @@
                     {% endif %}
           </div>
       </div>
-    {% if g.repo_obj and g.repo_obj.is_empty %}
+    {% if g.repo_obj and g.repo_obj.is_empty and repo.mirrored_from %}
+        <div class="alert {% if category == 'error' %}alert-warning{% else %}alert-info{%endif%}" role="alert">
+          <p>This repo is brand new and meant to be mirrored from {{
+                repo.mirrored_from }} !</p>
+          <p>Mirrored projects are refreshed regularly, please seat tight, code will
+          come land soon!</p>
+        </div>
+    {% elif g.repo_obj and g.repo_obj.is_empty %}
         <div class="alert {% if category == 'error' %}alert-warning{% else %}alert-info{%endif%}" role="alert">
           <p>This repo is brand new!</p>
             {% if g.authenticated and g.repo_committer %}

+ 3 - 0
pagure/ui/app.py

@@ -1041,6 +1041,8 @@ def new_project():
         else:
             ignore_existing_repos = False
 
+        mirrored_from = form.mirrored_from.data
+
         try:
             task = pagure.lib.query.new_project(
                 flask.g.session,
@@ -1055,6 +1057,7 @@ def new_project():
                 blacklist=pagure_config["BLACKLISTED_PROJECTS"],
                 allowed_prefix=pagure_config["ALLOWED_PREFIX"],
                 add_readme=create_readme,
+                mirrored_from=mirrored_from,
                 userobj=user,
                 prevent_40_chars=pagure_config.get(
                     "OLD_VIEW_COMMIT_ENABLED", False

+ 79 - 0
tests/test_pagure_flask_ui_app.py

@@ -281,6 +281,85 @@ class PagureFlaskApptests(tests.Modeltests):
 
         pagure.config.config['ENABLE_NEW_PROJECTS'] = True
 
+    def test_new_project_mirrored_invalid_url(self):
+        """ Test the new_project with a mirrored repo but an invalid URL. """
+
+        user = tests.FakeUser(username='foo')
+        with tests.user_set(self.app.application, user):
+            output = self.app.get('/new/')
+            self.assertEqual(output.status_code, 200)
+
+            csrf_token = self.get_csrf(output=output)
+
+            data = {
+                'description': 'Project #1',
+                'name': 'project-1',
+                'mirrored_from': 'abcd',
+                'csrf_token': csrf_token,
+            }
+
+            output = self.app.post('/new/', data=data, follow_redirects=True)
+            self.assertEqual(output.status_code, 200)
+            output_text = output.get_data(as_text=True)
+            self.assertIn(
+                '<title>New project - Pagure</title>', output_text)
+            self.assertIn(
+                'Invalid input.&nbsp;', output_text)
+
+    def test_new_project_mirrored_invalid_sshurl(self):
+        """ Test the new_project with a mirrored repo but an invalid
+        SSH-like url.
+        """
+
+        user = tests.FakeUser(username='foo')
+        with tests.user_set(self.app.application, user):
+            output = self.app.get('/new/')
+            self.assertEqual(output.status_code, 200)
+
+            csrf_token = self.get_csrf(output=output)
+
+            data = {
+                'description': 'Project #1',
+                'name': 'project-1',
+                'mirrored_from': 'ssh://git@server.org/foo/bar.git',
+                'csrf_token': csrf_token,
+            }
+
+            output = self.app.post('/new/', data=data, follow_redirects=True)
+            self.assertEqual(output.status_code, 200)
+            output_text = output.get_data(as_text=True)
+            self.assertIn(
+                '<title>New project - Pagure</title>', output_text)
+            self.assertIn(
+                'Invalid input.&nbsp;', output_text)
+
+    def test_new_project_mirrored_valid_url(self):
+        """ Test the new_project with a mirrored repo with a valid url. """
+
+        user = tests.FakeUser(username='foo')
+        with tests.user_set(self.app.application, user):
+            output = self.app.get('/new/')
+            self.assertEqual(output.status_code, 200)
+
+            csrf_token = self.get_csrf(output=output)
+
+            data = {
+                'description': 'Project #1',
+                'name': 'project-1',
+                'mirrored_from': 'https://example.com/foo/bar.git',
+                'csrf_token': csrf_token,
+            }
+
+            output = self.app.post('/new/', data=data, follow_redirects=True)
+            self.assertEqual(output.status_code, 200)
+            output_text = output.get_data(as_text=True)
+            self.assertIn(
+                '<title>Overview - project-1 - Pagure</title>',
+                output_text)
+            self.assertIn(
+                '<p>This repo is brand new and meant to be mirrored from '
+                'https://example.com/foo/bar.git !</p>', output_text)
+
     def test_new_project(self):
         """ Test the new_project endpoint. """
         # Before

+ 121 - 0
tests/test_pagure_lib_git_mirror_project.py

@@ -0,0 +1,121 @@
+# -*- coding: utf-8 -*-
+
+"""
+ (c) 2018 - Copyright Red Hat Inc
+
+ Authors:
+   Pierre-Yves Chibon <pingou@pingoured.fr>
+
+"""
+
+from __future__ import unicode_literals
+
+__requires__ = ['SQLAlchemy >= 0.8']
+
+import pkg_resources
+
+import datetime
+import os
+import shutil
+import sys
+import tempfile
+import time
+import unittest
+
+import pygit2
+import six
+from mock import patch, MagicMock, ANY, call
+
+sys.path.insert(0, os.path.join(os.path.dirname(
+    os.path.abspath(__file__)), '..'))
+
+import pagure.lib.git
+import tests
+
+from pagure.lib.repo import PagureRepo
+
+
+class PagureLibGitMirrorProjecttests(tests.Modeltests):
+    """ Tests for pagure.lib.git.mirror_pull_project """
+
+    maxDiff = None
+
+    def setUp(self):
+        """ Set up the environnment, ran before every tests. """
+        super(PagureLibGitMirrorProjecttests, self).setUp()
+
+        tests.create_projects(self.session)
+        tests.create_projects_git(
+            os.path.join(self.path, "repos"),
+            bare=True
+        )
+
+        # Make the test project mirrored from elsewhere
+        self.project = pagure.lib.query.get_authorized_project(
+            self.session, 'test')
+        self.project.mirrored_from = "https://example.com/foo/bar.git"
+        self.session.add(self.project)
+        self.session.commit()
+
+    @patch('subprocess.Popen')
+    @patch('subprocess.check_output')
+    def test_mirror_pull_project(self, ck_out_mock, popen_mock):
+        """ Test the mirror_pull_project method of pagure.lib.git. """
+
+        tmp = MagicMock()
+        tmp.communicate.return_value = ('', '')
+        popen_mock.return_value = tmp
+        ck_out_mock.return_value = "all good"
+
+        output = pagure.lib.git.mirror_pull_project(
+            self.session,
+            self.project
+        )
+
+        self.assertEqual(
+            popen_mock.call_count,
+            2
+        )
+
+        calls = [
+            call(
+                [
+                    u'git', u'clone', u'--mirror',
+                    u'https://example.com/foo/bar.git', u'.'
+                ],
+                cwd=ANY,
+                stderr=-1,
+                stdin=None,
+                stdout=-1
+            ),
+            ANY,
+            ANY,
+            ANY,
+            ANY,
+            call(
+                [u'git', u'remote', u'add', u'local', ANY],
+                cwd=ANY,
+                stderr=-1,
+                stdin=None,
+                stdout=-1
+            ),
+            ANY,
+            ANY,
+            ANY,
+            ANY,
+        ]
+        self.assertEqual(
+            popen_mock.mock_calls,
+            calls
+        )
+
+        ck_out_mock.assert_called_once_with(
+            [u'git', u'push', u'local', u'--mirror'],
+            cwd=ANY,
+            env=ANY,
+            stderr=-2
+        )
+
+
+if __name__ == '__main__':
+    unittest.main(verbosity=2)