Browse Source

checksrc: add COPYRIGHTYEAR check

Forgetting to bump the year in the copyright clause when hacking has
been quite common among curl developers, but a traditional checksrc
check isn't a good fit as it would penalize anyone hacking on January
1st (among other things). This adds a more selective COPYRIGHTYEAR
check which intends to only cover the currently hacked on changeset.

The check for updated copyright year is currently not enforced on all
files but only on files edited and/or committed locally. This is due to
the amount of files which aren't updated with their correct copyright
year at the time of their respective commit.

To further avoid running this expensive check for every developer, it
adds a new local override mode for checksrc where a .checksrc file can
be used to turn on extended warnings locally.

Closes #3303
Reviewed-by: Daniel Stenberg <daniel@haxx.se>
Daniel Gustafsson 5 years ago
parent
commit
6381708970
4 changed files with 101 additions and 5 deletions
  1. 1 0
      .gitignore
  2. 13 0
      docs/CHECKSRC.md
  3. 86 5
      lib/checksrc.pl
  4. 1 0
      lib/md4.c

+ 1 - 0
.gitignore

@@ -56,3 +56,4 @@ scripts/_curl
 curl_fuzzer
 curl_fuzzer_seed_corpus.zip
 libstandaloneengine.a
+.checksrc

+ 13 - 0
docs/CHECKSRC.md

@@ -103,6 +103,19 @@ warnings are:
 - `UNUSEDIGNORE`: a checksrc inlined warning ignore was asked for but not used,
    that's an ignore that should be removed or changed to get used.
 
+### Extended warnings
+
+Some warnings are quite computationally expensive to perform, so they are
+turned off by default. To enable these warnings, place a `.checksrc` file in
+the directory where they should be activated with commands to enable the
+warnings you are interested in. The format of the file is to enable one
+warning per line like so: `enable <EXTENDEDWARNING>`
+
+Currently there is one extended warning which can be enabled:
+
+- `COPYRIGHTYEAR`: the current changeset hasn't updated the copyright year in
+   the source file
+
 ## Ignore certain warnings
 
 Due to the nature of the source code and the flaws of the checksrc tool, there

+ 86 - 5
lib/checksrc.pl

@@ -45,6 +45,10 @@ my %ignore_set;
 my %ignore_used;
 my @ignore_line;
 
+my %warnings_extended = (
+    'COPYRIGHTYEAR'    => 'copyright year incorrect',
+    );
+
 my %warnings = (
     'LONGLINE'         => "Line longer than $max_column",
     'TABS'             => 'TAB characters not allowed',
@@ -88,6 +92,35 @@ sub readwhitelist {
     close(W);
 }
 
+# Reads the .checksrc in $dir for any extended warnings to enable locally.
+# Currently there is no support for disabling warnings from the standard set,
+# and since that's already handled via !checksrc! commands there is probably
+# little use to add it.
+sub readlocalfile {
+    my $i = 0;
+
+    open(my $rcfile, "<", "$dir/.checksrc") or return;
+
+    while(<$rcfile>) {
+        $i++;
+
+        # Lines starting with '#' are considered comments
+        if (/^\s*(#.*)/) {
+            next;
+        }
+        elsif (/^\s*enable ([A-Z]+)$/) {
+            if(!defined($warnings_extended{$1})) {
+                print STDERR "invalid warning specified in .checksrc: \"$1\"\n";
+                next;
+            }
+            $warnings{$1} = $warnings_extended{$1};
+        }
+        else {
+            die "Invalid format in $dir/.checksrc on line $i\n";
+        }
+    }
+}
+
 sub checkwarn {
     my ($name, $num, $col, $file, $line, $msg, $error) = @_;
 
@@ -195,6 +228,7 @@ if(!$file) {
 }
 
 readwhitelist();
+readlocalfile();
 
 do {
     if("$wlist" !~ / $file /) {
@@ -314,7 +348,7 @@ sub scanfile {
     open(R, "<$file") || die "failed to open $file";
 
     my $incomment=0;
-    my $copyright=0;
+    my @copyright=();
     checksrc_clear(); # for file based ignores
     accept_violations();
 
@@ -330,9 +364,16 @@ sub scanfile {
             checksrc($cmd, $line, $file, $l)
         }
 
-        # check for a copyright statement
-        if(!$copyright && ($l =~ /copyright .* \d\d\d\d/i)) {
-            $copyright=1;
+        # check for a copyright statement and save the years
+        if($l =~ /\* +copyright .* \d\d\d\d/i) {
+            while($l =~ /([\d]{4})/g) {
+                push @copyright, {
+                  year => $1,
+                  line => $line,
+                  col => index($l, $1),
+                  code => $l
+                };
+            }
         }
 
         # detect long lines
@@ -650,9 +691,49 @@ sub scanfile {
         $prevl = $ol;
     }
 
-    if(!$copyright) {
+    if(!scalar(@copyright)) {
         checkwarn("COPYRIGHT", 1, 0, $file, "", "Missing copyright statement", 1);
     }
+
+    # COPYRIGHTYEAR is a extended warning so we must first see if it has been
+    # enabled in .checksrc
+    if(defined($warnings{"COPYRIGHTYEAR"})) {
+        # The check for updated copyrightyear is overly complicated in order to
+        # not punish current hacking for past sins. The copyright years are
+        # right now a bit behind, so enforcing copyright year checking on all
+        # files would cause hundreds of errors. Instead we only look at files
+        # which are tracked in the Git repo and edited in the workdir, or
+        # committed locally on the branch without being in upstream master.
+        #
+        # The simple and naive test is to simply check for the current year,
+        # but updating the year even without an edit is against project policy
+        # (and it would fail every file on January 1st).
+        #
+        # A rather more interesting, and correct, check would be to not test
+        # only locally committed files but inspect all files wrt the year of
+        # their last commit. Removing the `git rev-list origin/master..HEAD`
+        # condition below will enfore copyright year checks against the year
+        # the file was last committed (and thus edited to some degree).
+        my $commityear = undef;
+        @copyright = sort {$$b{year} cmp $$a{year}} @copyright;
+
+        if(`git status -s -- $file` =~ /^ [MARCU]/) {
+            $commityear = (localtime(time))[5] + 1900;
+        }
+        elsif (`git rev-list --count origin/master..HEAD -- $file` !~ /^0/) {
+            my $grl = `git rev-list --max-count=1 --timestamp HEAD -- $file`;
+            $commityear = (localtime((split(/ /, $grl))[0]))[5] + 1900;
+        }
+
+        if(defined($commityear) && scalar(@copyright) &&
+           $copyright[0]{year} != $commityear) {
+            checkwarn("COPYRIGHTYEAR", $copyright[0]{line}, $copyright[0]{col},
+                      $file, $copyright[0]{code},
+                      "Copyright year out of date, should be $commityear, " .
+                      "is $copyright[0]{year}", 1);
+        }
+    }
+
     if($incomment) {
         checkwarn("OPENCOMMENT", 1, 0, $file, "", "Missing closing comment", 1);
     }

+ 1 - 0
lib/md4.c

@@ -1,4 +1,5 @@
 /*
+ * !checksrc! disable COPYRIGHT
  * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc.
  * MD4 Message-Digest Algorithm (RFC 1320).
  *