citadel

My dotfiles, scripts and nix configs
git clone git://jb55.com/citadel
Log | Files | Refs | README | LICENSE

showlinenum (16839B)


      1 #!/usr/bin/env bash
      2 #
      3 # Copyright (C) 2013 Jay Satiro <raysatiro@yahoo.com>
      4 # All rights reserved.
      5 #
      6 # This file is part of the showlinenum project.
      7 # https://github.com/jay/showlinenum/
      8 #
      9 # This file is free software: you can redistribute it and/or modify
     10 # it under the terms of the GNU General Public License as published by
     11 # the Free Software Foundation, either version 3 of the License, or
     12 # (at your option) any later version.
     13 #
     14 # This file is distributed in the hope that it will be useful,
     15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
     16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
     17 # GNU General Public License for more details.
     18 #
     19 # You should have received a copy of the GNU General Public License
     20 # along with this file. If not, see <http://www.gnu.org/licenses/>.
     21 #
     22 #
     23 #
     24 # This gawk script changes the output of git diff to prepend the line number
     25 # for each line.
     26 #
     27 #
     28 #### Usage:
     29 #
     30 # git diff [options] | showlinenum.awk [options]
     31 #
     32 # All options for showlinenum require a value and are specified using the
     33 # format option=value.
     34 #
     35 ####
     36 #
     37 #
     38 #### Output:
     39 #
     40 # The diff line output is in this format:
     41 # [path:]<line number>:<diff line>
     42 #
     43 # When the path is shown it's the new version's file path. Line numbers are
     44 # shown for lines in the new version of the file (ie lines that are the same or
     45 # added). If a line appears only in the old version of the file (ie lines
     46 # removed) or the warning indicator is found then padding space is used in
     47 # place of a line number. If a file was removed a tilde ~ is used in place of a
     48 # line number.
     49 #
     50 # The first character in <diff line> is one of four indicators:
     51 # - : Line removed
     52 # + : Line added
     53 # <space> : Line same
     54 # \ : diff warning about previous line
     55 #
     56 # For example:
     57 #  :-removed
     58 # 7:+added
     59 # 8: common
     60 #  :\ No newline at end of file
     61 #
     62 # As far as I know the backslash indicator is only used for the missing newline
     63 # at EOF warning. When that warning appears it applies to the line immediately
     64 # above it. In the example above both the old and new version of the compared
     65 # file are missing the newline at EOF. If the line above a warning is a removed
     66 # line then the warning applies to the old version of the file, and if the line
     67 # above a warning is an added line then the warning applies to the new version
     68 # of the file.
     69 #
     70 # All errors are sent to standard error output (stderr). Currently all errors
     71 # are treated as fatal errors. On fatal error a line that starts with 'FATAL:'
     72 # is followed by script name and error message(s), which may be one or more
     73 # lines. This script then aborts with exit code 1.
     74 #
     75 ####
     76 #
     77 #
     78 #### Examples:
     79 #
     80 # Simple example. Line numbers are prepended to git diff's output.
     81 # git diff --cached | showlinenum.awk
     82 #
     83 # This script properly handles the ANSI escape color codes output by git diff.
     84 # To get color output you have to force git diff to send it by passing
     85 # --color=always. When that option is used the color output is always output so
     86 # it is not recommended unless you are either outputting to the terminal or
     87 # somewhere that can properly handle the color codes. Many scripts do not
     88 # function correctly when working with color coded input.
     89 #
     90 # This is the same as the first example, but with color output.
     91 # git diff --color=always --cached | showlinenum.awk
     92 #
     93 # Options can be passed to this script by using awk's -v option or the
     94 # traditional way (shown).
     95 # git diff --color=always HEAD~1 HEAD | showlinenum.awk show_header=0
     96 # git diff --color=always HEAD~1 HEAD | showlinenum.awk show_path=1 show_hunk=0
     97 #
     98 ####
     99 #
    100 #
    101 #### Options:
    102 #
    103 # @show_header [0,1] default: 1
    104 # Show diff headers.
    105 #
    106 # Example:
    107 # diff --git a/abc.c b/abc.c
    108 # index 285065f..2471f87 100644
    109 # --- a/abc.c
    110 # +++ b/abc.c
    111 #
    112 ##
    113 #
    114 # @show_hunk [0,1] default: ( show_header ? 1 : 0 )
    115 # Show line hunks.
    116 #
    117 # Example: @@ -0,0 +1,17 @@
    118 #
    119 ##
    120 #
    121 # @show_path [0,1] default: ( show_header ? 0 : 1 )
    122 # Show paths before line numbers.
    123 #
    124 # Example:
    125 # testdir/file:39:+some added text
    126 #
    127 ##
    128 #
    129 # @show_binary [0,1] default: ( show_path ? 1 : 0 )
    130 # Show a binary file that differs in an empty format. [path:][~]:
    131 #
    132 # Binary files have no concept of lines, therefore there is no line number or
    133 # diff line to show that a binary file differs. If the headers are shown you
    134 # can always see whether or not a binary file differs because there will be a
    135 # message "Binary files <old> and <new> differ". If the headers are not shown
    136 # however, that message is suppressed and a binary file that differs has an
    137 # "empty format" with no information, except for a tilde that will be shown if
    138 # the file was removed.
    139 #
    140 # Here are two examples of the empty format, one where the path is shown and
    141 # one where it isn't:
    142 # testdir/binary_file::
    143 # :
    144 #
    145 # Here is an example of a removed binary file, path shown:
    146 # calc.exe:~:
    147 #
    148 ##
    149 #
    150 # @allow_colons_in_path [0,1] default: ( show_path ? 0 : 1 )
    151 # Allow colons in path.
    152 #
    153 # If this option is off then abort if a path that contains a colon is
    154 # encountered. That's done to guarantee that this script's diff line output can
    155 # always be parsed with the first colon occurring immediately after the full
    156 # path. Note git diff paths may start with '<commit>:' like HEAD:./foo/bar, and
    157 # for such a path this option would need to be on.
    158 #
    159 ##
    160 #
    161 # @color_{line_number,path,separator} <num>[;num][;num]
    162 # Add color to some sections.
    163 #
    164 # Color the respective section using one or more ANSI color codes.
    165 # This is not recommended unless you are outputting to the terminal.
    166 # If semi-colons are present in these options your shell may need them quoted.
    167 # Example: "color_line_number=1;37;45" is bright white foreground (1;37) on
    168 # purple background (45).
    169 #
    170 ####
    171 #
    172 
    173 
    174 {
    175 # This code block is compatible with both the bourne shell and gawk. If this
    176 # gawk script is being interpreted by the bourne shell then gawk is executed to
    177 # become its interpreter.
    178 LAUNCHER="" "exec" "gawk" "-f" "$0" "$@"
    179 }
    180 
    181 
    182 function reset_header_variables()
    183 {
    184   parsing_diff_header = 0;
    185   found_path = 0;
    186   path = 0;
    187   found_oldfile_path = 0;
    188   oldfile_path = 0;
    189   found_line = 0;
    190   line = 0;
    191   found_diff = 0;
    192   diff = 0;
    193 }
    194 
    195 function init()
    196 {
    197   reset_header_variables();
    198 
    199   # To determine whether or not a variable was defined on the command line and
    200   # is not an empty string it must be tested. Many versions of gawk will show a
    201   # warning if using option --lint and an undefined variable is evaluated.
    202   # Therefore this workaround to force define some variables as a string by
    203   # appending an empty string.
    204 
    205   # String variables.
    206   color_line_number = color_line_number "";
    207   color_path = color_path "";
    208   color_separator = color_separator "";
    209 
    210   die_if_bad_color(color_line_number);
    211   die_if_bad_color(color_path);
    212   die_if_bad_color(color_separator);
    213 
    214   # Bool variables are later converted back to a number by get_bool().
    215   show_header = show_header "";
    216   show_hunk = show_hunk "";
    217   show_path = show_path "";
    218   show_binary = show_binary "";
    219   allow_colons_in_path = allow_colons_in_path "";
    220 
    221   # Return the variable as a bool value unless it is empty then return its
    222   # default bool value.
    223   show_header = get_bool(show_header, 1);
    224   show_hunk = get_bool(show_hunk, (show_header ? 1 : 0));
    225   show_path = get_bool(show_path, (show_header ? 0 : 1));
    226   show_binary = get_bool(show_binary, (show_path ? 1 : 0));
    227   allow_colons_in_path = get_bool(allow_colons_in_path, (show_path ? 0 : 1));
    228 }
    229 
    230 function FATAL(a_msg)
    231 {
    232   print "";
    233   # Apparently there is no portable way to get this script's name at runtime?
    234   print strip_ansi_color_codes("FATAL: showlinenum: " a_msg) > "/dev/stderr";
    235   exit 1;
    236 }
    237 
    238 # this returns the bool numeric value of 'input' if it contains a numeric or
    239 # string bool value, otherwise it returns the numeric value of default_value.
    240 function get_bool(input, a_default_value)
    241 {
    242   if(a_default_value !~ /^[0-1]$/)
    243   {
    244     errmsg = "get_bool(): a_default_value must be a bool value." \
    245              "\n" "a_default_value: " a_default_value;
    246     FATAL(errmsg);
    247   }
    248 
    249   regex = "^[[:blank:]]*([0-1])[[:blank:]]*$";
    250   if(input ~ regex)
    251   {
    252     return gensub(regex, "\\1", 1, input) + 0;
    253   }
    254 
    255   return a_default_value + 0;
    256 }
    257 
    258 function die_if_bad_color(input)
    259 {
    260   if(input ~ /[^0-9;]/)
    261   {
    262     errmsg = "die_if_bad_color(): color parameters may contain only numbers " \
    263              "and semi-colons.";
    264     FATAL(errmsg);
    265   }
    266 }
    267 
    268 # Fix an extracted path.
    269 # eg '+++ b/foo/bar' the input is 'b/foo/bar' and the output is 'foo/bar'
    270 function fix_extracted_path(input)
    271 {
    272   if(input == "/dev/null")
    273   {
    274     return input;
    275   }
    276 
    277   if(input !~ /^\042?[abiwco]\//)
    278   {
    279     errmsg = "fix_extracted_path(): sanity check failed, expected [abiwco]/ " \
    280              "prefix." \
    281              "\n" "Path: " input;
    282     FATAL(errmsg);
    283   }
    284 
    285   if(!allow_colons_in_path && (input ~ /:/))
    286   {
    287     errmsg = "fix_extracted_path(): colons in path are forbidden ";
    288     if(show_path)
    289     {
    290       errmsg = errmsg "by default when show_path is on in deference to " \
    291                "scripts which may parse this script's output and rely on " \
    292                "the colon as a separator. To override use command line " \
    293                "option allow_colons_in_path=1.";
    294     }
    295     else
    296     {
    297       errmsg = errmsg "because allow_colons_in_path is off.";
    298     }
    299     errmsg = errmsg "\n" "Path: " input;
    300     FATAL(errmsg);
    301   }
    302 
    303   # Remove an erroneous trailing tab that git diff can add to some non-binary
    304   # paths. eg an unquoted 'b/a $b	' becomes 'b/a $b' if the diff line
    305   # only contains the latter.
    306   if((input ~ /\t$/) && !index(diff, input) && \
    307      index(diff, substr(input, 1, length(input) - 1)))
    308   {
    309     sub(/\t$/, "", input);
    310   }
    311 
    312   sub(/[abiwco]\//, "", input);
    313 
    314   return input;
    315 }
    316 
    317 # this returns a string with the ansi color codes removed
    318 function strip_ansi_color_codes(input)
    319 {
    320   return gensub(/\033\[[0-9;]*m/, "", "g", input);
    321 }
    322 
    323 function print_separator(a_separator)
    324 {
    325   if(color_separator)
    326   {
    327     printf "\033[%sm%s\033[m", color_separator, a_separator;
    328   }
    329   else
    330   {
    331     printf "%s", a_separator;
    332   }
    333 }
    334 
    335 function print_line_number(a_line_number)
    336 {
    337   if(color_line_number)
    338   {
    339     printf "\033[%sm", color_line_number;
    340   }
    341 
    342   if(a_line_number ~ /^[0-9]+$/)
    343   {
    344     # Awk stores all integers internally as floating point. If printf is passed
    345     # an integer it is allowed convert it to scientific notation which I don't
    346     # want for line numbers. I'm not sure how relevant that is since it seems
    347     # to vary between different versions of awk and only when the integer is
    348     # large (how large?).
    349     # The 'f' type specifier should show [-9007199254740992, 9007199254740992]
    350     printf "%.0f", a_line_number + 0;
    351   }
    352   else
    353   {
    354     printf "%s", a_line_number;
    355   }
    356 
    357   if(color_line_number)
    358   {
    359     printf "\033[m";
    360   }
    361 
    362   print_separator(":");
    363 }
    364 
    365 function print_path(a_path)
    366 {
    367   if(!show_path)
    368   {
    369     return;
    370   }
    371 
    372   if(color_path)
    373   {
    374     printf "\033[%sm%s\033[m", color_path, a_path;
    375   }
    376   else
    377   {
    378     printf "%s", a_path;
    379   }
    380 
    381   print_separator(":");
    382 }
    383 
    384 #
    385 # main
    386 #
    387 {
    388   if(NR == 1)
    389   {
    390     init();
    391   }
    392 
    393   if($0 ~ /^(\033\[[0-9;]*m)*diff /)
    394   {
    395     reset_header_variables();
    396     parsing_diff_header = 1;
    397 
    398     diff = strip_ansi_color_codes($0);
    399     found_diff = 1;
    400 
    401     if(show_header)
    402     {
    403       print;
    404     }
    405 
    406     next;
    407   }
    408 
    409   # check for combined diff line info
    410   if($0 ~ /^(\033\[[0-9;]*m)*@@@+ /)
    411   {
    412     FATAL("Combined diff format not supported.");
    413   }
    414 
    415   # check for diff line info
    416   if($0 ~ /^(\033\[[0-9;]*m)*@@ /)
    417   {
    418     line = 0;
    419     found_line = 0;
    420     parsing_diff_header = 0;
    421 
    422     if(!found_path || !found_oldfile_path)
    423     {
    424       FATAL("Line info found before path info.");
    425     }
    426 
    427     stripped = strip_ansi_color_codes($0);
    428 
    429     regex = "^@@ -[0-9]+(,[0-9]+)? \\+([0-9]+)(,[0-9]+)? @@.*$";
    430     if(stripped ~ regex)
    431     {
    432       line = gensub(regex, "\\2", 1, stripped);
    433       # Adding zero to line converts it from a string to an integer.
    434       # That only works when all color codes have been removed.
    435       line = line + 0;
    436       found_line = 1;
    437     }
    438 
    439     if(!found_line)
    440     {
    441       errmsg = "Unrecognized hunk info.";
    442       if(path == "/dev/null")
    443       {
    444         errmsg = errmsg "\n" "Removed file: " oldfile_path;
    445       }
    446       else
    447       {
    448         errmsg = errmsg "\n" "File: " path;
    449       }
    450       errmsg = errmsg "\n" "File's hunk info: " stripped;
    451       FATAL(errmsg);
    452     }
    453 
    454     if(show_hunk)
    455     {
    456       print;
    457     }
    458 
    459     next;
    460   }
    461 
    462   if(parsing_diff_header)
    463   {
    464     stripped = strip_ansi_color_codes($0);
    465 
    466     # Check for oldfile path
    467     regex = "^\\-\\-\\- (\\042?[aiwco]\\/.+|\\/dev\\/null)$";
    468     if(stripped ~ regex)
    469     {
    470       oldfile_path = fix_extracted_path(gensub(regex, "\\1", 1, stripped));
    471       found_oldfile_path = 1;
    472 
    473       if(show_header)
    474       {
    475         print;
    476       }
    477 
    478       next;
    479     }
    480 
    481     # Check for newfile path
    482     regex = "^\\+\\+\\+ (\\042?[biwco]\\/.+|\\/dev\\/null)$";
    483     if(stripped ~ regex)
    484     {
    485       path = fix_extracted_path(gensub(regex, "\\1", 1, stripped));
    486       found_path = 1;
    487 
    488       if(show_header)
    489       {
    490         print;
    491       }
    492 
    493       next;
    494     }
    495 
    496     # Check for binary old/newfile path
    497     regex = "^Binary files (.*) differ$";
    498     if(stripped ~ regex)
    499     {
    500       path = gensub(regex, "\\1", 1, stripped);
    501 
    502       found_path = 0;
    503       found_oldfile_path = 0;
    504 
    505       # Check for binary oldfile path.
    506       # The oldfile path only needs to be set if newfile is /dev/null (deleted
    507       # or moved file).
    508       if(match(path, / and \/dev\/null$/))
    509       {
    510         oldfile_path = substr(path, 1, length(path) - RLENGTH);
    511 
    512         if((oldfile_path ~ /^\042?[aiwco]\//) && index(diff, oldfile_path))
    513         {
    514           oldfile_path = fix_extracted_path(oldfile_path);
    515           found_oldfile_path = 1;
    516           path = "/dev/null";
    517           found_path = 1;
    518         }
    519       }
    520 
    521       # This gets the path for a binary file by digging through the first line
    522       # of the diff header ('diff') and the binary file notice line
    523       # ('stripped') to find the longest rightmost match between the two.
    524       while(!found_path && match(path, /and \042?[biwco]\/.+$/))
    525       {
    526         path_len = RLENGTH - 4;
    527         path = substr(path, RSTART + 4, path_len);
    528 
    529         diff_rstart = (length(diff) + 1) - path_len;
    530         if(diff_rstart < 1)
    531         {
    532           continue;
    533         }
    534 
    535         if(path == substr(diff, diff_rstart, path_len))
    536         {
    537           path = fix_extracted_path(path);
    538           found_path = 1;
    539           break;
    540         }
    541       }
    542 
    543       if(show_header)
    544       {
    545         print;
    546       }
    547 
    548       if(!found_path && !found_oldfile_path)
    549       {
    550         errmsg = "Path info for binary file not found in header lines." \
    551                  "\n" "Diff line: " diff \
    552                  "\n" "Current line: " stripped;
    553         FATAL(errmsg);
    554       }
    555 
    556       if(show_binary)
    557       {
    558         if(found_oldfile_path)
    559         {
    560           # Binary file removed: path/to/foo:~:
    561           print_path(oldfile_path);
    562           print_line_number("~");
    563         }
    564         else
    565         {
    566           # Binary file differs: path/to/foo::
    567           print_path(path);
    568           print_line_number("");
    569         }
    570 
    571         print "";
    572       }
    573 
    574       reset_header_variables();
    575       next;
    576     }
    577 
    578     if(show_header)
    579     {
    580       print;
    581     }
    582 
    583     next;
    584   }
    585 
    586   if(!found_path || !found_oldfile_path)
    587   {
    588     FATAL("Path info not found.");
    589   }
    590 
    591   if(!found_line)
    592   {
    593     FATAL("Line info not found.");
    594   }
    595 
    596   if(path == "/dev/null")
    597   {
    598     if($0 !~ /^(\033\[[0-9;]*m)*[\\-]/)
    599     {
    600       errmsg = "Expected negative or backslash indicator for removed file's " \
    601                "diff line." \
    602                "\n" "Removed file: " oldfile_path \
    603                "\n" "File's diff line: " $0;
    604       FATAL(errmsg);
    605     }
    606 
    607     # File removed: path/to/foo:~:
    608     print_path(oldfile_path);
    609     print_line_number("~");
    610 
    611     print;
    612     next;
    613   }
    614 
    615 
    616   # Extract the indicator. Unfortunately early versions of gawk (like the one
    617   # included with git for Windows) do not support an array parameter for
    618   # match() so the indicator must be extracted on success by using substr().
    619 
    620   if(($0 !~ /^(\033\[[0-9;]*m)*[\\ +-]/) || \
    621      !match($0, /[\\ +-]/) || (RLENGTH != 1))
    622   {
    623     errmsg = "Failed to extract indicator from diff line." \
    624              "\n" "File: " path \
    625              "\n" "File's diff line: " $0;
    626     FATAL(errmsg);
    627   }
    628 
    629   indicator = substr($0, RSTART, RLENGTH);
    630 
    631   if((indicator == "+") || (indicator == " "))
    632   {
    633     print_path(path);
    634     print_line_number(line++);
    635   }
    636   else if((indicator == "-") || (indicator == "\\"))
    637   {
    638     print_path(path);
    639     # Fill the line number section with padding.
    640     print_line_number(sprintf("%" length((line + 1) "") "s", " "));
    641   }
    642   else
    643   {
    644     errmsg = "Unexpected diff line indicator." \
    645              "\n" "Indicator: " indicator \
    646              "\n" "File: " path \
    647              "\n" "File's diff line: " $0;
    648     FATAL(errmsg);
    649   }
    650 
    651   print;
    652 }