showlinenum (16839B)
1 #!/usr/bin/env bash 2 # 3 # Copyright (C) 2013 Jay Satiro <raysatiro@yahoo.com> 4 # All rights reserved. 5 # 6 # This file is part of the showlinenum project. 7 # https://github.com/jay/showlinenum/ 8 # 9 # This file is free software: you can redistribute it and/or modify 10 # it under the terms of the GNU General Public License as published by 11 # the Free Software Foundation, either version 3 of the License, or 12 # (at your option) any later version. 13 # 14 # This file is distributed in the hope that it will be useful, 15 # but WITHOUT ANY WARRANTY; without even the implied warranty of 16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 # GNU General Public License for more details. 18 # 19 # You should have received a copy of the GNU General Public License 20 # along with this file. If not, see <http://www.gnu.org/licenses/>. 21 # 22 # 23 # 24 # This gawk script changes the output of git diff to prepend the line number 25 # for each line. 26 # 27 # 28 #### Usage: 29 # 30 # git diff [options] | showlinenum.awk [options] 31 # 32 # All options for showlinenum require a value and are specified using the 33 # format option=value. 34 # 35 #### 36 # 37 # 38 #### Output: 39 # 40 # The diff line output is in this format: 41 # [path:]<line number>:<diff line> 42 # 43 # When the path is shown it's the new version's file path. Line numbers are 44 # shown for lines in the new version of the file (ie lines that are the same or 45 # added). If a line appears only in the old version of the file (ie lines 46 # removed) or the warning indicator is found then padding space is used in 47 # place of a line number. If a file was removed a tilde ~ is used in place of a 48 # line number. 49 # 50 # The first character in <diff line> is one of four indicators: 51 # - : Line removed 52 # + : Line added 53 # <space> : Line same 54 # \ : diff warning about previous line 55 # 56 # For example: 57 # :-removed 58 # 7:+added 59 # 8: common 60 # :\ No newline at end of file 61 # 62 # As far as I know the backslash indicator is only used for the missing newline 63 # at EOF warning. When that warning appears it applies to the line immediately 64 # above it. In the example above both the old and new version of the compared 65 # file are missing the newline at EOF. If the line above a warning is a removed 66 # line then the warning applies to the old version of the file, and if the line 67 # above a warning is an added line then the warning applies to the new version 68 # of the file. 69 # 70 # All errors are sent to standard error output (stderr). Currently all errors 71 # are treated as fatal errors. On fatal error a line that starts with 'FATAL:' 72 # is followed by script name and error message(s), which may be one or more 73 # lines. This script then aborts with exit code 1. 74 # 75 #### 76 # 77 # 78 #### Examples: 79 # 80 # Simple example. Line numbers are prepended to git diff's output. 81 # git diff --cached | showlinenum.awk 82 # 83 # This script properly handles the ANSI escape color codes output by git diff. 84 # To get color output you have to force git diff to send it by passing 85 # --color=always. When that option is used the color output is always output so 86 # it is not recommended unless you are either outputting to the terminal or 87 # somewhere that can properly handle the color codes. Many scripts do not 88 # function correctly when working with color coded input. 89 # 90 # This is the same as the first example, but with color output. 91 # git diff --color=always --cached | showlinenum.awk 92 # 93 # Options can be passed to this script by using awk's -v option or the 94 # traditional way (shown). 95 # git diff --color=always HEAD~1 HEAD | showlinenum.awk show_header=0 96 # git diff --color=always HEAD~1 HEAD | showlinenum.awk show_path=1 show_hunk=0 97 # 98 #### 99 # 100 # 101 #### Options: 102 # 103 # @show_header [0,1] default: 1 104 # Show diff headers. 105 # 106 # Example: 107 # diff --git a/abc.c b/abc.c 108 # index 285065f..2471f87 100644 109 # --- a/abc.c 110 # +++ b/abc.c 111 # 112 ## 113 # 114 # @show_hunk [0,1] default: ( show_header ? 1 : 0 ) 115 # Show line hunks. 116 # 117 # Example: @@ -0,0 +1,17 @@ 118 # 119 ## 120 # 121 # @show_path [0,1] default: ( show_header ? 0 : 1 ) 122 # Show paths before line numbers. 123 # 124 # Example: 125 # testdir/file:39:+some added text 126 # 127 ## 128 # 129 # @show_binary [0,1] default: ( show_path ? 1 : 0 ) 130 # Show a binary file that differs in an empty format. [path:][~]: 131 # 132 # Binary files have no concept of lines, therefore there is no line number or 133 # diff line to show that a binary file differs. If the headers are shown you 134 # can always see whether or not a binary file differs because there will be a 135 # message "Binary files <old> and <new> differ". If the headers are not shown 136 # however, that message is suppressed and a binary file that differs has an 137 # "empty format" with no information, except for a tilde that will be shown if 138 # the file was removed. 139 # 140 # Here are two examples of the empty format, one where the path is shown and 141 # one where it isn't: 142 # testdir/binary_file:: 143 # : 144 # 145 # Here is an example of a removed binary file, path shown: 146 # calc.exe:~: 147 # 148 ## 149 # 150 # @allow_colons_in_path [0,1] default: ( show_path ? 0 : 1 ) 151 # Allow colons in path. 152 # 153 # If this option is off then abort if a path that contains a colon is 154 # encountered. That's done to guarantee that this script's diff line output can 155 # always be parsed with the first colon occurring immediately after the full 156 # path. Note git diff paths may start with '<commit>:' like HEAD:./foo/bar, and 157 # for such a path this option would need to be on. 158 # 159 ## 160 # 161 # @color_{line_number,path,separator} <num>[;num][;num] 162 # Add color to some sections. 163 # 164 # Color the respective section using one or more ANSI color codes. 165 # This is not recommended unless you are outputting to the terminal. 166 # If semi-colons are present in these options your shell may need them quoted. 167 # Example: "color_line_number=1;37;45" is bright white foreground (1;37) on 168 # purple background (45). 169 # 170 #### 171 # 172 173 174 { 175 # This code block is compatible with both the bourne shell and gawk. If this 176 # gawk script is being interpreted by the bourne shell then gawk is executed to 177 # become its interpreter. 178 LAUNCHER="" "exec" "gawk" "-f" "$0" "$@" 179 } 180 181 182 function reset_header_variables() 183 { 184 parsing_diff_header = 0; 185 found_path = 0; 186 path = 0; 187 found_oldfile_path = 0; 188 oldfile_path = 0; 189 found_line = 0; 190 line = 0; 191 found_diff = 0; 192 diff = 0; 193 } 194 195 function init() 196 { 197 reset_header_variables(); 198 199 # To determine whether or not a variable was defined on the command line and 200 # is not an empty string it must be tested. Many versions of gawk will show a 201 # warning if using option --lint and an undefined variable is evaluated. 202 # Therefore this workaround to force define some variables as a string by 203 # appending an empty string. 204 205 # String variables. 206 color_line_number = color_line_number ""; 207 color_path = color_path ""; 208 color_separator = color_separator ""; 209 210 die_if_bad_color(color_line_number); 211 die_if_bad_color(color_path); 212 die_if_bad_color(color_separator); 213 214 # Bool variables are later converted back to a number by get_bool(). 215 show_header = show_header ""; 216 show_hunk = show_hunk ""; 217 show_path = show_path ""; 218 show_binary = show_binary ""; 219 allow_colons_in_path = allow_colons_in_path ""; 220 221 # Return the variable as a bool value unless it is empty then return its 222 # default bool value. 223 show_header = get_bool(show_header, 1); 224 show_hunk = get_bool(show_hunk, (show_header ? 1 : 0)); 225 show_path = get_bool(show_path, (show_header ? 0 : 1)); 226 show_binary = get_bool(show_binary, (show_path ? 1 : 0)); 227 allow_colons_in_path = get_bool(allow_colons_in_path, (show_path ? 0 : 1)); 228 } 229 230 function FATAL(a_msg) 231 { 232 print ""; 233 # Apparently there is no portable way to get this script's name at runtime? 234 print strip_ansi_color_codes("FATAL: showlinenum: " a_msg) > "/dev/stderr"; 235 exit 1; 236 } 237 238 # this returns the bool numeric value of 'input' if it contains a numeric or 239 # string bool value, otherwise it returns the numeric value of default_value. 240 function get_bool(input, a_default_value) 241 { 242 if(a_default_value !~ /^[0-1]$/) 243 { 244 errmsg = "get_bool(): a_default_value must be a bool value." \ 245 "\n" "a_default_value: " a_default_value; 246 FATAL(errmsg); 247 } 248 249 regex = "^[[:blank:]]*([0-1])[[:blank:]]*$"; 250 if(input ~ regex) 251 { 252 return gensub(regex, "\\1", 1, input) + 0; 253 } 254 255 return a_default_value + 0; 256 } 257 258 function die_if_bad_color(input) 259 { 260 if(input ~ /[^0-9;]/) 261 { 262 errmsg = "die_if_bad_color(): color parameters may contain only numbers " \ 263 "and semi-colons."; 264 FATAL(errmsg); 265 } 266 } 267 268 # Fix an extracted path. 269 # eg '+++ b/foo/bar' the input is 'b/foo/bar' and the output is 'foo/bar' 270 function fix_extracted_path(input) 271 { 272 if(input == "/dev/null") 273 { 274 return input; 275 } 276 277 if(input !~ /^\042?[abiwco]\//) 278 { 279 errmsg = "fix_extracted_path(): sanity check failed, expected [abiwco]/ " \ 280 "prefix." \ 281 "\n" "Path: " input; 282 FATAL(errmsg); 283 } 284 285 if(!allow_colons_in_path && (input ~ /:/)) 286 { 287 errmsg = "fix_extracted_path(): colons in path are forbidden "; 288 if(show_path) 289 { 290 errmsg = errmsg "by default when show_path is on in deference to " \ 291 "scripts which may parse this script's output and rely on " \ 292 "the colon as a separator. To override use command line " \ 293 "option allow_colons_in_path=1."; 294 } 295 else 296 { 297 errmsg = errmsg "because allow_colons_in_path is off."; 298 } 299 errmsg = errmsg "\n" "Path: " input; 300 FATAL(errmsg); 301 } 302 303 # Remove an erroneous trailing tab that git diff can add to some non-binary 304 # paths. eg an unquoted 'b/a $b ' becomes 'b/a $b' if the diff line 305 # only contains the latter. 306 if((input ~ /\t$/) && !index(diff, input) && \ 307 index(diff, substr(input, 1, length(input) - 1))) 308 { 309 sub(/\t$/, "", input); 310 } 311 312 sub(/[abiwco]\//, "", input); 313 314 return input; 315 } 316 317 # this returns a string with the ansi color codes removed 318 function strip_ansi_color_codes(input) 319 { 320 return gensub(/\033\[[0-9;]*m/, "", "g", input); 321 } 322 323 function print_separator(a_separator) 324 { 325 if(color_separator) 326 { 327 printf "\033[%sm%s\033[m", color_separator, a_separator; 328 } 329 else 330 { 331 printf "%s", a_separator; 332 } 333 } 334 335 function print_line_number(a_line_number) 336 { 337 if(color_line_number) 338 { 339 printf "\033[%sm", color_line_number; 340 } 341 342 if(a_line_number ~ /^[0-9]+$/) 343 { 344 # Awk stores all integers internally as floating point. If printf is passed 345 # an integer it is allowed convert it to scientific notation which I don't 346 # want for line numbers. I'm not sure how relevant that is since it seems 347 # to vary between different versions of awk and only when the integer is 348 # large (how large?). 349 # The 'f' type specifier should show [-9007199254740992, 9007199254740992] 350 printf "%.0f", a_line_number + 0; 351 } 352 else 353 { 354 printf "%s", a_line_number; 355 } 356 357 if(color_line_number) 358 { 359 printf "\033[m"; 360 } 361 362 print_separator(":"); 363 } 364 365 function print_path(a_path) 366 { 367 if(!show_path) 368 { 369 return; 370 } 371 372 if(color_path) 373 { 374 printf "\033[%sm%s\033[m", color_path, a_path; 375 } 376 else 377 { 378 printf "%s", a_path; 379 } 380 381 print_separator(":"); 382 } 383 384 # 385 # main 386 # 387 { 388 if(NR == 1) 389 { 390 init(); 391 } 392 393 if($0 ~ /^(\033\[[0-9;]*m)*diff /) 394 { 395 reset_header_variables(); 396 parsing_diff_header = 1; 397 398 diff = strip_ansi_color_codes($0); 399 found_diff = 1; 400 401 if(show_header) 402 { 403 print; 404 } 405 406 next; 407 } 408 409 # check for combined diff line info 410 if($0 ~ /^(\033\[[0-9;]*m)*@@@+ /) 411 { 412 FATAL("Combined diff format not supported."); 413 } 414 415 # check for diff line info 416 if($0 ~ /^(\033\[[0-9;]*m)*@@ /) 417 { 418 line = 0; 419 found_line = 0; 420 parsing_diff_header = 0; 421 422 if(!found_path || !found_oldfile_path) 423 { 424 FATAL("Line info found before path info."); 425 } 426 427 stripped = strip_ansi_color_codes($0); 428 429 regex = "^@@ -[0-9]+(,[0-9]+)? \\+([0-9]+)(,[0-9]+)? @@.*$"; 430 if(stripped ~ regex) 431 { 432 line = gensub(regex, "\\2", 1, stripped); 433 # Adding zero to line converts it from a string to an integer. 434 # That only works when all color codes have been removed. 435 line = line + 0; 436 found_line = 1; 437 } 438 439 if(!found_line) 440 { 441 errmsg = "Unrecognized hunk info."; 442 if(path == "/dev/null") 443 { 444 errmsg = errmsg "\n" "Removed file: " oldfile_path; 445 } 446 else 447 { 448 errmsg = errmsg "\n" "File: " path; 449 } 450 errmsg = errmsg "\n" "File's hunk info: " stripped; 451 FATAL(errmsg); 452 } 453 454 if(show_hunk) 455 { 456 print; 457 } 458 459 next; 460 } 461 462 if(parsing_diff_header) 463 { 464 stripped = strip_ansi_color_codes($0); 465 466 # Check for oldfile path 467 regex = "^\\-\\-\\- (\\042?[aiwco]\\/.+|\\/dev\\/null)$"; 468 if(stripped ~ regex) 469 { 470 oldfile_path = fix_extracted_path(gensub(regex, "\\1", 1, stripped)); 471 found_oldfile_path = 1; 472 473 if(show_header) 474 { 475 print; 476 } 477 478 next; 479 } 480 481 # Check for newfile path 482 regex = "^\\+\\+\\+ (\\042?[biwco]\\/.+|\\/dev\\/null)$"; 483 if(stripped ~ regex) 484 { 485 path = fix_extracted_path(gensub(regex, "\\1", 1, stripped)); 486 found_path = 1; 487 488 if(show_header) 489 { 490 print; 491 } 492 493 next; 494 } 495 496 # Check for binary old/newfile path 497 regex = "^Binary files (.*) differ$"; 498 if(stripped ~ regex) 499 { 500 path = gensub(regex, "\\1", 1, stripped); 501 502 found_path = 0; 503 found_oldfile_path = 0; 504 505 # Check for binary oldfile path. 506 # The oldfile path only needs to be set if newfile is /dev/null (deleted 507 # or moved file). 508 if(match(path, / and \/dev\/null$/)) 509 { 510 oldfile_path = substr(path, 1, length(path) - RLENGTH); 511 512 if((oldfile_path ~ /^\042?[aiwco]\//) && index(diff, oldfile_path)) 513 { 514 oldfile_path = fix_extracted_path(oldfile_path); 515 found_oldfile_path = 1; 516 path = "/dev/null"; 517 found_path = 1; 518 } 519 } 520 521 # This gets the path for a binary file by digging through the first line 522 # of the diff header ('diff') and the binary file notice line 523 # ('stripped') to find the longest rightmost match between the two. 524 while(!found_path && match(path, /and \042?[biwco]\/.+$/)) 525 { 526 path_len = RLENGTH - 4; 527 path = substr(path, RSTART + 4, path_len); 528 529 diff_rstart = (length(diff) + 1) - path_len; 530 if(diff_rstart < 1) 531 { 532 continue; 533 } 534 535 if(path == substr(diff, diff_rstart, path_len)) 536 { 537 path = fix_extracted_path(path); 538 found_path = 1; 539 break; 540 } 541 } 542 543 if(show_header) 544 { 545 print; 546 } 547 548 if(!found_path && !found_oldfile_path) 549 { 550 errmsg = "Path info for binary file not found in header lines." \ 551 "\n" "Diff line: " diff \ 552 "\n" "Current line: " stripped; 553 FATAL(errmsg); 554 } 555 556 if(show_binary) 557 { 558 if(found_oldfile_path) 559 { 560 # Binary file removed: path/to/foo:~: 561 print_path(oldfile_path); 562 print_line_number("~"); 563 } 564 else 565 { 566 # Binary file differs: path/to/foo:: 567 print_path(path); 568 print_line_number(""); 569 } 570 571 print ""; 572 } 573 574 reset_header_variables(); 575 next; 576 } 577 578 if(show_header) 579 { 580 print; 581 } 582 583 next; 584 } 585 586 if(!found_path || !found_oldfile_path) 587 { 588 FATAL("Path info not found."); 589 } 590 591 if(!found_line) 592 { 593 FATAL("Line info not found."); 594 } 595 596 if(path == "/dev/null") 597 { 598 if($0 !~ /^(\033\[[0-9;]*m)*[\\-]/) 599 { 600 errmsg = "Expected negative or backslash indicator for removed file's " \ 601 "diff line." \ 602 "\n" "Removed file: " oldfile_path \ 603 "\n" "File's diff line: " $0; 604 FATAL(errmsg); 605 } 606 607 # File removed: path/to/foo:~: 608 print_path(oldfile_path); 609 print_line_number("~"); 610 611 print; 612 next; 613 } 614 615 616 # Extract the indicator. Unfortunately early versions of gawk (like the one 617 # included with git for Windows) do not support an array parameter for 618 # match() so the indicator must be extracted on success by using substr(). 619 620 if(($0 !~ /^(\033\[[0-9;]*m)*[\\ +-]/) || \ 621 !match($0, /[\\ +-]/) || (RLENGTH != 1)) 622 { 623 errmsg = "Failed to extract indicator from diff line." \ 624 "\n" "File: " path \ 625 "\n" "File's diff line: " $0; 626 FATAL(errmsg); 627 } 628 629 indicator = substr($0, RSTART, RLENGTH); 630 631 if((indicator == "+") || (indicator == " ")) 632 { 633 print_path(path); 634 print_line_number(line++); 635 } 636 else if((indicator == "-") || (indicator == "\\")) 637 { 638 print_path(path); 639 # Fill the line number section with padding. 640 print_line_number(sprintf("%" length((line + 1) "") "s", " ")); 641 } 642 else 643 { 644 errmsg = "Unexpected diff line indicator." \ 645 "\n" "Indicator: " indicator \ 646 "\n" "File: " path \ 647 "\n" "File's diff line: " $0; 648 FATAL(errmsg); 649 } 650 651 print; 652 }