repair-utf8 (460B)
1 #!/usr/bin/perl 2 3 use strict; 4 use warnings; 5 6 use Encode qw( decode FB_QUIET ); 7 8 binmode STDIN, ':bytes'; 9 binmode STDOUT, ':encoding(UTF-8)'; 10 11 my $out; 12 13 while ( <> ) { 14 $out = ''; 15 while ( length ) { 16 # consume input string up to the first UTF-8 decode error 17 $out .= decode( "utf-8", $_, FB_QUIET ); 18 # consume one character; all octets are valid Latin-1 19 $out .= decode( "iso-8859-1", substr( $_, 0, 1 ), FB_QUIET ) if length; 20 } 21 print $out; 22 }