citadel

My dotfiles, scripts and nix configs
git clone git://jb55.com/citadel
Log | Files | Refs | README | LICENSE

repair-utf8 (460B)


      1 #!/usr/bin/perl
      2 
      3 use strict;
      4 use warnings;
      5 
      6 use Encode qw( decode FB_QUIET );
      7 
      8 binmode STDIN, ':bytes';
      9 binmode STDOUT, ':encoding(UTF-8)';
     10 
     11 my $out;
     12 
     13 while ( <> ) {
     14   $out = '';
     15   while ( length ) {
     16     # consume input string up to the first UTF-8 decode error
     17     $out .= decode( "utf-8", $_, FB_QUIET );
     18     # consume one character; all octets are valid Latin-1
     19     $out .= decode( "iso-8859-1", substr( $_, 0, 1 ), FB_QUIET ) if length;
     20   }
     21   print $out;
     22 }