citadel

My dotfiles, scripts and nix configs
git clone git://jb55.com/citadel
Log | Files | Refs | README | LICENSE

pandocweb (1166B)


      1 #!/usr/bin/env bash
      2 
      3 set -eou pipefail
      4 
      5 PANDOC=${PANDOC:-pandoc}
      6 
      7 function onfail() {
      8 	printf "\n\nFAILED, output dir: $PWD\n\n"
      9 }
     10 
     11 function get_tmpname() {
     12 	hashpart=$(sha256sum <<<"$url" | awk '{print $1}')
     13 	printf "$PWD/out-${hashpart:0:8}.epub\n"
     14 }
     15 
     16 trap onfail ERR
     17 
     18 url="$1"
     19 # extract the protocol
     20 proto="$(<<<"$url" grep :// | sed -e's,^\(.*://\).*,\1,g')"
     21 noproto=$(<<<"$url" sed -e s,$proto,,g)
     22 path="$(<<<"$noproto" grep / | cut -d/ -f2-)"
     23 cdpath=$(basename "$path")
     24 
     25 out_name=$(readlink -f "${2:-"$(get_tmpname)"}")
     26 hostname=$(echo "$url" | awk -F[/:] '{print $4}')
     27 
     28 tmpdir=$(mktemp -d)
     29 
     30 cd "$tmpdir"
     31 
     32 wget \
     33      --page-requisites \
     34      --adjust-extension \
     35      --span-hosts \
     36      --convert-links \
     37      --restrict-file-names=windows \
     38      --domains "$hostname" \
     39      --no-parent \
     40      "$url" || :
     41 
     42 printf "finding html file... \n" 1>&2
     43 target=$(find . -name '*.htm*' | grep . | head -n1)
     44 printf "found: %s\n" "$target" 1>&2
     45 file="$(basename "$target")"
     46 
     47 cd "$(dirname "$target")"
     48 printf "converting %s ... " "$file" 1>&2
     49 $PANDOC "$file" -o "$out_name" 1>&2
     50 printf "done. saved as:\n" "$out_name" 1>&2
     51 printf "%s\n" "$out_name"
     52 
     53 cd
     54 rm -rf "$tmpdir"