pandocweb (1166B)
1 #!/usr/bin/env bash 2 3 set -eou pipefail 4 5 PANDOC=${PANDOC:-pandoc} 6 7 function onfail() { 8 printf "\n\nFAILED, output dir: $PWD\n\n" 9 } 10 11 function get_tmpname() { 12 hashpart=$(sha256sum <<<"$url" | awk '{print $1}') 13 printf "$PWD/out-${hashpart:0:8}.epub\n" 14 } 15 16 trap onfail ERR 17 18 url="$1" 19 # extract the protocol 20 proto="$(<<<"$url" grep :// | sed -e's,^\(.*://\).*,\1,g')" 21 noproto=$(<<<"$url" sed -e s,$proto,,g) 22 path="$(<<<"$noproto" grep / | cut -d/ -f2-)" 23 cdpath=$(basename "$path") 24 25 out_name=$(readlink -f "${2:-"$(get_tmpname)"}") 26 hostname=$(echo "$url" | awk -F[/:] '{print $4}') 27 28 tmpdir=$(mktemp -d) 29 30 cd "$tmpdir" 31 32 wget \ 33 --page-requisites \ 34 --adjust-extension \ 35 --span-hosts \ 36 --convert-links \ 37 --restrict-file-names=windows \ 38 --domains "$hostname" \ 39 --no-parent \ 40 "$url" || : 41 42 printf "finding html file... \n" 1>&2 43 target=$(find . -name '*.htm*' | grep . | head -n1) 44 printf "found: %s\n" "$target" 1>&2 45 file="$(basename "$target")" 46 47 cd "$(dirname "$target")" 48 printf "converting %s ... " "$file" 1>&2 49 $PANDOC "$file" -o "$out_name" 1>&2 50 printf "done. saved as:\n" "$out_name" 1>&2 51 printf "%s\n" "$out_name" 52 53 cd 54 rm -rf "$tmpdir"