└── scribd-dl /scribd-dl: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #WORKINGDIR="/tmp/${0##*/}-$$" 4 | WORKINGDIR=".${0##*/}-$$" 5 | 6 | if (( ${#@} != 1 )) || [[ $1 = '--help' ]]; then 7 | printf '%s\n' \ 8 | "Renders PDF files of books on Scribd." \ 9 | "Usage: ${0##*/} scribd_book_url" 10 | exit 1 11 | elif ! command -v wget > /dev/null; then 12 | echo "This script requires wget." 13 | exit 1 14 | elif ! command -v convert > /dev/null; then 15 | echo "This script requires ImageMagick." 16 | exit 1 17 | elif ! command -v pdftk > /dev/null; then 18 | echo "This script requires PDFTK." 19 | exit 1 20 | fi 21 | 22 | printf 'Getting HTML content... ' 23 | if ! html="$(wget -qO - "$1")"; then 24 | printf '%s\n' "error." "Could not download \"$1\"." 25 | exit 1 26 | fi 27 | echo "done." 28 | 29 | filename="${html#*}" 30 | filename="${filename%%*}" 31 | filename="${filename//\//-}" 32 | 33 | echo "Destination PDF: \"$filename.pdf\"." 34 | 35 | printf 'Searching for assetPrefix... ' 36 | regexPrefix="docManager.assetPrefix = \"" 37 | if ! [[ $html =~ $regexPrefix ]]; then 38 | printf '%s\n' "error." "\"$1\" is not a valid Scribd book URL." 39 | exit 1 40 | fi 41 | prefix="${html#*$regexPrefix}" 42 | prefix="${prefix%%\"*}" 43 | echo "\"$prefix\"." 44 | 45 | printf 'Finding pages to download... ' 46 | regexJpg="http://html.scribd.com/$prefix/images/[0-9]+-[0-9a-z]+.jpg" 47 | regexJsonp="http://html[1-4].scribdassets.com/$prefix/pages/[0-9]+-[0-9a-z]+.jsonp" 48 | unset -v pages 49 | while read line; do 50 | if [[ $line =~ $regexJsonp ]]; then 51 | page="${line##*/pages/}" 52 | page="${page%%.jsonp*}" 53 | pages+=("$page") 54 | continue 55 | fi 56 | 57 | if [[ $line =~ $regexJpg ]]; then 58 | page="${line##*/images/}" 59 | page="${page%%.jpg*}" 60 | pages+=("$page") 61 | fi 62 | done <<< "$html" 63 | totalPages="${#pages[@]}" 64 | echo "$totalPages pages." 65 | 66 | clean-up() { 67 | printf 'Cleaning up... ' 68 | rm -r "$WORKINGDIR" 69 | echo "done." 70 | } 71 | 72 | trap 'echo; clean-up; exit 1' SIGINT SIGTERM 73 | 74 | echo "Using working directory \"$WORKINGDIR\"." 75 | mkdir "$WORKINGDIR" 76 | 77 | padding="$(($totalPages-1))" 78 | padding="${#totalPages}" 79 | for pageIndex in ${!pages[@]}; do 80 | pageNumber="$((pageIndex + 1))" 81 | printf -v file "%.${padding}d" "$((pageIndex + 1))" 82 | 83 | printf 'Rendering PDF data for page %s (of %s)... ' "$pageNumber" "$totalPages" 84 | retries=0 85 | until (( retries == 10 )); do 86 | wget -qO "$WORKINGDIR/$file.jpg" "http://html.scribd.com/$prefix/images/${pages[$pageIndex]}.jpg" && break 87 | sleep 2 88 | done 89 | 90 | if (( retries == 10 )); then 91 | printf '%s\n' "error." "Error while downloading \"http://html.scribd.com/$prefix/images/${pages[$pageIndex]}.jpg\"." 92 | clean-up 93 | exit 1 94 | fi 95 | 96 | if ! convert "$WORKINGDIR/$file.jpg" "$WORKINGDIR/$file.pdf"; then 97 | printf '%s\n' "error." "Could not convert \"$WORKINGDIR/$file.jpg\" to PDF." 98 | clean-up 99 | exit 1 100 | fi 101 | 102 | rm "$WORKINGDIR/$file.jpg" 103 | echo "$(($(stat --printf '%s' "$WORKINGDIR/$file.pdf") / 1024)) KB." 104 | done 105 | 106 | printf 'Combining all PDF files to one file... ' 107 | pdftk "$WORKINGDIR/"*".pdf" cat output "$WORKINGDIR/$filename.pdf" 108 | mv "$WORKINGDIR/$filename.pdf" "$filename.pdf" 109 | echo "$(($(stat --printf '%s' "$filename.pdf") / 1024)) KB." 110 | 111 | clean-up 112 | 113 | echo "PDF file saved as \"$filename.pdf\"." 114 | --------------------------------------------------------------------------------