├── .dockerignore ├── .gitignore ├── .gitmodules ├── AUTHORS.md ├── CHANGELOG.md ├── Dockerfile ├── GITHUB.txt ├── INSTALL.txt ├── LICENSE ├── Makefile ├── README.md ├── amz-tradein.pl ├── cron.daily └── goodratings.example ├── friendgroup.pl ├── friendnet.pl ├── friendrated.pl ├── git-hooks ├── pre-commit └── pre-push ├── help ├── GOODTIPS.md ├── amz-tradein.md ├── friendgroup.md ├── friendnet.md ├── friendrated.md ├── img │ ├── friendgroup.png │ ├── friendrated.png │ ├── friendrated2.png │ ├── friendrated3.png │ ├── likeminded.png │ ├── search.png │ └── similarauth.png ├── likeminded.md ├── recentrated.md ├── savreviews.md ├── search.md └── similarauth.md ├── lib ├── Goodscrapes.html ├── Goodscrapes.pm └── Goodscrapes.pod ├── likeminded.pl ├── list-in ├── README.md ├── dict.lst ├── gram-en-l,word-en-1k.lst ├── gram-en-l.lst ├── gram-en-s.lst ├── test.lst ├── word-en-1k.lst ├── word-en-3k.lst └── word-en-s.lst ├── list-out └── README.md ├── recentrated.pl ├── savreviews.pl ├── search.pl ├── similarauth.pl └── t ├── README.md ├── config.pl-example ├── ghtmlxxx.t ├── gisxxx.t ├── glogin.t ├── gmeter.t ├── greadauthorbk.t ├── greadauthors.t ├── greadbook.t ├── greadcomments.t ├── greadfolls.t ├── greadreviews.t ├── greadshelf.t ├── greadshelfnames.t ├── greadsimilaraut.t ├── greaduser.t ├── greadusergrp.t ├── gsearch.t ├── gsocialnet.t ├── gverifyshelf.t └── gverifyxxx.t /.dockerignore: -------------------------------------------------------------------------------- 1 | # Ignore everything: 2 | ** 3 | 4 | # Allow files and directories: 5 | !/LICENSE 6 | !/Dockerfile 7 | 8 | !/*.md 9 | 10 | !/Makefile 11 | 12 | !/friendgroup.pl 13 | !/friendnet.pl 14 | !/friendrated.pl 15 | !/likeminded.pl 16 | !/recentrated.pl 17 | !/savreviews.pl 18 | !/search.pl 19 | !/similarauth.pl 20 | 21 | !/lib/*.pm 22 | !/lib/*.pl 23 | !/lib/*.pod 24 | 25 | !/list-in/*.lst 26 | # "list-out" will be created empty 27 | 28 | !/help/*.md 29 | !/help/img/** 30 | 31 | !/t/*.t 32 | !/t/*.md 33 | !/t/config.pl-example 34 | 35 | 36 | # REMOVE LATER AGAIN! 37 | #!/t/config.pl 38 | 39 | 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.bak 3 | *.tmp 4 | *.csv 5 | *.swp 6 | *.xml 7 | *.html 8 | !lib/Goodscrapes.html 9 | *.log 10 | *.tar.gz 11 | *.zip 12 | *.sh 13 | *.xcf 14 | *.secret 15 | *.token 16 | lib/local/* 17 | t/config.pl 18 | .build 19 | .obsolete 20 | TODO.txt 21 | cve 22 | lib/Goodgrief* 23 | staff* 24 | savreviews-*.txt 25 | revs-*.txt 26 | q 27 | export.pl 28 | cron.daily/goodratings.sh 29 | list-out/recentrated* 30 | 31 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "www"] 2 | path = www 3 | url = https://github.com/andre-st/goodreads-www.git 4 | -------------------------------------------------------------------------------- /AUTHORS.md: -------------------------------------------------------------------------------- 1 | # Authors/Contributors 2 | 3 | | Name | Contact | Dev | i18n | Test | Doc | Release | 4 | |---------------|------------------------------|:---:|:------:|:----:|:---:|:-------:| 5 | | André St. | | X | / | X | X | X | 6 | | ... | | | | | | | 7 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # The final image is around 138 MB, 2 | # Build time is around 7 minutes 3 | # 4 | FROM alpine:latest 5 | 6 | # ---------------------------------------------------------------------------- 7 | # Configuring the image: 8 | 9 | ENV PROGDIR=/root 10 | ENV HTPORT=80 11 | ENV HTDOCS=$PROGDIR/list-out 12 | ARG BUILD_DATE 13 | ARG PROJECT_VERSION 14 | VOLUME /tmp/FileCache 15 | EXPOSE $HTPORT 16 | 17 | # About: 18 | # http://label-schema.org/rc1/ 19 | LABEL org.label-schema.schema-version="1.0" 20 | LABEL org.label-schema.name="Andre's Goodreads Toolbox" 21 | LABEL org.label-schema.description="Tools for Goodreads.com, for finding people based on the books they've read, finding books popular among the people you follow, following new book reviews, etc" 22 | LABEL org.label-schema.maintainer="datakadabra@gmail.com" 23 | LABEL org.label-schema.build-date=$BUILD_DATE 24 | LABEL org.label-schema.version=$PROJECT_VERSION 25 | LABEL org.label-schema.url="https://github.com/andre-st/goodreads-toolbox/blob/master/README.md" 26 | LABEL org.label-schema.vcs-url="https://github.com/andre-st/goodreads-toolbox/" 27 | LABEL org.opencontainers.image.source="https://github.com/andre-st/goodreads-toolbox/" 28 | 29 | 30 | # ---------------------------------------------------------------------------- 31 | # Building the image: 32 | 33 | # Use .dockerignore to exclude everything but the minimum necessary set of files. 34 | COPY . $PROGDIR 35 | 36 | WORKDIR $PROGDIR/ 37 | 38 | RUN apk add --no-cache \ 39 | build-base \ 40 | zlib-dev \ 41 | bash \ 42 | openssl \ 43 | openssl-dev \ 44 | perl-dev \ 45 | perl-doc \ 46 | thttpd \ 47 | && make \ 48 | && apk del --purge build-base openssl-dev zlib-dev \ 49 | ; rm -rf \ 50 | /usr/share/{man,doc,info,groff}/* \ 51 | $HOME/.cpan/build/* \ 52 | $HOME/.cpan/sources/authors/id \ 53 | $HOME/.cpan/cpan_sqlite_log.* \ 54 | /tmp/cpan_install_*.txt \ 55 | ; echo $'\ 56 | echo "*******************************************"\n\ 57 | echo "*** WELCOME TO ANDRES GOODREADS TOOLBOX ***"\n\ 58 | echo "*******************************************"\n\ 59 | echo "Available Tools:"\n\ 60 | ls -1 *.pl | nl -bn \n\ 61 | ' > $HOME/.bashrc 62 | 63 | 64 | # ---------------------------------------------------------------------------- 65 | # Running the container: 66 | 67 | # bash already in WORKDIR: 68 | ENTRYPOINT thttpd -h 0.0.0.0 -p $HTPORT -d $HTDOCS -l /dev/null && bash 69 | 70 | 71 | -------------------------------------------------------------------------------- /GITHUB.txt: -------------------------------------------------------------------------------- 1 | Repository Name: 2 | 3 | goodreads 4 | 5 | 6 | Description: 7 | 8 | Tools for Goodreads.com, such as an Amazon buyback price monitor to discover 9 | sales opportunities, or a "follow book" implementation to discover quality 10 | users and libraries 11 | 12 | 13 | Website: 14 | 15 | - 16 | 17 | 18 | Topics: 19 | 20 | goodreads 21 | goodreads-api 22 | goodreads-shelves 23 | statistics 24 | rating 25 | recommender 26 | recommendation 27 | reviews 28 | monitor 29 | monitoring 30 | notification 31 | similarity 32 | similar-users 33 | taste 34 | followers 35 | friend-matching 36 | ratings 37 | like-minded 38 | discoverability 39 | likeminded 40 | -------------------------------------------------------------------------------- /INSTALL.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andre-st/goodreads-toolbox/010bc724003890d67efb2c40cd3e98a3b81c4007/INSTALL.txt -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Andre's Goodreads Toolbox Makefile 2 | 3 | 4 | # Configure Make: 5 | # https://tech.davis-hansson.com/p/make/ 6 | SHELL := bash 7 | .ONESHELL: 8 | .SHELLFLAGS := -eu -o pipefail -c 9 | .DELETE_ON_ERROR: 10 | MAKEFLAGS += --warn-undefined-variables 11 | MAKEFLAGS += --no-builtin-rules 12 | #.DEFAULT_GOAL := help 13 | 14 | 15 | # Configure Make rules: 16 | PROJECT_VERSION = 1.25.1 17 | CACHE_DIR = /tmp/FileCache/Goodscrapes 18 | BUILD_DIR = .build 19 | PACKAGE = goodreads-toolbox 20 | 21 | DOCKER_BUILD_DATE = $(shell date -u +'%Y-%m-%dT%H:%M:%SZ') 22 | DOCKER_IMG_VER = ${PROJECT_VERSION} 23 | DOCKER_IMG_NAME = ${PACKAGE} 24 | DOCKER_CON_NAME = ${PACKAGE} 25 | DOCKER_DIR = . 26 | DOCKER_HTPORT = 8080 27 | 28 | GITHUB_USER = andre-st 29 | GITHUB_REPONAME = ${PACKAGE} 30 | RELEASE = $(PACKAGE)-$(PROJECT_VERSION) 31 | GITDIR = $(wildcard .git) 32 | 33 | IS_ROOT := $(shell test $(shell id -u) = 0 && echo 1) 34 | IS_LOCAL_LIB := $(shell perldoc -l local::lib 2> /dev/null ) 35 | 36 | 37 | # ---------------------------------------------------------------------------- 38 | ## make all : Installs programs and dependencies from CPAN (default) 39 | # 40 | all: deps installdirs 41 | 42 | 43 | # ---------------------------------------------------------------------------- 44 | ## make installdirs : Creates needed directories, adds symlinks etc 45 | # 46 | .PHONY: installdirs $(GITDIR) 47 | installdirs: | $(GITDIR) 48 | chmod +x *.pl 49 | chmod +x t/*.t 50 | ln -sf word-en-l.lst ./list-in/dict.lst 51 | ln -sf dict.lst ./list-in/test.lst 52 | # recentrated.pl: 53 | mkdir -p ./list-out/recentrated 54 | 55 | # Developers: 56 | $(GITDIR): 57 | # TODO: Since Git 2.9 there is `git config core.hooksPath .git-hooks` 58 | chmod +x git-hooks/* 59 | ln -sf ../../git-hooks/pre-commit ./.git/hooks/pre-commit 60 | ln -sf ../../git-hooks/pre-push ./.git/hooks/pre-push 61 | 62 | 63 | # ---------------------------------------------------------------------------- 64 | ## make uninstall : Deletes files created outside the project directory 65 | # 66 | .PHONY: uninstall 67 | uninstall: 68 | rm -rf "${CACHE_DIR}" 69 | 70 | 71 | # ---------------------------------------------------------------------------- 72 | ## make deps : Downloads and installs dependencies from CPAN. 73 | ## Files go to the project's ./lib/local/ dir to ease software removal. 74 | ## It does not install modules system-wide. 75 | ## Doesn't require root too if local::lib module is already installed. 76 | # 77 | # CPAN complains without YAML::Any (warning not error) 78 | # We install without testing modules (significantly faster) 79 | # 80 | .PHONY: deps 81 | deps: 82 | ifndef IS_LOCAL_LIB 83 | ifndef IS_ROOT 84 | $(error "Please run as root -or- install Perl module local::lib first (apt-get install liblocal-lib-perl)") 85 | endif 86 | PERL_MM_USE_DEFAULT=1 perl -MCPAN -e 'CPAN::Shell->notest( "install", "local::lib" )' 87 | endif 88 | mkdir -p ./lib/local 89 | PERL_MM_USE_DEFAULT=1 perl -MCPAN -Mlocal::lib=./lib/local -e 'CPAN::Shell->notest( "install", "Term::ReadKey", "YAML::Any", "List::MoreUtils", "HTML::Entities", "URI::Escape", "Cache::FileCache", "IO::Socket::SSL", "Net::SSLeay", "HTTP::Tiny", "Text::CSV", "Log::Any", "IO::Prompter", "Test::More", "Test::Exception" )' 90 | 91 | 92 | 93 | # ---------------------------------------------------------------------------- 94 | ## make check : Runs unit tests 95 | # 96 | .PHONY: check 97 | check: 98 | prove 99 | 100 | 101 | # ---------------------------------------------------------------------------- 102 | ## make docker-image : Builds a Docker image from the dirty working copy 103 | ## make docker-run : Runs Docker image, optionally: 104 | ## make docker-run DOCKER_HTPORT=8080 105 | ## make docker-run DOCKER_CON_NAME=goodreads-toolbox 106 | ## make github-package : Builds a Docker image from the official repo and pushes it to GitHub Packages 107 | ## Expects a PAT from GitHub > Account > Settings > Developer Settings > Personal access tokens 108 | ## in local file .github-packages.secret 109 | ## See packages: https://github.com/users/andre-st/packages 110 | # 111 | .PHONY: docker-image 112 | docker-image: Dockerfile 113 | docker build \ 114 | --build-arg BUILD_DATE="${DOCKER_BUILD_DATE}" \ 115 | --build-arg PROJECT_VERSION="${PROJECT_VERSION}" \ 116 | --tag "${DOCKER_IMG_NAME}:${DOCKER_IMG_VER}" \ 117 | ${DOCKER_DIR} 118 | @echo "[NEXT] You might like to start the new Docker image with 'make docker-run'" 119 | 120 | 121 | .PHONY: docker-run 122 | docker-run: 123 | docker stop ${DOCKER_CON_NAME} || true 124 | docker container rm ${DOCKER_CON_NAME} || true 125 | @echo "[NOTE] Goodreads results are written to 'list-out/', accessible via web-browser at localhost:${DOCKER_HTPORT}" 126 | @docker run \ 127 | --name=${DOCKER_CON_NAME} \ 128 | --publish=${DOCKER_HTPORT}:80 \ 129 | --interactive \ 130 | --tty \ 131 | "${DOCKER_IMG_NAME}:${DOCKER_IMG_VER}" || true 132 | 133 | 134 | .PHONY: github-package 135 | github-package: .github-packages.secret 136 | rm -rf "${BUILD_DIR}/official-latest/" 137 | mkdir -p "${BUILD_DIR}/official-latest/" 138 | pushd "${BUILD_DIR}/official-latest/" 139 | git clone "https://github.com/${GITHUB_USER}/${GITHUB_REPONAME}/" . 140 | make docker-image DOCKER_IMG_NAME=ghcr.io/${GITHUB_USER}/${GITHUB_REPONAME} DOCKER_IMG_VER=latest 141 | popd 142 | cat .github-packages.secret | docker login ghcr.io -u ${GITHUB_USER} --password-stdin 143 | docker push "ghcr.io/${GITHUB_USER}/${GITHUB_REPONAME}" 144 | 145 | 146 | # ---------------------------------------------------------------------------- 147 | ## make docs : Updates documentation, optionally: 148 | ## make docs PROJECT_VERSION=1.22 149 | .PHONY: docs 150 | docs: 151 | # vX.X, vX.XX.X, image:X.XX.X 152 | sed -i -E "s/([v])[0-9\.]+/\1${PROJECT_VERSION}/" README.md INSTALL.txt 153 | 154 | 155 | # ---------------------------------------------------------------------------- 156 | ## make help : Prints this help screen 157 | # 158 | # Prints all comments with two leading # characters in this Makefile 159 | # 160 | .PHONY: help 161 | help: Makefile 162 | @sed -n 's/^## //p' $< 163 | 164 | # Debugging info: 165 | ifdef IS_ROOT 166 | @echo IS_ROOT=yes 167 | else 168 | @echo IS_ROOT=no 169 | endif 170 | ifdef IS_LOCAL_LIB 171 | @echo IS_LOCAL_LIB=yes 172 | else 173 | @echo IS_LOCAL_LIB=no 174 | endif 175 | 176 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # :books: Andre's Goodreads Toolbox, v1.25.1 2 | 3 | ![Maintenance](https://img.shields.io/maintenance/no/2022.svg) 4 | 5 | 9 Perl-scripts for Goodreads.com—the world largest book (cataloging) community. [What's new?](CHANGELOG.md) 6 | 7 | 8 | ## [recentrated.pl](./help/recentrated.md) 9 | 10 | Checks all the books in your shelf for new ratings and notifies you 11 | via periodical e-mail. It helps discover new criticisms and users with interesting 12 | libraries. You can [try this online](https://andre-st.github.io/goodreads/) if you 13 | don't want to install anything. 14 | It implements the "follow book" feature that was requested in the Goodreads forums. 15 | [Usage+Screenshot](./help/recentrated.md) 16 | 17 | 18 | ## [friendrated.pl](./help/friendrated.md) 19 | 20 | Prints all books rated 4 or 5 stars by 3 or more persons you follow (including friends). 21 | It implements the "books common among the people I follow" feature that was requested 22 | in the Goodreads forums. It also lists the most read authors, the most wished-for 23 | and hated books. 24 | [Usage+Screenshot](./help/friendrated.md) 25 | 26 | 27 | ## [friendnet.pl](./help/friendnet.md) 28 | 29 | Spiders your social network and creates files with edges and nodes which can be 30 | easily processed with social network analysis software. It answers questions 31 | like: Which members are popular among your friends? 32 | [Usage+Screenshot](./help/friendnet.md) 33 | 34 | 35 | ## [friendgroup.pl](./help/friendgroup.md) 36 | 37 | Prints discussion groups common among the persons you follow (including friends). 38 | Searching groups on Goodreads is a PITA, and sometimes you don't know what you can have 39 | and wouldn't search for it. [Usage+Screenshot](./help/friendgroup.md) 40 | 41 | 42 | ## [likeminded.pl](./help/likeminded.md) 43 | 44 | Prints Goodreads members who are interested in the same books as you. 45 | It implements the "Finding people based on the books they've read" 46 | feature that was requested in the Goodreads forums. 47 | [Usage+Screenshot](./help/likeminded.md) 48 | 49 | 50 | ## [similarauth.pl](./help/similarauth.md) 51 | 52 | Prints authors who Goodreads thinks are similar to all the authors you're reading. 53 | It implements the "Finding [all] similar authors" feature that was requested in the 54 | Goodreads forums. 55 | [Usage+Screenshot](./help/similarauth.md) 56 | 57 | 58 | ## [search.pl](./help/search.md) 59 | 60 | Prints a books search result, ordered by average rating and number of ratings 61 | (most popular books), or date published, optionally with exact title matches. 62 | The Goodreads website doesn't offer it for some reason. 63 | It implements the "Sort search results by rating" feature that was requested 64 | in the Goodreads forums. 65 | [Usage+Screenshot](./help/search.md) 66 | 67 | 68 | ## [savreviews.pl](./help/savreviews.md) 69 | 70 | Saves text-reviews for a book to a text-file. It implements the "Extract all 71 | reviews for a specific book" feature that was requested in the Goodreads forums. 72 | [Usage+Screenshot](./help/savreviews.md) 73 | 74 | 75 | ## ~~[amz-tradein.pl](./help/amz-tradein.md)~~ 76 | 77 | This script fetched Amazon Trade-In prices for all books in a Goodreads.com 78 | shelf ("resales" or "donations"). It automated regular manual bid-checking for 79 | hundreds of books, discovering sales opportunities. Amazon stopped its buyback 80 | program in 2015. 81 | [Usage+Screenshot](./help/amz-tradein.md) 82 | 83 | 84 | 85 | ## Getting started 86 | 87 | 1a\. [Docker](https://opensource.com/resources/what-docker) users can run the Toolbox in its own 88 | container([?](https://www.docker.com/resources/what-container)), 89 | and view the results via web-browser at _localhost:8080_: 90 | 91 | ```console 92 | $ docker run -it --publish=8080:80 ghcr.io/andre-st/goodreads-toolbox 93 | ``` 94 | 95 | 1b\. users without Docker can try to install the Toolbox directly on their systems: 96 | 97 | ```console 98 | $ git clone https://github.com/andre-st/goodreads-toolbox.git 99 | $ cd goodreads-toolbox 100 | $ sudo make # Gets required Perl modules from CPAN 101 | ``` 102 | 103 | 2\. at the prompt, try out the Toolbox programs: 104 | 105 | ```console 106 | $ ./example-script.pl --help 107 | ``` 108 | 109 | Before [Docker for Windows or Mac](https://github.com/docker/toolbox/releases) 110 | and the project's Docker-images became available, 111 | a Windows user wrote me that he ran the Toolbox on the [Windows 10 Subsystem for Linux](https://linuxhint.com/install_ubuntu_windows_10_wsl/) (WSL). 112 | 113 | 114 | Long program runtimes: Goodreads slows down all requests and we have to load a lot of data. 115 | Start one program and do other things in the meantime. 116 | You can break any program with CTRL-C and continue later (reloads from a file-cache). 117 | 118 | 119 | 120 | ## Contributing 121 | 122 | - Reporting bugs / feature requests 123 | - add a new issue via [Github's issue tracker](https://github.com/andre-st/goodreads-toolbox/issues/new) 124 | - [alternative contact options](AUTHORS.md) 125 | - thank you all who wrote me mails in the past or otherwise reported bugs and ideas :thumbsup: 126 | - Writing your own scripts 127 | - see the [tests directory](./t/) for examples on how to use the toolbox library 128 | - see the [toolbox library documentation](./lib/Goodscrapes.pod) 129 | - [non-functional considerations](./t/README.md) 130 | - the [less complex issues](https://github.com/andre-st/goodreads-toolbox/labels/freshmen) 131 | would be good first issues to work on for users who want to contribute to this project 132 | 133 | 134 | 135 | ## Further readings 136 | 137 | - About Goodreads 138 | - [GR developers group](https://www.goodreads.com/group/show/8095-goodreads-developers) 139 | - [GR technology stack](https://www.goodreads.com/jobs?id=597248#openPositions) 140 | or [here](https://www.glasswaves.co/selected_projects.txt) 141 | or [here](https://builtwith.com/goodreads.com) 142 | or [DynamoDB+S3+Athena](https://aws.amazon.com/blogs/big-data/how-goodreads-offloads-amazon-dynamodb-tables-to-amazon-s3-and-queries-them-using-amazon-athena/) 143 | - [GR workplace reviews](https://www.glassdoor.com/Reviews/Goodreads-Reviews-E684833.htm), 144 | anonymously about being acquired by Amazon, bureaucracy etc. 145 | - [GR on Crunchbase](https://www.crunchbase.com/organization/goodreads), 146 | people, recent news & activity 147 | - [GR members stats](https://www.statista.com/search/?q=goodreads&qKat=search) 148 | or [here](https://qz.com/1106341/most-women-reading-self-help-books-are-getting-advice-from-men/) 149 | or [here](https://onlinelibrary.wiley.com/doi/abs/10.1002/asi.23733)+[Sci-Hub](https://twitter.com/scihub_love) 150 | or [here](https://book.pressbooks.com/chapter/goodreads-otis-chandler) 151 | or [here](https://www.buzzfeednews.com/article/annanorth/what-amazon-is-getting-from-goodreads), 152 | source probably [Goodreads](https://www.goodreads.com/about/us) 153 | - [GR on Slideshare](https://www.slideshare.net/GoodreadsPresentations/presentations), 154 | presenting GR book marketing to authors, see also [Author Feedback Group](https://www.goodreads.com/group/show/31471) 155 | - [GR subreddit](https://www.reddit.com/r/goodreads/) 156 | - Further software 157 | - I leave statistics about your own reading habits to the following tools; 158 | my toolbox, in contrast, focuses on the social periphery, with Goodreads providing the largest user base 159 | - Paul Klinger's [Bookstats](https://github.com/PaulKlinger/Bookstats) or [here](https://almoturg.com/bookstats/) 160 | - untested: John Smith's [GoodreadsAnalysis](https://github.com/JohnSmithDev/GoodreadsAnalysis/blob/master/REPORTS.md) 161 | - untested: Petr's [CompareBooks](https://github.com/vatioz/GoodreadsUserCompare) 162 | browser [extension](https://chrome.google.com/webstore/detail/goodreads-compare-books/jcbnjaifalpejkcgfbpjbcmkfdildgpi) 163 | adds "compare" info next to usernames 164 | - untested: Andrea Samorini's [SamoGoodreadsUtility](https://github.com/asamorini/goodreads.utility) 165 | adds language filters to GR 166 | - untested: Danish Prakash's [goodreadsh](https://github.com/danishprakash/goodreadsh) 167 | is a command line interface for Goodreads (off. API) 168 | - untested: [Greasyfork Browser-Scripts](https://greasyfork.org/en/scripts/by-site/goodreads.com) 169 | - untested: the [Bookar Android app](https://github.com/intmainreturn00/Bookar) visualizes your books in augmented reality 170 | - untested: save your shelves and reviews [Goodreads data to SQLite](https://github.com/rixx/goodreads-to-sqlite) 171 | - Amazon: [export and filter long wishlists](https://github.com/andre-st/amazon-wishless) by priority and price (bargains) 172 | - Other 173 | - Data: thousands of books and authors (not GR) https://openlibrary.org/developers/dumps 174 | - Personal 175 | - [a list of things](./help/GOODTIPS.md) that improved my Goodreads experience: settings, browser extensions etc. 176 | - [Andre at Goodreads](https://www.goodreads.com/user/show/18418712-andr) 177 | 178 | 179 | -------------------------------------------------------------------------------- /amz-tradein.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | #<--------------------------------- 79 chars --------------------------------->| 4 | 5 | =pod 6 | 7 | =head1 NAME 8 | 9 | amz-tradein.pl 10 | 11 | 12 | =head1 VERSION 13 | 14 | 2015-08-31 (Since 2014-11-05) 15 | 16 | 17 | =head1 WARNING 18 | 19 | Amazon stopped its Trade-In program on 31th August, 2015. 20 | This script is no longer of any use. 21 | 22 | 23 | =head1 PURPOSE 24 | 25 | =over 26 | 27 | =item * fetches Amazon Trade-In prices for all books in a Goodreads-shelf, 28 | e.g., 'books-for-sale' 29 | 30 | =item * spares you checking each book by hand every time you want to sell 31 | books to Amazon 32 | 33 | =item * might reveal good buyback prices for books you hadn't yet considered 34 | for sales (run this script against a Goodreads "#ALL#" shelf) 35 | 36 | =back 37 | 38 | 39 | =head1 OUTPUT EXAMPLE 40 | 41 | EUR 5,30 Book title found at Amazon with Trade-In price 42 | EUR -,-- Book title either without Trade-In or not found by ISBN 43 | 44 | 45 | =head1 USAGE EXAMPLE 46 | 47 | =over 48 | 49 | =item Check all books of a specific Goodreads user: 50 | 51 | $ amz-tradein.pl 18418712 52 | 53 | =item Check all books in a specific Goodreads shelf only: 54 | 55 | $ amz-tradein.pl 18418712 books-for-sale 56 | 57 | =item Sort by highest price and save outout to a textfile: 58 | 59 | $ amz-tradein.pl 18418712 books-for-sale | sort --key 2n | tac > books-for-sale-w-prices.out 60 | 61 | =back 62 | 63 | 64 | =head1 OBSERVATIONS 65 | 66 | =over 67 | 68 | =item * process is slow, 123 books need ~2 minutes 69 | 70 | =back 71 | 72 | 73 | =head1 REQUIRES 74 | 75 | =over 76 | 77 | =item * a Goodreads account (number), your # is contained in each Goodreads-shelf-URL 78 | 79 | =item * no API key 80 | 81 | =item * $ perl -MCPAN -e 'install WWW::Curl::Easy, Cache::FileCache' 82 | 83 | =back 84 | 85 | 86 | =head1 KNOWN LIMITATIONS AND BUGS 87 | 88 | =over 89 | 90 | =item * german Amazon only (contact me if you need support for other countries) 91 | 92 | =back 93 | 94 | =cut 95 | 96 | #<--------------------------------- 79 chars --------------------------------->| 97 | 98 | 99 | 100 | use strict; 101 | use warnings; 102 | 103 | # Perl core: 104 | use FindBin; 105 | use local::lib "$FindBin::Bin/lib/local/"; 106 | use lib "$FindBin::Bin/lib/"; 107 | # Third party: 108 | # Ours: 109 | use Goodscrapes; 110 | 111 | 112 | # Program synopsis: 113 | say STDERR "Usage: $0 GOODUSERNUMBER [SHELFNAME]\nSee source code for more info." and exit if $#ARGV < 0; 114 | 115 | 116 | # Program configuration: 117 | our $USERID = gverifyuser ( $ARGV[0] ); 118 | our $SHELF = gverifyshelf( $ARGV[1] ); 119 | 120 | 121 | sub extract_amz_price 122 | { 123 | my $article_page_html = shift; 124 | return $article_page_html =~ /(EUR [0-9,]+)<\/span> Gutschein erhalten/ ? $1 : 'EUR -,--'; 125 | } 126 | 127 | 128 | my %books; 129 | greadshelf( from_user_id => $USERID, 130 | ra_from_shelves => [ $SHELF ], 131 | rh_into => \%books ); 132 | 133 | for my $b (values %books) 134 | { 135 | my $price = extract_amz_price( amz_book_html( $b ) ); 136 | say STDOUT $price . "\t" . $b->{title}; 137 | } 138 | 139 | -------------------------------------------------------------------------------- /cron.daily/goodratings.example: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Reports recent book ratings to some Goodreads members 4 | # 5 | # Put me to: /etc/cron.daily 6 | # 7 | # NOTE: 8 | # Cron- and/or run-parts limitations: 9 | # 1. This file must not have a dot in its filename 10 | # 2. This file must be owned by root and by a distinctive group of $SCRIPTUSER 11 | # 3. This file must be executable (chmod ug=rwx,o= ) 12 | # 13 | # Depends: 14 | # 1. "ifne" is part of the "moreutils" package. 15 | # 16 | 17 | readonly SCRIPTUSER=root 18 | readonly MAILFROM="GOODREADS@EXAMPLE.COM" 19 | readonly GOODMAIL="GOODREADSPASSWORD" 20 | readonly GOODPASS="GOODREADSPASSWORD" 21 | readonly SCRIPT="/path/to/goodreads/recentrated.pl" 22 | readonly DB_DIR="/path/to/goodreads/list-out/recentrated" 23 | readonly MAILERS=( 24 | [0]="ifne /usr/sbin/sendmail -t" 25 | [1]="ifne ip netns exec NS_PRIVATE /usr/sbin/sendmail -t" 26 | [9]="cat" ) 27 | # 9 for debugging, CSV restored 28 | 29 | 30 | # Re-run as another user 31 | if [ $( id -u ) = 0 ] 32 | then 33 | su ${SCRIPTUSER} --command "$0" 34 | exit; 35 | fi 36 | 37 | 38 | # Wait for Internet connection: 39 | # TODO 40 | 41 | 42 | chk() 43 | { 44 | gooduser="${1}" 45 | goodshelf="${2}" 46 | mailto="${3}" 47 | mailerid="${4:-0}" 48 | opts=${5} 49 | mailer="${MAILERS[$mailerid]}" 50 | csvname="${gooduser}-${goodshelf}.csv" 51 | csvpath="${DB_DIR}/${csvname}" 52 | csvbakpath="${DB_DIR}/${csvname}.recover" 53 | 54 | if [ -e "${csvbakpath}" ] 55 | then 56 | # Batch, script or mail auth failed last time. Recover and retry this time. 57 | # Mailtext in ~/dead.letter 58 | cp --preserve --force "${csvbakpath}" "${csvpath}" || exit 1 59 | else 60 | if [ -e "${csvpath}" ] 61 | then 62 | cp --preserve --force "${csvpath}" "${csvbakpath}" || exit 1 63 | fi 64 | fi 65 | ( "${SCRIPT}" ${opts} \ 66 | "--userid=${gooduser}" \ 67 | "--shelf=${goodshelf}" \ 68 | "--from=${MAILFROM}" \ 69 | "--to=${mailto}" \ 70 | "${GOODMAIL}" \ 71 | "${GOODPASS}" | ${mailer} ) \ 72 | && [ "$mailerid" != "9" ] \ 73 | && rm --force "${csvbakpath}" 74 | } 75 | 76 | 77 | 78 | #========================================================================================================================== 79 | # USER SHELF MAILTO MAILER OPTS REALNAME SINCE BOOKS@04/10 80 | #========================================================================================================================== 81 | chk 12345678 "%E3%85%A1watch-ratings" "${MAILFROM}" 0 # me 123 82 | chk 345678 "watch-ratings" example1@gmail.com 0 # Example 1 18/01/12 3 83 | chk 2345678 "de-mooisten" example2@gmx.net 0 # Example 2 18/11/14 23 84 | chk 5678 "wishlist-to-buy" example3@yahoo.com 0 -q # Example 3 18/06/11 123 85 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /friendgroup.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | #<--------------------------------- MAN PAGE --------------------------------->| 4 | 5 | =pod 6 | 7 | =head1 NAME 8 | 9 | friendgroup - groups common among the members I follow 10 | 11 | 12 | =head1 SYNOPSIS 13 | 14 | B 15 | [B<-c> F] 16 | [B<-o> F] 17 | [B<-u> F] 18 | [B<-i>] 19 | F [F] 20 | 21 | 22 | =head1 OPTIONS 23 | 24 | Mandatory arguments to long options are mandatory for short options too. 25 | 26 | =over 4 27 | 28 | =item B<-c, --cache>=F 29 | 30 | number of days to store and reuse downloaded data in F, 31 | default is 31 days. This helps with cheap recovery on a crash, power blackout 32 | or pause, and when experimenting with parameters. Loading data from Goodreads 33 | is a very time consuming process. 34 | 35 | 36 | =item B<-u, --userid>=F 37 | 38 | check another member instead of the one identified by the login-mail 39 | and password arguments. You find the ID by looking at the shelf URLs. 40 | 41 | 42 | =item B<-o, --outfile>=F 43 | 44 | name of the HTML file where we write results to, 45 | default see section FILES 46 | 47 | 48 | =item B<-i, --ignore-errors> 49 | 50 | Don't retry on errors, just keep going. 51 | Sometimes useful if a single Goodreads resource hangs over long periods 52 | and you're okay with some values missing in your result. 53 | This option is not recommended when you run the program unattended. 54 | 55 | 56 | =item B<-?, --help> 57 | 58 | show full man page 59 | 60 | =back 61 | 62 | 63 | =head1 FILES 64 | 65 | F<./list-out/friendgroup-$GOODUSERID.html> 66 | 67 | F 68 | 69 | 70 | =head1 EXAMPLES 71 | 72 | $ ./friendgroup.pl login@gmail.com MyPASSword 73 | 74 | $ ./friendgroup.pl --outfile=./sub/myfile.html login@gmail.com 75 | 76 | 77 | =head1 REPORTING BUGS 78 | 79 | Report bugs to or use Github's issue tracker 80 | 81 | 82 | 83 | =head1 COPYRIGHT 84 | 85 | This is free software. You may redistribute copies of it under the terms of 86 | the GNU General Public License . 87 | There is NO WARRANTY, to the extent permitted by law. 88 | 89 | 90 | =head1 SEE ALSO 91 | 92 | More info in ./help/friendgroup.md 93 | 94 | 95 | =head1 VERSION 96 | 97 | 2022-03-10 (Since 2018-09-26) 98 | 99 | =cut 100 | 101 | #<--------------------------------- 79 chars --------------------------------->| 102 | 103 | 104 | use strict; 105 | use warnings; 106 | use locale; 107 | use 5.18.0; 108 | 109 | # Perl core: 110 | use FindBin; 111 | use local::lib "$FindBin::Bin/lib/local/"; 112 | use lib "$FindBin::Bin/lib/"; 113 | use Time::HiRes qw( time tv_interval ); 114 | use POSIX qw( strftime locale_h ); 115 | use File::Spec; # Platform indep. directory separator 116 | use IO::File; 117 | use Getopt::Long; 118 | use Pod::Usage; 119 | # Third party: 120 | # Ours: 121 | use Goodscrapes; 122 | 123 | 124 | 125 | # ---------------------------------------------------------------------------- 126 | # Program configuration: 127 | # 128 | setlocale( LC_CTYPE, "en_US" ); # GR dates all en_US 129 | STDOUT->autoflush( 1 ); 130 | gsetopt( cache_days => 31 ); 131 | 132 | our $TSTART = time(); 133 | our $OUTPATH; 134 | our $USERID; 135 | 136 | GetOptions( 'outfile|o=s' => \$OUTPATH, 137 | 'userid|u=s' => \$USERID, 138 | 'ignore-errors|i' => sub{ gsetopt( ignore_errors => 1 ); }, 139 | 'cache|c=i' => sub{ gsetopt( cache_days => $_[1] ); }, 140 | 'help|?' => sub{ pod2usage( -verbose => 2 ); }) 141 | or pod2usage( 1 ); 142 | 143 | pod2usage( 1 ) if !$ARGV[0]; 144 | 145 | glogin( usermail => $ARGV[0], # Login required: Followee/friend/groups list are private 146 | userpass => $ARGV[1], # Asks pw if omitted 147 | r_userid => \$USERID ); 148 | 149 | $OUTPATH = File::Spec->catfile( $FindBin::Bin, 'list-out', "friendgroup-${USERID}.html" ) 150 | if !$OUTPATH; 151 | 152 | 153 | 154 | #----------------------------------------------------------------------------- 155 | # Primary data structures: 156 | # 157 | my %members; # {user_id} 158 | my %joins; # {group_id}{user_id} 159 | my %groups; # {group_id} 160 | 161 | 162 | 163 | #----------------------------------------------------------------------------- 164 | # Collect friends and followees data. Include normal users only (no authors): 165 | # 166 | print( "Getting list of members known to #${USERID}..." ); 167 | 168 | my $t0 = time(); 169 | greadfolls( from_user_id => $USERID, 170 | rh_into => \%members, 171 | incl_authors => 0, 172 | on_progress => gmeter( 'members' )); 173 | 174 | printf( " (%.2fs)\n", time()-$t0 ); 175 | 176 | 177 | 178 | #----------------------------------------------------------------------------- 179 | # Load group memberships of each member 180 | # 181 | my $memdone = 0; 182 | my $memcount = scalar keys %members; 183 | 184 | die( $GOOD_ERRMSG_NOMEMBERS ) unless $memcount; 185 | 186 | for my $mid (keys %members) 187 | { 188 | printf( "[%3d%%] %-25s #%-10s\t", ++$memdone/$memcount*100, $members{$mid}->{name}, $mid ); 189 | 190 | my $t0 = time(); 191 | my $trackjoinsfn = sub{ $joins{ $_[0]->{id} }{ $mid } = 1; }; 192 | 193 | greadusergp( from_user_id => $mid, 194 | rh_into => \%groups, 195 | on_group => $trackjoinsfn, 196 | on_progress => gmeter( 'groups' )); 197 | 198 | printf( "\t%6.2fs\n", time()-$t0 ); 199 | } 200 | 201 | say "\nPerfect! Got groups of ${memdone} users."; 202 | 203 | 204 | 205 | #----------------------------------------------------------------------------- 206 | # Write results to HTML file: 207 | # 208 | print "Writing results to \"$OUTPATH\"... "; 209 | 210 | my $fh = IO::File->new( $OUTPATH, 'w' ) or die "[FATAL] Cannot write to $OUTPATH ($!)"; 211 | my $now = strftime( '%a %b %e %H:%M:%S %Y', localtime ); 212 | 213 | print $fh ghtmlhead( "Groups joined by friends or followees of member $USERID, on $now", 214 | [ '!Logo', 'Group', 'Members', '>Joined:', '!Joined by' ]); 215 | 216 | my $num_finds = 0; 217 | for my $gid (keys %joins) 218 | { 219 | my @joiner_ids = keys %{$joins{$gid}}; 220 | my $num_joiners = scalar @joiner_ids; 221 | 222 | $num_finds++; 223 | 224 | print $fh qq{ 225 | 226 | 227 | 228 | ${\ghtmlsafe( $groups{$gid}->{name} )} 229 | $groups{$gid}->{num_members} 230 | ${num_joiners} 231 | 232 | }; 233 | 234 | print $fh qq{ 235 | 236 | 238 | 239 | } foreach (@joiner_ids); 240 | 241 | print $fh qq{ 242 | 243 | 244 | }; 245 | } 246 | 247 | print $fh ghtmlfoot(); 248 | undef $fh; 249 | 250 | printf "%d groups\n", $num_finds; 251 | 252 | 253 | 254 | #----------------------------------------------------------------------------- 255 | # Done: 256 | # 257 | printf "Total time: %.0f minutes\n", (time()-$TSTART)/60; 258 | 259 | 260 | -------------------------------------------------------------------------------- /friendnet.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | #<--------------------------------- MAN PAGE --------------------------------->| 4 | 5 | =pod 6 | 7 | =head1 NAME 8 | 9 | friendnet - Spiders one's social network and saves vertices/edges to CSV-files 10 | 11 | 12 | =head1 SYNOPSIS 13 | 14 | B 15 | [B<-u> F] 16 | [B<-d> F] 17 | [B<-c> F] 18 | [B<-o> F] 19 | [B<-i>] 20 | F [F] 21 | 22 | 23 | =head1 OPTIONS 24 | 25 | Mandatory arguments to long options are mandatory for short options too. 26 | 27 | =over 4 28 | 29 | =item B<-u, --userid>=F 30 | 31 | check another member instead of the one identified by the login-mail 32 | and password arguments. You find the ID by looking at the shelf URLs. 33 | You still need to login with your credentials because authenticated 34 | members only can access the member-lists of other members. 35 | 36 | 37 | =item B<-d, --depth>=F 38 | 39 | examine network to N levels. 40 | Runtime and datasize increases exponentially with every level. 41 | Depth 0 is useless, 1 equals exporting your friends/followees list, 42 | 2 allows first useful social network analysis. 43 | There is the idea that all seven billion earthlings are 6 or fewer 44 | social connections away from each other 45 | ("Six degrees of separation")--don't try to prove it here. 46 | Default is 2. 47 | 48 | depth 0: YOU [] 49 | depth 1: YOU --> friends [] 50 | depth 2: YOU <-> FRIENDS --> friends [100%] 51 | depth 3: YOU <-> FRIENDS <-> FRIENDS --> friends [100%, 100%] 52 | depth 4: YOU <-> FRIENDS <-> FRIENDS <-> FRIENDS --> friends [100%, 100%, 100%] 53 | depth n: ... 54 | 55 | Note: Friends with more than 1000 friends or followees are dropped, 56 | because the data of such accounts is likely not meaningful anymore and 57 | just waste your (computing) time. 58 | 59 | 60 | =item B<-c, --cache>=F 61 | 62 | number of days to store and reuse downloaded data in F, 63 | default is 31 days. This helps with cheap recovery on a crash, power blackout 64 | or pause, and when experimenting with parameters. Loading data from Goodreads 65 | is a very time consuming process. 66 | 67 | 68 | =item B<-o, --outdir>=F 69 | 70 | write CSV-files to this directory, 71 | default see section FILES 72 | 73 | 74 | =item B<-i, --ignore-errors> 75 | 76 | Don't retry on errors, just keep going. 77 | Sometimes useful if a single Goodreads resource hangs over long periods 78 | and you're okay with some values missing in your result. 79 | This option is not recommended when you run the program unattended. 80 | 81 | 82 | =item B<-?, --help> 83 | 84 | show full man page 85 | 86 | =back 87 | 88 | 89 | =head1 FILES 90 | 91 | F 92 | 93 | F<./list-out/friendnet-$GOODUSERID-edges.csv> 94 | 95 | F<./list-out/friendnet-$GOODUSERID-nodes.csv> 96 | 97 | 98 | =head1 EXAMPLES 99 | 100 | $ ./friendnet.pl login@gmail.com MyPASSword 101 | 102 | $ ./friendnet.pl --depth=3 --outdir=/tmp/ login@gmail.com 103 | 104 | 105 | =head1 REPORTING BUGS 106 | 107 | Send an email to or use Github's issue tracker 108 | 109 | 110 | 111 | =head1 COPYRIGHT 112 | 113 | This is free software. You may redistribute copies of it under the terms of 114 | the GNU General Public License . 115 | There is NO WARRANTY, to the extent permitted by law. 116 | 117 | 118 | =head1 SEE ALSO 119 | 120 | More info in ./help/friendnet.md 121 | 122 | 123 | =head1 VERSION 124 | 125 | 2022-03-10 (Since 2019-06-14) 126 | 127 | =cut 128 | 129 | #<--------------------------------- 79 chars --------------------------------->| 130 | 131 | 132 | use strict; 133 | use warnings; 134 | use locale; 135 | use 5.18.0; 136 | 137 | # Perl core: 138 | use FindBin; 139 | use local::lib "$FindBin::Bin/lib/local/"; 140 | use lib "$FindBin::Bin/lib/"; 141 | use Time::HiRes qw( time tv_interval ); 142 | use POSIX qw( strftime locale_h ); 143 | use File::Spec; # Platform indep. directory separator 144 | use IO::File; 145 | use Getopt::Long; 146 | use Pod::Usage; 147 | # Third party: 148 | use Text::CSV qw( csv ); 149 | # Ours: 150 | use Goodscrapes; 151 | 152 | 153 | 154 | # ---------------------------------------------------------------------------- 155 | # Program configuration: 156 | # 157 | setlocale( LC_CTYPE, "en_US" ); # GR dates all en_US 158 | STDOUT->autoflush( 1 ); 159 | gsetopt( cache_days => 31 ); 160 | 161 | our $TSTART = time(); 162 | our $DEPTH = 2; 163 | our $MAXNHOOD = 1000; # Ignore users with more than N friends 164 | our $OUTDIR = File::Spec->catfile( $FindBin::Bin, 'list-out' ); 165 | our $USERID; 166 | 167 | GetOptions( 'userid|u=s' => \$USERID, 168 | 'depth|d=i' => \$DEPTH, 169 | 'outdir|o=s' => \$OUTDIR, 170 | 'ignore-errors|i' => sub{ gsetopt( ignore_errors => 1 ); }, 171 | 'cache|c=i' => sub{ gsetopt( cache_days => $_[1] ); }, 172 | 'help|?' => sub{ pod2usage( -verbose => 2 ); }) 173 | or pod2usage( 1 ); 174 | 175 | pod2usage( 1 ) if !$ARGV[0]; 176 | 177 | glogin( usermail => $ARGV[0], # Login required: Followee/friend list are private 178 | userpass => $ARGV[1], # Asks pw if omitted 179 | r_userid => \$USERID ); 180 | 181 | our $OUTPATH_EDG = File::Spec->catfile( $OUTDIR, "friendnet-$USERID-edges.csv" ); 182 | our $OUTPATH_NOD = File::Spec->catfile( $OUTDIR, "friendnet-$USERID-nodes.csv" ); 183 | 184 | 185 | 186 | #----------------------------------------------------------------------------- 187 | # Primary data structures: 188 | # 189 | my %nodes; 190 | my @edges; 191 | 192 | 193 | 194 | #----------------------------------------------------------------------------- 195 | # Traverse social network: 196 | # 197 | printf( "Traversing #%s's social network (depth=%d)...\n", $USERID, $DEPTH ); 198 | 199 | 200 | # Displays sth. like "Covered: [ 14%, 55%]" for depth = 3 201 | my $progress_indicator_fn = sub 202 | { 203 | my (%args) = @_; 204 | my $dr = $args{depth}; 205 | my $d = $DEPTH - $dr; 206 | 207 | return if $dr == 1; # We get leaves as whole; percent-progress would be 0 to 100% in 1 step 208 | print ( "\r[" ); # Move cursor to column 0 209 | print ( "\t" x $d ); # Move cursor to column for depth d (tab doesn't del prev. chars) 210 | printf( "%3d%%", $args{perc} ); # Percent-progress for current network depth 211 | print ( ",\t 0%" x ($dr-2) ); # Fill empty columns with "0%" 212 | print ( ']' ); 213 | }; 214 | 215 | 216 | # Displays sth. like: 217 | # [ 1%] #1234567 218 | # [ 1%] #76543 219 | # ... 220 | # [100%] #432123 221 | my $progress_indicator_fn2 = sub 222 | { 223 | 224 | }; 225 | 226 | 227 | gsocialnet( from_user_id => $USERID, 228 | rh_into_nodes => \%nodes, 229 | ra_into_edges => \@edges, 230 | ignore_nhood_gt => $MAXNHOOD, 231 | depth => $DEPTH, 232 | on_progress => $progress_indicator_fn ); 233 | 234 | 235 | 236 | #----------------------------------------------------------------------------- 237 | # Write CSV-files: 238 | # 239 | my @nodeslines = values %nodes; 240 | 241 | printf( "\nWriting network data to: \n%s (N=%d)\n%s (N=%d)", 242 | $OUTPATH_NOD, scalar @nodeslines, 243 | $OUTPATH_EDG, scalar @edges ); 244 | 245 | csv( in => \@nodeslines, 246 | out => $OUTPATH_NOD, 247 | headers => [qw( id name img_url )] ); 248 | 249 | csv( in => \@edges, 250 | out => $OUTPATH_EDG, 251 | headers => [qw( from to )] ); 252 | 253 | 254 | 255 | #----------------------------------------------------------------------------- 256 | # Done: 257 | # 258 | printf( "\n\nTotal time: %.0f minutes\n", (time()-$TSTART)/60 ); 259 | 260 | 261 | 262 | -------------------------------------------------------------------------------- /git-hooks/pre-commit: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | 4 | # Exit on first error 5 | set -e 6 | 7 | 8 | # Static code checks: 9 | for f in `git ls-files --modified *.pl` 10 | do 11 | perl -c "${f}" 12 | done 13 | 14 | 15 | # Generate documentation: 16 | if git ls-files --modified ./lib/Goodscrapes.pm 17 | then 18 | echo "Generating documentation: Goodscrapes.html" 19 | pod2html ./lib/Goodscrapes.pm > ./lib/Goodscrapes.html \ 20 | && git add ./lib/Goodscrapes.html 21 | fi 22 | 23 | 24 | -------------------------------------------------------------------------------- /git-hooks/pre-push: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | 4 | # Exit on first error 5 | set -e 6 | 7 | 8 | # Unit-tests the libraries. 9 | # (Static checks took already place during commits) 10 | # 11 | # Don't run if there are just changes to the documentation 12 | # 13 | if git diff --stat --cached origin/master | grep -E "(\.pm|\.t)" 14 | then 15 | prove 16 | fi 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /help/GOODTIPS.md: -------------------------------------------------------------------------------- 1 | # Tips on Goodreads, i.a. 2 | 3 | ## Table of Contents 4 | - [Things That Improved My Goodreads.com Experience](#things-that-improved-my-goodreadscom-experience) 5 | - [Discovering Non-Fiction Books](#discovering-non-fiction-books) 6 | - [Annotating Books](#annotating-books) 7 | - [Feedback](#feedback) 8 | 9 | 10 | ## Things That Improved My Goodreads.com Experience 11 | 12 | - **Group shelves** with a prefix, e.g., _"region-usa"_, 13 | _"region-..."_. Goodreads sorts shelf lists in alphabetical order. 14 | Related but scattered shelves impair findability. 15 | - I moved shelves that are useful to me alone to the _end_ of the list by prefixing them with "z\_" or Unicode 0x3161: ㅡ 16 | - next to pseudo sub-shelves _"computer-history"_, _"computer-networks"_ 17 | and so on I'm using a separate _"computer"_ pseudo super-shelf which 18 | contains _all_ books from the sub-shelves 19 | (useful for [shelf-intersection](https://www.secondrunreviews.com/2016/03/selecting-multiple-shelves-goodreads.html)) 20 | 21 | - **Create a "more-urgent" shelf** from unread books, also create a _"more-tempting"_ shelf with books not urgent but probably more fun, 22 | then intesect both shelves ("select multiple") and copy the URL in your Goodreads profile text as _"[Likely next reads]"_ 23 | (regularly update the shelves) 24 | 25 | - **Create an "abandoned" shelf** to compensate the missing reading-status. 26 | Have the exclusive-checkbox [activated](https://www.goodreads.com/shelf/edit) 27 | 28 | - **Track physical book location** with shelves such as _"shelf-kitchen"_ or 29 | _"shelf-berlin"_ or _"shelf-office"_ if the amount of books exceeds memory (Future me) 30 | 31 | - **Limit the number of shelves** to max. 1 page. 32 | Few coarse-grained shelves better than 100+ fine-grained shelves: faster to navigate and more likely to keep up-to-date for every book. 33 | Anemic shelves also render functions such as "[select multiple shelves](https://www.secondrunreviews.com/2016/03/selecting-multiple-shelves-goodreads.html)" (intersection ∩) useless. 34 | - avoid shelves that will likely never contain more than 3 books 35 | - try to minimize difference within a shelf and maximize difference between shelves (similar to cluster analysis) 36 | - merge strongly overlapping shelves, e.g., _"politics-economy-history"_ or _"software-testing-infosec"_ 37 | - remove shelves only good in theory but never used practically 38 | 39 | - **Add unread books to custom shelves too.** This works 40 | well with Goodreads own _"[select multiple](https://www.secondrunreviews.com/2016/03/selecting-multiple-shelves-goodreads.html)"_ feature beneath your 41 | shelf list. It's clearer than having hundreds of books in _"want-to-read"_ over time, 42 | and helps others discovering new books more easily. Pick your next book by intersection ∩, e.g., 43 | - _"want-to-read" + "non-fiction" + "lang-german"_ 44 | - _"want-to-read" + "fiction" + "politics"_ 45 | 46 | ![Intersection](https://upload.wikimedia.org/wikipedia/commons/thumb/d/da/Set_intersection.svg/320px-Set_intersection.svg.png) 47 | 48 | - **Negative shelves**, or [non-shelves](https://www.goodreads.com/topic/show/19369665-reverse-results-on-my-shelf#comment_id_181173145): 49 | e.g., _"fiction" + "lang-de" + "non-computing"_ would exclude nerd fiction; 50 | also useful for friends who are interested in everything but computers; 51 | most common negative shelf is _"non-fiction"_; 52 | limit to few but big shelves 53 | 54 | - **Declutter the library** with cardboxes and GR-shelves labeled 55 | _"donations"_ and _"resales"_. My city library took 30 books 56 | after receiving a link to my donations shelf. Such link may also appear in 57 | your email signature: "I give away books: ...". 58 | PS: There is a "book condition" column (shelf settings: table view, [x] condition). 59 | 60 | - **Batch edit** shelf feature ([tutorial](https://www.soobsessedwith.com/2014/01/get-organized-on-goodreads.html)) 61 | 62 | - **Filter reviews by language** °~~by selecting a book edition in your language and "Filter: this edition" ([described here](https://www.goodreads.com/topic/show/19528032#comment_184069651))~~. 63 | See [new filter by language feature](https://www.goodreads.com/topic/show/19545889-filter-reviews-using-language#comment_192500388) 64 | 65 | - **Become a Goodreads librarian** by applying 66 | [there](https://www.goodreads.com/about/apply_librarian). Quickly 67 | edit wrong or missing book/author info and add cover images by yourself, 68 | combine stray book editions (take over reviews etc.) 69 | 70 | - [Goodreads Ratings for Amazon](https://chrome.google.com/webstore/detail/goodreads-ratings-for-ama/fkkcefhhadenobhjnngfdahhlodolkjg) – a Chrome-browser extension by Rubén Martínez; 71 | also reminds you of GR reviews when you're shopping on Amazon (alternatively, try my 72 | [Tiny JS Injector](https://github.com/andre-st/chrome-injectjs)) 73 | 74 | - **Photos in reviews**: 75 | Add photos to your reviews by uploading them to your Goodreads _user profile_ 76 | photos. So you don't have to find and rely on external web space, e.g. paid or 77 | shady, short-lived, free image hosts. 78 | Such photos can be snapshots of individual book pages to give an 79 | impression of the inside of the book, but also diagrams or photos of 80 | events and lectures. 81 | Use the caption "for Reviews > BOOK-SHORT-TITLE #PHOTO-NUMBER" 82 | so that Goodreads later displays it like "User > Photos > for Reviews > Nice Book #1". 83 | Add a link to your review to the photo description box: "Review: https://...". 84 | After uploading, simply copy the image URL ("largest") into your 85 | review (``). 86 | 87 | - **Check out users who rate good books**. 88 | [This service](https://andre-st.github.io/goodreads/) notifies you of new ratings for specific books. 89 | Be picky, create a special-purpose shelf with good but rare books, don't submit your whole _"read"_ shelf to this service. 90 | 91 | - **Force view settings**, e.g., unify the quasi-random view settings when browsing (other people's) 92 | shelves, by rewriting Goodreads URLs via Einar Egilsson's 93 | [Redirector](https://chrome.google.com/webstore/detail/redirector/ocgpenflpmgnfapjedencafcfakcekcd) 94 | Chrome browser extension (or my [Tiny JS Injector](https://github.com/andre-st/chrome-injectjs)). 95 | Once you are familiar with the Redirector user interface, you can simply copy/paste these values 96 | into the appropriate fields: 97 | ``` 98 | Description: Goodreads Shelves: 100 books per page, sort by user-rating (highest first), covers-view 99 | Example : https://www.goodreads.com/review/list/13055874?per_page=20&sort=reviews&view=table&shelf=ㅡxx-xx&page=2 100 | Pattern : (https://www\.goodreads\.com/review/list/[^?]+)(?=(?:.*[?&](page=\d+))?)(?=(?:.*[?&](shelf=[^&]+))?) 101 | Redirect : $1?per_page=100&sort=rating&order=d&view=covers&$2&$3 102 | Type : Regular Expression 103 | ``` 104 | ``` 105 | Description: Goodreads "All Editions": Expanded details (language etc), 100 per page 106 | Example : https://www.goodreads.com/work/editions/80128-silence-on-the-wire?expanded=false&utf8=✓&sort=num_ratings&filter_by_format=Nook 107 | Pattern : (https://www\.goodreads\.com/work/editions/[^\?]*)\?*(.*) 108 | Redirect : $1?expanded=true&$2&per_page=100 109 | Type : Regular Expression 110 | ``` 111 | All expressions takes inexact matches like "page" ∈ "per\_page", randomly ordered or missing 112 | parameters and Unicode values into account. Given duplicate query arguments, the last one applies. 113 | 114 | 115 | ## Discovering Non-Fiction Books 116 | 117 | - checkout the bibliography section of a good book (best signal-to-noise ratio); I use a separate _"bibliogr-to-check"_ Goodreads shelf to keep track of unchecked books 118 | - notice books mentioned in the _footnotes_ and literature sections of Wikipedia articles 119 | - notice books mentioned in magazine articles 120 | - notice alternative books mentioned in book reviews 121 | - notice names dropped in magazine articles and check them against Amazon 122 | - scan interesting websites/blogs for books 123 | - internal search or google for `book site:anygoodblog.com` 124 | - [HackerNewsBooks.com](https://hackernewsbooks.com/) 125 | - [top books on Reddit](http://booksreddit.com/) 126 | - [RedditFavorites.com](https://redditfavorites.com/books) 127 | - [BooksChatter.com](https://bookschatter.com) (scans Twitter) 128 | - search [books.google.com](https://www.google.com/search?tbm=bks&q=specific+interest) for "specific interest"; try Google's [Talk to Books](https://books.google.com/talktobooks/) (since April 2018) 129 | - search [Google scholar profiles](https://scholar.google.com/citations?hl=en&view_op=search_authors&mauthors=label%3A&btnG=) for `label:MY_AREA_OF_INTEREST` and check profile names against Amazon's book search 130 | - [Google Alerts](https://www.google.com/alerts): "new book" + "specific interest" 131 | - follow [Goodreads users](https://www.goodreads.com/user/18418712-andr/following) with interesting libraries 132 | - find Goodreads members [with similar taste](./likeminded.md) _(my GR toolbox)_ 133 | - investigate a list of [authors similar to the authors in your shelves](./similarauth.md) on Goodreads _(my GR toolbox)_ 134 | - inspect Goodreads books [common among members you follow](./friendrated.md) _(my GR toolbox)_ 135 | - check the Amazon and Goodreads profiles of users who comment good books 136 | - [get notified](https://andre-st.github.io/goodreads/) of new reviewers for your favourite books _(my GR toolbox)_ 137 | - follow small or specialized publishers through a Twitter list, RSS-feed or newsletter (works so lala) 138 | - reddit ([r/booksuggestions](https://www.reddit.com/r/booksuggestions/), [r/suggestmeabook](https://www.reddit.com/r/suggestmeabook/), ...) , quora, ... 139 | - the better book sites: 140 | - [NewBooksNetwork.com](http://newbooksnetwork.com/) 141 | - [perlentaucher.de](https://www.perlentaucher.de/teaserliste/2_Buecher.html) (German) 142 | - [Hotlist](https://www.hotlist-online.com/) (German) 143 | - recommendation engines hardly work for me: Goodreads never, Amazon sometimes 144 | - [Bookstragram](https://www.instagram.com/explore/tags/bookstagram/) does not work for me 145 | - [BookTube](https://en.wikipedia.org/wiki/BookTube) does not work for me, girls club & primarily fiction 146 | - common bestseller lists do not work for me 147 | - Parakweet's BookVibe closed in 2016, they sent you a list of books that your friends are talking about on Twitter 148 | - ... 149 | - get your keywords right: you have to know the right technical terms before learning about them; try "science books" or "textbook" over "nonfiction", all not necessarily scienctific or even academic but nonfiction is very broad; check non-english books too if you speak another language (no-brainer but st. I forgot) 150 | - bookmark interesting titles now and inspect them later, books must arouse interest also after one month; I use multiple Amazon wishlists, which also show current 2nd hand prices, my comments and priorization; I have a separate "(lost interest)" wishlist as an alternative to deletion; my [Amazon Wishlist-Exporter](https://github.com/andre-st/amazon-wishless) helps keep the overview by filtering ~60 wishlists by price and priority. 151 | 152 | 153 | 154 | ## Annotating Books 155 | 156 | I try to develop my system for markings, notes etc in physical books, which is still at its infancy. 157 | 158 | ### Tools: 159 | - [Stabilo Boss text marker](https://www.amazon.com/-/de/dp/B01LXOQ1KJ) 160 | (primary color: yellow, looks cleaner than other colors) 161 | - sharp pencil 162 | - [Mont Marte electric eraser](https://www.amazon.com/Mont-Marte-Electric-Refills-Suitable/dp/B0791BP2PX) 163 | (I do not like rubbing around on the book pages with classic erasers 164 | because the notes can get smeared, pages can crease and tear, 165 | or notes remain slightly readable due to caution. 166 | An electic eraser allows much more controlled erasing and brings enough 167 | abrasion power to the sheet with the rotating head, 168 | and that comfortably from any sitting or lying position) 169 | 170 | ### Symbols: 171 | - !: important 172 | - X: strongly disagree, faulty reasoning, ... 173 | - ?: don't get it, fishy, not backed up well, hard to believe but don't know counter arguments yet 174 | - 1, 2, 3: restructure text - these blocks are self-contained 175 | - Ex: examples 176 | - Lit: literature references 177 | - URL: web addresses 178 | - circle around words: keywords in this important sentence, emphasis 179 | 180 | ### Other: 181 | - use margin to explain unknown words ("prebendalism: ...") 182 | 183 | 184 | 185 | 186 | 187 | ## Feedback 188 | 189 | Use [GitHub](https://github.com/andre-st/goodreads-toolbox/issues) or see [AUTHORS.md](AUTHORS.md) file 190 | 191 | 192 | -------------------------------------------------------------------------------- /help/amz-tradein.md: -------------------------------------------------------------------------------- 1 | # amz-tradein.pl 2 | 3 | ![Maintenance](https://img.shields.io/maintenance/yes/2015.svg) 4 | 5 | 6 | ## Autom. Amazon-Trade-In-Preisliste für Goodreads-Bücher 7 | ```console 8 | $ ./gr-tradein.pl 18418712 books-for-sale 9 | EUR 8,50 Schneekreuzer. Alle drei Teile in einem Band 10 | EUR 3,37 Exit Wounds 11 | EUR 2,45 Software Factories: Assembling Applications with Patterns, Models, Frameworks and Tools 12 | EUR 0,15 Death March 13 | EUR 0,15 Bellum Gallicum. Text 14 | EUR 0,10 Wien wartet auf Dich. Der Faktor Mensch im DV Management 15 | EUR 0,10 Produkt ist Kommunikation - Integration von Branding und Usability 16 | EUR 0,10 Politik als Beruf 17 | ``` 18 | 19 | ## Amazon kauft gebrauchte Bücher zurück 20 | - kein Warten auf Käufer, kein Werben nötig = schneller Verkauf alter Bücher 21 | - finanz. immer Verlust, aber Buchfehlkäufe verstauben sonst bzw. ärgern mit ihrer Gegenwart 22 | - Fachbücher erlösten manchmal 10-25 EUR (50% vom Einkaufspreis) 23 | - Erlöse z.B. für den Kauf anderer Gebrauchtbücher 24 | - _zeitaufwendig_ (a) immer wieder und (b) genug höherpreisige Bücher per Hand zu finden 25 | - vertane Zeit, wenn sich nichts findet; Preise ändern sich regelm. 26 | - lohnende Bücher übersehen, falsch beurteilt 27 | - Gr-tradein.pl ermittelt _automatisch_ alle Angebote für ein gesamtes Goodreads-Regal 28 | - Goodreads.com: weltgrößte Lesegemeinde + Tools zur Bücherverwaltung 29 | 30 | 31 | ## Installation unter GNU/Linux 32 | 1. Keine Installation nötig! Amazon kauft nichts mehr ([seit 31.08.15](https://www.amazon.de/gp/browse/ref=trdrt_conf_exodus?ie=UTF8&node=4455884031)) 33 | 2. Perl ist oft vorinstalliert 34 | 3. amz-tradein.pl ausführbar machen (chmod +x) und starten, Hilfe erscheint 35 | 4. bei Startfehler evtl. das Perl-Modul WWW::Curl::Easy z.B. über [cpan](http://perl.about.com/od/packagesmodules/qt/perlcpan.htm) installieren 36 | -------------------------------------------------------------------------------- /help/friendgroup.md: -------------------------------------------------------------------------------- 1 | # friendgroup.pl 2 | 3 | ![Maintenance](https://img.shields.io/maintenance/no/2022.svg) 4 | 5 | 6 | ## Discussion groups common among the people you follow 7 | 8 | From the _Goodreads Feedback_ forum, [Carlissa (2018)](https://web.archive.org/web/20190525013220/https://www.goodreads.com/topic/show/19548229-finding-a-particular-type-of-group): 9 | > ... The best way to find a group is by word of mouth from friends ... 10 | 11 | or Faith (ibidem): 12 | > Look at the lists of groups to which your friends or people you follow belong. 13 | 14 | 15 | ## This 16 | 17 | ![Screenshot](img/friendgroup.png?raw=true "Screenshot") 18 | 19 | 20 | 21 | ## How to generate this on a GNU/Linux operating system 22 | 23 | 1. [Install the toolbox](../README.md#Getting-started) 24 | 2. at the prompt, enter: 25 | 26 | ```console 27 | $ ./friendgroup.pl --help 28 | $ ./friendgroup.pl goodlogin@example.com 29 | 30 | Enter GR password for goodlogin@example.com: ***************** 31 | Signing in to Goodreads... OK 32 | Getting list of members known to #18418712... 141 members (0.18s) 33 | [ 0%] Aron Mellendar #21254511 0 groups 0.41s 34 | [ 1%] Moshe Fiono #3932835 0 groups 0.80s 35 | [ 2%] Peter Glowwa #18936366 2 groups 0.58s 36 | [ 3%] DuyGeboad #73957929 9 groups 0.05s 37 | [ 3%] Michael #9482539 0 groups 0.15s 38 | [ 5%] Peter Prischl #17272051 0 groups 1.47s 39 | [ 6%] Steven Shoffork #51011129 0 groups 0.15s 40 | [ 7%] 2mo #32504210 12 groups 0.07s 41 | ... 42 | [ 99%] Charlene #2442665 0 groups 2.41s 43 | [100%] David #7634567 0 groups 0.01s 44 | 45 | Perfect! Got groups of 141 users. 46 | Writing results to "./list-out/friendgroup-1234567.html"... 245 groups (0.31s) 47 | Total time: 2 minutes 48 | ``` 49 | 50 | **Note:** 51 | 52 | You can break the process with CTRL-C and continue later 53 | without having to re-read all online sources again, as reading from 54 | Goodreads.com is very time consuming. The script internally uses a 55 | **file-cache** which is busted after 31 days and saves to /tmp/FileCache/. 56 | 57 | 58 | 59 | ## Observations and limitations 60 | 61 | - long runtime: Goodreads slows down all requests and we have to load a lot of data 62 | - sometimes you don't know what you can have and you wouldn't search for it, but it might show up here 63 | 64 | 65 | ## Feedback 66 | 67 | If you like this project, give it a star on GitHub. 68 | Report bugs or suggestions [via GitHub](https://github.com/andre-st/goodreads-toolbox/issues) 69 | or see the [AUTHORS.md](../AUTHORS.md) file. 70 | 71 | 72 | ## See also 73 | 74 | - "[Groups With My Books](https://www.goodreads.com/group/my_books)" - Groups that have added a book in your shelves (Goodreads feature) 75 | - [friendrated.pl](friendrated.md) - Books common among the people you follow 76 | - [friendnet.pl](friendnet.md) - Social network analysis 77 | - [likeminded.pl](likeminded.md) - Find Goodreads members with similar book taste 78 | - [recentrated.pl](recentrated.md) - Know when people rate or write reviews about a book 79 | - [similarauth.pl](similarauth.md) - Find all similar authors 80 | - [search.pl](search.md) - Sort books-search results by popularity or date published 81 | - [savreviews.pl](savreviews.md) - Get all reviews of a book 82 | -------------------------------------------------------------------------------- /help/friendnet.md: -------------------------------------------------------------------------------- 1 | # friendnet.pl 2 | 3 | ![Maintenance](https://img.shields.io/maintenance/no/2022.svg) 4 | 5 | 6 | ## Analyze your Goodreads.com social network 7 | 8 | Spiders your social network and creates files with edges and nodes which can be 9 | easily processed with social network analysis software. 10 | 11 | 12 | ## Output 13 | 14 | ```console 15 | $ head friendnet-nodes.csv friendnet-edges.csv 16 | ==> friendnet-nodes.csv <== 17 | id,name,img_url 18 | 50965461,"Peter Hesar",https://images.gr-assets.com/users/1514444137p2/50911111.jpg 19 | 15232357,"Carole Arsifeult",https://images.gr-assets.com/users/139552226262/15222217.jpg 20 | 41256336,"Jordan Teller",https://images.gr-assets.com/users/1427180778p2/41444336.jpg 21 | 4112343,Tim,https://images.gr-assets.com/users/1432411115p2/4114553.jpg 22 | 23 | ==> friendnet-edges.csv <== 24 | from,to 25 | 15234712,18525218 26 | 15234712,8251216 27 | 15234712,13152689 28 | 15234712,9362611 29 | ``` 30 | 31 | Comma-separated values (CSV) files can be easily processed with any social network 32 | analysis (SNA) software such as `R` with the `igraph` package or similar. 33 | You can ran other statistics software or query languages against CSV-files too, 34 | e.g. `q` is SQL for CSV. 35 | A user sent me a screenshot with Excel processing these data, which looked good too. 36 | 37 | 38 | ## Social network analysis (SNA) 39 | 40 | Generated network type: 41 | - Egocentric (not sociocentric/complete), 42 | - Directed (not undirected), 43 | - Binary (not valued), 44 | - One-Mode (not bipartite/multi-mode), 45 | - Connected (not disconnected) 46 | 47 | 48 | ![Network](img/friendnet.png?raw=true "Network") 49 | 50 | 51 | ```R 52 | TODO: R/igraph-examples: 53 | - direct influence on neighbours (degree centrality) 54 | - brokerage or gatekeeping potential (betweeness centrality) 55 | - influence entire network most quickly or: who hears news first (closeness centrality) 56 | - influence over whole network, not just neighbours (eigen centrality) 57 | - probability that any message will arrive (page rank) 58 | - linked by many nodes that are linking many other nodes (Kleinberg authority score) 59 | - community detection 60 | - ... 61 | ``` 62 | 63 | ```console 64 | TODO: q-example "Members popular among your friends" 65 | ``` 66 | 67 | 68 | ## How to generate this on a GNU/Linux operating system 69 | 70 | 1. [Install the toolbox](../README.md#Getting-started) 71 | 2. at the prompt, enter: 72 | 73 | ```console 74 | $ ./friendnet.pl --help 75 | $ ./friendnet.pl goodlogin@example.com 76 | 77 | Enter GR password for goodlogin@example.com: ****************** 78 | Signing in to Goodreads... OK 79 | Traversing #18418712's social network (depth=2)... 80 | Covered: [100%] 81 | Writing network data to: 82 | ./list-out/friendnet-5685856-nodes.csv (N=76622) 83 | ./list-out/friendnet-5685856-edges.csv (N=106974) 84 | 85 | Total time: 195 minutes 86 | ``` 87 | 88 | **Note:** 89 | 90 | You can break the process with CTRL-C and continue later 91 | without having to re-read all online sources again, as reading from 92 | Goodreads.com is very time consuming. The script internally uses a 93 | **file-cache** which is busted after 31 days and saves to /tmp/FileCache/. 94 | 95 | 96 | 97 | ## Observations and limitations 98 | 99 | - long runtime: Goodreads slows down all requests and we have to load a lot of data 100 | 101 | 102 | 103 | ## Feedback 104 | 105 | If you like this project, give it a star on GitHub. 106 | Report bugs or suggestions [via GitHub](https://github.com/andre-st/goodreads-toolbox/issues) 107 | or see the [AUTHORS.md](../AUTHORS.md) file. 108 | 109 | 110 | ## See also 111 | 112 | - [friendrated.pl](friendrated.md) - Books common among the people you follow 113 | - [friendgroup.pl](friendgroup.md) - Groups common among the people you follow 114 | - [likeminded.pl](likeminded.md) - Find Goodreads members with similar book taste 115 | - [recentrated.pl](recentrated.md) - Know when people rate or write reviews about a book 116 | - [similarauth.pl](similarauth.md) - Find all similar authors 117 | - [search.pl](search.md) - Sort books-search results by popularity or date published 118 | - [savreviews.pl](savreviews.md) - Get all reviews of a book 119 | 120 | 121 | -------------------------------------------------------------------------------- /help/friendrated.md: -------------------------------------------------------------------------------- 1 | # friendrated.pl 2 | 3 | ![Maintenance](https://img.shields.io/maintenance/no/2022.svg) 4 | 5 | 6 | ## Books common among the people you follow 7 | 8 | From the _Goodreads Feedback_ forum, 9 | [Sophie (2013)](https://web.archive.org/web/20190525013028/https://www.goodreads.com/topic/show/1573755-most-popular-books-among-friends) or 10 | [Madharper (2019)](https://help.goodreads.com/s/question/0D51H00004RMzDLSA1/is-there-any-way-to-list-all-the-books-that-my-friends-have-given-five-star-ratings-to?t=1566829298278) or 11 | [Anne (2018)](https://web.archive.org/web/20190525012925/https://www.goodreads.com/topic/show/19320371-recommendations): 12 | > I often choose a book to read if many of the people I follow have read it 13 | > (and rated it high). Anyway, to find these kind of books isn't always easy 14 | > especially if they are published many years ago and do not pop up in my news 15 | > feed daily. 16 | > Could Goodreads develop a feature which recommends a book because it is 17 | > common among the people I follow? 18 | 19 | 20 | ## This 21 | 22 | ![Screenshot](img/friendrated2.png?raw=true "Screenshot") 23 | 24 | The report also includes a table with the most liked authors among the friends and followees: 25 | 26 | ![Screenshot](img/friendrated3.png?raw=true "Screenshot") 27 | 28 | 29 | ## How to generate this on a GNU/Linux operating system 30 | 31 | 1. [Install the toolbox](../README.md#Getting-started) 32 | 2. at the prompt, enter: 33 | 34 | ```console 35 | $ ./friendrated.pl --help 36 | $ ./friendrated.pl goodlogin@example.com 37 | 38 | Enter GR password for goodlogin@example.com: ************** 39 | Signing in to Goodreads... OK 40 | Getting list of members known to #18418712... 164 members (0.18s) 41 | [ 0%] Aron Mellendar #21254511 247 read 94 hits 0.41s 42 | [ 1%] Moshe Fiono #3932835 520 read 126 hits 0.80s 43 | [ 2%] Peter Glowwa #18936366 392 read 148 hits 0.58s 44 | [ 3%] DuyGeboad #73957929 9 read 0 hits 0.05s 45 | [ 3%] Michael #9482539 88 read 61 hits 0.15s 46 | [ 5%] Peter Prischl #17272051 1034 read 913 hits 1.47s 47 | [ 6%] Steven Shoffork #51011129 69 read 50 hits 0.15s 48 | [ 7%] 2mo #32504210 12 read 6 hits 0.07s 49 | ... 50 | [ 99%] Charlene #2442665 1172 read 732 hits 2.41s 51 | [100%] David #7634567 142 read 58 hits 0.01s 52 | 53 | Perfect! Got favourites of 164 users. 54 | Writing results to: 55 | ./list-out/friendrated-1234567-read.html (271 books) 56 | ./list-out/friendrated-1234567-read-authors.htmml (210 authors) 57 | 58 | Total time: 18 minutes 59 | ``` 60 | 61 | **Note:** 62 | 63 | You can break the process with CTRL-C and continue later 64 | without having to re-read all online sources again, as reading from 65 | Goodreads.com is very time consuming. The script internally uses a 66 | **file-cache** which is busted after 31 days and saves to /tmp/FileCache/. 67 | 68 | "0 read 0 hits" is either an empty shelf or a shelf accessible only to friends 69 | of that person (depends on your login). 70 | 71 | 72 | ## Alternative reports 73 | 74 | - most _wished-for_ books among the members you follow: use `--toread` option 75 | - most _hated_ books among the members you follow: use `--hated` option 76 | - there are options (`--help`) to fine-tune the reports, e.g., 77 | only include books published in a specific time-range, 78 | exclude bestsellers etc. 79 | 80 | 81 | ## Observations and limitations 82 | 83 | - long runtime: Goodreads slows down all requests and we have to load a lot of data 84 | - books in the upper value range are usually well-known titles, fiction, classics, no surprises 85 | - female GR members mainly read fiction, tend to give 4 and 5 stars pretty generously, 86 | and their networks are female 87 | - start with harsh program settings: min rating of 5 and rated by min 5 followees 88 | - "common authors" tables can be misleading, at the moment: 89 | it just counts the frequency of a name but does not take into account 90 | the aggregated ratings of a member for a specific author, example: 91 | 20 members hate 10 books of an author except 1 book. 92 | the program would count 20x a love relationship for this author, 93 | although the books in general of this author are more often hated 94 | 95 | 96 | ## Feedback 97 | 98 | If you like this project, give it a star on GitHub. 99 | Report bugs or suggestions [via GitHub](https://github.com/andre-st/goodreads-toolbox/issues) 100 | or see the [AUTHORS.md](../AUTHORS.md) file. 101 | 102 | 103 | ## See also 104 | 105 | - ~~[Popular books](https://www.goodreads.com/friend/popular_books) among my friends _this month_ (Goodreads feature)~~ 106 | - [Most read by pub-year](https://www.goodreads.com/book/popular_by_date/1919/) on all of Goodreads 107 | - [likeminded.pl](likeminded.md) - Find Goodreads members with similar book taste 108 | - [recentrated.pl](recentrated.md) - Know when people rate or write reviews about a book 109 | - [friendnet.pl](friendnet.md) - Social network analysis 110 | - [friendgroup.pl](friendgroup.md) - Groups common among the people you follow 111 | - [similarauth.pl](similarauth.md) - Find all similar authors 112 | - [search.pl](search.md) - Sort books-search results by popularity or date published 113 | - [savreviews.pl](savreviews.md) - Get all reviews of a book 114 | 115 | -------------------------------------------------------------------------------- /help/img/friendgroup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andre-st/goodreads-toolbox/010bc724003890d67efb2c40cd3e98a3b81c4007/help/img/friendgroup.png -------------------------------------------------------------------------------- /help/img/friendrated.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andre-st/goodreads-toolbox/010bc724003890d67efb2c40cd3e98a3b81c4007/help/img/friendrated.png -------------------------------------------------------------------------------- /help/img/friendrated2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andre-st/goodreads-toolbox/010bc724003890d67efb2c40cd3e98a3b81c4007/help/img/friendrated2.png -------------------------------------------------------------------------------- /help/img/friendrated3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andre-st/goodreads-toolbox/010bc724003890d67efb2c40cd3e98a3b81c4007/help/img/friendrated3.png -------------------------------------------------------------------------------- /help/img/likeminded.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andre-st/goodreads-toolbox/010bc724003890d67efb2c40cd3e98a3b81c4007/help/img/likeminded.png -------------------------------------------------------------------------------- /help/img/search.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andre-st/goodreads-toolbox/010bc724003890d67efb2c40cd3e98a3b81c4007/help/img/search.png -------------------------------------------------------------------------------- /help/img/similarauth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andre-st/goodreads-toolbox/010bc724003890d67efb2c40cd3e98a3b81c4007/help/img/similarauth.png -------------------------------------------------------------------------------- /help/likeminded.md: -------------------------------------------------------------------------------- 1 | # likeminded.pl 2 | 3 | ![Maintenance](https://img.shields.io/maintenance/no/2022.svg) 4 | 5 | 6 | ## Finding people based on the books they've read 7 | 8 | From the _Goodreads Feedback_ forum, 9 | [Linda (2010)](https://web.archive.org/web/20170427072407/http://www.goodreads.com/topic/show/298531-is-there-an-option-to-do-a-general-search-for-people-with-similar-readin) 10 | or [Michael (2013)](https://web.archive.org/web/20190525014022/https://www.goodreads.com/topic/show/1619830-finding-friends-using-compare-books) 11 | or [Wren (2014)](https://web.archive.org/web/20190525013926/https://www.goodreads.com/topic/show/1790589-what-if-there-was-a-recommended-friends-feature) 12 | or [Kara (2015)](https://web.archive.org/web/20190525013835/https://www.goodreads.com/topic/show/17019858-compare-books-suggestion) 13 | or [Samantha (2016)](https://web.archive.org/web/20190525013741/https://www.goodreads.com/topic/show/18167287-users-like-you-feature-suggestion) 14 | or [Jacob (2017)](https://web.archive.org/web/20190525013655/https://www.goodreads.com/topic/show/18433578-find-me-a-friend-with-same-taste-for-books) 15 | or [Superbunny (2018)](https://web.archive.org/web/20190525013600/https://www.goodreads.com/topic/show/19361289-searching-others-with-similar-taste-to-mine) 16 | or [Marc (2018)](https://web.archive.org/web/20190525013501/https://www.goodreads.com/topic/show/19252693-new-suggestion-to-find-like-minded-people) 17 | or [Anna (2019)](https://help.goodreads.com/s/question/0D51H00004AEWwNSAX/is-there-a-way-to-find-people-who-have-similar-tastes-in-books-i-like-so-i-can-follow-them) 18 | or [Mehran](https://web.archive.org/web/20190525013406/https://www.goodreads.com/topic/show/19397936-finding-people-based-on-the-books-they-ve-read): 19 | > Is there a way to search for people who have read books X, Y, and Z? Or maybe 20 | > a way for you to find people who have many books in common with you, without 21 | > going through people manually? If such features don't exist, Goodreads should 22 | > definitely add them. They can provoke many conversations among people who have 23 | > similar tastes in books. 24 | 25 | 26 | 27 | ## This 28 | 29 | ![Screenshot](img/likeminded.png?raw=true "Screenshot") 30 | 31 | 32 | 33 | ## How to generate this on a GNU/Linux operating system 34 | 35 | 1. [Install the toolbox](../README.md#Getting-started) 36 | 2. at the prompt, enter: 37 | ```console 38 | $ ./likeminded.pl --help 39 | $ ./likeminded.pl goodlogin@example.com 40 | 41 | Enter GR password for goodlogin@example.com: ****************** 42 | Signing in to Goodreads... OK 43 | Loading authors from "ALL" may take a while... 95 authors 44 | Loading books of 95 authors: 45 | [ 1%] Schuberth, Richard #2793763 6 books 1.03s 46 | [ 2%] Lohoff, Ernst #1339033 4 books 1.05s 47 | [ 3%] Huang, Andrew "bunnie" #2949412 6 books 1.04s 48 | [ 4%] Pullum, Laura L. #476506 2 books 1.05s 49 | [ 5%] Patri, Giacomo #379757 3 books 1.04s 50 | ... 51 | [100%] Fertl, Herbert L. #16159494 1 books 1.03s 52 | Done. 53 | Loading readers of 1625 author books: 54 | [ 0%] First as Tragedy, Then as Farce #6636487 2278 memb 134.20s 55 | [ 0%] Descriptive Check List: Together With #6517166 0 memb 1.41s 56 | [ 0%] Little Brother (Little Brother, #1) #25547383 5885 memb 324.83s 57 | [ 0%] The Hardware Hacker: Adventures in Ma #30804383 219 memb 11.25s 58 | [ 1%] Hacking the Xbox: An Introduction to #984394 206 memb 10.26s 59 | ... 60 | [100%] Maker Pro Essays on Making a Living a #24214717 33 memb 1.09s 61 | Done. 62 | Dropping who read less than 5% of your authors... -20205 memb (99.998%) 63 | Loading profiles of the remaining 420 members: 64 | [ 0%] goodreads.com/user/show/120456 1.21s * 65 | [ 1%] goodreads.com/user/show/65482 2.10s **** 66 | [ 1%] goodreads.com/user/show/45763483 0.90s 67 | [ 2%] goodreads.com/user/show/773911 2.23s private account 68 | [ 2%] goodreads.com/user/show/1031286 3.01s 69 | ... 70 | [100%] goodreads.com/user/show/818022 1.01s ** 71 | Done. 72 | Writing report (N=399) to "./list-out/likeminded-18418712.html"... 73 | Total time: 294 minutes 74 | ``` 75 | 76 | **Note:** 77 | 78 | You can break the process with CTRL-C and continue later 79 | without having to re-read all online sources again, as reading from 80 | Goodreads.com is very time consuming. The script internally uses a 81 | file-cache which saves to /tmp/FileCache/. 82 | The program is designed to run unattended, outwaits connection issues etc. 83 | 84 | 85 | ## Observations and limitations 86 | 87 | #### Latest version: 88 | - long runtime: Goodreads slows down all requests and we have to load a lot of data 89 | - loading data could take a month given too many books 90 | - prefer loading from a separate _"best-of"_ shelf: 91 | - use the program's `--shelf` option, avoid _"All"_ or _"Read"_ shelves 92 | - add _100_ good but rare books (<5000 ratings) 93 | - the more popular your literature, the longer the program's runtime 94 | - the more popular your lit, the more generic the results (500 million sales of Harry Potter) 95 | - the more popular your lit, the less likely we detect reoccuring members (we cannot see all readers) 96 | - you can add books to your shelf more quickly in Goodreads' [batch edit](https://2.bp.blogspot.com/-MBcqYj2mK_I/UsyW06AX43I/AAAAAAAAEdE/5V5z2_XJaCI/s1600/Step+1&2.jpg) mode 97 | - alternatively load from multiple smaller shelves: 98 | `./likeminded.pl --shelf=nonfiction --shelf=poetry ...` 99 | - make sure you have some _Gigabytes_ of free diskspace in `/tmp/`: 100 | - my last test run with 356 books filled 11 GB in ~24 hours (many small files) 101 | - there's no way to get _all_ readers of a book: 102 | - the program tries different things to get as many as possible 103 | - you can tune this with the `--rigor` program option (increases runtime) 104 | - there is a number of readers not considered in our statistics 105 | - we cannot randomize in a way which would produce samples of similar size 106 | - although, we don't get _all_ readers (for books with ten of thousand readers), 107 | the final report still contains _enough_ members who read the same N authors 108 | - does _not_ list members with private accounts anymore 109 | - slow but good enough; you won't run it more often than 4x a year 110 | - _"...most number of shared books would be a list of children's books"_ 111 | - exclude them by passing one or many `--shelf` arguments to the program 112 | 113 | 114 | #### Library sizes as ranking factor: 115 | - there are members with many common authors just because they have huge libraries 116 | - there are members with 94.857 ratings, likely bots 117 | - in a previous program version, 118 | a member with 11 common authors and 3000 books in total was shown earlier than 119 | a member with 10 common authors and 300 books in total, 120 | although the latter one is probably more "like-minded" 121 | - getting the library sizes requires an additional profiles loading stage 122 | - increased runtime is accepted as weeding out members 123 | by hand takes even longer 124 | 125 | 126 | #### First version compared books, not authors: 127 | - turned out to be too narrow in order to produce satisfying results 128 | - given 299 books and a minimum of 9 _common_ books (3% similarity), 129 | I've got 10 of 31.398 members, 130 | with 5 members actually worth investigating, 131 | and only 1 member already on my hand-curated list of 137 followees 132 | - a minimum of 6 common books (2%) listed 43 members, more or less interesting 133 | - we learn: book combinations tend to become unique quickly 134 | - combinations of same books are more rare than combinations of same authors, 135 | while latter still satisfies the 'same taste' condition 136 | (the assumption with 'same books' is that likeminded people had the same exposure to the exact same books, but that's questionable - and comparing the _authors_ relaxes this assumption) 137 | - the new authors-version takes longer but yields better results, e.g., 138 | more matches with my hand-curated followees list 139 | - try program argument `--maxauthorbooks=50` to reduce runtime (checks max. 50 books per author) 140 | 141 | 142 | #### Alternatives to consider: 143 | - _"I look for people who __dislike__ the same books that I do. I don't have a problem finding books to read. What I need is someone who can warn me about the books that everyone else seems to love."_ 144 | - current results based on stuff you already liked, model the past as identical to the future, based on who you are/were not who you want or could become (stuff that's out of your current wheelhouse but still has likeable features) 145 | - maybe recommendations from our to-read lists are more interesting/up-to-date than our read-lists 146 | - _"People *near me* with the same taste in books.", additional filter by city 147 | 148 | 149 | ## Feedback 150 | 151 | If you like this project, give it a star on GitHub. 152 | Report bugs or suggestions [via GitHub](https://github.com/andre-st/goodreads-toolbox/issues) 153 | or see the [AUTHORS.md](../AUTHORS.md) file. 154 | 155 | 156 | ## See also 157 | 158 | - [friendrated.pl](friendrated.md) - Books common among the people you follow 159 | - [friendnet.pl](friendnet.md) - Social network analysis 160 | - [friendgroup.pl](friendgroup.md) - Groups common among the people you follow 161 | - [recentrated.pl](recentrated.md) - Know when people rate or write reviews about a book 162 | - [similarauth.pl](similarauth.md) - Find all similar authors 163 | - [search.pl](search.md) - Sort books-search results by popularity or date published 164 | - [savreviews.pl](savreviews.md) - Get all reviews of a book 165 | 166 | -------------------------------------------------------------------------------- /help/recentrated.md: -------------------------------------------------------------------------------- 1 | # recentrated.pl 2 | 3 | ![Maintenance](https://img.shields.io/maintenance/no/2022.svg) 4 | 5 | 6 | ## Know when people rate or write reviews about a book 7 | 8 | From the _Goodreads Feedback_ forum, 9 | [Scribble (2010)](https://web.archive.org/web/20190525012742/https://www.goodreads.com/topic/show/440170-follow-user-reviews----follow-the-book) 10 | or [Jimmy (2011)](https://web.archive.org/web/20190525012635/https://www.goodreads.com/topic/show/563115-follow-a-book) 11 | or [PetraX (2014)](https://web.archive.org/web/20190525012443/https://www.goodreads.com/topic/show/2136206-following-books) 12 | or [Lucas (2018)](https://web.archive.org/web/20190525012344/https://www.goodreads.com/topic/show/19212816-follow-all-reviews-of-a-book) 13 | or [Jason (2018)](https://web.archive.org/web/20190525012148/https://www.goodreads.com/topic/show/19540183-subscribe-to-book-reviews-of-certain-books) 14 | or [Elizabeth (2016):](https://web.archive.org/web/20190525012253/https://www.goodreads.com/topic/show/18060629-follow-book) 15 | > I know this has been requested before, but I'd really like the opportunity to 16 | > follow a book. I'd like to know when people rate or write reviews about a 17 | > book and to be notified of such. I have some favorites that are not 18 | > particularly well known or often read, and I'd like to know about who chooses 19 | > to read them. 20 | 21 | 22 | **Receive notification e-mails:** 23 | ``` 24 | From: yourmail@example.com 25 | To: yourmail@example.com 26 | Subject: New ratings on Goodreads.com 27 | Date: Wed, 10 Jan 2018 21:10:50 +0100 28 | 29 | Recently rated books in your "watch-ratings" shelf: 30 | 31 | "The Machine Question" 32 | www.goodreads.com/user/show/54336239 [*****] 33 | 34 | "Spam: A Shadow History of the Internet" 35 | www.goodreads.com/book/show/16718273 [9 new] 36 | 37 | "Understanding Beliefs" 38 | www.goodreads.com/review/show/22346637 [TTTT ] 39 | www.goodreads.com/user/show/24850532 [** ] 40 | 41 | 42 | -- 43 | [*** ] 3/5 stars rating without text 44 | [ttt ] 3/5 stars rating with tweet-size text 45 | [TTT ] 3/5 stars rating with text 46 | [9 new] ratings better viewed on book page 47 | ... 48 | ``` 49 | - low-bandwidth, distraction-free plaintext mail; HTML mail appeals to marketers because it's another place to stick their logo, nobody else needs it 50 | - most mail-clients recognize the signature and the links and make the latter clickable 51 | - changes are collected in periodic mails; individual mails would be annoying 52 | - text-reviews in the mail are bloat, a click on a review-link is bearable - I would have checked the reviewer on the GR website anyway 53 | - usernames in the mail are bloat - 99% are unknown/random letters to me and I would see it on the GR website anyway 54 | 55 | 56 | ## How to "follow books" 57 | 58 | ### Installation-free: 59 | 60 | 1. visit [https://andre-st.github.io/goodreads/](https://andre-st.github.io/goodreads/) 61 | 2. enter your e-mail and shelf address 62 | 63 | 64 | ### Dos and don'ts: 65 | 66 | - don't use the "All" or "Read" shelves; be picky, use a separate single purpose shelf 67 | - don't run this on more than one of your shelves; it's feasible but better use a single purpose shelf 68 | - don't use this program with well known fiction books that get a lot of reviews; 69 | some books receive 300 ratings every day = no insights, readers too random; a separate single 70 | purpose shelf allows fine-tuning by dropping such books over time without affecting other shelves 71 | - create and [populate](http://i0.wp.com/theeverscholar.com/wp-content/uploads/2015/03/goodreads3.jpg) 72 | a Goodreads shelf, e.g., "watch-ratings": You can add and remove books at any time. 73 | New books will be checked automatically. 74 | Such a shelf prevents unnecessary mails and eases manual checks if this system is discontinued someday 75 | 76 | 77 | ### Installation on a server: 78 | 79 | 1. open a GNU/Linux terminal and install the Goodreads Toolbox: 80 | ```console 81 | $ git clone https://github.com/andre-st/goodreads-toolbox.git 82 | $ cd goodreads 83 | $ sudo make # Required Perl modules from CPAN etc. 84 | ``` 85 | 2. have a sendmail MTA set up. 86 | Most simple thing is 87 | [ssmtp](https://wiki.debian.org/sSMTP) (deprecated) or 88 | [nullmailer](http://untroubled.org/nullmailer/) or 89 | [msmtp-mta](http://msmtp.sourceforge.net), 90 | with your original sendmail being renamed and symlinked to one of them: 91 | ```sh 92 | $ vi ~/.msmtprc 93 | # All accounts: 94 | defaults 95 | auth on 96 | tls on 97 | tls_starttls on 98 | tls_trust_file /etc/ssl/certs/ca-certificates.crt # or .../ca-bundle.crt 99 | logfile ~/.msmtp.log 100 | 101 | # Gmail account: 102 | account gmail 103 | host smtp.gmail.com # smtp-relay.gmail.com for G Suite users 104 | port 587 105 | from XXXXXXXX@gmail.com 106 | user XXXXXXXX@gmail.com 107 | password XXXXXXXXXXXXXXXXXX 108 | 109 | # Default account: 110 | account default : gmail 111 | 112 | $ chmod u=rw,go= ~/.msmtprc 113 | $ echo "Test message" | mail -s "Mailer Test" XXXXXXXX@gmail.com 114 | $ echo "Subject: Mailer Test" | sendmail -v XXXXXXXX@gmail.com 115 | ``` 116 | 117 | 3. add a cron-job (I prefer 118 | [anacrony](https://en.wikipedia.org/wiki/Anacron "performs pending jobs if the computer was previously shut down") 119 | daemons such as 120 | [dcron](https://github.com/dubiousjim/dcron) or 121 | [fcron](https://en.wikipedia.org/wiki/Fcron)): 122 | edit `/etc/cron.daily/goodratings` and replace ARGUMENTS: 123 | ```sh 124 | #!/usr/bin/env sh 125 | # `ifne` is part of `moreutils` 126 | /path/to/recentrated.pl GOODUSERID SHELFNAME YOURMAIL@EXAMPLE.COM | ifne /usr/sbin/sendmail -t 127 | 128 | # Provide this self-hosted service to your Goodreads friends too! 129 | # ... HERUSERID HERSHELF HERMAIL@EXAMPLE.COM ADMIN@EXAMLE.COM | ... 130 | # ... 131 | ``` 132 | ```sh 133 | $ sudo chmod +x /etc/cron.daily/goodratings 134 | ``` 135 | See also [cron.daily/goodratings.example](cron.daily/goodratings.example) 136 | 137 | 138 | ## Feedback 139 | 140 | If you like this project, you can "star" it on GitHub. 141 | Report bugs or suggestions [via GitHub](https://github.com/andre-st/goodreads-toolbox/issues) 142 | or see the [AUTHORS.md](../AUTHORS.md) file. 143 | 144 | 145 | ## See also 146 | 147 | - [likeminded.pl](likeminded.md) - Find Goodreads members with similar book taste 148 | - [friendrated.pl](friendrated.md) - Books common among the people you follow 149 | - [friendnet.pl](friendnet.md) - Social network analysis 150 | - [friendgroup.pl](friendgroup.md) - Groups common among the people you follow 151 | - [similarauth.pl](similarauth.md) - Find all similar authors 152 | - [search.pl](search.md) - Sort books-search results by popularity or date published 153 | - [savreviews.pl](savreviews.md) - Get all reviews of a book 154 | 155 | -------------------------------------------------------------------------------- /help/savreviews.md: -------------------------------------------------------------------------------- 1 | # savreviews.pl 2 | 3 | ![Maintenance](https://img.shields.io/maintenance/no/2022.svg) 4 | 5 | 6 | ## Download all reviews for a book, e.g., for sentiment analysis 7 | 8 | From [r/goodreads (2018)](https://www.reddit.com/r/goodreads/comments/aail3f/is_there_any_way_website_or_api_to_see_all/) or the _Goodreads Developers_ forum, 9 | [Breslin (2018)](https://web.archive.org/web/20190525014427/https://www.goodreads.com/topic/show/19484417-increase-the-visible-number-of-ratings-of-a-book) 10 | or [Giulia (2018)](https://web.archive.org/web/20190525014339/https://www.goodreads.com/topic/show/19477061-how-can-i-extract-all-reviews-full-text-for-a-specific-book): 11 | 12 | > I simply need to obtain all (or as many) reviews for two books, namely 13 | > Woolf's To the Lighthouse and Mrs Dalloway, so that i can then analyse 14 | > the corpus obtained from them and see if readers define the two novels 15 | > as "difficult". 16 | 17 | 18 | ## Output format 19 | ```console 20 | $ cat savreviews-book12345-stars2.txt 21 | 2018/12/29 #1234567 22 | 23 | Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do 24 | eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad 25 | minim veniam, quis nostrud exercitation ullamco laboris nisi ut 26 | aliquip ex ea commodo consequat. 27 | 28 | Duis aute irure dolor in reprehenderit in voluptate velit esse cillum 29 | dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non 30 | proident, sunt in culpa qui officia deserunt mollit anim id est laborum. 31 | 32 | ------------------------------------------------------------------------------- 33 | 2018/10/21 #7654321 34 | 35 | Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi 36 | ut aliquip ex ea commodo consequat: example.com 37 | 38 | ------------------------------------------------------------------------------- 39 | 2018/04/01 #918273 40 | 41 | Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do 42 | eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad 43 | minim veniam, quis nostrud exercitation ullamco laboris nisi 44 | ``` 45 | 46 | **Note:** 47 | 48 | The generated files (one per star-rating) contain review-texts, dates and the review-ID only. 49 | They do not contain any other information, e.g., user names. 50 | If there is interest in these details or other output formats, just contact 51 | me or [add an issue](https://github.com/andre-st/goodreads-toolbox/issues). 52 | 53 | 54 | 55 | ## How to generate this on a GNU/Linux operating system 56 | 57 | 1. [Install the toolbox](../README.md#Getting-started) 58 | 2. at the prompt, enter: 59 | 60 | ```console 61 | $ ./savreviews.pl --help 62 | $ ./savreviews.pl 59716 # Goodreads Book-ID in URL 63 | 64 | Loading reviews for "To the Lighthouse"... 5271 of 5860 [searching] 65 | 66 | Number of reviews per year: 67 | 2007 ################ 263 68 | 2008 ##################### 343 69 | 2009 ################ 266 70 | 2010 ################# 276 71 | 2011 ###################### 357 72 | 2012 ############################# 473 73 | 2013 ################################## 565 74 | 2014 ############################ 456 75 | 2015 ########################### 440 76 | 2016 ############################# 474 77 | 2017 #################################### 599 78 | 2018 ######################################## 648 79 | 2019 ###### 111 80 | 81 | Writing reviews to: 82 | ./list-out/savreviews-book59716-stars0.txt 83 | ./list-out/savreviews-book59716-stars1.txt 84 | ./list-out/savreviews-book59716-stars2.txt 85 | ./list-out/savreviews-book59716-stars3.txt 86 | ./list-out/savreviews-book59716-stars4.txt 87 | ./list-out/savreviews-book59716-stars5.txt 88 | 89 | Total time: 36 minutes 90 | ``` 91 | 92 | 93 | ## Observations and limitations 94 | 95 | - long runtime: Goodreads slows down all requests and we have to load a lot of data 96 | - there's no way to load _all_ reviews of a book, but the program 97 | tries different things to get as many fulltext reviews as 98 | possible -- this can take very long (see `--rigor` parameter and [this](../list-in/)) 99 | - needs data cleansing on your side 100 | - review text might include user-entered (broken) HTML code and URLs 101 | - review text can be in any language, e.g., German or Russian 102 | - review text might include non-latin characters, e.g., Cyrillic 103 | - no duplicate reviewers, but could theoretically contain duplicate 104 | reviews posted by different members (statistically negligible?) 105 | 106 | 107 | ## Feedback 108 | 109 | If you like this project, give it a star on GitHub. 110 | Report bugs or suggestions [via GitHub](https://github.com/andre-st/goodreads-toolbox/issues) 111 | or see the [AUTHORS.md](../AUTHORS.md) file. 112 | 113 | 114 | ## See also 115 | 116 | - [friendrated.pl](friendrated.md) - Books common among the people you follow 117 | - [friendnet.pl](friendnet.md) - Social network analysis 118 | - [friendgroup.pl](friendgroup.md) - Groups common among the people you follow 119 | - [recentrated.pl](recentrated.md) - Know when people rate or write reviews about a book 120 | - [similarauth.pl](similarauth.md) - Find all similar authors 121 | - [likeminded.pl](likeminded.md) - Finding people based on the books they've read 122 | 123 | 124 | -------------------------------------------------------------------------------- /help/search.md: -------------------------------------------------------------------------------- 1 | # search.pl 2 | 3 | ![Maintenance](https://img.shields.io/maintenance/no/2022.svg) 4 | 5 | 6 | ## Sort Goodreads search results by popularity or date published 7 | 8 | From the _Goodreads Feedback_ forum, 9 | [Pawel (2010)](https://web.archive.org/web/20190525015116/https://www.goodreads.com/topic/show/423469-sorting-search-results) 10 | or [obsessedwithbooks (2013)](https://web.archive.org/web/20190525015022/https://www.goodreads.com/topic/show/1188302-sort-search-results) 11 | or [Sonja (2016)](https://web.archive.org/web/20190525014930/https://www.goodreads.com/topic/show/18177911-advanced-search-for-books) 12 | or [Ferouk (2016)](https://web.archive.org/web/20190525014842/https://www.goodreads.com/topic/show/18084428-we-want-to-find-good-books-fast) 13 | or [David-Emmanuel (2017)](https://web.archive.org/web/20190525014755/https://www.goodreads.com/topic/show/18541118-better-search) 14 | or [Halordain (2017)](https://web.archive.org/web/20190525014643/https://www.goodreads.com/topic/show/18496984-sorting-by-average-rating) 15 | or [Kevin (2018)](https://web.archive.org/web/20190525014542/https://www.goodreads.com/topic/show/19464605-sort-search-results-by-rating): 16 | 17 | > I am trying to explore and discover the *best* books. I am not looking 18 | > for the most relevant book. Probably all the books that contain 19 | > "Linux" in the title are relevant to what I'm looking for. I am not 20 | > interested in a particular book's algorithmically-determined 21 | > "relevance score" to my search query. I'm strictly interested in star 22 | > ratings. 23 | 24 | In addition to [Em__Jay (2015)](https://web.archive.org/web/20190525015950/https://www.goodreads.com/topic/show/2279173-search-results) 25 | or [Carri (2016)](https://web.archive.org/web/20190525015857/https://www.goodreads.com/topic/show/18123885-search-functionality) 26 | or [G.H. (2016)](https://web.archive.org/web/20190525015818/https://www.goodreads.com/topic/show/18034964-search-results) 27 | or [Epper (2016)](https://web.archive.org/web/20190525015727/https://www.goodreads.com/topic/show/18223264-search-books-filter-results) 28 | or [Shanna_redwind (2016)](https://web.archive.org/web/20190525015634/https://www.goodreads.com/topic/show/18208444-search-very-frustrating) 29 | or [Lisa (2017)](https://web.archive.org/web/20190525015546/https://www.goodreads.com/topic/show/19114134-search-fundction-when-looking-for-books) 30 | or [Jenna (2017)](https://web.archive.org/web/20190525015501/https://www.goodreads.com/topic/show/18901296-please-improve-search-function) 31 | or [SL (2018)](https://web.archive.org/web/20190525020028/https://www.goodreads.com/topic/show/19387052-search-needs-improvement) 32 | or [Mimi (2018)](https://web.archive.org/web/20190525015405/https://www.goodreads.com/topic/show/19272652-refined-search) 33 | or [Ian (2016)](https://web.archive.org/web/20190525015312/https://www.goodreads.com/topic/show/18115612-search-prioritise-exact-matches): 34 | 35 | >I kind of wonder if I'm the only one who finds this annoying. If you search 36 | >for a book and type in the title of the book, exact matches to what you type 37 | >are rarely the first listed. 38 | 39 | 40 | ## This 41 | 42 | [![Screenshot](img/search.png?raw=true "Search result for 'Linux'")](https://andre-st.github.io/search-linux.html) 43 | 44 | 45 | ## How to generate this on a GNU/Linux operating system 46 | 47 | 1. [Install the toolbox](../README.md#Getting-started) 48 | 2. at the prompt, enter: 49 | 50 | ```console 51 | $ ./search.pl --help 52 | $ ./search.pl YOURKEYWORD 53 | 54 | Searching books: 55 | 56 | about..... YOURKEYWORD 57 | rated by.. 5 members or more 58 | order by.. stars, num_ratings, year 59 | progress.. 100% 60 | 61 | Writing search result (N=275) to "./list-out/search-YOURKEYWORD.html"... 62 | Total time: 3 minutes 63 | ``` 64 | 65 | 66 | ## Observations and limitations 67 | 68 | - long runtime: Goodreads slows down all requests and we have to load a lot of data 69 | - start the program with defaults and re-run to fine-tune with parameters later (previously downloaded resources are reused so it's faster than the first run); you might not know how many ratings actually exists, if `--ratings` is too high you will not get any results (`N=0`) 70 | - [garbage in, garbage out](https://en.wikipedia.org/wiki/Garbage_in,_garbage_out) 71 | 72 | 73 | ## Feedback 74 | 75 | If you like this project, give it a star on GitHub. 76 | Report bugs or suggestions [via GitHub](https://github.com/andre-st/goodreads-toolbox/issues) 77 | or see the [AUTHORS.md](../AUTHORS.md) file. 78 | 79 | 80 | ## See also 81 | 82 | - [friendrated.pl](friendrated.md) - Books common among the people you follow 83 | - [friendnet.pl](friendnet.md) - Social network analysis 84 | - [friendgroup.pl](friendgroup.md) - Groups common among the people you follow 85 | - [recentrated.pl](recentrated.md) - Know when people rate or write reviews about a book 86 | - [similarauth.pl](similarauth.md) - Find all similar authors 87 | - [likeminded.pl](likeminded.md) - Finding people based on the books they've read 88 | - [savreviews.pl](savreviews.md) - Get all reviews of a book 89 | 90 | 91 | -------------------------------------------------------------------------------- /help/similarauth.md: -------------------------------------------------------------------------------- 1 | # similarauth.pl 2 | 3 | ![Maintenance](https://img.shields.io/maintenance/no/2022.svg) 4 | 5 | 6 | ## Finding all similar authors 7 | 8 | From the _Goodreads Feedback_ forum, 9 | [Anne (2018)](https://web.archive.org/web/20190525014222/https://www.goodreads.com/topic/show/19438988-finding-similar-authors): 10 | > I like Laura Kinsale and Loretta Chase. If I do some digging, I discover that 11 | > I might like Judith Ivory too, because she is on the similar authors list of 12 | > both authors. And if I like Judith Ivory, too, I certainly should try Sherry 13 | > Thomas, because she is on all lists of those three authors 14 | 15 | 16 | 17 | ## This 18 | 19 | ![Screenshot](img/similarauth.png?raw=true "Screenshot") 20 | 21 | 22 | 23 | ## How to generate this on a GNU/Linux operating system 24 | 25 | 1. [Install the toolbox](../README.md#Getting-started) 26 | 2. at the prompt, enter: 27 | 28 | ```console 29 | $ ./similarauth.pl --help 30 | $ ./similarauth.pl goodlogin@example.com 31 | 32 | Enter GR password for goodlogin@example.com: **************** 33 | Signing in to Goodreads... OK 34 | Loading books from "ALL" may take a while... 108 books 35 | Loading similar authors for 96 authors: 36 | [ 0%] Huhn, Willy #17326001 0 similar 2.56s 37 | [ 1%] Gse, Don Murdoch #8506208 24 similar 2.13s 38 | [ 2%] Foucault, Michel #1260 19 similar 2.41s 39 | [ 3%] Siedersleben, Johannes #1878894 0 similar 1.11s 40 | [ 4%] Mattheck, Claus #1960 0 similar 3.27s 41 | [ 5%] Dillmann, Renate #9835498 0 similar 1.51s 42 | [ 6%] Decker, Peter #361391 0 similar 2.42s 43 | [ 7%] Bockelmann, Eske #6219827 0 similar 2.20s 44 | ... 45 | [100%] O'Neill, Ryan "Elfmaster" #15065556 0 similar 2.43s 46 | Done. 47 | Writing authors (N=360) to "./list-out/similarauth-18418712.html"... 48 | Total time: 8 minutes 49 | ``` 50 | 51 | 52 | **Note:** 53 | 54 | You can break the process with CTRL-C and continue later 55 | without having to re-read all online sources again, as reading from 56 | Goodreads.com is very time consuming. The script internally uses a 57 | **file-cache** which is busted after 31 days and saves to /tmp/FileCache/. 58 | 59 | 60 | 61 | ## Observations and limitations 62 | 63 | - long runtime: Goodreads slows down all requests and we have to load a lot of data 64 | - many authors (in my shelves) have no "similar authors" data on Goodreads 65 | - actual value of this isn't the 'seen' part but just having a long list with 66 | similar but yet unknown authors 67 | 68 | 69 | 70 | ## Feedback 71 | 72 | If you like this project, give it a star on GitHub. 73 | Report bugs or suggestions [via GitHub](https://github.com/andre-st/goodreads-toolbox/issues) 74 | or see the [AUTHORS.md](../AUTHORS.md) file. 75 | 76 | 77 | ## See also 78 | 79 | - [friendrated.pl](friendrated.md) - Books common among the people you follow 80 | - [friendnet.pl](friendnet.md) - Social network analysis 81 | - [friendgroup.pl](friendgroup.md) - Groups common among the people you follow 82 | - [recentrated.pl](recentrated.md) - Know when people rate or write reviews about a book 83 | - [likeminded.pl](likeminded.md) - Finding people based on the books they've read 84 | - [search.pl](search.md) - Sort books-search result by popularity or date published 85 | - [savreviews.pl](savreviews.md) - Get all reviews of a book 86 | 87 | -------------------------------------------------------------------------------- /list-in/README.md: -------------------------------------------------------------------------------- 1 | # Dictionaries 2 | 3 | ## Purpose 4 | 5 | As far as the reviews are concerned, the official Goodreads API typically gets you a maximum of 300 6 | short _excerpts_ ([here](https://www.goodreads.com/topic/show/19512142-how-to-get-whole-body-of-book-review), 7 | [here](https://www.goodreads.com/topic/show/12070102-review-is-truncated?comment=130838734#comment_130838734) 8 | or [here](https://www.goodreads.com/topic/show/19455087-unable-to-get-book-reviews-by-book-id?comment=182375978#comment_182375978)). Goodreads does not use this API on its own website, it is a side project. 9 | They use other mechanisms to display reviews on their website, mechanisms that 10 | are used by the _Toolbox_ programs too ([AJAX](https://en.wikipedia.org/wiki/Ajax_(programming)) endpoints in this case). 11 | These mechanisms have their own limitations: you can not see all reviews, 12 | but search a book's reviews by a keyword and/or filter by the number of stars, age etc. 13 | Toolbox programs such as [savreviews.pl](../help/savreviews.md) or [likeminded.pl](../help/likeminded.md) 14 | use filters and also run a dictionary against this search in order to collect reviews. 15 | 16 | 17 | ## Results 18 | 19 | | Dictionary | Lines | Minutes | "To the Lighthouse"
5514 text reviews | "Mrs Dalloway"
7376 text reviews | 20 | |:-------------------------|------:|--------:|-------------:|--------------:| 21 | | _none (filters only)_ | - | | 948 or 17% | _untested_ 22 | | gram-en-l.lst | 3349 | 111 | 3057 or 55% | _untested_ 23 | | gram-en-s.lst | 390 | | _untested_ | _untested_ 24 | | word-en-1k.lst | 1000 | 33 | 4962 or 90% | 6413 or 87% 25 | | word-en-s.lst | 114 | | _untested_ | _untested_ 26 | | gram-en-s,word-en-1k.lst | 1390 | | _untested_ | _untested_ 27 | | gram-en-l,word-en-1k.lst | 4349 | 144 | 5127 or 93% | 6715 or 91% 28 | 29 | No duplicate reviewers, but could theoretically contain duplicate reviews 30 | posted by different members, which would be counted by Goodreads too. 31 | 32 | 33 | ## Naming Conventions 34 | 35 | File names: `${TYPE4LETTERCODE}-${LANGUAGE2LETTERCODE}-${SIZE}.lst` with 36 | size `l` meaning large dictionaries, `s` meaning small dictionaries, 37 | 1k meaning 1000 lines, 3k meaning 3000 lines, 38 | extension `lst` meaning "list". Lists are ASCII files with one word per line. 39 | Comma denotes combined dictionaries, e.g., `gram-en-l,word-en-1k.lst`. 40 | 41 | Smaller dictionaries are usually a subset of the larger ones, so you should 42 | start with the smaller ones to test. Since all Toolbox programs cache their 43 | results for some days, switching to the larger dictionaries in addition 44 | will not waste time with downloading already present results. 45 | 46 | 47 | ## File: gram-en-l.lst 48 | 49 | most frequent english n-grams first 50 | 51 | 52 | ## File: gram-en-s.lst 53 | 54 | most frequent english trigrams from `gram-en-l.lst` tested against 55 | Harry Potter reviews: I only saved trigrams which led to 10-30 unique(!) hits, 56 | best first. Appended most frequent english trigrams which are not already 57 | present in the Harry Potter set. Works better with a larger set of available 58 | reviews. Randomization yield no improvements (rather opposite). 59 | Seems often as good as the whole `gram-en-l.lst`. 60 | 61 | 62 | ## File: word-en-1k.lst 63 | 64 | most frequent english words first. 65 | Performed better than the Ngrams based dictionaries 66 | 67 | 68 | ## File: word-en-s.lst 69 | 70 | [Parts of speech](https://en.wikipedia.org/wiki/Most_common_words_in_English#Parts_of_speech) 71 | 72 | 73 | ## File: gram-en-l,word-en-1k.lst 74 | 75 | little more results than just word-en-l.lst 76 | but way more search time (1000 vs 4349) 77 | 78 | ## File: dict.lst 79 | 80 | A symlink to any of the other dictionary files. Toolbox programs default to this dictionary-symlink, so you can change it for all programs at once. 81 | 82 | 83 | -------------------------------------------------------------------------------- /list-in/dict.lst: -------------------------------------------------------------------------------- 1 | word-en-1k.lst -------------------------------------------------------------------------------- /list-in/gram-en-s.lst: -------------------------------------------------------------------------------- 1 | 3 2 | 4 3 | 5 4 | let 5 | wit 6 | ing 7 | put 8 | ten 9 | met 10 | ass 11 | ini 12 | bit 13 | lit 14 | men 15 | job 16 | get 17 | rat 18 | cut 19 | mix 20 | our 21 | are 22 | owe 23 | win 24 | all 25 | con 26 | hit 27 | the 28 | use 29 | pre 30 | ran 31 | ist 32 | ate 33 | you 34 | art 35 | per 36 | era 37 | ton 38 | her 39 | end 40 | ter 41 | lot 42 | old 43 | one 44 | and 45 | low 46 | fit 47 | was 48 | fan 49 | too 50 | ill 51 | dec 52 | add 53 | tho 54 | pay 55 | row 56 | tra 57 | ver 58 | act 59 | mad 60 | sat 61 | awe 62 | nor 63 | ive 64 | can 65 | new 66 | car 67 | had 68 | ish 69 | for 70 | tan 71 | pro 72 | she 73 | lea 74 | ice 75 | not 76 | age 77 | two 78 | cat 79 | got 80 | off 81 | far 82 | lay 83 | wee 84 | tea 85 | try 86 | day 87 | kid 88 | est 89 | sin 90 | way 91 | red 92 | etc 93 | par 94 | sit 95 | ser 96 | com 97 | cos 98 | led 99 | sum 100 | fed 101 | see 102 | own 103 | son 104 | mum 105 | por 106 | out 107 | via 108 | saw 109 | fun 110 | rid 111 | ear 112 | ink 113 | now 114 | eat 115 | his 116 | hes 117 | mid 118 | but 119 | eye 120 | han 121 | ugh 122 | ron 123 | bar 124 | who 125 | ask 126 | dit 127 | yea 128 | fav 129 | how 130 | pop 131 | bad 132 | due 133 | bug 134 | don 135 | sci 136 | sad 137 | set 138 | ame 139 | hot 140 | man 141 | dry 142 | ago 143 | air 144 | lie 145 | fly 146 | run 147 | did 148 | bat 149 | law 150 | bed 151 | tip 152 | leg 153 | cry 154 | has 155 | mom 156 | tie 157 | bag 158 | yes 159 | boy 160 | top 161 | ese 162 | gem 163 | him 164 | bus 165 | map 166 | war 167 | fix 168 | amo 169 | odd 170 | wat 171 | its 172 | app 173 | tal 174 | owl 175 | mil 176 | dog 177 | las 178 | pun 179 | arc 180 | nth 181 | che 182 | buy 183 | egg 184 | fat 185 | der 186 | dia 187 | ler 188 | mal 189 | pig 190 | key 191 | tom 192 | mis 193 | pet 194 | sun 195 | beg 196 | big 197 | alt 198 | hid 199 | que 200 | dat 201 | any 202 | box 203 | eso 204 | sex 205 | del 206 | rip 207 | nos 208 | sea 209 | sky 210 | ama 211 | leo 212 | hog 213 | und 214 | ban 215 | sus 216 | lee 217 | aug 218 | mon 219 | mas 220 | til 221 | den 222 | ans 223 | hut 224 | yer 225 | aka 226 | itu 227 | bet 228 | pen 229 | dig 230 | net 231 | nov 232 | asi 233 | boa 234 | ele 235 | los 236 | eve 237 | lei 238 | dio 239 | una 240 | vas 241 | tak 242 | gap 243 | ale 244 | ont 245 | fue 246 | min 247 | tag 248 | les 249 | bow 250 | non 251 | hal 252 | sem 253 | imo 254 | rob 255 | uni 256 | sue 257 | ein 258 | ook 259 | dan 260 | aun 261 | boo 262 | fin 263 | tem 264 | qui 265 | ins 266 | arm 267 | nel 268 | ora 269 | ref 270 | tim 271 | ani 272 | hop 273 | pan 274 | sam 275 | chi 276 | hat 277 | ada 278 | lil 279 | esa 280 | nut 281 | poi 282 | inc 283 | sub 284 | api 285 | pat 286 | aid 287 | umm 288 | bin 289 | lad 290 | def 291 | uno 292 | doo 293 | oli 294 | oct 295 | nit 296 | mes 297 | vol 298 | lap 299 | bir 300 | din 301 | pra 302 | pie 303 | tha 304 | mit 305 | dis 306 | sis 307 | uit 308 | ect 309 | sur 310 | cap 311 | ben 312 | mai 313 | int 314 | ali 315 | ilk 316 | pub 317 | max 318 | dos 319 | mia 320 | eva 321 | dal 322 | raw 323 | flu 324 | wer 325 | ile 326 | des 327 | gue 328 | dar 329 | pot 330 | bon 331 | elf 332 | har 333 | ven 334 | dip 335 | log 336 | ide 337 | apa 338 | mud 339 | wel 340 | bom 341 | woo 342 | ray 343 | cup 344 | toe 345 | ant 346 | aim 347 | gar 348 | ero 349 | ion 350 | tio 351 | ati 352 | ent 353 | ess 354 | ine 355 | nce 356 | res 357 | cti 358 | tic 359 | sth 360 | sta 361 | ste 362 | ica 363 | tin 364 | str 365 | tor 366 | 367 | rea 368 | ite 369 | lin 370 | ble 371 | rin 372 | cal 373 | nte 374 | anc 375 | ity 376 | ure 377 | oun 378 | eri 379 | ain 380 | ers 381 | nal 382 | iti 383 | ted 384 | tur 385 | sti 386 | ons 387 | ort 388 | lan 389 | lat 390 | ell 391 | igh 392 | tri 393 | nes 394 | ial 395 | ous 396 | gra 397 | -------------------------------------------------------------------------------- /list-in/test.lst: -------------------------------------------------------------------------------- 1 | dict.lst -------------------------------------------------------------------------------- /list-in/word-en-1k.lst: -------------------------------------------------------------------------------- 1 | a 2 | ability 3 | able 4 | about 5 | above 6 | accept 7 | according 8 | account 9 | across 10 | act 11 | action 12 | activity 13 | actually 14 | add 15 | address 16 | administration 17 | admit 18 | adult 19 | affect 20 | after 21 | again 22 | against 23 | age 24 | agency 25 | agent 26 | ago 27 | agree 28 | agreement 29 | ahead 30 | air 31 | all 32 | allow 33 | almost 34 | alone 35 | along 36 | already 37 | also 38 | although 39 | always 40 | American 41 | among 42 | amount 43 | analysis 44 | and 45 | animal 46 | another 47 | answer 48 | any 49 | anyone 50 | anything 51 | appear 52 | apply 53 | approach 54 | area 55 | argue 56 | arm 57 | around 58 | arrive 59 | art 60 | article 61 | artist 62 | as 63 | ask 64 | assume 65 | at 66 | attack 67 | attention 68 | attorney 69 | audience 70 | author 71 | authority 72 | available 73 | avoid 74 | away 75 | baby 76 | back 77 | bad 78 | bag 79 | ball 80 | bank 81 | bar 82 | base 83 | be 84 | beat 85 | beautiful 86 | because 87 | become 88 | bed 89 | before 90 | begin 91 | behavior 92 | behind 93 | believe 94 | benefit 95 | best 96 | better 97 | between 98 | beyond 99 | big 100 | bill 101 | billion 102 | bit 103 | black 104 | blood 105 | blue 106 | board 107 | body 108 | book 109 | born 110 | both 111 | box 112 | boy 113 | break 114 | bring 115 | brother 116 | budget 117 | build 118 | building 119 | business 120 | but 121 | buy 122 | by 123 | call 124 | camera 125 | campaign 126 | can 127 | cancer 128 | candidate 129 | capital 130 | car 131 | card 132 | care 133 | career 134 | carry 135 | case 136 | catch 137 | cause 138 | cell 139 | center 140 | central 141 | century 142 | certain 143 | certainly 144 | chair 145 | challenge 146 | chance 147 | change 148 | character 149 | charge 150 | check 151 | child 152 | choice 153 | choose 154 | church 155 | citizen 156 | city 157 | civil 158 | claim 159 | class 160 | clear 161 | clearly 162 | close 163 | coach 164 | cold 165 | collection 166 | college 167 | color 168 | come 169 | commercial 170 | common 171 | community 172 | company 173 | compare 174 | computer 175 | concern 176 | condition 177 | conference 178 | Congress 179 | consider 180 | consumer 181 | contain 182 | continue 183 | control 184 | cost 185 | could 186 | country 187 | couple 188 | course 189 | court 190 | cover 191 | create 192 | crime 193 | cultural 194 | culture 195 | cup 196 | current 197 | customer 198 | cut 199 | dark 200 | data 201 | daughter 202 | day 203 | dead 204 | deal 205 | death 206 | debate 207 | decade 208 | decide 209 | decision 210 | deep 211 | defense 212 | degree 213 | Democrat 214 | democratic 215 | describe 216 | design 217 | despite 218 | detail 219 | determine 220 | develop 221 | development 222 | die 223 | difference 224 | different 225 | difficult 226 | dinner 227 | direction 228 | director 229 | discover 230 | discuss 231 | discussion 232 | disease 233 | do 234 | doctor 235 | dog 236 | door 237 | down 238 | draw 239 | dream 240 | drive 241 | drop 242 | drug 243 | during 244 | each 245 | early 246 | east 247 | easy 248 | eat 249 | economic 250 | economy 251 | edge 252 | education 253 | effect 254 | effort 255 | eight 256 | either 257 | election 258 | else 259 | employee 260 | end 261 | energy 262 | enjoy 263 | enough 264 | enter 265 | entire 266 | environment 267 | environmental 268 | especially 269 | establish 270 | even 271 | evening 272 | event 273 | ever 274 | every 275 | everybody 276 | everyone 277 | everything 278 | evidence 279 | exactly 280 | example 281 | executive 282 | exist 283 | expect 284 | experience 285 | expert 286 | explain 287 | eye 288 | face 289 | fact 290 | factor 291 | fail 292 | fall 293 | family 294 | far 295 | fast 296 | father 297 | fear 298 | federal 299 | feel 300 | feeling 301 | few 302 | field 303 | fight 304 | figure 305 | fill 306 | film 307 | final 308 | finally 309 | financial 310 | find 311 | fine 312 | finger 313 | finish 314 | fire 315 | firm 316 | first 317 | fish 318 | five 319 | floor 320 | fly 321 | focus 322 | follow 323 | food 324 | foot 325 | for 326 | force 327 | foreign 328 | forget 329 | form 330 | former 331 | forward 332 | four 333 | free 334 | friend 335 | from 336 | front 337 | full 338 | fund 339 | future 340 | game 341 | garden 342 | gas 343 | general 344 | generation 345 | get 346 | girl 347 | give 348 | glass 349 | go 350 | goal 351 | good 352 | government 353 | great 354 | green 355 | ground 356 | group 357 | grow 358 | growth 359 | guess 360 | gun 361 | guy 362 | hair 363 | half 364 | hand 365 | hang 366 | happen 367 | happy 368 | hard 369 | have 370 | he 371 | head 372 | health 373 | hear 374 | heart 375 | heat 376 | heavy 377 | help 378 | her 379 | here 380 | herself 381 | high 382 | him 383 | himself 384 | his 385 | history 386 | hit 387 | hold 388 | home 389 | hope 390 | hospital 391 | hot 392 | hotel 393 | hour 394 | house 395 | how 396 | however 397 | huge 398 | human 399 | hundred 400 | husband 401 | I 402 | idea 403 | identify 404 | if 405 | image 406 | imagine 407 | impact 408 | important 409 | improve 410 | in 411 | include 412 | including 413 | increase 414 | indeed 415 | indicate 416 | individual 417 | industry 418 | information 419 | inside 420 | instead 421 | institution 422 | interest 423 | interesting 424 | international 425 | interview 426 | into 427 | investment 428 | involve 429 | issue 430 | it 431 | item 432 | its 433 | itself 434 | job 435 | join 436 | just 437 | keep 438 | key 439 | kid 440 | kill 441 | kind 442 | kitchen 443 | know 444 | knowledge 445 | land 446 | language 447 | large 448 | last 449 | late 450 | later 451 | laugh 452 | law 453 | lawyer 454 | lay 455 | lead 456 | leader 457 | learn 458 | least 459 | leave 460 | left 461 | leg 462 | legal 463 | less 464 | let 465 | letter 466 | level 467 | lie 468 | life 469 | light 470 | like 471 | likely 472 | line 473 | list 474 | listen 475 | little 476 | live 477 | local 478 | long 479 | look 480 | lose 481 | loss 482 | lot 483 | love 484 | low 485 | machine 486 | magazine 487 | main 488 | maintain 489 | major 490 | majority 491 | make 492 | man 493 | manage 494 | management 495 | manager 496 | many 497 | market 498 | marriage 499 | material 500 | matter 501 | may 502 | maybe 503 | me 504 | mean 505 | measure 506 | media 507 | medical 508 | meet 509 | meeting 510 | member 511 | memory 512 | mention 513 | message 514 | method 515 | middle 516 | might 517 | military 518 | million 519 | mind 520 | minute 521 | miss 522 | mission 523 | model 524 | modern 525 | moment 526 | money 527 | month 528 | more 529 | morning 530 | most 531 | mother 532 | mouth 533 | move 534 | movement 535 | movie 536 | Mr 537 | Mrs 538 | much 539 | music 540 | must 541 | my 542 | myself 543 | name 544 | nation 545 | national 546 | natural 547 | nature 548 | near 549 | nearly 550 | necessary 551 | need 552 | network 553 | never 554 | new 555 | news 556 | newspaper 557 | next 558 | nice 559 | night 560 | no 561 | none 562 | nor 563 | north 564 | not 565 | note 566 | nothing 567 | notice 568 | now 569 | n't 570 | number 571 | occur 572 | of 573 | off 574 | offer 575 | office 576 | officer 577 | official 578 | often 579 | oh 580 | oil 581 | ok 582 | old 583 | on 584 | once 585 | one 586 | only 587 | onto 588 | open 589 | operation 590 | opportunity 591 | option 592 | or 593 | order 594 | organization 595 | other 596 | others 597 | our 598 | out 599 | outside 600 | over 601 | own 602 | owner 603 | page 604 | pain 605 | painting 606 | paper 607 | parent 608 | part 609 | participant 610 | particular 611 | particularly 612 | partner 613 | party 614 | pass 615 | past 616 | patient 617 | pattern 618 | pay 619 | peace 620 | people 621 | per 622 | perform 623 | performance 624 | perhaps 625 | period 626 | person 627 | personal 628 | phone 629 | physical 630 | pick 631 | picture 632 | piece 633 | place 634 | plan 635 | plant 636 | play 637 | player 638 | PM 639 | point 640 | police 641 | policy 642 | political 643 | politics 644 | poor 645 | popular 646 | population 647 | position 648 | positive 649 | possible 650 | power 651 | practice 652 | prepare 653 | present 654 | president 655 | pressure 656 | pretty 657 | prevent 658 | price 659 | private 660 | probably 661 | problem 662 | process 663 | produce 664 | product 665 | production 666 | professional 667 | professor 668 | program 669 | project 670 | property 671 | protect 672 | prove 673 | provide 674 | public 675 | pull 676 | purpose 677 | push 678 | put 679 | quality 680 | question 681 | quickly 682 | quite 683 | race 684 | radio 685 | raise 686 | range 687 | rate 688 | rather 689 | reach 690 | read 691 | ready 692 | real 693 | reality 694 | realize 695 | really 696 | reason 697 | receive 698 | recent 699 | recently 700 | recognize 701 | record 702 | red 703 | reduce 704 | reflect 705 | region 706 | relate 707 | relationship 708 | religious 709 | remain 710 | remember 711 | remove 712 | report 713 | represent 714 | Republican 715 | require 716 | research 717 | resource 718 | respond 719 | response 720 | responsibility 721 | rest 722 | result 723 | return 724 | reveal 725 | rich 726 | right 727 | rise 728 | risk 729 | road 730 | rock 731 | role 732 | room 733 | rule 734 | run 735 | safe 736 | same 737 | save 738 | say 739 | scene 740 | school 741 | science 742 | scientist 743 | score 744 | sea 745 | season 746 | seat 747 | second 748 | section 749 | security 750 | see 751 | seek 752 | seem 753 | sell 754 | send 755 | senior 756 | sense 757 | series 758 | serious 759 | serve 760 | service 761 | set 762 | seven 763 | several 764 | sex 765 | sexual 766 | shake 767 | share 768 | she 769 | shoot 770 | short 771 | shot 772 | should 773 | shoulder 774 | show 775 | side 776 | sign 777 | significant 778 | similar 779 | simple 780 | simply 781 | since 782 | sing 783 | single 784 | sister 785 | sit 786 | site 787 | situation 788 | six 789 | size 790 | skill 791 | skin 792 | small 793 | smile 794 | so 795 | social 796 | society 797 | soldier 798 | some 799 | somebody 800 | someone 801 | something 802 | sometimes 803 | son 804 | song 805 | soon 806 | sort 807 | sound 808 | source 809 | south 810 | southern 811 | space 812 | speak 813 | special 814 | specific 815 | speech 816 | spend 817 | sport 818 | spring 819 | staff 820 | stage 821 | stand 822 | standard 823 | star 824 | start 825 | state 826 | statement 827 | station 828 | stay 829 | step 830 | still 831 | stock 832 | stop 833 | store 834 | story 835 | strategy 836 | street 837 | strong 838 | structure 839 | student 840 | study 841 | stuff 842 | style 843 | subject 844 | success 845 | successful 846 | such 847 | suddenly 848 | suffer 849 | suggest 850 | summer 851 | support 852 | sure 853 | surface 854 | system 855 | table 856 | take 857 | talk 858 | task 859 | tax 860 | teach 861 | teacher 862 | team 863 | technology 864 | television 865 | tell 866 | ten 867 | tend 868 | term 869 | test 870 | than 871 | thank 872 | that 873 | the 874 | their 875 | them 876 | themselves 877 | then 878 | theory 879 | there 880 | these 881 | they 882 | thing 883 | think 884 | third 885 | this 886 | those 887 | though 888 | thought 889 | thousand 890 | threat 891 | three 892 | through 893 | throughout 894 | throw 895 | thus 896 | time 897 | to 898 | today 899 | together 900 | tonight 901 | too 902 | top 903 | total 904 | tough 905 | toward 906 | town 907 | trade 908 | traditional 909 | training 910 | travel 911 | treat 912 | treatment 913 | tree 914 | trial 915 | trip 916 | trouble 917 | true 918 | truth 919 | try 920 | turn 921 | TV 922 | two 923 | type 924 | under 925 | understand 926 | unit 927 | until 928 | up 929 | upon 930 | us 931 | use 932 | usually 933 | value 934 | various 935 | very 936 | victim 937 | view 938 | violence 939 | visit 940 | voice 941 | vote 942 | wait 943 | walk 944 | wall 945 | want 946 | war 947 | watch 948 | water 949 | way 950 | we 951 | weapon 952 | wear 953 | week 954 | weight 955 | well 956 | west 957 | western 958 | what 959 | whatever 960 | when 961 | where 962 | whether 963 | which 964 | while 965 | white 966 | who 967 | whole 968 | whom 969 | whose 970 | why 971 | wide 972 | wife 973 | will 974 | win 975 | wind 976 | window 977 | wish 978 | with 979 | within 980 | without 981 | woman 982 | wonder 983 | word 984 | work 985 | worker 986 | world 987 | worry 988 | would 989 | write 990 | writer 991 | wrong 992 | yard 993 | yeah 994 | year 995 | yes 996 | yet 997 | you 998 | young 999 | your 1000 | yourself 1001 | 1002 | -------------------------------------------------------------------------------- /list-in/word-en-s.lst: -------------------------------------------------------------------------------- 1 | time 2 | person 3 | year 4 | way 5 | day 6 | thing 7 | man 8 | world 9 | life 10 | hand 11 | part 12 | child 13 | eye 14 | woman 15 | place 16 | work 17 | week 18 | case 19 | point 20 | government 21 | company 22 | number 23 | group 24 | problem 25 | fact 26 | be 27 | have 28 | do 29 | say 30 | get 31 | make 32 | go 33 | know 34 | take 35 | see 36 | come 37 | think 38 | look 39 | want 40 | give 41 | use 42 | find 43 | tell 44 | ask 45 | work 46 | seem 47 | feel 48 | try 49 | leave 50 | call 51 | good 52 | new 53 | first 54 | last 55 | long 56 | great 57 | little 58 | own 59 | other 60 | old 61 | right 62 | big 63 | high 64 | different 65 | small 66 | large 67 | next 68 | early 69 | young 70 | important 71 | few 72 | public 73 | bad 74 | same 75 | able 76 | to 77 | of 78 | in 79 | for 80 | on 81 | with 82 | at 83 | by 84 | from 85 | up 86 | about 87 | into 88 | over 89 | after 90 | the 91 | and 92 | a 93 | that 94 | I 95 | it 96 | not 97 | he 98 | as 99 | you 100 | this 101 | but 102 | his 103 | they 104 | her 105 | she 106 | or 107 | an 108 | will 109 | my 110 | one 111 | all 112 | would 113 | there 114 | their 115 | -------------------------------------------------------------------------------- /list-out/README.md: -------------------------------------------------------------------------------- 1 | # List-out 2 | 3 | ## Purpose 4 | 5 | This directory stores all results that were generated by the Toolbox programs. 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /recentrated.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | #<--------------------------------- MAN PAGE --------------------------------->| 4 | 5 | =pod 6 | 7 | =head1 NAME 8 | 9 | recentrated - know when people rate or write reviews about a book 10 | 11 | 12 | =head1 SYNOPSIS 13 | 14 | B 15 | [B<-t> F] 16 | [B<-f> F] 17 | [B<-u> F] 18 | [B<-s> F] 19 | [B<-q>] 20 | [I] 21 | [F] 22 | 23 | 24 | =head1 OPTIONS 25 | 26 | =over 4 27 | 28 | 29 | =item I 30 | 31 | add an unsubscribe email header and a contact address for 32 | administrative issues to the programm output. 33 | This also appends a helpful email signature. 34 | It limits the number of books in the mail, with the rest to be 35 | mailed the next time (if I does not equal I). 36 | Less books means shorter program runtimes for each receiver 37 | (GitHub #23). 38 | 39 | 40 | =item I 41 | 42 | the password that is required for the Goodreads website login 43 | 44 | 45 | =item B<-t, --to>=F 46 | 47 | by default "TO:" mail header is set to I. 48 | This tool does not send mails by its own. 49 | You would have to pipe its output into a C programm. 50 | 51 | 52 | =item B<-f, --from>=F 53 | 54 | by default "FROM:" mail header is set to I. 55 | This tool does not send mails by its own. 56 | You would have to pipe its output into a C programm. 57 | 58 | 59 | =item B<-u, --userid>=F 60 | 61 | check another member instead of the one identified by the login-mail 62 | and password arguments. You find the ID by looking at the shelf URLs. 63 | 64 | 65 | =item B<-s, --shelf>=F 66 | 67 | name of the shelf with a selection of books, default is "#ALL#". 68 | If the name contains special characters use an URL-encoded name. 69 | You can use this parameter multiple times if there is more than 1 shelf to 70 | include (boolean OR operation), see the examples section of this man page. 71 | Use B<--shelf>=shelf1,shelf2,shelf3 to intersect shelves (Intersection 72 | requires password). 73 | 74 | 75 | =item B<-q, --textonly> 76 | 77 | output links to text-reviews only. This drops all non-text 78 | ratings (stars only) and the smaller text-reviews too ("Loved it so much!"). 79 | This option is useful if you have many books which get many ratings every day. 80 | But it shifts the use case from finding new people to mere reading 81 | new ideas about a book. 82 | 83 | 84 | =item B<-?, --help> 85 | 86 | show full man page 87 | 88 | 89 | =back 90 | 91 | 92 | =head1 EXAMPLES 93 | 94 | $ ./recentrated.pl my@mail.com 95 | 96 | $ ./recentrated.pl --shelf=read my@mail.com 97 | 98 | $ ./recentrated.pl --userid=55554444 --shelf=read --to=my@mail.com 99 | 100 | $ ./recentrated.pl -u 55554444 -s read -t friend@mail.com admin@mail.com 101 | 102 | 103 | =head1 FILES 104 | 105 | Log written to F<./list-out/recrentrated/log.txt> 106 | 107 | Database stored in F<./list-out/recentrated/> 108 | 109 | 110 | =head1 REPORTING BUGS 111 | 112 | Report bugs to or use Github's issue tracker 113 | 114 | 115 | 116 | =head1 COPYRIGHT 117 | 118 | This is free software. You may redistribute copies of it under the terms of 119 | the GNU General Public License . 120 | There is NO WARRANTY, to the extent permitted by law. 121 | 122 | 123 | =head1 SEE ALSO 124 | 125 | More info in ./help/recentrated.md 126 | 127 | 128 | =head1 VERSION 129 | 130 | 2022-09-25 (Since 2018-01-09) 131 | 132 | =cut 133 | 134 | #<--------------------------------- 79 chars --------------------------------->| 135 | 136 | 137 | use strict; 138 | use warnings; 139 | use locale; 140 | use 5.18.0; 141 | 142 | # Perl core: 143 | use FindBin; 144 | use local::lib "$FindBin::Bin/lib/local/"; 145 | use lib "$FindBin::Bin/lib/"; 146 | use POSIX qw( locale_h ); 147 | use List::Util qw( max ); 148 | use File::Spec; # Platform indep. directory separator 149 | use Time::Piece; 150 | use Getopt::Long; 151 | use Pod::Usage; 152 | # Third party: 153 | use Log::Any '$_log', default_adapter => [ 'File' => File::Spec->catfile( $FindBin::Bin, 'list-out', 'recentrated', 'log.txt' )]; 154 | use Text::CSV qw( csv ); 155 | use List::MoreUtils qw( any ); 156 | # Ours: 157 | use Goodscrapes; 158 | 159 | 160 | 161 | # ---------------------------------------------------------------------------- 162 | # Program configuration: 163 | # 164 | pod2usage( -verbose => 2 ) if $#ARGV < 0; 165 | setlocale( LC_CTYPE, "en_US" ); # GR dates all en_US 166 | 167 | our $TEXTONLY = 0; 168 | our @SHELVES; 169 | our $USERID; 170 | our $MAILTO; 171 | our $MAILFROM; 172 | 173 | GetOptions( 'userid|u=s' => \$USERID, 174 | 'shelf|s=s' => \@SHELVES, 175 | 'to|t=s' => \$MAILTO, 176 | 'from|f=s' => \$MAILFROM, 177 | 'textonly|q' => \$TEXTONLY, 178 | 'help|?' => sub{ pod2usage( -verbose => 2 ) }); 179 | 180 | gsetopt( ignore_errors => 1 ); # Don't get stuck, may get book or review next time 181 | 182 | our $LOGINMAIL = $ARGV[0]; 183 | our $LOGINPASS = $ARGV[1]; 184 | $MAILFROM = $LOGINMAIL if !$MAILFROM; 185 | $MAILTO = $LOGINMAIL if !$MAILTO; 186 | 187 | glogin( usermail => $LOGINMAIL, # Login required for reading private members 188 | userpass => $LOGINPASS, # Asks pw if omitted 189 | r_userid => \$USERID ) 190 | if $LOGINPASS; 191 | 192 | 193 | say( "[CRIT ] Missing --userid option or goodloginmail argument." ) 194 | if !$USERID; 195 | 196 | 197 | # Path to the database files which contain last check states 198 | my $dbname = sprintf( "%s-%s.csv", $USERID, join( '-', @SHELVES )); 199 | our $DBPATH = File::Spec->catfile( $FindBin::Bin, 'list-out', 'recentrated', $dbname ); 200 | 201 | 202 | # The more URLs, the longer and untempting the mail. 203 | # If number exceeded, we link to the book page with *all* reviews. 204 | our $MAX_REVURLS_PER_BOOK = 3; 205 | 206 | 207 | # Limit number of books in the mail and limit the program runtime for non-admins 208 | our $MAX_BOOKS_TO_CHECK = ($MAILFROM && $MAILTO && $MAILFROM ne $MAILTO) ? 50 : 999999; 209 | 210 | 211 | # GR-URLs in mail padded to average length, with "https://" stripped 212 | sub prettyurl{ return sprintf '%-36s', substr( shift, 8 ); } 213 | 214 | 215 | 216 | # ---------------------------------------------------------------------------- 217 | # Looking just at the shelves, we can already see the number of current 218 | # ratings for each individual book. We compare them with the numbers from the 219 | # last check (stored in a CSV-file $db). Only for those books whose numbers 220 | # differ, we actually load the most recent ratings, which gets us info about the 221 | # members who rated the books, how they rated it, and whether they added text. 222 | # 223 | my $db = ( -e $DBPATH ? csv( in => $DBPATH, key => 'id' ) : {} ); 224 | my $num_hits = 0; 225 | my %books; 226 | 227 | 228 | greadshelf( from_user_id => $USERID, 229 | ra_from_shelves => \@SHELVES, 230 | rh_into => \%books ); 231 | 232 | 233 | my @added = grep{ !exists $db->{$_} } keys %books; 234 | my @removed = grep{ !exists $books{$_} } keys %{$db}; 235 | 236 | delete $db->{$_} for( @removed ); 237 | 238 | my @oldest_ids = sort{ $db->{$a}->{checked} <=> 239 | $db->{$b}->{checked} } keys %{$db}; # Oldest first 240 | 241 | my $limit = $MAX_BOOKS_TO_CHECK; 242 | for my $id (@oldest_ids) 243 | { 244 | last unless $limit--; # Mail other books the next time 245 | 246 | my $num_new_rat = $books{$id}->{num_ratings} - $db->{$id}->{num_ratings}; 247 | 248 | next unless $num_new_rat > 0; 249 | 250 | my %revs; 251 | my $lastcheck = Time::Piece->strptime( $db->{$id}->{checked} +(60*60*12), '%s' ); 252 | 253 | greadreviews( rh_for_book => $books{$id}, 254 | since => $lastcheck, 255 | rh_into => \%revs, 256 | text_minlen => $TEXTONLY * $GOOD_USEFUL_REVIEW_LEN, 257 | rigor => 0 ); 258 | 259 | $db->{$id}->{num_ratings} = $books{$id}->{num_ratings}; 260 | $db->{$id}->{checked } = time; # GR locale 261 | 262 | next unless %revs; 263 | 264 | my $revcount = scalar keys %revs; 265 | 266 | $num_hits++; 267 | 268 | # E-Mail header and first body line: 269 | if( $MAILTO && $num_hits == 1 ) 270 | { 271 | print ( "To: ${MAILTO}\n" ); 272 | print ( "From: ${MAILFROM}\n" ) if $MAILFROM; 273 | print ( "List-Unsubscribe: \n" ) if $MAILFROM; 274 | print ( "Content-Type: text/plain; charset=utf-8\n" ); 275 | print ( "Subject: New ratings on Goodreads.com\n\n" ); # 2x \n hdr end 276 | printf( "Recently rated books in your \"%s\" shelf:\n", join( '" and "', @SHELVES )); 277 | } 278 | 279 | 280 | # ASCII design isn't responsive, and the GMail web client neither uses fixed 281 | # width fonts nor treats multiple space characters as defined, even on large 282 | # screens. It treats plain text mails as HTML text. I don't do HTML mails, 283 | # so mobile GMail web users will have the disadvantage. 284 | # 285 | #<-------------------- 78 chars per line i.a.w. RFC 2822 ---------------------> 286 | # 287 | # "Book Title1" 288 | # www.goodreads.com/book/show/609606 [9 new] 289 | # 290 | # "Book Title2" 291 | # www.goodreads.com/review/show/1234567 [TTT ] 292 | # www.goodreads.com/user/show/2345 [*****] 293 | # 294 | printf( "\n \"%s\"\n", $books{$id}->{title} ); 295 | 296 | if( $revcount > $MAX_REVURLS_PER_BOOK ) 297 | { 298 | printf( " %s [%d new]\n", prettyurl( $books{$id}->{url} ), $revcount ); 299 | } 300 | else 301 | { 302 | printf( " %s %s\n", prettyurl( $_->{text} ? $_->{url} : $_->{rh_user}->{url} ), $_->{rating_str} ) 303 | foreach (values %revs); 304 | } 305 | } 306 | 307 | 308 | # Help user to help himself. 309 | # 310 | # Experience has shown 311 | # that users cannot be dissuaded from their shelf choice by giving advice. 312 | # Therefore, the admin should use the program option `--text-only` 313 | # for large shelves - especially "ALL", "read", "to-read" - by default 314 | # to keep the mails small. 315 | # 316 | print "\n\n\nToo many ratings?\n" 317 | . ">> Create a shelf \"watch-ratings\" or similar on Goodreads.com " 318 | . "with 50-150 special but lesser-known books, " 319 | . "and drop or add books from time to time. " 320 | . "Reply \"shelf watch-ratings\" when ready. " 321 | . "You can also reply \"textonly\" to skip the ratings without text. " 322 | if $MAILFROM && $num_hits > 20; 323 | 324 | 325 | # Without a hint, the user doesn't know whether there are simply no 326 | # stars-only ratings or whether they were intentionally ignored: 327 | # 328 | print "\n\n\nRatings without text were ignored (Reply 'all' otherwise)." 329 | if $TEXTONLY; 330 | 331 | 332 | # E-mail signature block if run for other users: 333 | if( $MAILFROM && $num_hits > 0 ) 334 | { 335 | print "\n\n-- \n" # RFC 3676 sig delimiter (has space char) 336 | . " [*** ] 3/5 stars rating without text \n" 337 | . " [ttt ] 3/5 stars rating with tweet-size text \n" 338 | . " [TTT ] 3/5 stars rating with text \n" 339 | . " [9 new] ratings better viewed on the book page \n" 340 | . " \n"; 341 | print " Reply 'textonly' to skip ratings w/o text \n" if !$TEXTONLY; 342 | print " Reply 'shelf NAME' to check alternative shelf \n" 343 | # . " Reply 'hateonly' to see negative rat. only \n" 344 | # . " Reply 'weekly' to avoid daily mails \n" 345 | . " Reply 'unsubscribe' to unsubscribe \n" 346 | . " Via https://andre-st.github.io/goodreads/ \n\n"; 347 | } 348 | 349 | 350 | # Add new books: 351 | $db->{$_} = { 'id' => $_, 352 | 'num_ratings' => $books{$_}->{num_ratings}, 353 | 'checked' => time } for( @added ); 354 | 355 | 356 | # Cronjob audits: 357 | $_log->infof( 'Recently rated: %d of %d books in %s\'s shelf "%s" (check limit %d)', 358 | $num_hits, 359 | scalar keys %books, 360 | $USERID, 361 | join( '" and "', @SHELVES ), 362 | $MAX_BOOKS_TO_CHECK ); 363 | 364 | 365 | # Update database: 366 | my @lines = values %{$db}; 367 | csv( in => \@lines, 368 | out => $DBPATH, 369 | headers => [qw( id num_ratings checked )] ); 370 | 371 | 372 | # Done. 373 | 374 | -------------------------------------------------------------------------------- /savreviews.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | #<--------------------------------- MAN PAGE --------------------------------->| 4 | 5 | =pod 6 | 7 | =head1 NAME 8 | 9 | savreviews - Download reviews for a book 10 | 11 | 12 | =head1 SYNOPSIS 13 | 14 | B 15 | [B<-x> F] 16 | [B<-d> F] 17 | [B<-c> F] 18 | [B<-o> F] 19 | [B<-i>] 20 | F 21 | 22 | You find the F by looking at the book URL. 23 | 24 | 25 | =head1 OPTIONS 26 | 27 | Mandatory arguments to long options are mandatory for short options too. 28 | 29 | =over 4 30 | 31 | =item B<-x, --rigor>=F 32 | 33 | level 0 = search newest reviews only (max 300 ratings) 34 | level 1 = search with a combination of filters (max 5400 ratings) 35 | level 2 = like 1 plus dict-search if more than 3000 ratings with stall-time of 2 minutes 36 | level n = like 1 plus dict-search with stall-time of n minutes - default is 10 37 | 38 | 39 | =item B<-d, --dict>=F 40 | 41 | default see section FILES 42 | 43 | 44 | =item B<-c, --cache>=F 45 | 46 | number of days to store and reuse downloaded data in F, 47 | default is 7 days. This helps on experimenting with parameters. 48 | Loading data from Goodreads is a time consuming process. 49 | 50 | 51 | =item B<-o, --outdir>=F 52 | 53 | directory path where the final reports will be saved, 54 | default see section FILES 55 | 56 | 57 | =item B<-i, --ignore-errors> 58 | 59 | Don't retry on errors, just keep going. 60 | Sometimes useful if a single Goodreads resource hangs over long periods 61 | and you're okay with some values missing in your result. 62 | This option is not recommended when you run the program unattended. 63 | 64 | 65 | =item B<-?, --help> 66 | 67 | show full man page 68 | 69 | =back 70 | 71 | 72 | =head1 FILES 73 | 74 | F<./list-in/dict.lst> 75 | 76 | F<./list-out/savreviews-book*-stars{0..5}.txt> 77 | 78 | F 79 | 80 | 81 | =head1 EXAMPLES 82 | 83 | $ ./savreviews.pl 333222 84 | 85 | 86 | =head1 REPORTING BUGS 87 | 88 | Report bugs to or use Github's issue tracker 89 | L 90 | 91 | 92 | =head1 COPYRIGHT 93 | 94 | This is free software. You may redistribute copies of it under the terms of 95 | the GNU General Public License L. 96 | There is NO WARRANTY, to the extent permitted by law. 97 | 98 | 99 | =head1 SEE ALSO 100 | 101 | More info in ./help/savreviews.md 102 | 103 | 104 | =head1 VERSION 105 | 106 | 2022-03-10 (Since 2018-08-13) 107 | 108 | =cut 109 | 110 | #<--------------------------------- 79 chars --------------------------------->| 111 | 112 | 113 | use strict; 114 | use warnings; 115 | use locale; 116 | use 5.18.0; 117 | 118 | # Perl core: 119 | use FindBin; 120 | use local::lib "$FindBin::Bin/lib/local/"; 121 | use lib "$FindBin::Bin/lib/"; 122 | use Time::HiRes qw( time tv_interval ); 123 | use POSIX qw( locale_h ); 124 | use File::Spec; # Platform indep. directory separator 125 | use IO::File; 126 | use Getopt::Long; 127 | use Pod::Usage; 128 | # Third party: 129 | # Ours: 130 | use Goodscrapes; 131 | 132 | 133 | 134 | # ---------------------------------------------------------------------------- 135 | # Program configuration: 136 | # 137 | setlocale( LC_CTYPE, 'en_US' ); # GR dates all en_US 138 | STDOUT->autoflush( 1 ); 139 | gsetopt( cache_days => 7 ); 140 | 141 | our $TSTART = time(); 142 | our $RIGOR = 10; 143 | our $DICTPATH = File::Spec->catfile( $FindBin::Bin, 'list-in', 'dict.lst' ); 144 | our $OUTDIR = File::Spec->catfile( $FindBin::Bin, 'list-out' ); 145 | our $OUTNAMEFMT = 'savreviews-book%s-stars%d.txt'; 146 | our $OUTDATEFMT = "%Y/%m/%d"; # man strptime 147 | our $BOOKID; 148 | our $REVIEWSEPARATOR = "\n\n".( '-' x 79 )."\n"; # long line 149 | our $MAXPOSSIBLESTARS = 5; 150 | 151 | GetOptions( 'rigor|x=i' => \$RIGOR, 152 | 'dict|d=s' => \$DICTPATH, 153 | 'outdir|o=s' => \$OUTDIR, 154 | 'ignore-errors|i' => sub{ gsetopt( ignore_errors => 1 ); }, 155 | 'cache|c=i' => sub{ gsetopt( cache_days => $_[1] ); }, 156 | 'help|?' => sub{ pod2usage( -verbose => 2 ); }) 157 | or pod2usage( 1 ); 158 | 159 | $BOOKID = $ARGV[0] or pod2usage( 1 ); 160 | 161 | 162 | 163 | # ---------------------------------------------------------------------------- 164 | print( 'Loading reviews ' ); 165 | 166 | my %reviews; 167 | 168 | my %book = greadbook( $BOOKID ); 169 | 170 | printf( 'for "%s"...', $book{title} ); 171 | 172 | greadreviews( rh_for_book => \%book, 173 | rigor => $RIGOR, 174 | rh_into => \%reviews, 175 | dict_path => $DICTPATH, 176 | text_minlen => 1, 177 | on_progress => gmeter( "of $book{num_reviews} [\033[38;5msearching\033[0m]" )); 178 | 179 | ghistogram( rh_from => \%reviews ); 180 | 181 | 182 | 183 | # ---------------------------------------------------------------------------- 184 | print( "\n\nWriting reviews to:" ); 185 | 186 | my @files; 187 | 188 | for my $n (0..$MAXPOSSIBLESTARS) 189 | { 190 | my $fpath = File::Spec->catfile( $OUTDIR, sprintf( $OUTNAMEFMT, $BOOKID, $n )); 191 | 192 | print( "\n$fpath" ); 193 | 194 | push @files, IO::File->new( $fpath, '>:utf8' ) 195 | or die( "[FATAL] Cannot write to $fpath ($!)" ); 196 | } 197 | 198 | 199 | print {$files[$_->{rating}]} 200 | $_->{date}->strftime( $OUTDATEFMT ) . " #" . 201 | $_->{id } . "\n\n" . 202 | $_->{text} . 203 | $REVIEWSEPARATOR 204 | for (values %reviews); 205 | 206 | 207 | 208 | # ---------------------------------------------------------------------------- 209 | printf( "\n\nTotal time: %.0f minutes\n", (time()-$TSTART)/60 ); 210 | 211 | 212 | -------------------------------------------------------------------------------- /search.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | #<--------------------------------- MAN PAGE --------------------------------->| 4 | 5 | =pod 6 | 7 | =head1 NAME 8 | 9 | search - Search a book and sort result by popularity 10 | 11 | 12 | =head1 SYNOPSIS 13 | 14 | B 15 | [B<-z> F] 16 | [B<-r> F] 17 | [B<-c> F] 18 | [B<-o> F] 19 | [B<-i>] 20 | F... 21 | 22 | Use quotes if you want exact matches (see examples section) 23 | 24 | =head1 OPTIONS 25 | 26 | Mandatory arguments to long options are mandatory for short options too. 27 | 28 | =over 4 29 | 30 | =item B<-z, --order>=F 31 | 32 | sort order, all descending, comma-separated column names, 33 | default is "stars,num_ratings,year" 34 | (you're free to change the order but not the names) 35 | 36 | 37 | =item B<-r, --ratings>=F 38 | 39 | only include books with N or more ratings: 40 | a 4-stars book rated by 30 readers might be "better" than a 5-stars book rated 41 | by 1 reader (perhaps the author). This also declutters our F. 42 | Use low values to cut away the nonsense, use high values only if you know 43 | the available range otherwise you might get zero results. 44 | Default is 5 or 0 if exact match. 45 | 46 | 47 | =item B<-c, --cache>=F 48 | 49 | number of days to store and reuse downloaded data in F, 50 | default is 7 days. This helps on experimenting with parameters. 51 | Loading data from Goodreads is a time consuming process. 52 | 53 | 54 | =item B<-o, --outfile>=F 55 | 56 | name of the HTML file where we write results to, 57 | default see section FILES 58 | 59 | 60 | =item B<-i, --ignore-errors> 61 | 62 | Don't retry on errors, just keep going. 63 | Sometimes useful if a single Goodreads resource hangs over long periods 64 | and you're okay with some values missing in your result. 65 | This option is not recommended when you run the program unattended. 66 | 67 | 68 | =item B<-?, --help> 69 | 70 | show full man page 71 | 72 | =back 73 | 74 | 75 | =head1 FILES 76 | 77 | F<./list-out/search-$KEYWORD.html> 78 | 79 | F 80 | 81 | 82 | =head1 EXAMPLES 83 | 84 | $ ./search.pl linux 85 | 86 | $ ./search.pl --ratings=10 --order=stars,num_ratings linux kernel 87 | 88 | $ ./search.pl --order=year,num_ratings linux kernel 89 | 90 | $ ./search.pl -r 10 -z year "linux kernel" 91 | 92 | 93 | =head1 REPORTING BUGS 94 | 95 | Report bugs to or use Github's issue tracker 96 | L 97 | 98 | 99 | =head1 COPYRIGHT 100 | 101 | This is free software. You may redistribute copies of it under the terms of 102 | the GNU General Public License L. 103 | There is NO WARRANTY, to the extent permitted by law. 104 | 105 | 106 | =head1 SEE ALSO 107 | 108 | More info in ./help/search.md 109 | 110 | 111 | =head1 VERSION 112 | 113 | 2022-03-10 (Since 2018-07-29) 114 | 115 | =cut 116 | 117 | #<--------------------------------- 79 chars --------------------------------->| 118 | 119 | 120 | use strict; 121 | use warnings; 122 | use locale; 123 | use 5.18.0; 124 | 125 | # Perl core: 126 | use FindBin; 127 | use local::lib "$FindBin::Bin/lib/local/"; 128 | use lib "$FindBin::Bin/lib/"; 129 | use Time::HiRes qw( time tv_interval ); 130 | use POSIX qw( strftime locale_h ); 131 | use File::Spec; # Platform indep. directory separator 132 | use IO::File; 133 | use Getopt::Long; 134 | use Pod::Usage; 135 | # Third party: 136 | use List::MoreUtils qw( uniq ); 137 | # Ours: 138 | use Goodscrapes; 139 | 140 | 141 | 142 | # ---------------------------------------------------------------------------- 143 | # Program configuration: 144 | # 145 | setlocale( LC_CTYPE, "en_US" ); # GR dates all en_US 146 | STDOUT->autoflush( 1 ); 147 | gsetopt( cache_days => 7 ); 148 | 149 | our $TSTART = time(); 150 | our @ORDER; 151 | our $NUMRATINGS; 152 | our $PHRASE; 153 | our $OUTPATH; 154 | our $ISEXACT; 155 | my $ordercsv = ''; 156 | 157 | GetOptions( 'ratings|r=i' => \$NUMRATINGS, 158 | 'order|z=s' => \$ordercsv, 159 | 'outfile|o=s' => \$OUTPATH, 160 | 'ignore-errors|i' => sub{ gsetopt( ignore_errors => 1 ); }, 161 | 'cache|c=i' => sub{ gsetopt( cache_days => $_[1] ); }, 162 | 'help|?' => sub{ pod2usage( -verbose => 2 ); }) 163 | or pod2usage( 1 ); 164 | 165 | $PHRASE = join( ' ', @ARGV ) or pod2usage( 1 ); 166 | $OUTPATH = File::Spec->catfile( $FindBin::Bin, 'list-out', "search-${PHRASE}.html" ) if !$OUTPATH; 167 | $ISEXACT = index( $ARGV[0], ' ' ) > -1; # Quoted "aaa bbb" as single argument, otherwise 2 args 168 | $NUMRATINGS = $ISEXACT ? 0 : 5 if !defined $NUMRATINGS; 169 | $ordercsv =~ s/\s+//g; # Mistakenly added spaces 170 | @ORDER = uniq(( split( ',', lc $ordercsv ), qw( stars num_ratings year ))); # Adds missing 171 | 172 | 173 | 174 | # ---------------------------------------------------------------------------- 175 | # Primary data structures: 176 | # 177 | my @books; 178 | 179 | 180 | 181 | # ---------------------------------------------------------------------------- 182 | # Load basic data: 183 | # 184 | printf( "Searching books:\n\n about..... %s\n rated by.. %d members or more\n order by.. %s\n progress.. ", 185 | $ISEXACT ? "$PHRASE (exact)" : $PHRASE, $NUMRATINGS, join( ', ', @ORDER ) ); 186 | 187 | gsearch( phrase => $PHRASE, 188 | ra_into => \@books, 189 | is_exact => $ISEXACT, 190 | ra_order_by => \@ORDER, 191 | num_ratings => $NUMRATINGS, 192 | on_progress => gmeter() ); 193 | 194 | 195 | 196 | # ---------------------------------------------------------------------------- 197 | # Write results to HTML file 198 | # 199 | printf( "\n\nWriting search result (N=%d) to \"%s\"... ", scalar @books, $OUTPATH ); 200 | 201 | my $fh = IO::File->new( $OUTPATH, 'w' ) or die "[FATAL] Cannot write to $OUTPATH ($!)"; 202 | my $now = strftime( '%a %b %e %H:%M:%S %Y', localtime ); 203 | 204 | print $fh ghtmlhead( "Query: \"$PHRASE\", $now", [ '!Cover', 'Title', 'Author', ">$ORDER[0]:", "$ORDER[1]:", "$ORDER[2]:" ]); 205 | 206 | my $line; 207 | for my $b (@books) 208 | { 209 | $line++; 210 | print $fh qq{ 211 | 212 | 213 | 214 | ${\ghtmlsafe( $b->{title } )} 215 | 216 | 217 | ${\ghtmlsafe( $b->{rh_author}->{name} )} 218 | 219 | ${\ghtmlsafe( $b->{$ORDER[0]} )} 220 | ${\ghtmlsafe( $b->{$ORDER[1]} )} 221 | ${\ghtmlsafe( $b->{$ORDER[2]} )} 222 | 223 | }; 224 | } 225 | 226 | print $fh ghtmlfoot(); 227 | undef $fh; 228 | 229 | 230 | 231 | # ---------------------------------------------------------------------------- 232 | # Done: 233 | # 234 | printf( "\nTotal time: %.0f minutes\n", (time()-$TSTART)/60 ); 235 | 236 | -------------------------------------------------------------------------------- /similarauth.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | #<--------------------------------- MAN PAGE --------------------------------->| 4 | 5 | =pod 6 | 7 | =head1 NAME 8 | 9 | similarauth - Finding all similar authors 10 | 11 | 12 | =head1 SYNOPSIS 13 | 14 | B 15 | [B<-u> F] 16 | [B<-c> F] 17 | [B<-o> F] 18 | [B<-s> F ...] 19 | [B<-i>] 20 | F [F] 21 | 22 | 23 | =head1 OPTIONS 24 | 25 | Mandatory arguments to long options are mandatory for short options too. 26 | 27 | =over 4 28 | 29 | =item B<-u, --userid>=F 30 | 31 | check another member instead of the one identified by the login-mail 32 | and password arguments. You find the ID by looking at the shelf URLs. 33 | 34 | 35 | =item B<-m, --minseen>=F 36 | 37 | only report authors that were similar to N other authors; default is 1 38 | 39 | 40 | =item B<-c, --cache>=F 41 | 42 | number of days to store and reuse downloaded data in F, 43 | default is 31 days. This helps with cheap recovery on a crash, power blackout 44 | or pause, and when experimenting with parameters. Loading data from Goodreads 45 | is a very time consuming process. 46 | 47 | 48 | =item B<-o, --outfile>=F 49 | 50 | name of the HTML file where we write results to, 51 | default see section FILES 52 | 53 | 54 | =item B<-s, --shelf>=F 55 | 56 | name of the shelf with a selection of books to be considered, default is 57 | "#ALL#". If the name contains special characters use an URL-encoded name. 58 | You can use this parameter multiple times if there is more than 1 shelf to 59 | include (boolean OR operation), see the examples section of this man page. 60 | Use B<--shelf>=shelf1,shelf2,shelf3 to intersect shelves (Intersection 61 | requires password). 62 | 63 | 64 | =item B<-i, --ignore-errors> 65 | 66 | Don't retry on errors, just keep going. 67 | Sometimes useful if a single Goodreads resource hangs over long periods 68 | and you're okay with some values missing in your result. 69 | This option is not recommended when you run the program unattended. 70 | 71 | 72 | =item B<-?, --help> 73 | 74 | show full man page 75 | 76 | =back 77 | 78 | 79 | =head1 FILES 80 | 81 | F<./list-out/similarauth-$USER-$SHELF.html> 82 | 83 | F 84 | 85 | 86 | =head1 EXAMPLES 87 | 88 | $ ./similarauth.pl login@gmail.com MyPASSword 89 | 90 | $ ./similarauth.pl --shelf=science --shelf=music login@gmail.com 91 | 92 | $ ./similarauth.pl --shelf=read --outfile=./sub/myfile.html login@gmail.com 93 | 94 | $ ./similarauth.pl -c 31 -s science -s music -o myfile.html login@gmail.com 95 | 96 | 97 | =head1 REPORTING BUGS 98 | 99 | Report bugs to or use Github's issue tracker 100 | L 101 | 102 | 103 | =head1 COPYRIGHT 104 | 105 | This is free software. You may redistribute copies of it under the terms of 106 | the GNU General Public License L. 107 | There is NO WARRANTY, to the extent permitted by law. 108 | 109 | 110 | =head1 SEE ALSO 111 | 112 | More info in ./help/similarauth.md 113 | 114 | 115 | =head1 VERSION 116 | 117 | 2022-03-08 (Since 2018-07-05) 118 | 119 | =cut 120 | 121 | #<--------------------------------- 79 chars --------------------------------->| 122 | 123 | 124 | use strict; 125 | use warnings; 126 | use 5.18.0; 127 | 128 | # Perl core: 129 | use FindBin; 130 | use local::lib "$FindBin::Bin/lib/local/"; 131 | use lib "$FindBin::Bin/lib/"; 132 | use Time::HiRes qw( time tv_interval ); 133 | use POSIX qw( strftime ); 134 | use File::Spec; # Platform indep. directory separator 135 | use IO::File; 136 | use Getopt::Long; 137 | use Pod::Usage; 138 | # Third party: 139 | # Ours: 140 | use Goodscrapes; 141 | 142 | 143 | 144 | # ---------------------------------------------------------------------------- 145 | # Program configuration: 146 | # 147 | STDOUT->autoflush( 1 ); 148 | gsetopt( cache_days => 31 ); 149 | 150 | our $TSTART = time(); 151 | our @SHELVES; 152 | our $OUTPATH; 153 | our $USERID; 154 | our $MINSEEN = 1; 155 | 156 | GetOptions( 'shelf|s=s' => \@SHELVES, 157 | 'minseen|m=i' => \$MINSEEN, 158 | 'userid|u=s' => \$USERID, 159 | 'outfile|o=s' => \$OUTPATH, 160 | 'cache|c=i' => sub{ gsetopt( cache_days => $_[1] ); }, 161 | 'ignore-errors|i' => sub{ gsetopt( ignore_errors => 1 ); }, 162 | 'help|?' => sub{ pod2usage( -verbose => 2 ); }) 163 | or pod2usage( 1 ); 164 | 165 | pod2usage( 1 ) if !$ARGV[0]; 166 | 167 | glogin( usermail => $ARGV[0], # Login not really required at the moment 168 | userpass => $ARGV[1], # Asks pw if omitted 169 | r_userid => \$USERID ); 170 | 171 | @SHELVES = qw( %23ALL%23 ) 172 | if !@SHELVES; 173 | 174 | $OUTPATH = File::Spec->catfile( $FindBin::Bin, 'list-out', sprintf( 'similarauth-%s-%s.html', $USERID, join( '-', @SHELVES ))) 175 | if !$OUTPATH; 176 | 177 | 178 | 179 | # ---------------------------------------------------------------------------- 180 | # Primary data structures: 181 | # 182 | our %auknown; # {$auid => %author} 183 | our %aufound; # {$auid => %author} 184 | 185 | 186 | 187 | # ---------------------------------------------------------------------------- 188 | # Load basic data: 189 | # 190 | printf( "Loading authors from \"%s\"...", join( '" and "', @SHELVES ) ); 191 | 192 | greadauthors( from_user_id => $USERID, 193 | ra_from_shelves => \@SHELVES, 194 | rh_into => \%auknown, 195 | on_progress => gmeter( 'authors' )); 196 | 197 | 198 | 199 | # ---------------------------------------------------------------------------- 200 | # Query similar authors: 201 | # TODO recurs_depth = n 202 | # 203 | my $aucount = scalar keys %auknown; 204 | my $audone = 0; 205 | 206 | die( $GOOD_ERRMSG_NOBOOKS ) if $aucount == 0; 207 | 208 | printf( "\nLoading similar authors for %d authors:\n", $aucount ); 209 | 210 | for my $auid (keys %auknown) 211 | { 212 | my $t0 = time(); 213 | printf( "[%3d%%] %-25s #%-8s\t", ++$audone/$aucount*100, $auknown{$auid}->{name}, $auid ); 214 | 215 | # Also increments each author's '_seen' counter if already in %aufound 216 | greadsimilaraut( author_id => $auid, 217 | rh_into => \%aufound, 218 | on_progress => gmeter( 'similar' )); 219 | 220 | printf( "\t%6.2fs\n", time()-$t0 ); 221 | } 222 | say( "Done." ); 223 | 224 | 225 | 226 | # ---------------------------------------------------------------------------- 227 | # Write results to HTML file 228 | # 229 | printf( "Writing authors (N=%d) to \"%s\"... ", scalar keys %aufound, $OUTPATH ); 230 | 231 | my $fh = IO::File->new( $OUTPATH, 'w' ) or die "[FATAL] Cannot write to $OUTPATH ($!)"; 232 | my $now = strftime( '%a %b %e %H:%M:%S %Y', localtime ); 233 | 234 | print $fh ghtmlhead( "Similar Authors, $now", [ 'Author', '>Seen:' ]); 235 | 236 | my $line; 237 | for my $auid (keys %aufound) 238 | { 239 | next if exists $auknown{$auid}; 240 | next if $aufound{$auid}->{_seen} < $MINSEEN; 241 | 242 | $line++; 243 | print $fh qq{ 244 | 245 | 246 | 247 | 248 | ${\ghtmlsafe( $aufound{$auid}->{name} )} 249 | 250 | $aufound{$auid}->{_seen} 251 | 252 | }; 253 | } 254 | 255 | print $fh ghtmlfoot(); 256 | undef $fh; 257 | 258 | 259 | # ---------------------------------------------------------------------------- 260 | # Done: 261 | # 262 | printf( "\nTotal time: %.0f minutes\n", (time()-$TSTART)/60 ); 263 | 264 | -------------------------------------------------------------------------------- /t/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andre-st/goodreads-toolbox/010bc724003890d67efb2c40cd3e98a3b81c4007/t/README.md -------------------------------------------------------------------------------- /t/config.pl-example: -------------------------------------------------------------------------------- 1 | use 5.18.0; 2 | use warnings; 3 | 4 | sub get_gooduser_mail { return 'yourgoodreadslogin@gmail.com'; } 5 | sub get_gooduser_pass { return 'YOUR-GOODREADS-PASSWORD'; } 6 | sub get_gooduser_id { return '123456789'; } # Use number in your shelf-URLs 7 | 8 | 1; 9 | 10 | -------------------------------------------------------------------------------- /t/ghtmlxxx.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | # Test cases realized: 4 | # [x] HTML safe strings 5 | # [ ] 6 | # [ ] 7 | # [ ] 8 | 9 | use diagnostics; # More debugging info 10 | use warnings; 11 | use strict; 12 | use FindBin; 13 | use local::lib "$FindBin::Bin/../lib/local"; 14 | use lib "$FindBin::Bin/../lib/"; 15 | use Test::More qw( no_plan ); 16 | 17 | 18 | use_ok( 'Goodscrapes' ); 19 | 20 | 21 | my %user; 22 | my %book; 23 | 24 | $user{name} = ''; 25 | $user{num_books} = 100; 26 | $user{url} = '">'; 27 | $book{title} = ''; 28 | $book{stars} = 4; 29 | $book{url} = '">'; 30 | $book{rh_author} = \%user; 31 | 32 | 33 | # Also example of functions inside string interpolations: 34 | my $test = qq{ 35 | 36 | 37 | ${\ghtmlsafe( $book{title} )} 38 | ${\ghtmlsafe( $book{stars} )} 39 | ${\ghtmlsafe( $book{url} )} 40 | ${\ghtmlsafe( $book{rh_author}->{name} )} 41 | ${\ghtmlsafe( $book{rh_author}->{num_books} )} 42 | ${\ghtmlsafe( $book{rh_author}->{url} )} 43 | 44 | 45 | }; 46 | 47 | my $expected = qq{ 48 | 49 | 50 | <script>alert("Book Title XSS");</script> 51 | 4 52 | "><script>alert("Book URL XSS");</script> 53 | <script>alert("User Name XSS");</script> 54 | 100 55 | "><script>alert("User URL XSS");</script> 56 | 57 | 58 | }; 59 | 60 | 61 | is( $test, $expected ); 62 | 63 | -------------------------------------------------------------------------------- /t/gisxxx.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | # Test cases realized: 4 | # [x] Good profiles, bad profiles 5 | # [x] unexpected values (undef etc) 6 | # [ ] 7 | # [ ] 8 | 9 | 10 | use diagnostics; # More debugging info 11 | use warnings; 12 | use strict; 13 | use FindBin; 14 | use local::lib "$FindBin::Bin/../lib/local/"; 15 | use lib "$FindBin::Bin/../lib/"; 16 | use Test::More qw( no_plan ); 17 | 18 | 19 | use_ok( 'Goodscrapes' ); 20 | 21 | 22 | ok( !gisbaduser( '1' ), 'Otis Chandler (GR founder)' ); 23 | ok( !gisbaduser( '2' ), 'Goodreads employee' ); 24 | ok( gisbaduser( '1000834' ), '"NOT A BOOK" author' ); 25 | ok( gisbaduser( '5158478' ), '"Anonymous" author' ); 26 | ok( gisbaduser( undef ), 'Invalid value is bad' ); 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /t/glogin.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | # Test cases realized: 4 | # [x] login and get correct user-id 5 | # [ ] 6 | # [ ] 7 | # [ ] 8 | 9 | 10 | use diagnostics; # More debugging info 11 | use warnings; 12 | use strict; 13 | use FindBin; 14 | use local::lib "$FindBin::Bin/../lib/local/"; 15 | use lib "$FindBin::Bin/../lib/"; 16 | use lib "$FindBin::Bin/../t/"; 17 | use Test::More qw( no_plan ); 18 | use List::MoreUtils qw( any firstval ); 19 | 20 | 21 | use_ok( 'Goodscrapes' ); 22 | require( 'config.pl' ); 23 | 24 | 25 | my $userid_extracted; 26 | my $userid_expected = get_gooduser_id(); 27 | 28 | glogin( usermail => get_gooduser_mail(), 29 | userpass => get_gooduser_pass(), 30 | r_userid => \$userid_extracted ); 31 | 32 | is( $userid_extracted, $userid_expected, 'Got correct user ID after login' ); 33 | 34 | 35 | -------------------------------------------------------------------------------- /t/gmeter.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | # Test cases realized: 4 | # [x] additive absolute progress with custom unit 5 | # [x] additive percent progress 6 | # [ ] invalid arguments 7 | # [ ] 8 | 9 | 10 | use diagnostics; # More debugging info 11 | use warnings; 12 | use strict; 13 | use FindBin; 14 | use local::lib "$FindBin::Bin/../lib/local/"; 15 | use lib "$FindBin::Bin/../lib/"; 16 | use Test::More qw( no_plan ); 17 | use List::MoreUtils qw( any firstval ); 18 | 19 | 20 | use_ok( 'Goodscrapes' ); 21 | 22 | 23 | my $stdout; 24 | { 25 | local *STDOUT; 26 | open( STDOUT, ">", \$stdout ); 27 | my $meter; 28 | 29 | 30 | # Absolute progress with custom unit: 31 | $meter = gmeter( 'test unit' ); 32 | 33 | $stdout = ''; 34 | $meter->( 1 ); 35 | like( $stdout, qr/1 test unit/, 'Prints number with custom unit' ); 36 | 37 | $stdout = ''; 38 | $meter->( 20 ); # Adds 20 to previous value 1 39 | like( $stdout, qr/21 test unit/, 'Prints sum with custom unit' ); 40 | 41 | 42 | # Percent progress is enabled by using a second number with a known maximum 43 | # Any custom unit is ignored. 44 | $meter = gmeter(); 45 | 46 | $stdout = ''; 47 | $meter->( 1, 10 ); 48 | like( $stdout, qr/10%/, 'Prints percent number' ); 49 | 50 | $stdout = ''; 51 | $meter->( 5, 10 ); # Adds another 5 to prev value 1; You must not read this as "5 of 10" or 50% 52 | like( $stdout, qr/60%/, 'Prints percent number for sum' ); 53 | } 54 | 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /t/greadauthorbk.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | # Test cases realized: 4 | # [x] read books and check attributes (detects changed markup) 5 | # [ ] invalid arguments 6 | # [ ] 7 | # [ ] 8 | 9 | 10 | 11 | use diagnostics; # More debugging info 12 | use warnings; 13 | use strict; 14 | use FindBin; 15 | use local::lib "$FindBin::Bin/../lib/local/"; 16 | use lib "$FindBin::Bin/../lib/"; 17 | use Test::More qw( no_plan ); 18 | use List::MoreUtils qw( any firstval ); 19 | 20 | 21 | use_ok( 'Goodscrapes' ); 22 | 23 | 24 | # We should never use caching during real tests: 25 | # We need to test against the most up-to-date markup from Goodreads.com 26 | # Having no cache during development is annoying, tho. 27 | # So we leave a small window: 28 | gsetopt( cache_days => 1 ); 29 | 30 | 31 | print( 'Reading books of author...' ); 32 | 33 | my %books; 34 | my $LIMIT = 10; 35 | my $AUTID = '2546'; # Palahniuk, Chuck (Fight Club) 36 | 37 | greadauthorbk( author_id => $AUTID, 38 | limit => $LIMIT, 39 | rh_into => \%books, 40 | #on_book => sub{} 41 | on_progress => gmeter( 'books' )); 42 | 43 | print( "\n" ); 44 | 45 | 46 | ok( scalar( keys( %books )) == $LIMIT, "$LIMIT books read from author" ); 47 | 48 | 49 | map { 50 | ok ( $_->{title}, 'Book has title' ); 51 | like( $_->{url}, qr/^https:\/\/www\.goodreads\.com\/book\/show\//, 'Book has URL' ); 52 | like( $_->{img_url}, qr/^https:\/\/[a-z0-9]+\.gr-assets\.com/, 'Book has image URL' ); 53 | like( $_->{id}, qr/^\d+$/, 'Book has Goodreads ID' ); 54 | ok ( $_->{num_ratings} > 0, 'Book has number of ratings' ); 55 | ok ( $_->{avg_rating} > 0, 'Book has average rating' ); 56 | ok ( $_->{rh_author}->{name}, 'Book author has name' ); 57 | ok ( $_->{rh_author}->{name_lf}, 'Book author has name' ); 58 | is ( $_->{rh_author}->{id}, $AUTID, 'Book author has Goodreads ID' ); 59 | like( $_->{rh_author}->{img_url}, qr/^https:\/\/images\.gr-assets\.com/, 'Book author has image URL' ); 60 | like( $_->{rh_author}->{url}, qr/^https:\/\/www\.goodreads\.com/, 'Book author has URL' ); 61 | like( $_->{rh_author}->{works_url}, qr/^https:\/\/www\.goodreads\.com/, 'Book author has works URL' ); 62 | is ( $_->{rh_author}->{is_author}, 1, 'Book author is author' ); 63 | is ( $_->{rh_author}->{is_private}, 0, 'Book author not private' ); 64 | is ( $_->{rh_author}->{is_mainstream}, 1, 'Is a mainstream author' ); 65 | 66 | 67 | # Not available or scraped yet, otherwise one of the following 68 | # tests will fail and remind me of implementing a correct test: 69 | is ( $_->{isbn}, undef, 'N/A: Book ISBN' ); 70 | is ( $_->{isbn13}, undef, 'N/A: Book ISBN13' ); 71 | is ( $_->{format}, undef, 'N/A: Book format' ); 72 | is ( $_->{user_rating}, undef, 'N/A: User rating' ); 73 | is ( $_->{user_read_count}, undef, 'N/A: User read count' ); 74 | is ( $_->{user_num_owned}, undef, 'N/A: Number user-owned books' ); 75 | is ( $_->{user_date_read}, undef, 'N/A: User reading-date' ); 76 | is ( $_->{user_date_added}, undef, 'N/A: User addition-date' ); 77 | is ( $_->{ra_user_shelves}, undef, 'N/A: User shelves' ); 78 | is ( $_->{stars}, undef, 'N/A: Book average rating' ); 79 | is ( $_->{num_pages}, undef, 'N/A: Book number of pages' ); 80 | is ( $_->{num_reviews}, undef, 'N/A: Book number of reviews' ); 81 | is ( $_->{review_id}, undef, 'N/A: User review id' ); 82 | is ( $_->{year}, undef, 'N/A: Book pub-year' ); 83 | is ( $_->{year_edit}, undef, 'N/A: Book edition pub-year' ); 84 | is ( $_->{rh_author}->{residence}, undef, 'N/A: Author residence' ); 85 | is ( $_->{rh_author}->{age}, undef, 'N/A: Author age' ); 86 | is ( $_->{rh_author}->{is_staff}, undef, 'N/A: Is Goodreads author' ); 87 | is ( $_->{rh_author}->{is_female}, undef, 'N/A: Author gender' ); 88 | is ( $_->{rh_author}->{is_friend}, undef, 'N/A: Author friend status' ); 89 | is ( $_->{rh_author}->{num_books}, undef, 'N/A: Number of author books' ); 90 | 91 | } values( %books ) 92 | 93 | -------------------------------------------------------------------------------- /t/greadauthors.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | # Test cases realized: 4 | # [x] Read authors and check attributes (detects changed markup) 5 | # [ ] Invalid arguments 6 | # [ ] 7 | # [ ] 8 | 9 | 10 | 11 | use diagnostics; # More debugging info 12 | use warnings; 13 | use strict; 14 | use FindBin; 15 | use local::lib "$FindBin::Bin/../lib/local/"; 16 | use lib "$FindBin::Bin/../lib/"; 17 | use Test::More qw( no_plan ); 18 | use List::MoreUtils qw( any firstval ); 19 | 20 | 21 | use_ok( 'Goodscrapes' ); 22 | 23 | 24 | # We should never use caching during real tests: 25 | # We need to test against the most up-to-date markup from Goodreads.com 26 | # Having no cache during development is annoying, tho. 27 | # So we leave a small window: 28 | gsetopt( cache_days => 1 ); 29 | 30 | 31 | print( 'Reading authors from book shelf... '); 32 | 33 | my %authors; 34 | 35 | greadauthors( from_user_id => 2, 36 | ra_from_shelves => [ 'read' ], 37 | rh_into => \%authors, 38 | on_progress => gmeter( 'authors' )); 39 | 40 | print( "\n" ); 41 | 42 | 43 | ok( scalar( keys( %authors )) > 30, 'At least 30 authors read from shelf' ); 44 | 45 | ok( exists( $authors{2546} ), 'Expected author found via hash-key = Goodreads author ID' ) 46 | or BAIL_OUT( "Cannot test author attributes when expected author is missing." ); 47 | 48 | 49 | my $a = $authors{2546}; 50 | 51 | isa_ok( $a, 'HASH', 'Author datatype' ); 52 | is ( $a->{id}, '2546', 'Author has ID' ); 53 | is ( $a->{name_lf}, 'Palahniuk, Chuck', 'Author has name' ); 54 | is ( $a->{url}, 'https://www.goodreads.com/author/show/2546', 'Author has URL' ); 55 | like( $a->{works_url}, qr/^https:\/\/www\.goodreads\.com\/author\/list\/2546/, 'Author has works URL' ); 56 | is ( $a->{is_author}, 1, 'Author has author flag' ); 57 | is ( $a->{is_private}, 0, 'Author not private' ); 58 | is ( $a->{is_mainstream}, 1, 'Author is mainstream' ); 59 | 60 | 61 | # Not available or scraped yet, otherwise one of the following 62 | # tests will fail and remind me of implementing a correct test: 63 | is ( $a->{name}, $a->{name_lf}, 'N/A: author name != name_lf' ); # "Chuck Palahniuk" 64 | is ( $a->{residence}, undef, 'N/A: author residence' ); 65 | is ( $a->{img_url}, undef, 'N/A: author image URL' ); 66 | is ( $a->{age}, undef, 'N/A: author age' ); 67 | is ( $a->{num_books}, undef, 'N/A: number of author books' ); 68 | is ( $a->{is_friend}, undef, 'N/A: author friend status' ); 69 | is ( $a->{is_female}, undef, 'N/A: author gender status' ); 70 | is ( $a->{user_avg_rating}, undef, 'N/A: user avg rating' ); 71 | is ( $a->{user_min_rating}, undef, 'N/A: user min rating' ); 72 | is ( $a->{user_max_rating}, undef, 'N/A: user max rating' ); 73 | 74 | 75 | -------------------------------------------------------------------------------- /t/greadbook.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | # Test cases realized: 4 | # [x] read book and check attributes (detects changed markup) 5 | # [x] wrong book ID 6 | # [ ] 7 | # [ ] 8 | 9 | 10 | use diagnostics; # More debugging info 11 | use warnings; 12 | use strict; 13 | use FindBin; 14 | use local::lib "$FindBin::Bin/../lib/local/"; 15 | use lib "$FindBin::Bin/../lib/"; 16 | use Test::More qw( no_plan ); 17 | use List::MoreUtils qw( any firstval ); 18 | 19 | 20 | use_ok( 'Goodscrapes' ); 21 | 22 | 23 | # We should never use caching during real tests: 24 | # We need to test against the most up-to-date markup from Goodreads.com 25 | # Having no cache during development is annoying, tho. 26 | # So we leave a small window: 27 | gsetopt( cache_days => 1 ); 28 | 29 | 30 | my %nob = greadbook( 'TEST_INVALID_BOOK_ID' ); 31 | 32 | ok( !%nob, 'Book not found' ); 33 | 34 | 35 | my $test_book_id = 5759; # 5759 legacy id, 36236124 new id 36 | my %b = greadbook( $test_book_id ); 37 | 38 | ok( %b, 'Book read' ) 39 | or BAIL_OUT( "Cannot test book attributes when expected book is missing." ); 40 | 41 | 42 | is ( $b{id}, $test_book_id, 'Book has Goodreads ID' ); 43 | is ( $b{isbn}, '0393327345', 'Book has ISBN' ); 44 | is ( $b{isbn13}, '9780393327342', 'Book has ISBN13' ); 45 | is ( $b{num_pages}, 218, 'Book has number of pages' ); 46 | ok ( $b{num_ratings} > 190000 && $b{num_ratings} < 1000000, 'Book has number of ratings' ); 47 | ok ( $b{num_reviews} > 18000 && $b{num_reviews} < 50000, 'Book has number of reviews' ); 48 | is ( $b{title}, 'Fight Club', 'Book has title' ); 49 | ok ( $b{avg_rating} >= 4 && $b{avg_rating} < 5, 'Book has average rating' ); 50 | ok ( $b{stars} >= 4 && $b{stars} < 5, 'Book has stars rating' ); 51 | like( $b{img_url}, qr/\.jpg$/, 'Book has image URL' ); 52 | is ( $b{url}, 'https://www.goodreads.com/book/show/5759', 'Book has URL' ); 53 | is ( $b{format}, 'Paperback', 'Book has format' ); 54 | 55 | 56 | # Not available or scraped yet, otherwise one of the following 57 | # tests will fail and remind me of implementing a correct test: 58 | # is( $b{year}, 1996, 'Book has pub-year' ); 59 | # is( $b{year_edit}, 2005, 'Book edition has pub-year' ); 60 | # user_xxx 61 | # ra_user_shelves 62 | # review_id 63 | # is ( $b{rh_author}->{id}, '2546', 'Book has author ID' ); 64 | # is ( $b{rh_author}->{name_lf}, 'Palahniuk, Chuck', 'Book has author name' ); 65 | # is ( $b{rh_author}->{url}, 'https://www.goodreads.com/author/show/2546', 'Book has author URL' ); 66 | # like( $b{rh_author}->{works_url}, qr/https:\/\/www.goodreads.com\/author\/list\/2546.*/, 'Book has author works URL' ); 67 | # is ( $b{rh_author}->{residence}, 68 | # is ( $b{rh_author}->{num_books}, 69 | # is ( $b{rh_author}->{age}, 70 | # is ( $b{rh_author}->{is_author}, 1, 'Book author has author flag' ); 71 | # is ( $b{rh_author}->{is_private}, 0, 'Book author not private' ); 72 | # is ( $b{rh_author}->{is_staff}, 73 | # is ( $b{rh_author}->{is_female}, 74 | # is ( $b{rh_author}->{is_friend}, 75 | # is ( $b{rh_author}->{is_mainstream}, 76 | 77 | 78 | 79 | -------------------------------------------------------------------------------- /t/greadcomments.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | # Test cases realized: 4 | # [ ] ... 5 | 6 | 7 | use diagnostics; # More debugging info 8 | use warnings; 9 | use strict; 10 | use FindBin; 11 | use local::lib "$FindBin::Bin/../lib/local/"; 12 | use lib "$FindBin::Bin/../lib/"; 13 | use lib "$FindBin::Bin/../t/"; 14 | use Test::More qw( no_plan ); 15 | 16 | 17 | use_ok( 'Goodscrapes' ); 18 | require( 'config.pl' ); 19 | 20 | 21 | # We should never use caching during real tests: 22 | # We need to test against the most up-to-date markup from Goodreads.com 23 | # Having no cache during development is annoying, tho. 24 | # So we leave a small window: 25 | gsetopt( cache_days => 1 ); 26 | 27 | 28 | # Info is only available to authenticated users: 29 | glogin( usermail => get_gooduser_mail(), 30 | userpass => get_gooduser_pass() ); 31 | 32 | 33 | print( 'Reading comments... '); 34 | 35 | 36 | my @comments; 37 | 38 | greadcomments( from_user_id => 18418712, # 1036726, 39 | ra_into => \@comments, 40 | on_progress => gmeter( 'comments' )); 41 | 42 | 43 | print( "\n" ); 44 | 45 | 46 | 47 | ok( scalar( @comments ) >= 10, 'At least 10 books read from shelf' ); 48 | 49 | for my $c ( @comments ) 50 | { 51 | ok( $c->{text}, 'Comment has text' ); 52 | 53 | if( $c->{rh_to_user} ) # No user info if comment on a group 54 | { 55 | ok ( $c->{rh_to_user}, 'Comment has an addressee' ); 56 | ok ( $c->{rh_to_user}->{name}, 'Addressee of comment has name' ); 57 | } 58 | 59 | if( $c->{rh_book} ) # No book info if comment on a group or a quote or a user status 60 | { 61 | ok ( $c->{rh_book}->{title}, 'Commented book has title' ); 62 | like( $c->{rh_book}->{img_url}, qr/^https:.*\.(jpg|png)$/, 'Commented book has image URL' ); 63 | like( $c->{rh_book}->{url}, qr/^https:\/\/www.goodreads.com\//, 'Commented book has an URL' ); # Not real URL but search-URL due to missing book ID 64 | 65 | ok ( $c->{rh_review}. 'Comment addressed a review' ); 66 | ok ( $c->{rh_review}->{id}, 'Commented review has an ID' ); 67 | like( $c->{rh_review}->{url}, qr/^https:\/\/www.goodreads.com\//, 'Commented review has an URL' ); 68 | ok ( $c->{rh_review}->{rh_user}, 'Commented review has an author' ); 69 | ok ( $c->{rh_review}->{rh_user}->{name}, 'Author of commented review has a name' ); 70 | 71 | 72 | # Not available or scraped yet, otherwise one of the following 73 | # tests will fail and remind me of implementing a correct test: 74 | 75 | is ( $c->{rh_book}->{id}, undef, 'N/A: Book ID' ); 76 | } 77 | 78 | } 79 | 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /t/greadfolls.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | # Test cases realized: 4 | # [x] get friends and followees 5 | # [x] get friends only 6 | # [x] get followees only 7 | # [ ] get only friends who are authors 8 | # [ ] get only followees who are authors 9 | # [x] discard threshold 10 | # [ ] check member attributees 11 | 12 | 13 | 14 | use diagnostics; # More debugging info 15 | use warnings; 16 | use strict; 17 | use FindBin; 18 | use local::lib "$FindBin::Bin/../lib/local/"; 19 | use lib "$FindBin::Bin/../lib/"; 20 | use lib "$FindBin::Bin/../t/"; 21 | use Test::More qw( no_plan ); 22 | use List::MoreUtils qw( duplicates ); 23 | 24 | 25 | use_ok( 'Goodscrapes' ); 26 | require( 'config.pl' ); 27 | 28 | 29 | # We should never use caching during real tests: 30 | # We need to test against the most up-to-date markup from Goodreads.com 31 | # Having no cache during development is annoying, tho. 32 | # So we leave a small window: 33 | gsetopt( cache_days => 1 ); 34 | 35 | 36 | # Access to member lists needs some privileges: 37 | glogin( usermail => get_gooduser_mail(), 38 | userpass => get_gooduser_pass() ); 39 | 40 | 41 | my $userid = '2'; 42 | my $discard_threshold = 3; 43 | my %friends; 44 | my %followees; 45 | my %all; 46 | my %discarded_friends; 47 | my %discarded_followees; 48 | 49 | 50 | greadfolls( from_user_id => $userid, 51 | rh_into => \%friends, 52 | incl_followees => 0, 53 | incl_friends => 1, 54 | incl_authors => 1 ); 55 | 56 | greadfolls( from_user_id => $userid, 57 | rh_into => \%followees, 58 | incl_followees => 1, 59 | incl_friends => 0, 60 | incl_authors => 1 ); 61 | 62 | greadfolls( from_user_id => $userid, 63 | rh_into => \%all, 64 | incl_followees => 1, 65 | incl_friends => 1, 66 | incl_authors => 1 ); 67 | 68 | greadfolls( from_user_id => $userid, 69 | rh_into => \%discarded_friends, 70 | discard_threshold => $discard_threshold, 71 | incl_followees => 0, 72 | incl_friends => 1, 73 | incl_authors => 1 ); 74 | 75 | greadfolls( from_user_id => $userid, 76 | rh_into => \%discarded_followees, 77 | discard_threshold => $discard_threshold, 78 | incl_followees => 1, 79 | incl_friends => 0, 80 | incl_authors => 1 ); 81 | 82 | 83 | ok( exists $friends{1}, "Member $userid and Otis Chandler are friends" ); 84 | ok( exists $followees{21269}, "Member $userid is following Guy Kawasaki (author)" ); 85 | ok( exists $friends{1} && exists $followees{21269}, "Member $userid is friends with Otis Chandler and is following Guy Kawasaki (author)" ); 86 | ok( !%discarded_friends, "No friends returned if there are more than $discard_threshold" ); 87 | ok( !%discarded_followees, "No followees returned if there are more than $discard_threshold" ); 88 | 89 | 90 | my @kfriends = keys %friends; 91 | my @kfollowees = keys %followees; 92 | my @kall = keys %all; 93 | 94 | ok( !duplicates(( @kfriends, @kfollowees )), 'Friends and followees lists expected to be exclusive' ); 95 | 96 | is( scalar(@kall), scalar(duplicates(( @kfriends, @kfollowees, @kall ))), 'Friends and followees in all-list expected' ); 97 | 98 | 99 | 100 | 101 | 102 | 103 | -------------------------------------------------------------------------------- /t/greadreviews.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | # Test cases realized: 4 | # [x] latest and check attributes (detects changed markup) 5 | # [x] text only 6 | # [x] date range 7 | # [x] dict 8 | # [ ] 9 | # [ ] invalid arguments 10 | 11 | use diagnostics; # More debugging info 12 | use warnings; 13 | use strict; 14 | use FindBin; 15 | use local::lib "$FindBin::Bin/../lib/local/"; 16 | use lib "$FindBin::Bin/../lib/"; 17 | use Time::Piece; 18 | use Test::More qw( no_plan ); 19 | use List::MoreUtils qw( any all firstval ); 20 | 21 | 22 | use_ok( 'Goodscrapes' ); 23 | 24 | 25 | # We should never use caching during real tests: 26 | # We need to test against the most up-to-date markup from Goodreads.com 27 | # Having no cache during development is annoying, tho. 28 | # So we leave a small window: 29 | gsetopt( cache_days => 1 ); 30 | gsetopt( ignore_errors => 1 ); 31 | gsetopt( maxretries => 0 ); 32 | 33 | 34 | diag( 'takes ~1 minute' ); 35 | 36 | 37 | print( 'Loading reviews...' ); 38 | 39 | my %reviews; 40 | my %reviews_textonly; 41 | my %reviews_by_dict; 42 | 43 | my %book; 44 | $book{id} = '984394'; # "Hacking the Xbox" 45 | $book{num_ratings} = 253; # This value can be obtained using greadbook() or ignored, it helps optimizing; TODO: constant might break test 46 | $book{num_reviews} = 28; # " " TODO: constant might break test 47 | my $since = Time::Piece->strptime( '2016-01-01', '%Y-%m-%d' ); 48 | 49 | 50 | greadreviews( rh_for_book => \%book, 51 | rigor => 0, # 0 = 300 reviews only (latest) 52 | rh_into => \%reviews, 53 | text_minlen => 0, 54 | since => $since, 55 | on_progress => gmeter()); 56 | 57 | greadreviews( rh_for_book => \%book, # Uses some cached values from query above, which is fine for this test 58 | rigor => 0, # 0 = 300 reviews only (latest) 59 | rh_into => \%reviews_textonly, 60 | text_minlen => 1, 61 | on_progress => gmeter()); 62 | 63 | greadreviews( rh_for_book => \%book, # Uses some cached values from query above, which is fine for this test 64 | rigor => 3, # Include dict in every case 65 | rh_into => \%reviews_by_dict, 66 | dict_path => "$FindBin::Bin/../list-in/test.lst", 67 | text_minlen => 1, 68 | on_progress => gmeter()); 69 | 70 | print( "\n" ); 71 | 72 | 73 | # Check numbers: 74 | my $num_reviews = scalar( keys( %reviews )); 75 | my $num_reviews_textonly = scalar( keys( %reviews_textonly )); 76 | my $num_reviews_by_dict = scalar( keys( %reviews_by_dict )); 77 | 78 | ok( $num_reviews > 0, 'Load some reviews' ) 79 | or BAIL_OUT( "Cannot test review attributes when there are no reviews." ); 80 | 81 | ok( $num_reviews_textonly > 0, 'Load some text reviews' ) 82 | or BAIL_OUT( "Cannot test text reviews when there are no text reviews." ); 83 | 84 | ok( $num_reviews_by_dict >= $num_reviews_textonly, 'Load more or equal number of reviews compared to rigor-level 0' ) 85 | or BAIL_OUT( "Book specimen might not sufficient for this test anymore or adjust book's num_reviews constant in this testfile. Expected #reviews from dict ($num_reviews_by_dict) >= #reviews from latest ($num_reviews_textonly)" ); 86 | 87 | 88 | # Check contents: 89 | ok(( !all { $_->{text} } values( %reviews )), 'Reviews include text and non-text ratings'); 90 | ok(( all { $_->{text} } values( %reviews_textonly )), 'All reviews include text'); 91 | ok(( all { $_->{text} } values( %reviews_by_dict )), 'All dict-searched reviews include text'); 92 | 93 | 94 | # Check contents in detail: 95 | map { 96 | ok ( $_->{rating} >= 0, "Review $_->{id} has rating" ); 97 | ok ( $_->{rating_str}, "Review $_->{id} has rating code" ); 98 | #ok ( $_->{text}, "Review $_->{id} has text" ); # Often no text but just stars 99 | #ok ( $_->{date}->year > 2005, "Review $_->{id} has date > 2006 (got date: '$_->{date}')" ); # GR was founded 2007, but there are reviews from 2006, e.g., #454926175 100 | ok ( $_->{date} >= $since, "Review $_->{id} isn't older than ".$since->strftime( "%Y-%m-%d" )); 101 | is ( $_->{book_id}, $book{id}, "Review $_->{id} has Goodreads book ID" ); 102 | like( $_->{id}, qr/^\d+$/, "Review $_->{id} has ID" ); 103 | like( $_->{url}, qr/^https:\/\/www\.goodreads\.com\/review\/show\//, "Review $_->{id} has URL" ); 104 | like( $_->{rh_user}->{url}, qr/^https:\/\/www\.goodreads\.com\/user\/show\//, "Review $_->{id} has author URL" ); 105 | like( $_->{rh_user}->{id}, qr/^\d+$/, "Review $_->{id} has author ID" ); 106 | like( $_->{rh_user}->{img_url}, qr/^https:\/\/[a-z0-9]+\.gr-assets\.com\//, "Review $_->{id} has author image URL" ); 107 | ok ( $_->{rh_user}->{name}, "Review $_->{id} has author name: $_->{rh_user}->{name}" ); 108 | ok ( $_->{rh_user}->{name_lf}, "Review $_->{id} has author lastname, firstname" ); 109 | 110 | 111 | # Not available or scraped yet, otherwise one of the following 112 | # tests will fail and remind me of implementing a correct test: 113 | is ( $_->{rh_user}->{is_private}, undef, 'N/A: User is private' ); 114 | is ( $_->{rh_user}->{is_female}, undef, 'N/A: User gender' ); 115 | is ( $_->{rh_user}->{is_author}, undef, 'N/A: User is author' ); 116 | is ( $_->{rh_user}->{is_staff}, undef, 'N/A: User is Goodreads employee' ); 117 | is ( $_->{rh_user}->{is_friend}, undef, 'N/A: User friend status' ); 118 | is ( $_->{rh_user}->{is_mainstream}, undef, 'N/A: User mainstream status' ); 119 | is ( $_->{rh_user}->{residence}, undef, 'N/A: User residence' ); 120 | is ( $_->{rh_user}->{age}, undef, 'N/A: User age' ); 121 | is ( $_->{rh_user}->{num_books}, undef, 'N/A: Number of books' ); # Works or books read? 122 | is ( $_->{rh_user}->{works_url}, undef, 'N/A: Works URL if author' ); 123 | } values( %reviews ); 124 | 125 | 126 | -------------------------------------------------------------------------------- /t/greadshelf.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | # Test cases realized: 4 | # [x] Read shelf, find specific book, check all attributes (detects changed markup) 5 | # [ ] Reading from multiple shelves 6 | # [ ] invalid arguments 7 | # [ ] 8 | 9 | 10 | 11 | use diagnostics; # More debugging info 12 | use warnings; 13 | use strict; 14 | use FindBin; 15 | use local::lib "$FindBin::Bin/../lib/local/"; 16 | use lib "$FindBin::Bin/../lib/"; 17 | use Test::More qw( no_plan ); 18 | 19 | 20 | use_ok( 'Goodscrapes' ); 21 | 22 | 23 | # We should never use caching during real tests: 24 | # We need to test against the most up-to-date markup from Goodreads.com 25 | # Having no cache during development is annoying, tho. 26 | # So we leave a small window: 27 | gsetopt( cache_days => 1 ); 28 | 29 | 30 | print( 'Reading book shelf... '); 31 | 32 | my %books; 33 | 34 | greadshelf( from_user_id => 2, # "Odawg" (GR employee; 1 is GR founder Otis Chandler, but too many books = test too long) 35 | ra_from_shelves => [ 'read' ], 36 | rh_into => \%books, 37 | # on_book => sub{}, 38 | on_progress => gmeter( 'books' ) ); 39 | 40 | print( "\n" ); 41 | 42 | 43 | ok( scalar( keys( %books )) > 50, 'At least 50 books read from shelf' ); 44 | 45 | ok( exists( $books{5759} ), 'Expected book found via hash-key = Goodreads book ID' ) 46 | or BAIL_OUT( "Cannot test book attributes when expected book is missing." ); 47 | 48 | 49 | my $b = $books{5759}; 50 | 51 | isa_ok( $b, 'HASH', 'Book datatype' ); 52 | 53 | is ( $b->{id}, '5759', 'Book has Goodreads ID' ); 54 | is ( $b->{year}, 1996, 'Book has pub-year' ); 55 | is ( $b->{year_edit}, 2005, 'Book edition has pub-year' ); 56 | is ( $b->{isbn}, '0393327345', 'Book has ISBN' ); 57 | is ( $b->{isbn13}, '9780393327342', 'Book has ISBN13' ); 58 | ok ( $b->{avg_rating} > 2, 'Book has average rating' ); 59 | is ( $b->{num_pages}, 218, 'Book has number of pages' ); 60 | ok ( $b->{num_ratings} > 190000, 'Book has number of ratings' ); 61 | is ( $b->{format}, 'Paperback', 'Book has format' ); 62 | is ( $b->{title}, 'Fight Club', 'Book has title' ); 63 | ok ( $b->{stars} > 2, 'Book has stars rating' ); 64 | is ( $b->{url}, 'https://www.goodreads.com/book/show/5759', 'Book has URL' ); 65 | like( $b->{img_url}, qr/^https:.*\.jpg$/, 'Book has image URL' ); 66 | like( $b->{review_id}, qr/^\d+$/, 'Book has user review ID' ); 67 | ok ( $b->{user_rating} > 2, 'User rating' ); 68 | ok ( $b->{user_read_count} > 0, 'User read count' ); 69 | ok ( $b->{user_date_added}->year > 2006, 'User addition-date > 2006' ); # GR was founded in 2007 70 | is ( $b->{user_num_owned}, 0, 'Number of user-owned books' ); 71 | 72 | is ( $b->{rh_author}->{id}, '2546', 'Book has author ID' ); 73 | is ( $b->{rh_author}->{name_lf}, 'Palahniuk, Chuck', 'Book has author name' ); 74 | is ( $b->{rh_author}->{url}, 'https://www.goodreads.com/author/show/2546', 'Book has author URL' ); 75 | like( $b->{rh_author}->{works_url}, qr/^https:\/\/www\.goodreads\.com\/author\/list\/2546/, 'Book has author works URL' ); 76 | is ( $b->{rh_author}->{is_author}, 1, 'Book author has author flag' ); 77 | is ( $b->{rh_author}->{is_private}, 0, 'Book author not private' ); 78 | is ( $b->{rh_author}->{is_mainstream}, 1, 'Author is mainstream' ); 79 | is ( $b->{rh_author}->{user_avg_rating}, 5, 'user avg rating' ); 80 | is ( $b->{rh_author}->{user_min_rating}, 5, 'user min rating' ); 81 | is ( $b->{rh_author}->{user_max_rating}, 5, 'user max rating' ); 82 | 83 | 84 | # Not available or scraped yet, otherwise one of the following 85 | # tests will fail and remind me of implementing a correct test: 86 | is ( $b->{rh_author}->{residence}, undef, 'N/A: Author residence' ); 87 | is ( $b->{rh_author}->{img_url}, undef, 'N/A: Author image URL' ); 88 | is ( $b->{rh_author}->{is_staff}, undef, 'N/A: Is Goodreads author' ); 89 | is ( $b->{rh_author}->{is_female}, undef, 'N/A: Author gender' ); 90 | is ( $b->{rh_author}->{is_friend}, undef, 'N/A: Author friend status' ); 91 | is ( $b->{rh_author}->{num_books}, undef, 'N/A: Number of author books' ); 92 | is ( $b->{rh_author}->{num_reviews}, undef, 'N/A: Number of book reviews' ); 93 | #is ( $b->{user_date_read}, undef, 'N/A: User reading-date' ); # TODO 94 | is ( scalar( @{$b->{ra_user_shelves}} ), 0, 'N/A: User shelves for book' ); 95 | 96 | -------------------------------------------------------------------------------- /t/greadshelfnames.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | # Test cases realized: 4 | # [x] read all shelf names of another Goodreads member, exclude some shelves 5 | 6 | 7 | use diagnostics; # More debugging info 8 | use warnings; 9 | use strict; 10 | use FindBin; 11 | use local::lib "$FindBin::Bin/../lib/local/"; 12 | use lib "$FindBin::Bin/../lib/"; 13 | use lib "$FindBin::Bin/../t/"; 14 | use Test::More qw( no_plan ); 15 | use List::MoreUtils qw( any none ); 16 | 17 | 18 | use_ok( 'Goodscrapes' ); 19 | require( 'config.pl' ); 20 | 21 | 22 | # We should never use caching during real tests: 23 | # We need to test against the most up-to-date markup from Goodreads.com 24 | # Having no cache during development is annoying, tho. 25 | # So we leave a small window: 26 | gsetopt( cache_days => 1 ); 27 | 28 | 29 | # At the moment, functionality is just available to signed-in users: 30 | glogin( usermail => get_gooduser_mail(), 31 | userpass => get_gooduser_pass() ); 32 | 33 | 34 | # Because scraping *all* shelf names is more nasty than you would expect, 35 | # it got its own command (more commentary see function in Goodscrapes.pm): 36 | my @shelfnames; 37 | 38 | greadshelfnames( from_user_id => '1', # Otis Chandler 39 | ra_into => \@shelfnames, 40 | ra_exclude => [ 'to-read', 'nonfiction' ]); 41 | 42 | 43 | # Otis Chandler has so many shelves that they are paginated. 44 | # This test includes some shelves from page 2 too: 45 | ok( (any { $_ eq 'read' } @shelfnames), 'User has shelf' ); 46 | ok( (any { $_ eq 'currently-reading' } @shelfnames), 'User has shelf' ); 47 | ok( (any { $_ eq 'health' } @shelfnames), 'User has shelf' ); 48 | ok( (any { $_ eq 'submarine' } @shelfnames), 'User has shelf' ); 49 | ok( (any { $_ eq 'travel' } @shelfnames), 'User has shelf' ); 50 | ok( (none{ $_ eq 'to-read' } @shelfnames), 'User shelf was excluded'); 51 | ok( (none{ $_ eq 'nonfiction' } @shelfnames), 'User shelf was excluded' ); 52 | 53 | -------------------------------------------------------------------------------- /t/greadsimilaraut.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | # Test cases realized: 4 | # [x] 5 | # [ ] 6 | # [ ] 7 | # [ ] 8 | 9 | 10 | use diagnostics; # More debugging info 11 | use warnings; 12 | use strict; 13 | use FindBin; 14 | use local::lib "$FindBin::Bin/../lib/local/"; 15 | use lib "$FindBin::Bin/../lib/"; 16 | use Test::More qw( no_plan ); 17 | use List::MoreUtils qw( any firstval ); 18 | 19 | 20 | use_ok( 'Goodscrapes' ); 21 | 22 | 23 | # We should never use caching during real tests: 24 | # We need to test against the most up-to-date markup from Goodreads.com 25 | # Having no cache during development is annoying, tho. 26 | # So we leave a small window: 27 | gsetopt( cache_days => 1 ); 28 | 29 | 30 | print( 'Reading book shelf... '); 31 | 32 | my %authors; 33 | my $SIMILAR_AUTHOR_ID = '9876'; # John Milton 34 | 35 | greadsimilaraut( author_id => '1734373', # Karl Held 36 | rh_into => \%authors, 37 | on_progress => gmeter( 'similar' )); 38 | 39 | 40 | 41 | print( "\n" ); 42 | 43 | 44 | ok( scalar( keys( %authors )) >= 4, 'At least 4 similar authors' ); 45 | 46 | ok( exists( $authors{$SIMILAR_AUTHOR_ID} ), 'Expected author found via hash-key = Goodreads author ID' ) 47 | or BAIL_OUT( "Cannot test author attributes when expected author is missing." ); 48 | 49 | 50 | my $a = $authors{$SIMILAR_AUTHOR_ID}; 51 | 52 | isa_ok( $a, 'HASH', 'Author datatype' ); 53 | is ( $a->{id}, $SIMILAR_AUTHOR_ID, 'Author has ID' ); 54 | is ( $a->{name}, 'John Milton', 'Author has name' ); 55 | is ( $a->{url}, "https://www.goodreads.com/author/show/$SIMILAR_AUTHOR_ID", 'Author has URL' ); 56 | like( $a->{works_url}, qr/^https:\/\/www\.goodreads\.com\/author\/list\/$SIMILAR_AUTHOR_ID/, 'Author has works URL' ); 57 | like( $a->{img_url}, qr/\.jpg$/, 'Author has image URL' ); 58 | is ( $a->{is_author}, 1, 'Author has author flag' ); 59 | is ( $a->{is_private}, 0, 'Author not private' ); 60 | ok ( $a->{is_mainstream}, 'is mainstream author' ); 61 | 62 | # Not available or scraped yet, otherwise one of the following 63 | # tests will fail and remind me of implementing a correct test: 64 | is ( $a->{name_lf}, $a->{name}, 'N/A: author name != name_lf' ); # "Dick, Philip K." 65 | is ( $a->{residence}, undef, 'N/A: author residence' ); 66 | is ( $a->{age}, undef, 'N/A: author age' ); 67 | is ( $a->{num_books}, undef, 'N/A: number of author books' ); 68 | is ( $a->{is_friend}, undef, 'N/A: author friend status' ); 69 | is ( $a->{is_female}, undef, 'N/A: author gender status' ); 70 | is ( $a->{is_staff}, undef, 'N/A: is Goodreads author' ); 71 | 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /t/greaduser.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | # Test cases realized: 4 | # [x] read normal user info and check attributes (detects changed markup) 5 | # [ ] read author user info and check attributes (detects changed markup) 6 | # [ ] private users 7 | # [ ] 8 | 9 | 10 | use diagnostics; # More debugging info 11 | use warnings; 12 | use strict; 13 | use FindBin; 14 | use local::lib "$FindBin::Bin/../lib/local/"; 15 | use lib "$FindBin::Bin/../lib/"; 16 | use lib "$FindBin::Bin/../t/"; 17 | use Test::More qw( no_plan ); 18 | use List::MoreUtils qw( any firstval ); 19 | 20 | 21 | use_ok( 'Goodscrapes' ); 22 | require( 'config.pl' ); 23 | 24 | 25 | # We should never use caching during real tests: 26 | # We need to test against the most up-to-date markup from Goodreads.com 27 | # Having no cache during development is annoying, tho. 28 | # So we leave a small window: 29 | gsetopt( cache_days => 1 ); 30 | 31 | 32 | # Some info is only available to authenticated users: 33 | glogin( usermail => get_gooduser_mail(), 34 | userpass => get_gooduser_pass() ); 35 | 36 | 37 | # Normal user: 38 | my $user_id = '62730330'; 39 | my %u = greaduser( $user_id ); 40 | 41 | is ( $u{id}, $user_id, 'User has Goodreads ID' ); 42 | is ( $u{name}, 'Paola Quiros (pbarrant)', 'User has name' ); 43 | is ( $u{is_female}, 1, 'User is female' ); 44 | ok ( $u{num_books} > 10, 'User has number of books' ); 45 | ok ( $u{age} >= 38, 'User has age' ); # login 46 | #is ( $u{residence}, '', 'User has residence' ); # login 47 | #is ( $u{is_private}, 0, 'User is not private' ); # login 48 | is ( $u{is_author}, 0, 'User not an author' ); 49 | is ( $u{is_staff}, 1, 'User is GR employee' ); 50 | is ( $u{url}, 'https://www.goodreads.com/user/show/' . $user_id, 'User has URL' ); 51 | like( $u{img_url}, qr/^https:\/\/[a-z0-9]+\.gr-assets\.com\//, 'User has image URL' ); 52 | is ( $u{works_url}, undef, 'User has no works URL (not an author) ' ); 53 | 54 | # Not available or scraped yet, otherwise one of the following 55 | # tests will fail and remind me of implementing a correct test: 56 | is ( $u{is_friend}, undef, 'Not avail: user friend status' ); 57 | 58 | 59 | # Author user: 60 | 61 | my %au = greaduser( '2546', 1 ); 62 | 63 | is ( $au{id}, '2546', 'Author has ID' ); 64 | is ( $au{name}, 'Chuck Palahniuk', 'Author has name' ); 65 | is ( $au{url}, 'https://www.goodreads.com/author/show/2546', 'Author has URL' ); 66 | like( $au{works_url}, qr/^https:\/\/www\.goodreads\.com\/author\/list\/2546/, 'Author has works URL' ); 67 | like( $au{img_url}, qr/^https:\/\/images.gr-assets.com/, 'Author has image URL' ); 68 | is ( $au{is_author}, 1, 'Author has author flag' ); 69 | is ( $au{is_private}, 0, 'Author not private' ); 70 | is ( $au{is_staff}, 1, 'Goodreads author' ); 71 | ok ( $au{num_books} > 10, 'Author > 10 books' ); 72 | ok (!$au{is_mainstream}, 'Author is not mainstream' ); 73 | 74 | # Not available or scraped yet, otherwise one of the following 75 | # tests will fail and remind me of implementing a correct test: 76 | is ( $au{is_friend}, undef, 'N/A: author friend status' ); 77 | is ( $au{is_female}, undef, 'N/A: author gender status' ); 78 | is ( $au{residence}, undef, 'N/A: author residence' ); 79 | 80 | 81 | #use Data::Dumper; 82 | #print Dumper(%u); 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /t/greadusergrp.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | # Test cases realized: 4 | # [x] Read groups and check attributes (detects changed markup) 5 | # [ ] 6 | # [ ] 7 | # [ ] 8 | 9 | 10 | use diagnostics; # More debugging info 11 | use warnings; 12 | use strict; 13 | use FindBin; 14 | use local::lib "$FindBin::Bin/../lib/local/"; 15 | use lib "$FindBin::Bin/../lib/"; 16 | use Test::More qw( no_plan ); 17 | use List::MoreUtils qw( any firstval ); 18 | 19 | 20 | use_ok( 'Goodscrapes' ); 21 | 22 | 23 | # We should never use caching during real tests: 24 | # We need to test against the most up-to-date markup from Goodreads.com 25 | # Having no cache during development is annoying, tho. 26 | # So we leave a small window: 27 | gsetopt( cache_days => 1 ); 28 | 29 | 30 | print( "Getting groups... " ); 31 | 32 | my %groups; 33 | 34 | greadusergp( from_user_id => '1', # "Otis Chandler" (GR founder) 35 | rh_into => \%groups, 36 | # on_group => sub{}, 37 | on_progress => gmeter( 'groups' )); 38 | 39 | print( "\n" ); 40 | 41 | ok( scalar( keys( %groups )) > 70, 'At least 70 groups (3 pages)' ); # Chandler had 127 42 | 43 | ok( exists( $groups{8095} ), 'Expected group found via hash-key = Goodreads group ID' ) 44 | or BAIL_OUT( "Cannot test group attributes when expected group is missing." ); 45 | 46 | my $g = $groups{8095}; 47 | 48 | is( $g->{id}, '8095', 'Group has Goodreads ID' ); 49 | is( $g->{name}, 'Goodreads Developers', 'Group has name' ); 50 | ok( $g->{num_members} > 1000, 'Group has number of members' ); 51 | is( $g->{url}, 'https://www.goodreads.com/group/show/8095', 'Group has URL' ); 52 | is( $g->{img_url}, 'https://images.gr-assets.com/groups/1220414390p2/8095.jpg', 'Group has image URL' ); 53 | 54 | 55 | 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /t/gsearch.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | # Test cases realized: 4 | # [x] getting books with execpted attributes (detects changes in markup) 5 | # [ ] order 6 | # [ ] num_ratings 7 | # [ ] exact matches 8 | # [ ] invalid arguments 9 | 10 | use diagnostics; # More debugging info 11 | use warnings; 12 | use strict; 13 | use FindBin; 14 | use local::lib "$FindBin::Bin/../lib/local/"; 15 | use lib "$FindBin::Bin/../lib/"; 16 | use Test::More qw( no_plan ); 17 | use List::MoreUtils qw( firstval ); 18 | 19 | 20 | use_ok( 'Goodscrapes' ); 21 | 22 | 23 | # We should never use caching during real tests: 24 | # We need to test against the most up-to-date markup from Goodreads.com 25 | # Having no cache during development is annoying, tho. 26 | # So we leave a small window: 27 | gsetopt( cache_days => 1 ); 28 | gsetopt( ignore_errors => 1 ); 29 | gsetopt( maxretries => 0 ); 30 | 31 | 32 | diag( 'takes ~8 minutes' ); 33 | 34 | 35 | print( 'Searching books... ' ); 36 | 37 | my @books; 38 | gsearch( phrase => 'Linux', 39 | ra_into => \@books, 40 | is_exact => 0, 41 | ra_order_by => [ 'stars', 'num_ratings', 'year' ], 42 | num_ratings => 5, 43 | on_progress => gmeter()); 44 | 45 | print( "\n" ); 46 | 47 | my $numbooks = scalar( @books ); 48 | ok( $numbooks > 450, "At least 500 results, got $numbooks" ); # was 500, later 480 49 | 50 | my $BOOK_ID = '8474434'; 51 | 52 | my $b = firstval{ $_->{id} eq $BOOK_ID } @books; 53 | 54 | isa_ok( $b, 'HASH', 'Book datatype' ) 55 | or BAIL_OUT( "Cannot test book attributes when expected book is missing." ); 56 | 57 | 58 | is ( $b->{id}, $BOOK_ID, 'Book has Goodreads ID' ); 59 | is ( $b->{title}, 'Linux Kernel Development', 'Book has title' ); 60 | is ( $b->{url}, 'https://www.goodreads.com/book/show/'.$BOOK_ID, 'Book has URL' ); 61 | like( $b->{img_url}, qr/^https:.*\.(jpg|png)$/, 'Book has image URL' ); 62 | ok ( $b->{stars} > 0, 'Book has stars rating' ); 63 | ok ( $b->{avg_rating} > 0, 'Book has average rating' ); 64 | ok ( $b->{num_ratings} > 0, 'Book has number of ratings' ); 65 | is ( $b->{year}, 2003, 'Book has year published' ); 66 | is ( $b->{rh_author}->{id}, '13609144', 'Book has author ID' ); 67 | is ( $b->{rh_author}->{name}, 'Robert Love', 'Book has author name' ); 68 | is ( $b->{rh_author}->{url}, 'https://www.goodreads.com/author/show/13609144', 'Book has author URL' ); 69 | like( $b->{rh_author}->{works_url}, qr/^https:\/\/www\.goodreads\.com\/author\/list\/13609144/, 'Book has author works URL' ); 70 | is ( $b->{rh_author}->{is_author}, 1, 'Book author has author flag' ); 71 | is ( $b->{rh_author}->{is_private}, 0, 'Book author not private' ); 72 | ok (!$b->{rh_author}->{is_mainstream}, 'Book author not mainstream author' ); 73 | 74 | 75 | 76 | # Not available or scraped yet, otherwise one of the following 77 | # tests will fail and remind me of implementing a correct test: 78 | is ( $b->{rh_author}->{name_lf}, $b->{rh_author}->{name}, 'N/A: Author name_lf != name' ); 79 | is ( $b->{rh_author}->{residence}, undef, 'N/A: Author residence' ); 80 | like( $b->{rh_author}->{img_url}, qr/nophoto/, 'N/A: Author real image URL' ); 81 | is ( $b->{rh_author}->{is_staff}, undef, 'N/A: Is Goodreads author' ); 82 | is ( $b->{rh_author}->{is_female}, undef, 'N/A: Author gender' ); 83 | is ( $b->{rh_author}->{is_friend}, undef, 'N/A: Author friend status' ); 84 | is ( $b->{rh_author}->{num_books}, undef, 'N/A: Number of author books' ); 85 | is ( $b->{year_edit}, undef, 'N/A: Book edition pub-year' ); 86 | is ( $b->{isbn}, undef, 'N/A: Book ISBN' ); 87 | is ( $b->{isbn13}, undef, 'N/A: Book ISBN13' ); 88 | is ( $b->{num_pages}, undef, 'N/A: Book number of pages' ); 89 | is ( $b->{format}, undef, 'N/A: Book format' ); 90 | is ( $b->{review_id}, undef, 'N/A: User book review ID' ); 91 | is ( $b->{user_rating}, undef, 'N/A: User book rating' ); 92 | is ( $b->{user_read_count}, undef, 'N/A: User read count' ); 93 | is ( $b->{user_date_added}, undef, 'N/A: User addition-date' ); 94 | is ( $b->{num_reviews}, undef, 'N/A: Number of book reviews' ); 95 | is ( $b->{user_num_owned}, undef, 'N/A: Number user-owned books' ); 96 | is ( $b->{user_date_read}, undef, 'N/A: User reading-date' ); 97 | is ( scalar( @{$b->{ra_user_shelves}} ), 0, 'N/A: User shelves for book' ); 98 | 99 | 100 | 101 | 102 | -------------------------------------------------------------------------------- /t/gsocialnet.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | # Test cases realized: 4 | # [x] 5 | # [ ] 6 | # [ ] 7 | # [ ] 8 | 9 | 10 | 11 | use diagnostics; # More debugging info 12 | use warnings; 13 | use strict; 14 | use FindBin; 15 | use local::lib "$FindBin::Bin/../lib/local/"; 16 | use lib "$FindBin::Bin/../lib/"; 17 | use Test::More qw( no_plan ); 18 | use List::MoreUtils qw( any firstval ); 19 | 20 | 21 | use_ok( 'Goodscrapes' ); 22 | 23 | 24 | diag( "Tests TODO" ); 25 | ok( 1 ); 26 | 27 | 28 | -------------------------------------------------------------------------------- /t/gverifyshelf.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | # Test cases realized: 4 | # [x] shelf name corrections (displayed name vs real ids) 5 | # [ ] invalid shelves 6 | # [ ] 7 | # [ ] 8 | 9 | 10 | use diagnostics; # More debugging info 11 | use warnings; 12 | use strict; 13 | use FindBin; 14 | use local::lib "$FindBin::Bin/../lib/local/"; 15 | use lib "$FindBin::Bin/../lib/"; 16 | use Test::More qw( no_plan ); 17 | 18 | 19 | use_ok( 'Goodscrapes' ); 20 | 21 | # Internal vs displayed shelf names for default GR shelves: 22 | 23 | is ( gverifyshelf( '#ALL#' ), '#ALL#', 'Shelf valid' ); 24 | is ( gverifyshelf( 'AlL' ), '#ALL#', 'Shelf corrected' ); 25 | is ( gverifyshelf( '#AlL' ), '#ALL#', 'Shelf corrected' ); 26 | isnt( gverifyshelf( 'all-x' ), '#ALL#', 'Shelf not corrected' ); 27 | isnt( gverifyshelf( 'x-all' ), '#ALL#', 'Shelf not corrected' ); 28 | isnt( gverifyshelf( 'x-all-x' ), '#ALL#', 'Shelf not corrected' ); 29 | 30 | is ( gverifyshelf( 'read' ), 'read', 'Shelf valid' ); 31 | is ( gverifyshelf( 'ReAd' ), 'read', 'Shelf corrected' ); 32 | isnt( gverifyshelf( 'x-read' ), 'read', 'Shelf not corrected' ); 33 | isnt( gverifyshelf( 'read-x' ), 'read', 'Shelf not corrected' ); 34 | isnt( gverifyshelf( 'x-read-x' ), 'read', 'Shelf not corrected' ); 35 | 36 | is ( gverifyshelf( 'currently-reading' ), 'currently-reading', 'Shelf valid' ); 37 | is ( gverifyshelf( 'CurrEntly_ReAding' ), 'currently-reading', 'Shelf corrected' ); 38 | isnt( gverifyshelf( 'x-currently-reading' ), 'currently-reading', 'Shelf not corrected' ); 39 | isnt( gverifyshelf( 'currently-reading-x' ), 'currently-reading', 'Shelf not corrected' ); 40 | isnt( gverifyshelf( 'x-currently-reading-x' ), 'currently-reading', 'Shelf not corrected' ); 41 | 42 | is ( gverifyshelf( 'to-read' ), 'to-read', 'Shelf valid' ); 43 | is ( gverifyshelf( 'tO_ReaD' ), 'to-read', 'Shelf corrected' ); 44 | is ( gverifyshelf( 'Want-To_ReAd' ), 'to-read', 'Shelf corrected' ); # You could have such a shelf but misspelling more likely 45 | isnt( gverifyshelf( 'x-to-read' ), 'to-read', 'Shelf not corrected' ); 46 | isnt( gverifyshelf( 'to-read-x' ), 'to-read', 'Shelf not corrected' ); 47 | isnt( gverifyshelf( 'x-to-read-x' ), 'to-read', 'Shelf not corrected' ); 48 | 49 | 50 | # User created shelves: 51 | 52 | is( gverifyshelf( 'UsEr_CreaTed-shElf' ), 'user_created-shelf', 'Shelf "UsEr_CreaTed-shElf" corrected to lowercase' ); 53 | 54 | 55 | # Invalid shelves: 56 | 57 | # @TODO 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /t/gverifyxxx.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | # Test cases realized: 4 | # [x] sanitization 5 | # [x] invalid/empty/missing argument -> die 6 | # [ ] 7 | # [ ] 8 | 9 | 10 | use diagnostics; # More debugging info 11 | use warnings; 12 | use strict; 13 | use FindBin; 14 | use local::lib "$FindBin::Bin/../lib/local/"; 15 | use lib "$FindBin::Bin/../lib/"; 16 | use Test::More qw( no_plan ); 17 | use Test::Exception; 18 | 19 | 20 | use_ok( 'Goodscrapes' ); 21 | 22 | 23 | is( gverifyuser( '123' ), '123', 'Valid user ID' ); 24 | is( gverifyuser( '123-username' ), '123', 'Sanitized user ID' ); 25 | 26 | dies_ok( sub{ gverifyuser( 'username' ); }, 'Invalid user ID' ); 27 | dies_ok( sub{ gverifyuser( '' ); }, 'Empty user ID' ); 28 | dies_ok( sub{ gverifyuser( undef ); }, 'Missing user ID' ); 29 | 30 | 31 | is( gverifyshelf( 'myshelf' ), 'myshelf', 'Valid shelf name' ); 32 | 33 | dies_ok( sub{ gverifyshelf( '^@#' ); }, 'Invalid shelf name' ); 34 | dies_ok( sub{ gverifyshelf( '' ); }, 'Empty shelf name' ); 35 | dies_ok( sub{ gverifyshelf( undef ); }, 'Missing shelf name' ); 36 | 37 | 38 | --------------------------------------------------------------------------------