├── .dockerignore
├── .gitignore
├── .gitmodules
├── AUTHORS.md
├── CHANGELOG.md
├── Dockerfile
├── GITHUB.txt
├── INSTALL.txt
├── LICENSE
├── Makefile
├── README.md
├── amz-tradein.pl
├── cron.daily
    └── goodratings.example
├── friendgroup.pl
├── friendnet.pl
├── friendrated.pl
├── git-hooks
    ├── pre-commit
    └── pre-push
├── help
    ├── GOODTIPS.md
    ├── amz-tradein.md
    ├── friendgroup.md
    ├── friendnet.md
    ├── friendrated.md
    ├── img
    │   ├── friendgroup.png
    │   ├── friendrated.png
    │   ├── friendrated2.png
    │   ├── friendrated3.png
    │   ├── likeminded.png
    │   ├── search.png
    │   └── similarauth.png
    ├── likeminded.md
    ├── recentrated.md
    ├── savreviews.md
    ├── search.md
    └── similarauth.md
├── lib
    ├── Goodscrapes.html
    ├── Goodscrapes.pm
    └── Goodscrapes.pod
├── likeminded.pl
├── list-in
    ├── README.md
    ├── dict.lst
    ├── gram-en-l,word-en-1k.lst
    ├── gram-en-l.lst
    ├── gram-en-s.lst
    ├── test.lst
    ├── word-en-1k.lst
    ├── word-en-3k.lst
    └── word-en-s.lst
├── list-out
    └── README.md
├── recentrated.pl
├── savreviews.pl
├── search.pl
├── similarauth.pl
└── t
    ├── README.md
    ├── config.pl-example
    ├── ghtmlxxx.t
    ├── gisxxx.t
    ├── glogin.t
    ├── gmeter.t
    ├── greadauthorbk.t
    ├── greadauthors.t
    ├── greadbook.t
    ├── greadcomments.t
    ├── greadfolls.t
    ├── greadreviews.t
    ├── greadshelf.t
    ├── greadshelfnames.t
    ├── greadsimilaraut.t
    ├── greaduser.t
    ├── greadusergrp.t
    ├── gsearch.t
    ├── gsocialnet.t
    ├── gverifyshelf.t
    └── gverifyxxx.t


/.dockerignore:
--------------------------------------------------------------------------------
 1 | # Ignore everything:
 2 | **
 3 | 
 4 | # Allow files and directories:
 5 | !/LICENSE
 6 | !/Dockerfile
 7 | 
 8 | !/*.md
 9 | 
10 | !/Makefile
11 | 
12 | !/friendgroup.pl
13 | !/friendnet.pl
14 | !/friendrated.pl
15 | !/likeminded.pl
16 | !/recentrated.pl
17 | !/savreviews.pl
18 | !/search.pl
19 | !/similarauth.pl
20 | 
21 | !/lib/*.pm
22 | !/lib/*.pl
23 | !/lib/*.pod
24 | 
25 | !/list-in/*.lst
26 | # "list-out" will be created empty
27 | 
28 | !/help/*.md
29 | !/help/img/**
30 | 
31 | !/t/*.t
32 | !/t/*.md
33 | !/t/config.pl-example
34 | 
35 | 
36 | # REMOVE LATER AGAIN!
37 | #!/t/config.pl
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *~
 2 | *.bak
 3 | *.tmp
 4 | *.csv
 5 | *.swp
 6 | *.xml
 7 | *.html
 8 | !lib/Goodscrapes.html
 9 | *.log
10 | *.tar.gz
11 | *.zip
12 | *.sh
13 | *.xcf
14 | *.secret
15 | *.token
16 | lib/local/*
17 | t/config.pl
18 | .build
19 | .obsolete
20 | TODO.txt
21 | cve
22 | lib/Goodgrief*
23 | staff*
24 | savreviews-*.txt
25 | revs-*.txt
26 | q
27 | export.pl
28 | cron.daily/goodratings.sh
29 | list-out/recentrated*
30 | 
31 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "www"]
2 | 	path = www
3 | 	url = https://github.com/andre-st/goodreads-www.git
4 | 


--------------------------------------------------------------------------------
/AUTHORS.md:
--------------------------------------------------------------------------------
1 | # Authors/Contributors
2 | 
3 | | Name          | Contact                      | Dev |  i18n  | Test | Doc | Release |
4 | |---------------|------------------------------|:---:|:------:|:----:|:---:|:-------:|
5 | | André St.     | <datakadabra@gmail.com>      |  X  |    /   |   X  |  X  |    X    |
6 | | ...           |                              |     |        |      |     |         |
7 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # The final image is around 138 MB,
 2 | # Build time is around 7 minutes
 3 | #
 4 | FROM alpine:latest
 5 | 
 6 | # ----------------------------------------------------------------------------
 7 | # Configuring the image:
 8 | 
 9 | ENV PROGDIR=/root
10 | ENV HTPORT=80
11 | ENV HTDOCS=$PROGDIR/list-out
12 | ARG BUILD_DATE
13 | ARG PROJECT_VERSION
14 | VOLUME /tmp/FileCache
15 | EXPOSE $HTPORT
16 | 
17 | # About:
18 | # http://label-schema.org/rc1/
19 | LABEL org.label-schema.schema-version="1.0"
20 | LABEL org.label-schema.name="Andre's Goodreads Toolbox"
21 | LABEL org.label-schema.description="Tools for Goodreads.com, for finding people based on the books they've read, finding books popular among the people you follow, following new book reviews, etc"
22 | LABEL org.label-schema.maintainer="datakadabra@gmail.com"
23 | LABEL org.label-schema.build-date=$BUILD_DATE
24 | LABEL org.label-schema.version=$PROJECT_VERSION
25 | LABEL org.label-schema.url="https://github.com/andre-st/goodreads-toolbox/blob/master/README.md"
26 | LABEL org.label-schema.vcs-url="https://github.com/andre-st/goodreads-toolbox/"
27 | LABEL org.opencontainers.image.source="https://github.com/andre-st/goodreads-toolbox/"
28 | 
29 | 
30 | # ----------------------------------------------------------------------------
31 | # Building the image:
32 | 
33 | # Use .dockerignore to exclude everything but the minimum necessary set of files.
34 | COPY . $PROGDIR
35 | 
36 | WORKDIR $PROGDIR/
37 | 
38 | RUN apk add --no-cache      \
39 | 			build-base   \
40 | 			zlib-dev     \
41 | 			bash         \
42 | 			openssl      \
43 | 			openssl-dev  \
44 | 			perl-dev     \
45 | 			perl-doc     \
46 | 			thttpd       \
47 | 	&& make                                             \
48 | 	&& apk del --purge build-base openssl-dev zlib-dev  \
49 | 	;  rm -rf                                     \
50 | 			/usr/share/{man,doc,info,groff}/*   \
51 | 			$HOME/.cpan/build/*                 \
52 | 			$HOME/.cpan/sources/authors/id      \
53 | 			$HOME/.cpan/cpan_sqlite_log.*       \
54 | 			/tmp/cpan_install_*.txt             \
55 | 	; echo $'\
56 | echo "*******************************************"\n\
57 | echo "*** WELCOME TO ANDRES GOODREADS TOOLBOX ***"\n\
58 | echo "*******************************************"\n\
59 | echo "Available Tools:"\n\
60 | ls -1 *.pl | nl -bn \n\
61 | ' > $HOME/.bashrc
62 | 
63 | 
64 | # ----------------------------------------------------------------------------
65 | # Running the container:
66 | 
67 | # bash already in WORKDIR:
68 | ENTRYPOINT  thttpd -h 0.0.0.0 -p $HTPORT -d $HTDOCS -l /dev/null  &&  bash
69 | 
70 | 
71 | 


--------------------------------------------------------------------------------
/GITHUB.txt:
--------------------------------------------------------------------------------
 1 | Repository Name:
 2 | 
 3 | goodreads
 4 | 
 5 | 
 6 | Description:
 7 | 
 8 | Tools for Goodreads.com, such as an Amazon buyback price monitor to discover
 9 | sales opportunities, or a "follow book" implementation to discover quality
10 | users and libraries
11 | 
12 | 
13 | Website:
14 | 
15 | -
16 | 
17 | 
18 | Topics:
19 | 
20 | goodreads
21 | goodreads-api
22 | goodreads-shelves
23 | statistics
24 | rating
25 | recommender
26 | recommendation
27 | reviews
28 | monitor
29 | monitoring
30 | notification
31 | similarity
32 | similar-users
33 | taste
34 | followers
35 | friend-matching
36 | ratings
37 | like-minded
38 | discoverability
39 | likeminded
40 | 


--------------------------------------------------------------------------------
/INSTALL.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andre-st/goodreads-toolbox/010bc724003890d67efb2c40cd3e98a3b81c4007/INSTALL.txt


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | # Andre's Goodreads Toolbox Makefile
  2 | 
  3 | 
  4 | # Configure Make:
  5 | # https://tech.davis-hansson.com/p/make/
  6 | SHELL := bash
  7 | .ONESHELL:
  8 | .SHELLFLAGS := -eu -o pipefail -c
  9 | .DELETE_ON_ERROR:
 10 | MAKEFLAGS += --warn-undefined-variables
 11 | MAKEFLAGS += --no-builtin-rules
 12 | #.DEFAULT_GOAL := help
 13 | 
 14 | 
 15 | # Configure Make rules:
 16 | PROJECT_VERSION   = 1.25.1
 17 | CACHE_DIR         = /tmp/FileCache/Goodscrapes
 18 | BUILD_DIR         = .build
 19 | PACKAGE           = goodreads-toolbox
 20 | 
 21 | DOCKER_BUILD_DATE = $(shell date -u +'%Y-%m-%dT%H:%M:%SZ')
 22 | DOCKER_IMG_VER    = ${PROJECT_VERSION}
 23 | DOCKER_IMG_NAME   = ${PACKAGE}
 24 | DOCKER_CON_NAME   = ${PACKAGE}
 25 | DOCKER_DIR        = .
 26 | DOCKER_HTPORT     = 8080
 27 | 
 28 | GITHUB_USER       = andre-st
 29 | GITHUB_REPONAME   = ${PACKAGE}
 30 | RELEASE           = $(PACKAGE)-$(PROJECT_VERSION)
 31 | GITDIR            = $(wildcard .git)
 32 | 
 33 | IS_ROOT           := $(shell test $(shell id -u) = 0 && echo 1)
 34 | IS_LOCAL_LIB      := $(shell perldoc -l local::lib 2> /dev/null )
 35 | 
 36 | 
 37 | # ----------------------------------------------------------------------------
 38 | ## make all            :  Installs programs and dependencies from CPAN (default)
 39 | #
 40 | all: deps installdirs
 41 | 
 42 | 
 43 | # ----------------------------------------------------------------------------
 44 | ## make installdirs    :  Creates needed directories, adds symlinks etc
 45 | #
 46 | .PHONY: installdirs $(GITDIR)
 47 | installdirs: | $(GITDIR)
 48 | 	chmod +x *.pl
 49 | 	chmod +x t/*.t
 50 | 	ln -sf word-en-l.lst ./list-in/dict.lst
 51 | 	ln -sf dict.lst      ./list-in/test.lst
 52 | 	# recentrated.pl:
 53 | 	mkdir -p ./list-out/recentrated
 54 | 
 55 | # Developers:
 56 | $(GITDIR):
 57 | 	# TODO: Since Git 2.9 there is `git config core.hooksPath .git-hooks`
 58 | 	chmod +x git-hooks/*
 59 | 	ln -sf ../../git-hooks/pre-commit ./.git/hooks/pre-commit
 60 | 	ln -sf ../../git-hooks/pre-push   ./.git/hooks/pre-push
 61 | 
 62 | 
 63 | # ----------------------------------------------------------------------------
 64 | ## make uninstall      :  Deletes files created outside the project directory
 65 | #
 66 | .PHONY: uninstall
 67 | uninstall:
 68 | 	rm -rf "${CACHE_DIR}"
 69 | 
 70 | 
 71 | # ----------------------------------------------------------------------------
 72 | ## make deps           :  Downloads and installs dependencies from CPAN.
 73 | ##                        Files go to the project's ./lib/local/ dir to ease software removal.
 74 | ##                        It does not install modules system-wide.
 75 | ##                        Doesn't require root too if local::lib module is already installed.
 76 | #
 77 | # CPAN complains without YAML::Any (warning not error)
 78 | # We install without testing modules (significantly faster)
 79 | #
 80 | .PHONY: deps
 81 | deps:
 82 | ifndef IS_LOCAL_LIB
 83 | ifndef IS_ROOT
 84 | 	$(error "Please run as root -or- install Perl module local::lib first (apt-get install liblocal-lib-perl)")
 85 | endif
 86 | 	PERL_MM_USE_DEFAULT=1 perl -MCPAN -e 'CPAN::Shell->notest( "install", "local::lib" )'
 87 | endif
 88 | 	mkdir -p ./lib/local
 89 | 	PERL_MM_USE_DEFAULT=1 perl -MCPAN -Mlocal::lib=./lib/local -e 'CPAN::Shell->notest( "install", "Term::ReadKey", "YAML::Any", "List::MoreUtils", "HTML::Entities", "URI::Escape", "Cache::FileCache", "IO::Socket::SSL", "Net::SSLeay", "HTTP::Tiny", "Text::CSV", "Log::Any", "IO::Prompter", "Test::More", "Test::Exception" )'
 90 | 
 91 | 
 92 | 
 93 | # ----------------------------------------------------------------------------
 94 | ## make check          :  Runs unit tests
 95 | #
 96 | .PHONY: check
 97 | check:
 98 | 	prove
 99 | 
100 | 
101 | # ----------------------------------------------------------------------------
102 | ## make docker-image   :  Builds a Docker image from the dirty working copy
103 | ## make docker-run     :  Runs Docker image, optionally:
104 | ##                        make docker-run DOCKER_HTPORT=8080
105 | ##                        make docker-run DOCKER_CON_NAME=goodreads-toolbox
106 | ## make github-package :  Builds a Docker image from the official repo and pushes it to GitHub Packages
107 | ##                        Expects a PAT from GitHub > Account > Settings > Developer Settings > Personal access tokens
108 | ##                        in local file .github-packages.secret
109 | ##                        See packages: https://github.com/users/andre-st/packages
110 | #
111 | .PHONY: docker-image
112 | docker-image: Dockerfile
113 | 	docker build                                                 \
114 | 			--build-arg BUILD_DATE="${DOCKER_BUILD_DATE}"      \
115 | 			--build-arg PROJECT_VERSION="${PROJECT_VERSION}"   \
116 | 			--tag "${DOCKER_IMG_NAME}:${DOCKER_IMG_VER}"       \
117 | 			${DOCKER_DIR}
118 | 	@echo "[NEXT] You might like to start the new Docker image with 'make docker-run'"
119 | 
120 | 
121 | .PHONY: docker-run
122 | docker-run:
123 | 	docker stop         ${DOCKER_CON_NAME} || true
124 | 	docker container rm ${DOCKER_CON_NAME} || true
125 | 	@echo "[NOTE] Goodreads results are written to 'list-out/', accessible via web-browser at localhost:${DOCKER_HTPORT}"
126 | 	@docker run                               \
127 | 			--name=${DOCKER_CON_NAME}       \
128 | 			--publish=${DOCKER_HTPORT}:80   \
129 | 			--interactive                   \
130 | 			--tty                           \
131 | 			"${DOCKER_IMG_NAME}:${DOCKER_IMG_VER}" || true
132 | 
133 | 
134 | .PHONY: github-package
135 | github-package: .github-packages.secret
136 | 	rm    -rf  "${BUILD_DIR}/official-latest/"
137 | 	mkdir -p   "${BUILD_DIR}/official-latest/"
138 | 	pushd      "${BUILD_DIR}/official-latest/"
139 | 	git   clone  "https://github.com/${GITHUB_USER}/${GITHUB_REPONAME}/"  .
140 | 	make  docker-image  DOCKER_IMG_NAME=ghcr.io/${GITHUB_USER}/${GITHUB_REPONAME}  DOCKER_IMG_VER=latest
141 | 	popd
142 | 	cat .github-packages.secret | docker login ghcr.io -u ${GITHUB_USER} --password-stdin
143 | 	docker push "ghcr.io/${GITHUB_USER}/${GITHUB_REPONAME}"
144 | 
145 | 
146 | # ----------------------------------------------------------------------------
147 | ## make docs           :  Updates documentation, optionally:
148 | ##                        make docs PROJECT_VERSION=1.22
149 | .PHONY: docs
150 | docs:
151 | 	# vX.X, vX.XX.X, image:X.XX.X
152 | 	sed -i -E "s/([v])[0-9\.]+/\1${PROJECT_VERSION}/"  README.md INSTALL.txt
153 | 
154 | 
155 | # ----------------------------------------------------------------------------
156 | ## make help           :  Prints this help screen
157 | #
158 | # Prints all comments with two leading # characters in this Makefile
159 | #
160 | .PHONY: help
161 | help: Makefile
162 | 	@sed -n 's/^## //p' $<
163 | 	
164 | 	# Debugging info:
165 | ifdef IS_ROOT
166 | 	@echo IS_ROOT=yes
167 | else
168 | 	@echo IS_ROOT=no
169 | endif
170 | ifdef IS_LOCAL_LIB
171 | 	@echo IS_LOCAL_LIB=yes
172 | else
173 | 	@echo IS_LOCAL_LIB=no
174 | endif
175 | 
176 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # :books: Andre's Goodreads Toolbox, v1.25.1
  2 | 
  3 | ![Maintenance](https://img.shields.io/maintenance/no/2022.svg)
  4 | 
  5 | 9 Perl-scripts for Goodreads.com—the world largest book (cataloging) community. [What's new?](CHANGELOG.md)
  6 | 
  7 | 
  8 | ## [recentrated.pl](./help/recentrated.md)
  9 | 
 10 | Checks all the books in your shelf for new ratings and notifies you
 11 | via periodical e-mail. It helps discover new criticisms and users with interesting 
 12 | libraries. You can [try this online](https://andre-st.github.io/goodreads/) if you 
 13 | don't want to install anything.
 14 | It implements the "follow book" feature that was requested in the Goodreads forums. 
 15 | [Usage+Screenshot](./help/recentrated.md)
 16 | 
 17 | 
 18 | ## [friendrated.pl](./help/friendrated.md)
 19 | 
 20 | Prints all books rated 4 or 5 stars by 3 or more persons you follow (including friends). 
 21 | It implements the "books common among the people I follow" feature that was requested 
 22 | in the Goodreads forums. It also lists the most read authors, the most wished-for 
 23 | and hated books.
 24 | [Usage+Screenshot](./help/friendrated.md)
 25 | 
 26 | 
 27 | ## [friendnet.pl](./help/friendnet.md)
 28 | 
 29 | Spiders your social network and creates files with edges and nodes which can be
 30 | easily processed with social network analysis software. It answers questions
 31 | like: Which members are popular among your friends?
 32 | [Usage+Screenshot](./help/friendnet.md)
 33 | 
 34 | 
 35 | ## [friendgroup.pl](./help/friendgroup.md)
 36 | 
 37 | Prints discussion groups common among the persons you follow (including friends).
 38 | Searching groups on Goodreads is a PITA, and sometimes you don't know what you can have 
 39 | and wouldn't search for it. [Usage+Screenshot](./help/friendgroup.md)
 40 | 
 41 | 
 42 | ## [likeminded.pl](./help/likeminded.md)
 43 | 
 44 | Prints Goodreads members who are interested in the same books as you.
 45 | It implements the "Finding people based on the books they've read"
 46 | feature that was requested in the Goodreads forums.
 47 | [Usage+Screenshot](./help/likeminded.md)
 48 | 
 49 | 
 50 | ## [similarauth.pl](./help/similarauth.md)
 51 | 
 52 | Prints authors who Goodreads thinks are similar to all the authors you're reading.
 53 | It implements the "Finding [all] similar authors" feature that was requested in the 
 54 | Goodreads forums.
 55 | [Usage+Screenshot](./help/similarauth.md)
 56 | 
 57 | 
 58 | ## [search.pl](./help/search.md)
 59 | 
 60 | Prints a books search result, ordered by average rating and number of ratings 
 61 | (most popular books), or date published, optionally with exact title matches. 
 62 | The Goodreads website doesn't offer it for some reason.
 63 | It implements the "Sort search results by rating" feature that was requested 
 64 | in the Goodreads forums.
 65 | [Usage+Screenshot](./help/search.md)
 66 | 
 67 | 
 68 | ## [savreviews.pl](./help/savreviews.md)
 69 | 
 70 | Saves text-reviews for a book to a text-file. It implements the "Extract all 
 71 | reviews for a specific book" feature that was requested in the Goodreads forums.
 72 | [Usage+Screenshot](./help/savreviews.md)
 73 | 
 74 | 
 75 | ## ~~[amz-tradein.pl](./help/amz-tradein.md)~~
 76 | 
 77 | This script fetched Amazon Trade-In prices for all books in a Goodreads.com
 78 | shelf ("resales" or "donations"). It automated regular manual bid-checking for 
 79 | hundreds of books, discovering sales opportunities. Amazon stopped its buyback 
 80 | program in 2015.
 81 | [Usage+Screenshot](./help/amz-tradein.md)
 82 | 
 83 | 
 84 | 
 85 | ## Getting started
 86 | 
 87 | 1a\.  [Docker](https://opensource.com/resources/what-docker) users can run the Toolbox in its own 
 88 |    container([?](https://www.docker.com/resources/what-container)),
 89 |    and view the results via web-browser at _localhost:8080_:
 90 | 
 91 | ```console
 92 | $ docker run -it --publish=8080:80 ghcr.io/andre-st/goodreads-toolbox
 93 | ```
 94 | 
 95 | 1b\.  users without Docker can try to install the Toolbox directly on their systems:
 96 | 
 97 | ```console
 98 | $ git clone https://github.com/andre-st/goodreads-toolbox.git
 99 | $ cd goodreads-toolbox
100 | $ sudo make          # Gets required Perl modules from CPAN
101 | ```
102 | 
103 | 2\.  at the prompt, try out the Toolbox programs:
104 | 
105 | ```console
106 | $ ./example-script.pl --help
107 | ```
108 | 
109 | Before [Docker for Windows or Mac](https://github.com/docker/toolbox/releases) 
110 | and the project's Docker-images became available,
111 | a Windows user wrote me that he ran the Toolbox on the [Windows 10 Subsystem for Linux](https://linuxhint.com/install_ubuntu_windows_10_wsl/) (WSL).
112 | 
113 | 
114 | Long program runtimes: Goodreads slows down all requests and we have to load a lot of data.
115 |   Start one program and do other things in the meantime.
116 |   You can break any program with <kbd>CTRL</kbd>-<kbd>C</kbd> and continue later (reloads from a file-cache).
117 | 
118 | 
119 | 
120 | ## Contributing
121 | 
122 | - Reporting bugs / feature requests
123 |   - add a new issue via [Github's issue tracker](https://github.com/andre-st/goodreads-toolbox/issues/new)
124 |   - [alternative contact options](AUTHORS.md)
125 |   - thank you all who wrote me mails in the past or otherwise reported bugs and ideas :thumbsup:
126 | - Writing your own scripts
127 |   - see the [tests directory](./t/) for examples on how to use the toolbox library
128 |   - see the [toolbox library documentation](./lib/Goodscrapes.pod)
129 |   - [non-functional considerations](./t/README.md)
130 |   - the [less complex issues](https://github.com/andre-st/goodreads-toolbox/labels/freshmen)
131 |     would be good first issues to work on for users who want to contribute to this project
132 | 
133 | 
134 | 
135 | ## Further readings
136 | 
137 | - About Goodreads
138 |   - [GR developers group](https://www.goodreads.com/group/show/8095-goodreads-developers)
139 |   - [GR technology stack](https://www.goodreads.com/jobs?id=597248#openPositions) 
140 | 		or [here](https://www.glasswaves.co/selected_projects.txt) 
141 | 		or [here](https://builtwith.com/goodreads.com) 
142 | 		or [DynamoDB+S3+Athena](https://aws.amazon.com/blogs/big-data/how-goodreads-offloads-amazon-dynamodb-tables-to-amazon-s3-and-queries-them-using-amazon-athena/)
143 |   - [GR workplace reviews](https://www.glassdoor.com/Reviews/Goodreads-Reviews-E684833.htm), 
144 | 		anonymously about being acquired by Amazon, bureaucracy etc.
145 |   - [GR on Crunchbase](https://www.crunchbase.com/organization/goodreads), 
146 | 		people, recent news & activity 
147 |   - [GR members stats](https://www.statista.com/search/?q=goodreads&qKat=search) 
148 | 		or [here](https://qz.com/1106341/most-women-reading-self-help-books-are-getting-advice-from-men/) 
149 | 		or [here](https://onlinelibrary.wiley.com/doi/abs/10.1002/asi.23733)+[Sci-Hub](https://twitter.com/scihub_love) 
150 | 		or [here](https://book.pressbooks.com/chapter/goodreads-otis-chandler) 
151 | 		or [here](https://www.buzzfeednews.com/article/annanorth/what-amazon-is-getting-from-goodreads),
152 | 		source probably [Goodreads](https://www.goodreads.com/about/us)
153 |   - [GR on Slideshare](https://www.slideshare.net/GoodreadsPresentations/presentations), 
154 | 		presenting GR book marketing to authors, see also [Author Feedback Group](https://www.goodreads.com/group/show/31471) 
155 |   - [GR subreddit](https://www.reddit.com/r/goodreads/)
156 | - Further software 
157 |   - I leave statistics about your own reading habits to the following tools; 
158 | 		my toolbox, in contrast, focuses on the social periphery, with Goodreads providing the largest user base
159 |   - Paul Klinger's [Bookstats](https://github.com/PaulKlinger/Bookstats) or [here](https://almoturg.com/bookstats/)
160 |   - untested: John Smith's [GoodreadsAnalysis](https://github.com/JohnSmithDev/GoodreadsAnalysis/blob/master/REPORTS.md)
161 |   - untested: Petr's [CompareBooks](https://github.com/vatioz/GoodreadsUserCompare) 
162 | 		browser [extension](https://chrome.google.com/webstore/detail/goodreads-compare-books/jcbnjaifalpejkcgfbpjbcmkfdildgpi) 
163 | 		adds "compare" info next to usernames
164 |   - untested: Andrea Samorini's [SamoGoodreadsUtility](https://github.com/asamorini/goodreads.utility) 
165 | 		adds language filters to GR 
166 |   - untested: Danish Prakash's [goodreadsh](https://github.com/danishprakash/goodreadsh) 
167 | 		is a command line interface for Goodreads (off. API)
168 |   - untested: [Greasyfork Browser-Scripts](https://greasyfork.org/en/scripts/by-site/goodreads.com)
169 |   - untested: the [Bookar Android app](https://github.com/intmainreturn00/Bookar) visualizes your books in augmented reality
170 |   - untested: save your shelves and reviews [Goodreads data to SQLite](https://github.com/rixx/goodreads-to-sqlite)
171 |   - Amazon: [export and filter long wishlists](https://github.com/andre-st/amazon-wishless) by priority and price (bargains)
172 | - Other
173 |   - Data: thousands of books and authors (not GR) https://openlibrary.org/developers/dumps
174 | - Personal
175 |   - [a list of things](./help/GOODTIPS.md) that improved my Goodreads experience: settings, browser extensions etc.
176 |   - [Andre at Goodreads](https://www.goodreads.com/user/show/18418712-andr)
177 | 
178 | 
179 | 


--------------------------------------------------------------------------------
/amz-tradein.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | 
  3 | #<--------------------------------- 79 chars --------------------------------->|
  4 | 
  5 | =pod
  6 | 
  7 | =head1 NAME
  8 | 
  9 | amz-tradein.pl
 10 | 
 11 | 
 12 | =head1 VERSION
 13 | 
 14 | 2015-08-31 (Since 2014-11-05)
 15 | 
 16 | 	
 17 | =head1 WARNING
 18 | 
 19 | Amazon stopped its Trade-In program on 31th August, 2015. 
 20 | This script is no longer of any use.
 21 | 
 22 | 
 23 | =head1 PURPOSE
 24 | 
 25 | =over
 26 | 
 27 | =item * fetches Amazon Trade-In prices for all books in a Goodreads-shelf, 
 28 |         e.g., 'books-for-sale'
 29 | 
 30 | =item * spares you checking each book by hand every time you want to sell 
 31 |         books to Amazon
 32 | 
 33 | =item * might reveal good buyback prices for books you hadn't yet considered 
 34 |         for sales (run this script against a Goodreads "#ALL#" shelf)
 35 | 
 36 | =back
 37 | 
 38 | 
 39 | =head1 OUTPUT EXAMPLE
 40 | 
 41 |   EUR 5,30  Book title found at Amazon with Trade-In price
 42 |   EUR -,--  Book title either without Trade-In or not found by ISBN
 43 | 
 44 | 
 45 | =head1 USAGE EXAMPLE
 46 | 
 47 | =over
 48 | 
 49 | =item Check all books of a specific Goodreads user:
 50 | 
 51 | $ amz-tradein.pl 18418712
 52 | 
 53 | =item Check all books in a specific Goodreads shelf only:
 54 | 
 55 | $ amz-tradein.pl 18418712 books-for-sale
 56 | 
 57 | =item Sort by highest price and save outout to a textfile:
 58 | 
 59 | $ amz-tradein.pl 18418712 books-for-sale | sort --key 2n | tac > books-for-sale-w-prices.out
 60 | 
 61 | =back
 62 | 
 63 | 
 64 | =head1 OBSERVATIONS
 65 | 
 66 | =over
 67 | 
 68 | =item * process is slow, 123 books need ~2 minutes
 69 | 
 70 | =back
 71 | 
 72 | 	
 73 | =head1 REQUIRES
 74 | 
 75 | =over
 76 | 
 77 | =item * a Goodreads account (number), your # is contained in each Goodreads-shelf-URL
 78 | 
 79 | =item * no API key
 80 | 
 81 | =item * $ perl -MCPAN -e 'install WWW::Curl::Easy, Cache::FileCache'
 82 | 
 83 | =back
 84 | 
 85 | 	
 86 | =head1 KNOWN LIMITATIONS AND BUGS
 87 | 
 88 | =over
 89 | 
 90 | =item * german Amazon only (contact me if you need support for other countries)
 91 | 
 92 | =back
 93 | 	
 94 | =cut
 95 | 
 96 | #<--------------------------------- 79 chars --------------------------------->|
 97 | 
 98 | 
 99 | 
100 | use strict;
101 | use warnings;
102 | 
103 | # Perl core:
104 | use FindBin;
105 | use local::lib "$FindBin::Bin/lib/local/";
106 | use        lib "$FindBin::Bin/lib/";
107 | # Third party:
108 | # Ours:
109 | use Goodscrapes;
110 | 
111 | 
112 | # Program synopsis:
113 | say STDERR "Usage: $0 GOODUSERNUMBER [SHELFNAME]\nSee source code for more info." and exit if $#ARGV < 0;
114 | 
115 | 
116 | # Program configuration:
117 | our $USERID = gverifyuser ( $ARGV[0] );
118 | our $SHELF  = gverifyshelf( $ARGV[1] );
119 | 
120 | 
121 | sub extract_amz_price
122 | {
123 | 	my $article_page_html = shift;
124 | 	return $article_page_html =~ /(EUR [0-9,]+)<\/span> Gutschein erhalten/  ? $1 : 'EUR -,--';
125 | }
126 | 
127 | 
128 | my %books;
129 | greadshelf( from_user_id    => $USERID, 
130 |             ra_from_shelves => [ $SHELF ],
131 |             rh_into         => \%books );
132 | 
133 | for my $b (values %books)
134 | {
135 | 	my $price = extract_amz_price( amz_book_html( $b ) );
136 | 	say STDOUT $price . "\t" . $b->{title};
137 | }
138 | 
139 | 


--------------------------------------------------------------------------------
/cron.daily/goodratings.example:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Reports recent book ratings to some Goodreads members
 4 | #
 5 | # Put me to: /etc/cron.daily
 6 | #
 7 | # NOTE:
 8 | #   Cron- and/or run-parts limitations:
 9 | #     1. This file must not have a dot in its filename
10 | #     2. This file must be owned by root and by a distinctive group of $SCRIPTUSER
11 | #     3. This file must be executable (chmod ug=rwx,o= <file>)
12 | #
13 | #   Depends:
14 | #     1. "ifne" is part of the "moreutils" package.
15 | #
16 | 
17 | readonly SCRIPTUSER=root
18 | readonly MAILFROM="GOODREADS@EXAMPLE.COM"
19 | readonly GOODMAIL="GOODREADSPASSWORD"
20 | readonly GOODPASS="GOODREADSPASSWORD"
21 | readonly SCRIPT="/path/to/goodreads/recentrated.pl"
22 | readonly DB_DIR="/path/to/goodreads/list-out/recentrated"
23 | readonly MAILERS=(
24 | 		[0]="ifne /usr/sbin/sendmail -t"
25 | 		[1]="ifne ip netns exec NS_PRIVATE /usr/sbin/sendmail -t"
26 | 		[9]="cat" )
27 | 		# 9 for debugging, CSV restored
28 | 
29 | 
30 | # Re-run as another user
31 | if [ $( id -u ) = 0 ]
32 | then
33 | 	su ${SCRIPTUSER} --command "$0"
34 | 	exit;
35 | fi
36 | 
37 | 
38 | # Wait for Internet connection:
39 | # TODO
40 | 
41 | 
42 | chk()
43 | {
44 | 	  gooduser="${1}" 
45 | 	 goodshelf="${2}" 
46 | 	    mailto="${3}"
47 | 	  mailerid="${4:-0}"
48 | 	      opts=${5}
49 | 	    mailer="${MAILERS[$mailerid]}"
50 | 	   csvname="${gooduser}-${goodshelf}.csv"
51 | 	   csvpath="${DB_DIR}/${csvname}"
52 | 	csvbakpath="${DB_DIR}/${csvname}.recover"
53 | 	
54 | 	if [ -e "${csvbakpath}" ]
55 | 	then
56 | 		# Batch, script or mail auth failed last time. Recover and retry this time.
57 | 		# Mailtext in ~/dead.letter
58 | 		cp --preserve --force "${csvbakpath}" "${csvpath}" || exit 1
59 | 	else
60 | 		if [ -e "${csvpath}" ]
61 | 		then
62 | 			cp --preserve --force "${csvpath}" "${csvbakpath}" || exit 1
63 | 		fi
64 | 	fi
65 | 	( "${SCRIPT}" ${opts}                      \
66 | 	              "--userid=${gooduser}"       \
67 | 	              "--shelf=${goodshelf}"       \
68 | 	              "--from=${MAILFROM}"         \
69 | 	              "--to=${mailto}"             \
70 | 	              "${GOODMAIL}"                \
71 | 	              "${GOODPASS}" | ${mailer} )  \
72 | 		&& [ "$mailerid" != "9" ]             \
73 | 		&& rm --force "${csvbakpath}"
74 | }
75 | 
76 | 
77 | 
78 | #==========================================================================================================================
79 | #    USER      SHELF                     MAILTO                  MAILER  OPTS  REALNAME              SINCE     BOOKS@04/10
80 | #==========================================================================================================================
81 | chk  12345678  "%E3%85%A1watch-ratings"  "${MAILFROM}"           0             # me                            123
82 | chk    345678  "watch-ratings"           example1@gmail.com      0             # Example 1           18/01/12    3
83 | chk   2345678  "de-mooisten"             example2@gmx.net        0             # Example 2           18/11/14   23
84 | chk      5678  "wishlist-to-buy"         example3@yahoo.com      0       -q    # Example 3           18/06/11  123
85 | 
86 | 
87 | 
88 | 


--------------------------------------------------------------------------------
/friendgroup.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | 
  3 | #<--------------------------------- MAN PAGE --------------------------------->|
  4 | 
  5 | =pod
  6 | 
  7 | =head1 NAME
  8 | 
  9 | friendgroup - groups common among the members I follow
 10 | 
 11 | 
 12 | =head1 SYNOPSIS
 13 | 
 14 | B<friendgroup.pl> 
 15 | [B<-c> F<numdays>] 
 16 | [B<-o> F<filename>] 
 17 | [B<-u> F<number>]
 18 | [B<-i>]
 19 | F<goodloginmail> [F<goodloginpass>]
 20 | 
 21 | 
 22 | =head1 OPTIONS
 23 | 
 24 | Mandatory arguments to long options are mandatory for short options too.
 25 | 
 26 | =over 4
 27 | 
 28 | =item B<-c, --cache>=F<numdays>
 29 | 
 30 | number of days to store and reuse downloaded data in F</tmp/FileCache/>,
 31 | default is 31 days. This helps with cheap recovery on a crash, power blackout 
 32 | or pause, and when experimenting with parameters. Loading data from Goodreads
 33 | is a very time consuming process.
 34 | 
 35 | 
 36 | =item B<-u, --userid>=F<number>
 37 | 
 38 | check another member instead of the one identified by the login-mail 
 39 | and password arguments. You find the ID by looking at the shelf URLs.
 40 | 
 41 | 
 42 | =item B<-o, --outfile>=F<filename>
 43 | 
 44 | name of the HTML file where we write results to, 
 45 | default see section FILES
 46 | 
 47 | 
 48 | =item B<-i, --ignore-errors>
 49 | 
 50 | Don't retry on errors, just keep going. 
 51 | Sometimes useful if a single Goodreads resource hangs over long periods 
 52 | and you're okay with some values missing in your result.
 53 | This option is not recommended when you run the program unattended.
 54 | 
 55 | 
 56 | =item B<-?, --help>
 57 | 
 58 | show full man page
 59 | 
 60 | =back
 61 | 
 62 | 
 63 | =head1 FILES
 64 | 
 65 | F<./list-out/friendgroup-$GOODUSERID.html>
 66 | 
 67 | F</tmp/FileCache/>
 68 | 
 69 | 
 70 | =head1 EXAMPLES
 71 | 
 72 | $ ./friendgroup.pl login@gmail.com MyPASSword
 73 | 
 74 | $ ./friendgroup.pl --outfile=./sub/myfile.html  login@gmail.com
 75 | 
 76 | 
 77 | =head1 REPORTING BUGS
 78 | 
 79 | Report bugs to <datakadabra@gmail.com> or use Github's issue tracker
 80 | <https://github.com/andre-st/goodreads-toolbox/issues>
 81 | 
 82 | 
 83 | =head1 COPYRIGHT
 84 | 
 85 | This is free software. You may redistribute copies of it under the terms of
 86 | the GNU General Public License <https://www.gnu.org/licenses/gpl.html>.
 87 | There is NO WARRANTY, to the extent permitted by law.
 88 | 
 89 | 
 90 | =head1 SEE ALSO
 91 | 
 92 | More info in ./help/friendgroup.md
 93 | 
 94 | 
 95 | =head1 VERSION
 96 | 
 97 | 2022-03-10 (Since 2018-09-26)
 98 | 
 99 | =cut
100 | 
101 | #<--------------------------------- 79 chars --------------------------------->|
102 | 
103 | 
104 | use strict;
105 | use warnings;
106 | use locale;
107 | use 5.18.0;
108 | 
109 | # Perl core:
110 | use FindBin;
111 | use local::lib "$FindBin::Bin/lib/local/";
112 | use        lib "$FindBin::Bin/lib/";
113 | use Time::HiRes qw( time tv_interval );
114 | use POSIX       qw( strftime locale_h );
115 | use File::Spec; # Platform indep. directory separator
116 | use IO::File;
117 | use Getopt::Long;
118 | use Pod::Usage;
119 | # Third party:
120 | # Ours:
121 | use Goodscrapes;
122 | 
123 | 
124 | 
125 | # ----------------------------------------------------------------------------
126 | # Program configuration:
127 | # 
128 | setlocale( LC_CTYPE, "en_US" );  # GR dates all en_US
129 | STDOUT->autoflush( 1 );
130 | gsetopt( cache_days => 31 );
131 | 
132 | our $TSTART = time();
133 | our $OUTPATH;
134 | our $USERID;
135 | 
136 | GetOptions( 'outfile|o=s'     => \$OUTPATH,
137 |             'userid|u=s'      => \$USERID,
138 |             'ignore-errors|i' => sub{  gsetopt( ignore_errors => 1 );   },
139 |             'cache|c=i'       => sub{  gsetopt( cache_days => $_[1] );  },
140 |             'help|?'          => sub{  pod2usage( -verbose => 2 );      })
141 | 	or pod2usage( 1 );
142 | 
143 | pod2usage( 1 ) if !$ARGV[0];
144 | 
145 | glogin( usermail => $ARGV[0],  # Login required: Followee/friend/groups list are private
146 |         userpass => $ARGV[1],  # Asks pw if omitted
147 |         r_userid => \$USERID );
148 | 
149 | $OUTPATH = File::Spec->catfile( $FindBin::Bin, 'list-out', "friendgroup-${USERID}.html" ) 
150 | 	if !$OUTPATH;
151 | 
152 | 
153 | 
154 | #-----------------------------------------------------------------------------
155 | # Primary data structures:
156 | #
157 | my %members;  # {user_id}
158 | my %joins;    # {group_id}{user_id}
159 | my %groups;   # {group_id}
160 | 
161 | 
162 | 
163 | #-----------------------------------------------------------------------------
164 | # Collect friends and followees data. Include normal users only (no authors):
165 | #
166 | print( "Getting list of members known to #${USERID}..." );
167 | 
168 | my $t0 = time();
169 | greadfolls( from_user_id => $USERID,
170 |             rh_into      => \%members, 
171 |             incl_authors => 0,
172 |             on_progress  => gmeter( 'members' ));
173 | 
174 | printf( " (%.2fs)\n", time()-$t0 );
175 | 
176 | 
177 | 
178 | #-----------------------------------------------------------------------------
179 | # Load group memberships of each member
180 | # 
181 | my $memdone  = 0;
182 | my $memcount = scalar keys %members;
183 | 
184 | die( $GOOD_ERRMSG_NOMEMBERS ) unless $memcount;
185 | 
186 | for my $mid (keys %members)
187 | {
188 | 	printf( "[%3d%%] %-25s #%-10s\t", ++$memdone/$memcount*100, $members{$mid}->{name}, $mid );
189 | 	
190 | 	my $t0           = time();
191 | 	my $trackjoinsfn = sub{  $joins{ $_[0]->{id} }{ $mid } = 1;  };
192 | 	
193 | 	greadusergp( from_user_id => $mid,
194 | 	             rh_into      => \%groups,
195 | 	             on_group     => $trackjoinsfn,
196 | 	             on_progress  => gmeter( 'groups' ));
197 | 	
198 | 	printf( "\t%6.2fs\n", time()-$t0 );
199 | }
200 | 
201 | say "\nPerfect! Got groups of ${memdone} users.";
202 | 
203 | 
204 | 
205 | #-----------------------------------------------------------------------------
206 | # Write results to HTML file:
207 | # 
208 | print "Writing results to \"$OUTPATH\"... ";
209 | 
210 | my $fh  = IO::File->new( $OUTPATH, 'w' ) or die "[FATAL] Cannot write to $OUTPATH ($!)";
211 | my $now = strftime( '%a %b %e %H:%M:%S %Y', localtime );
212 | 
213 | print $fh ghtmlhead( "Groups joined by friends or followees of member $USERID, on $now",
214 | 		[ '!Logo', 'Group', 'Members', '>Joined:', '!Joined by' ]);
215 | 
216 | my $num_finds = 0;
217 | for my $gid (keys %joins)
218 | {
219 | 	my @joiner_ids  = keys %{$joins{$gid}};
220 | 	my $num_joiners = scalar @joiner_ids;
221 | 	
222 | 	$num_finds++;
223 | 	
224 | 	print $fh qq{
225 | 			<tr>
226 | 			<td><img src="${\ghtmlsafe( $groups{$gid}->{img_url} )}"></td>
227 | 			<td><a  href="${\ghtmlsafe( $groups{$gid}->{url}     )}" target="_blank">
228 | 			              ${\ghtmlsafe( $groups{$gid}->{name}    )}</a></td>
229 | 			<td>$groups{$gid}->{num_members}</td>
230 | 			<td>${num_joiners}</td>
231 | 			<td>
232 | 			};
233 | 	
234 | 	print $fh qq{
235 | 			<a  href="${\ghtmlsafe( $members{$_}->{url}     )}" target="_blank" class="gr-user">
236 | 			<img src="${\ghtmlsafe( $members{$_}->{img_url} )}" 
237 | 			   title="${\ghtmlsafe( $members{$_}->{name}    )}">
238 | 			</a>
239 | 			} foreach (@joiner_ids);
240 | 	
241 | 	print $fh qq{
242 | 			</td>
243 | 			</tr> 
244 | 			};
245 | }
246 | 
247 | print $fh ghtmlfoot();
248 | undef $fh;
249 | 
250 | printf "%d groups\n", $num_finds;
251 | 
252 | 
253 | 
254 | #-----------------------------------------------------------------------------
255 | # Done:
256 | #
257 | printf "Total time: %.0f minutes\n", (time()-$TSTART)/60;
258 | 
259 | 
260 | 


--------------------------------------------------------------------------------
/friendnet.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | 
  3 | #<--------------------------------- MAN PAGE --------------------------------->|
  4 | 
  5 | =pod
  6 | 
  7 | =head1 NAME
  8 | 
  9 | friendnet - Spiders one's social network and saves vertices/edges to CSV-files
 10 | 
 11 | 
 12 | =head1 SYNOPSIS
 13 | 
 14 | B<friennet.pl> 
 15 | [B<-u> F<number>] 
 16 | [B<-d> F<number>] 
 17 | [B<-c> F<numdays>] 
 18 | [B<-o> F<dirpath>] 
 19 | [B<-i>]
 20 | F<goodloginmail> [F<goodloginpass>]
 21 | 
 22 | 
 23 | =head1 OPTIONS
 24 | 
 25 | Mandatory arguments to long options are mandatory for short options too.
 26 | 
 27 | =over 4
 28 | 
 29 | =item B<-u, --userid>=F<number>
 30 | 
 31 | check another member instead of the one identified by the login-mail 
 32 | and password arguments. You find the ID by looking at the shelf URLs.
 33 | You still need to login with your credentials because authenticated 
 34 | members only can access the member-lists of other members.
 35 | 
 36 | 
 37 | =item B<-d, --depth>=F<number>
 38 | 
 39 | examine network to N levels. 
 40 | Runtime and datasize increases exponentially with every level.
 41 | Depth 0 is useless, 1 equals exporting your friends/followees list, 
 42 | 2 allows first useful social network analysis. 
 43 | There is the idea that all seven billion earthlings are 6 or fewer
 44 | social connections away from each other 
 45 | ("Six degrees of separation")--don't try to prove it here.
 46 | Default is 2.
 47 | 
 48 |  depth 0:  YOU                                                  []
 49 |  depth 1:  YOU --> friends                                      []
 50 |  depth 2:  YOU <-> FRIENDS --> friends                          [100%]
 51 |  depth 3:  YOU <-> FRIENDS <-> FRIENDS --> friends              [100%, 100%]
 52 |  depth 4:  YOU <-> FRIENDS <-> FRIENDS <-> FRIENDS --> friends  [100%, 100%, 100%]
 53 |  depth n:  ...
 54 | 
 55 | Note: Friends with more than 1000 friends or followees are dropped, 
 56 | because the data of such accounts is likely not meaningful anymore and 
 57 | just waste your (computing) time.
 58 |  
 59 |  
 60 | =item B<-c, --cache>=F<numdays>
 61 | 
 62 | number of days to store and reuse downloaded data in F</tmp/FileCache/>,
 63 | default is 31 days. This helps with cheap recovery on a crash, power blackout 
 64 | or pause, and when experimenting with parameters. Loading data from Goodreads
 65 | is a very time consuming process.
 66 | 
 67 | 
 68 | =item B<-o, --outdir>=F<dirpath>
 69 | 
 70 | write CSV-files to this directory, 
 71 | default see section FILES
 72 | 
 73 | 
 74 | =item B<-i, --ignore-errors>
 75 | 
 76 | Don't retry on errors, just keep going. 
 77 | Sometimes useful if a single Goodreads resource hangs over long periods 
 78 | and you're okay with some values missing in your result.
 79 | This option is not recommended when you run the program unattended.
 80 | 
 81 | 
 82 | =item B<-?, --help>
 83 | 
 84 | show full man page
 85 | 
 86 | =back
 87 | 
 88 | 
 89 | =head1 FILES
 90 | 
 91 | F</tmp/FileCache/>
 92 | 
 93 | F<./list-out/friendnet-$GOODUSERID-edges.csv>
 94 | 
 95 | F<./list-out/friendnet-$GOODUSERID-nodes.csv>
 96 | 
 97 | 
 98 | =head1 EXAMPLES
 99 | 
100 | $ ./friendnet.pl login@gmail.com MyPASSword
101 | 
102 | $ ./friendnet.pl --depth=3 --outdir=/tmp/  login@gmail.com
103 | 
104 | 
105 | =head1 REPORTING BUGS
106 | 
107 | Send an email to <datakadabra@gmail.com> or use Github's issue tracker
108 | <https://github.com/andre-st/goodreads-toolbox/issues>
109 | 
110 | 
111 | =head1 COPYRIGHT
112 | 
113 | This is free software. You may redistribute copies of it under the terms of
114 | the GNU General Public License <https://www.gnu.org/licenses/gpl.html>.
115 | There is NO WARRANTY, to the extent permitted by law.
116 | 
117 | 
118 | =head1 SEE ALSO
119 | 
120 | More info in ./help/friendnet.md
121 | 
122 | 
123 | =head1 VERSION
124 | 
125 | 2022-03-10 (Since 2019-06-14)
126 | 
127 | =cut
128 | 
129 | #<--------------------------------- 79 chars --------------------------------->|
130 | 
131 | 
132 | use strict;
133 | use warnings;
134 | use locale;
135 | use 5.18.0;
136 | 
137 | # Perl core:
138 | use FindBin;
139 | use local::lib "$FindBin::Bin/lib/local/";
140 | use        lib "$FindBin::Bin/lib/";
141 | use Time::HiRes qw( time tv_interval );
142 | use POSIX       qw( strftime locale_h );
143 | use File::Spec; # Platform indep. directory separator
144 | use IO::File;
145 | use Getopt::Long;
146 | use Pod::Usage;
147 | # Third party:
148 | use Text::CSV   qw( csv );
149 | # Ours:
150 | use Goodscrapes;
151 | 
152 | 
153 | 
154 | # ----------------------------------------------------------------------------
155 | # Program configuration:
156 | # 
157 | setlocale( LC_CTYPE, "en_US" );  # GR dates all en_US
158 | STDOUT->autoflush( 1 );
159 | gsetopt( cache_days => 31 );
160 | 
161 | our $TSTART   = time();
162 | our $DEPTH    = 2;
163 | our $MAXNHOOD = 1000;  # Ignore users with more than N friends
164 | our $OUTDIR   = File::Spec->catfile( $FindBin::Bin, 'list-out' );
165 | our $USERID;
166 | 
167 | GetOptions( 'userid|u=s'      => \$USERID,
168 |             'depth|d=i'       => \$DEPTH,
169 |             'outdir|o=s'      => \$OUTDIR,
170 |             'ignore-errors|i' => sub{  gsetopt( ignore_errors => 1 );   },
171 |             'cache|c=i'       => sub{  gsetopt( cache_days => $_[1] );  },
172 |             'help|?'          => sub{  pod2usage( -verbose => 2 );      })
173 | 	or pod2usage( 1 );
174 | 
175 | pod2usage( 1 ) if !$ARGV[0];
176 | 
177 | glogin( usermail => $ARGV[0],  # Login required: Followee/friend list are private
178 |         userpass => $ARGV[1],  # Asks pw if omitted
179 |         r_userid => \$USERID );
180 | 
181 | our $OUTPATH_EDG = File::Spec->catfile( $OUTDIR, "friendnet-$USERID-edges.csv" );
182 | our $OUTPATH_NOD = File::Spec->catfile( $OUTDIR, "friendnet-$USERID-nodes.csv" );
183 | 
184 | 
185 | 
186 | #-----------------------------------------------------------------------------
187 | # Primary data structures:
188 | #
189 | my %nodes;
190 | my @edges;
191 | 
192 | 
193 | 
194 | #-----------------------------------------------------------------------------
195 | # Traverse social network:
196 | #
197 | printf( "Traversing #%s's social network (depth=%d)...\n", $USERID, $DEPTH );
198 | 
199 | 
200 | # Displays sth. like "Covered: [ 14%, 55%]" for depth = 3
201 | my $progress_indicator_fn = sub
202 | {
203 | 	my (%args) = @_;
204 | 	my $dr     = $args{depth};
205 | 	my $d      = $DEPTH - $dr;
206 | 	
207 | 	return if $dr == 1;              # We get leaves as whole; percent-progress would be 0 to 100% in 1 step
208 | 	print ( "\r["                );  # Move cursor to column 0
209 | 	print ( "\t" x $d            );  # Move cursor to column for depth d (tab doesn't del prev. chars)
210 | 	printf( "%3d%%", $args{perc} );  # Percent-progress for current network depth
211 | 	print ( ",\t  0%" x ($dr-2)  );  # Fill empty columns with "0%"
212 | 	print ( ']' );
213 | };
214 | 
215 | 
216 | # Displays sth. like:
217 | # [  1%] #1234567
218 | # [  1%] #76543
219 | # ...
220 | # [100%] #432123
221 | my $progress_indicator_fn2 = sub
222 | {
223 | 	
224 | };
225 | 
226 | 
227 | gsocialnet( from_user_id    => $USERID,
228 |             rh_into_nodes   => \%nodes,
229 |             ra_into_edges   => \@edges,
230 |             ignore_nhood_gt => $MAXNHOOD,
231 |             depth           => $DEPTH,
232 |             on_progress     => $progress_indicator_fn );
233 | 
234 | 
235 | 
236 | #-----------------------------------------------------------------------------
237 | # Write CSV-files:
238 | # 
239 | my @nodeslines = values %nodes;
240 | 
241 | printf( "\nWriting network data to: \n%s  (N=%d)\n%s  (N=%d)", 
242 | 		$OUTPATH_NOD, scalar @nodeslines,
243 | 		$OUTPATH_EDG, scalar @edges );
244 | 
245 | csv( in      => \@nodeslines,
246 |      out     => $OUTPATH_NOD,
247 |      headers => [qw( id name img_url )] );
248 | 
249 | csv( in      => \@edges,
250 |      out     => $OUTPATH_EDG,
251 |      headers => [qw( from to )] );
252 | 
253 | 
254 | 
255 | #-----------------------------------------------------------------------------
256 | # Done:
257 | # 
258 | printf( "\n\nTotal time: %.0f minutes\n", (time()-$TSTART)/60 );
259 | 
260 | 
261 | 
262 | 


--------------------------------------------------------------------------------
/git-hooks/pre-commit:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | 
 4 | # Exit on first error
 5 | set -e
 6 | 
 7 | 
 8 | # Static code checks:
 9 | for f in `git ls-files --modified *.pl`
10 | do
11 | 	perl -c "${f}"
12 | done
13 | 
14 | 
15 | # Generate documentation:
16 | if git ls-files --modified ./lib/Goodscrapes.pm
17 | then
18 | 	echo "Generating documentation: Goodscrapes.html"
19 | 	pod2html ./lib/Goodscrapes.pm > ./lib/Goodscrapes.html \
20 | 		&& git add ./lib/Goodscrapes.html
21 | fi
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/git-hooks/pre-push:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | 
 4 | # Exit on first error
 5 | set -e
 6 | 
 7 | 
 8 | # Unit-tests the libraries.
 9 | # (Static checks took already place during commits)
10 | # 
11 | # Don't run if there are just changes to the documentation
12 | # 
13 | if git diff --stat --cached origin/master | grep -E "(\.pm|\.t)"
14 | then
15 | 	prove
16 | fi
17 | 
18 | 
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/help/GOODTIPS.md:
--------------------------------------------------------------------------------
  1 | # Tips on Goodreads, i.a.
  2 | 
  3 | ## Table of Contents
  4 | - [Things That Improved My Goodreads.com Experience](#things-that-improved-my-goodreadscom-experience)
  5 | - [Discovering Non-Fiction Books](#discovering-non-fiction-books)
  6 | - [Annotating Books](#annotating-books)
  7 | - [Feedback](#feedback)
  8 | 
  9 | 
 10 | ## Things That Improved My Goodreads.com Experience
 11 | 
 12 | - **Group shelves** with a prefix, e.g., _"region-usa"_,
 13 |   _"region-..."_. Goodreads sorts shelf lists in alphabetical order.
 14 |   Related but scattered shelves impair findability.  
 15 |   - I moved shelves that are useful to me alone to the _end_ of the list by prefixing them with "z\_" or Unicode 0x3161: ㅡ
 16 |   - next to pseudo sub-shelves _"computer-history"_, _"computer-networks"_ 
 17 |     and so on I'm using a separate _"computer"_ pseudo super-shelf which 
 18 |     contains _all_ books from the sub-shelves 
 19 |     (useful for [shelf-intersection](https://www.secondrunreviews.com/2016/03/selecting-multiple-shelves-goodreads.html))
 20 | 
 21 | - **Create a "more-urgent" shelf** from unread books, also create a _"more-tempting"_ shelf with books not urgent but probably more fun,
 22 |   then intesect both shelves ("select multiple") and copy the URL in your Goodreads profile text as _"[Likely next reads]"_
 23 |   (regularly update the shelves)
 24 | 
 25 | - **Create an "abandoned" shelf** to compensate the missing reading-status. 
 26 |   Have the exclusive-checkbox [activated](https://www.goodreads.com/shelf/edit)
 27 | 
 28 | - **Track physical book location** with shelves such as _"shelf-kitchen"_ or 
 29 |   _"shelf-berlin"_ or _"shelf-office"_ if the amount of books exceeds memory (Future me)
 30 | 
 31 | - **Limit the number of shelves** to max. 1 page. 
 32 |   Few coarse-grained shelves better than 100+ fine-grained shelves: faster to navigate and more likely to keep up-to-date for every book.
 33 |   Anemic shelves also render functions such as "[select multiple shelves](https://www.secondrunreviews.com/2016/03/selecting-multiple-shelves-goodreads.html)" (intersection ∩) useless.
 34 |   - avoid shelves that will likely never contain more than 3 books
 35 |   - try to minimize difference within a shelf and maximize difference between shelves (similar to cluster analysis)
 36 |   - merge strongly overlapping shelves, e.g., _"politics-economy-history"_ or _"software-testing-infosec"_
 37 |   - remove shelves only good in theory but never used practically
 38 | 
 39 | - **Add unread books to custom shelves too.** This works
 40 |   well with Goodreads own _"[select multiple](https://www.secondrunreviews.com/2016/03/selecting-multiple-shelves-goodreads.html)"_ feature beneath your
 41 |   shelf list. It's clearer than having hundreds of books in _"want-to-read"_ over time,
 42 |   and helps others discovering new books more easily. Pick your next book by intersection ∩, e.g.,
 43 |   - _"want-to-read" + "non-fiction" + "lang-german"_
 44 |   - _"want-to-read" + "fiction" + "politics"_
 45 | 
 46 |   ![Intersection](https://upload.wikimedia.org/wikipedia/commons/thumb/d/da/Set_intersection.svg/320px-Set_intersection.svg.png)
 47 | 
 48 | - **Negative shelves**, or [non-shelves](https://www.goodreads.com/topic/show/19369665-reverse-results-on-my-shelf#comment_id_181173145): 
 49 |   e.g., _"fiction" + "lang-de" + "non-computing"_ would exclude nerd fiction; 
 50 |   also useful for friends who are interested in everything but computers; 
 51 |   most common negative shelf is _"non-fiction"_; 
 52 |   limit to few but big shelves
 53 | 
 54 | - **Declutter the library** with cardboxes and GR-shelves labeled
 55 |   _"donations"_ and _"resales"_. My city library took 30 books
 56 |   after receiving a link to my donations shelf.  Such link may also appear in
 57 |   your email signature: "I give away books: ...". 
 58 |   PS: There is a "book condition" column (shelf settings: table view, [x] condition).
 59 | 
 60 | - **Batch edit** shelf feature ([tutorial](https://www.soobsessedwith.com/2014/01/get-organized-on-goodreads.html))
 61 | 
 62 | - **Filter reviews by language** °~~by selecting a book edition in your language and "Filter: this edition" ([described here](https://www.goodreads.com/topic/show/19528032#comment_184069651))~~. 
 63 |   See [new filter by language feature](https://www.goodreads.com/topic/show/19545889-filter-reviews-using-language#comment_192500388)
 64 | 
 65 | - **Become a Goodreads librarian** by applying 
 66 |   [there](https://www.goodreads.com/about/apply_librarian). Quickly
 67 |   edit wrong or missing book/author info and add cover images by yourself,
 68 |   combine stray book editions (take over reviews etc.)
 69 | 
 70 | - [Goodreads Ratings for Amazon](https://chrome.google.com/webstore/detail/goodreads-ratings-for-ama/fkkcefhhadenobhjnngfdahhlodolkjg) – a Chrome-browser extension by Rubén Martínez; 
 71 |   also reminds you of GR reviews when you're shopping on Amazon (alternatively, try my
 72 |   [Tiny JS Injector](https://github.com/andre-st/chrome-injectjs))
 73 | 
 74 | - **Photos in reviews**: 
 75 |   Add photos to your reviews by uploading them to your Goodreads _user profile_
 76 |   photos. So you don't have to find and rely on external web space, e.g. paid or
 77 |   shady, short-lived, free image hosts.  
 78 |   Such photos can be snapshots of individual book pages to give an
 79 |   impression of the inside of the book, but also diagrams or photos of
 80 |   events and lectures.  
 81 |   Use the caption "for Reviews &gt; BOOK-SHORT-TITLE #PHOTO-NUMBER"
 82 |   so that Goodreads later displays it like "User &gt; Photos &gt; for Reviews &gt; Nice Book #1".  
 83 |   Add a link to your review to the photo description box: "Review: https://...".
 84 |   After uploading, simply copy the image URL ("largest") into your
 85 |   review (`<img src="URL">`).
 86 | 
 87 | - **Check out users who rate good books**. 
 88 |   [This service](https://andre-st.github.io/goodreads/) notifies you of new ratings for specific books.
 89 |   Be picky, create a special-purpose shelf with good but rare books, don't submit your whole _"read"_ shelf to this service.
 90 | 
 91 | - **Force view settings**, e.g., unify the quasi-random view settings when browsing (other people's)
 92 |   shelves, by rewriting Goodreads URLs via Einar Egilsson's 
 93 |   [Redirector](https://chrome.google.com/webstore/detail/redirector/ocgpenflpmgnfapjedencafcfakcekcd)
 94 |   Chrome browser extension (or my [Tiny JS Injector](https://github.com/andre-st/chrome-injectjs)). 
 95 |   Once you are familiar with the Redirector user interface, you can simply copy/paste these values 
 96 |   into the appropriate fields: 
 97 |   ```
 98 |   Description: Goodreads Shelves: 100 books per page, sort by user-rating (highest first), covers-view
 99 |   Example    : https://www.goodreads.com/review/list/13055874?per_page=20&sort=reviews&view=table&shelf=ㅡxx-xx&page=2
100 |   Pattern    : (https://www\.goodreads\.com/review/list/[^?]+)(?=(?:.*[?&](page=\d+))?)(?=(?:.*[?&](shelf=[^&]+))?)
101 |   Redirect   : $1?per_page=100&sort=rating&order=d&view=covers&$2&$3
102 |   Type       : Regular Expression
103 |   ```
104 |   ```
105 |   Description: Goodreads "All Editions": Expanded details (language etc), 100 per page
106 |   Example    : https://www.goodreads.com/work/editions/80128-silence-on-the-wire?expanded=false&utf8=✓&sort=num_ratings&filter_by_format=Nook
107 |   Pattern    : (https://www\.goodreads\.com/work/editions/[^\?]*)\?*(.*)
108 |   Redirect   : $1?expanded=true&$2&per_page=100
109 |   Type       : Regular Expression
110 |   ```
111 |   All expressions takes inexact matches like "page" ∈ "per\_page", randomly ordered or missing 
112 |   parameters and Unicode values into account. Given duplicate query arguments, the last one applies.
113 | 
114 | 
115 | ## Discovering Non-Fiction Books
116 | 
117 | - checkout the bibliography section of a good book (best signal-to-noise ratio); I use a separate _"bibliogr-to-check"_ Goodreads shelf to keep track of unchecked books
118 | - notice books mentioned in the _footnotes_ and literature sections of Wikipedia articles
119 | - notice books mentioned in magazine articles
120 | - notice alternative books mentioned in book reviews
121 | - notice names dropped in magazine articles and check them against Amazon
122 | - scan interesting websites/blogs for books 
123 |   - internal search or google for `book site:anygoodblog.com`
124 |   - [HackerNewsBooks.com](https://hackernewsbooks.com/)
125 |   - [top books on Reddit](http://booksreddit.com/)
126 |   - [RedditFavorites.com](https://redditfavorites.com/books)
127 |   - [BooksChatter.com](https://bookschatter.com) (scans Twitter)
128 | - search [books.google.com](https://www.google.com/search?tbm=bks&q=specific+interest) for "specific interest"; try Google's [Talk to Books](https://books.google.com/talktobooks/) (since April 2018)
129 | - search [Google scholar profiles](https://scholar.google.com/citations?hl=en&view_op=search_authors&mauthors=label%3A&btnG=) for `label:MY_AREA_OF_INTEREST` and check profile names against Amazon's book search
130 | - [Google Alerts](https://www.google.com/alerts): "new book" + "specific interest"
131 | - follow [Goodreads users](https://www.goodreads.com/user/18418712-andr/following) with interesting libraries
132 |   - find Goodreads members [with similar taste](./likeminded.md) _(my GR toolbox)_
133 | - investigate a list of [authors similar to the authors in your shelves](./similarauth.md) on Goodreads _(my GR toolbox)_
134 | - inspect Goodreads books [common among members you follow](./friendrated.md) _(my GR toolbox)_
135 | - check the Amazon and Goodreads profiles of users who comment good books
136 |   - [get notified](https://andre-st.github.io/goodreads/) of new reviewers for your favourite books _(my GR toolbox)_
137 | - follow small or specialized publishers through a Twitter list, RSS-feed or newsletter (works so lala)
138 | - reddit ([r/booksuggestions](https://www.reddit.com/r/booksuggestions/), [r/suggestmeabook](https://www.reddit.com/r/suggestmeabook/), ...) , quora, ...
139 | - the better book sites:
140 |   - [NewBooksNetwork.com](http://newbooksnetwork.com/)
141 |   - [perlentaucher.de](https://www.perlentaucher.de/teaserliste/2_Buecher.html) (German)
142 |   - [Hotlist](https://www.hotlist-online.com/) (German)
143 | - recommendation engines hardly work for me: Goodreads never, Amazon sometimes
144 | - [Bookstragram](https://www.instagram.com/explore/tags/bookstagram/) does not work for me
145 | - [BookTube](https://en.wikipedia.org/wiki/BookTube) does not work for me, girls club & primarily fiction
146 | - common bestseller lists do not work for me
147 | - Parakweet's BookVibe closed in 2016, they sent you a list of books that your friends are talking about on Twitter
148 | - ...
149 | - get your keywords right: you have to know the right technical terms before learning about them; try "science books" or "textbook" over "nonfiction", all not necessarily scienctific or even academic but nonfiction is very broad; check non-english books too if you speak another language (no-brainer but st. I forgot)
150 | - bookmark interesting titles now and inspect them later, books must arouse interest also after one month; I use multiple Amazon wishlists, which also show current 2nd hand prices, my comments and priorization; I have a separate "(lost interest)" wishlist as an alternative to deletion; my [Amazon Wishlist-Exporter](https://github.com/andre-st/amazon-wishless) helps keep the overview by filtering ~60 wishlists by price and priority.
151 | 
152 | 
153 | 
154 | ## Annotating Books
155 | 
156 | I try to develop my system for markings, notes etc in physical books, which is still at its infancy.
157 | 
158 | ### Tools:
159 | - [Stabilo Boss text marker](https://www.amazon.com/-/de/dp/B01LXOQ1KJ)
160 |   (primary color: yellow, looks cleaner than other colors)
161 | - sharp pencil
162 | - [Mont Marte electric eraser](https://www.amazon.com/Mont-Marte-Electric-Refills-Suitable/dp/B0791BP2PX)
163 |   (I do not like rubbing around on the book pages with classic erasers 
164 |   because the notes can get smeared, pages can crease and tear, 
165 |   or notes remain slightly readable due to caution. 
166 |   An electic eraser allows much more controlled erasing and brings enough 
167 |   abrasion power to the sheet with the rotating head, 
168 |   and that comfortably from any sitting or lying position)
169 | 
170 | ### Symbols:
171 | - !: important
172 | - X: strongly disagree, faulty reasoning, ...
173 | - ?: don't get it, fishy, not backed up well, hard to believe but don't know counter arguments yet
174 | - 1, 2, 3: restructure text - these blocks are self-contained
175 | - Ex: examples
176 | - Lit: literature references
177 | - URL: web addresses
178 | - circle around words: keywords in this important sentence, emphasis
179 | 
180 | ### Other:
181 | - use margin to explain unknown words ("prebendalism: ...")
182 | 
183 | 
184 | 
185 | 
186 | 
187 | ## Feedback
188 | 
189 | Use [GitHub](https://github.com/andre-st/goodreads-toolbox/issues) or see [AUTHORS.md](AUTHORS.md) file
190 | 
191 | 
192 | 


--------------------------------------------------------------------------------
/help/amz-tradein.md:
--------------------------------------------------------------------------------
 1 | # amz-tradein.pl
 2 | 
 3 | ![Maintenance](https://img.shields.io/maintenance/yes/2015.svg)
 4 | 
 5 | 
 6 | ## Autom. Amazon-Trade-In-Preisliste für Goodreads-Bücher
 7 | ```console
 8 | $ ./gr-tradein.pl 18418712 books-for-sale
 9 | EUR 8,50   Schneekreuzer. Alle drei Teile in einem Band
10 | EUR 3,37   Exit Wounds
11 | EUR 2,45   Software Factories: Assembling Applications with Patterns, Models, Frameworks and Tools
12 | EUR 0,15   Death March
13 | EUR 0,15   Bellum Gallicum. Text
14 | EUR 0,10   Wien wartet auf Dich. Der Faktor Mensch im DV Management
15 | EUR 0,10   Produkt ist Kommunikation - Integration von Branding und Usability
16 | EUR 0,10   Politik als Beruf
17 | ```
18 | 
19 | ## Amazon kauft gebrauchte Bücher zurück
20 | - kein Warten auf Käufer, kein Werben nötig = schneller Verkauf alter Bücher
21 | - finanz. immer Verlust, aber Buchfehlkäufe verstauben sonst bzw. ärgern mit ihrer Gegenwart
22 | - Fachbücher erlösten manchmal 10-25 EUR (50% vom Einkaufspreis)
23 | - Erlöse z.B. für den Kauf anderer Gebrauchtbücher
24 | - _zeitaufwendig_ (a) immer wieder und (b) genug höherpreisige Bücher per Hand zu finden
25 |   - vertane Zeit, wenn sich nichts findet; Preise ändern sich regelm.
26 |   - lohnende Bücher übersehen, falsch beurteilt
27 | - Gr-tradein.pl ermittelt _automatisch_ alle Angebote für ein gesamtes Goodreads-Regal
28 | - Goodreads.com: weltgrößte Lesegemeinde + Tools zur Bücherverwaltung
29 | 
30 | 
31 | ## Installation unter GNU/Linux
32 | 1. Keine Installation nötig! Amazon kauft nichts mehr ([seit 31.08.15](https://www.amazon.de/gp/browse/ref=trdrt_conf_exodus?ie=UTF8&node=4455884031))
33 | 2. Perl ist oft vorinstalliert
34 | 3. amz-tradein.pl ausführbar machen (chmod +x) und starten, Hilfe erscheint
35 | 4. bei Startfehler evtl. das Perl-Modul WWW::Curl::Easy z.B. über [cpan](http://perl.about.com/od/packagesmodules/qt/perlcpan.htm) installieren
36 | 


--------------------------------------------------------------------------------
/help/friendgroup.md:
--------------------------------------------------------------------------------
 1 | # friendgroup.pl
 2 | 
 3 | ![Maintenance](https://img.shields.io/maintenance/no/2022.svg)
 4 | 
 5 | 
 6 | ## Discussion groups common among the people you follow
 7 | 
 8 | From the _Goodreads Feedback_ forum, [Carlissa (2018)](https://web.archive.org/web/20190525013220/https://www.goodreads.com/topic/show/19548229-finding-a-particular-type-of-group):
 9 | > ... The best way to find a group is by word of mouth from friends ...
10 | 
11 | or Faith (ibidem):
12 | > Look at the lists of groups to which your friends or people you follow belong. 
13 | 
14 | 
15 | ## This
16 |  
17 | ![Screenshot](img/friendgroup.png?raw=true "Screenshot")
18 | 
19 | 
20 | 
21 | ## How to generate this on a GNU/Linux operating system
22 | 
23 | 1. [Install the toolbox](../README.md#Getting-started)
24 | 2. at the prompt, enter:
25 | 
26 | ```console
27 | $ ./friendgroup.pl --help
28 | $ ./friendgroup.pl goodlogin@example.com
29 | 
30 | Enter GR password for goodlogin@example.com: *****************
31 | Signing in to Goodreads... OK
32 | Getting list of members known to #18418712... 141 members (0.18s)
33 | [  0%] Aron Mellendar            #21254511      0 groups     0.41s
34 | [  1%] Moshe Fiono               #3932835       0 groups     0.80s
35 | [  2%] Peter Glowwa              #18936366      2 groups     0.58s
36 | [  3%] DuyGeboad                 #73957929      9 groups     0.05s
37 | [  3%] Michael                   #9482539       0 groups     0.15s
38 | [  5%] Peter Prischl             #17272051      0 groups     1.47s
39 | [  6%] Steven Shoffork           #51011129      0 groups     0.15s
40 | [  7%] 2mo                       #32504210     12 groups     0.07s
41 | ...
42 | [ 99%] Charlene                  #2442665       0 groups     2.41s
43 | [100%] David                     #7634567       0 groups     0.01s
44 | 
45 | Perfect! Got groups of 141 users.
46 | Writing results to "./list-out/friendgroup-1234567.html"... 245 groups (0.31s)
47 | Total time: 2 minutes
48 | ```
49 | 
50 | **Note:**
51 | 
52 | You can break the process with <kbd>CTRL</kbd>-<kbd>C</kbd> and continue later
53 | without having to re-read all online sources again, as reading from
54 | Goodreads.com is very time consuming.  The script internally uses a
55 | **file-cache** which is busted after 31 days and saves to /tmp/FileCache/.
56 | 
57 | 
58 | 
59 | ## Observations and limitations
60 | 
61 | - long runtime: Goodreads slows down all requests and we have to load a lot of data
62 | - sometimes you don't know what you can have and you wouldn't search for it, but it might show up here
63 | 
64 | 
65 | ## Feedback
66 | 
67 | If you like this project, give it a star on GitHub.
68 | Report bugs or suggestions [via GitHub](https://github.com/andre-st/goodreads-toolbox/issues) 
69 | or see the [AUTHORS.md](../AUTHORS.md) file.
70 | 
71 | 
72 | ## See also
73 | 
74 | - "[Groups With My Books](https://www.goodreads.com/group/my_books)" - Groups that have added a book in your shelves (Goodreads feature)
75 | - [friendrated.pl](friendrated.md) - Books common among the people you follow
76 | - [friendnet.pl](friendnet.md)     - Social network analysis
77 | - [likeminded.pl](likeminded.md)   - Find Goodreads members with similar book taste
78 | - [recentrated.pl](recentrated.md) - Know when people rate or write reviews about a book
79 | - [similarauth.pl](similarauth.md) - Find all similar authors
80 | - [search.pl](search.md)           - Sort books-search results by popularity or date published
81 | - [savreviews.pl](savreviews.md)   - Get all reviews of a book
82 | 


--------------------------------------------------------------------------------
/help/friendnet.md:
--------------------------------------------------------------------------------
  1 | # friendnet.pl
  2 | 
  3 | ![Maintenance](https://img.shields.io/maintenance/no/2022.svg)
  4 | 
  5 | 
  6 | ## Analyze your Goodreads.com social network
  7 | 
  8 | Spiders your social network and creates files with edges and nodes which can be
  9 | easily processed with social network analysis software.
 10 | 
 11 | 
 12 | ## Output
 13 | 
 14 | ```console
 15 | $ head friendnet-nodes.csv friendnet-edges.csv
 16 | ==> friendnet-nodes.csv <==
 17 | id,name,img_url
 18 | 50965461,"Peter Hesar",https://images.gr-assets.com/users/1514444137p2/50911111.jpg
 19 | 15232357,"Carole Arsifeult",https://images.gr-assets.com/users/139552226262/15222217.jpg
 20 | 41256336,"Jordan Teller",https://images.gr-assets.com/users/1427180778p2/41444336.jpg
 21 | 4112343,Tim,https://images.gr-assets.com/users/1432411115p2/4114553.jpg
 22 | 
 23 | ==> friendnet-edges.csv <==
 24 | from,to
 25 | 15234712,18525218
 26 | 15234712,8251216
 27 | 15234712,13152689
 28 | 15234712,9362611
 29 | ```
 30 | 
 31 | Comma-separated values (CSV) files can be easily processed with any social network 
 32 | analysis (SNA) software such as `R` with the `igraph` package or similar.
 33 | You can ran other statistics software or query languages against CSV-files too, 
 34 | e.g. `q` is SQL for CSV. 
 35 | A user sent me a screenshot with Excel processing these data, which looked good too.
 36 | 
 37 | 
 38 | ## Social network analysis (SNA)
 39 | 
 40 | Generated network type: 
 41 | - Egocentric (not sociocentric/complete), 
 42 | - Directed   (not undirected), 
 43 | - Binary     (not valued), 
 44 | - One-Mode   (not bipartite/multi-mode), 
 45 | - Connected  (not disconnected)
 46 | 
 47 | 
 48 | ![Network](img/friendnet.png?raw=true "Network")
 49 | 
 50 | 
 51 | ```R
 52 | TODO: R/igraph-examples:
 53 | - direct influence on neighbours (degree centrality)
 54 | - brokerage or gatekeeping potential (betweeness centrality)
 55 | - influence entire network most quickly or: who hears news first (closeness centrality)
 56 | - influence over whole network, not just neighbours (eigen centrality)
 57 | - probability that any message will arrive (page rank)
 58 | - linked by many nodes that are linking many other nodes (Kleinberg authority score)
 59 | - community detection
 60 | - ...
 61 | ```
 62 | 
 63 | ```console
 64 | TODO: q-example "Members popular among your friends"
 65 | ```
 66 | 
 67 | 
 68 | ## How to generate this on a GNU/Linux operating system
 69 | 
 70 | 1. [Install the toolbox](../README.md#Getting-started)
 71 | 2. at the prompt, enter:
 72 | 
 73 | ```console
 74 | $ ./friendnet.pl --help
 75 | $ ./friendnet.pl goodlogin@example.com
 76 | 
 77 | Enter GR password for goodlogin@example.com: ******************
 78 | Signing in to Goodreads... OK
 79 | Traversing #18418712's social network (depth=2)...
 80 | Covered: [100%]
 81 | Writing network data to: 
 82 | ./list-out/friendnet-5685856-nodes.csv  (N=76622)
 83 | ./list-out/friendnet-5685856-edges.csv  (N=106974)
 84 | 
 85 | Total time: 195 minutes
 86 | ```
 87 | 
 88 | **Note:**
 89 | 
 90 | You can break the process with <kbd>CTRL</kbd>-<kbd>C</kbd> and continue later
 91 | without having to re-read all online sources again, as reading from
 92 | Goodreads.com is very time consuming.  The script internally uses a
 93 | **file-cache** which is busted after 31 days and saves to /tmp/FileCache/.
 94 | 
 95 | 
 96 | 
 97 | ## Observations and limitations
 98 | 
 99 | - long runtime: Goodreads slows down all requests and we have to load a lot of data
100 | 
101 | 
102 | 
103 | ## Feedback
104 | 
105 | If you like this project, give it a star on GitHub.
106 | Report bugs or suggestions [via GitHub](https://github.com/andre-st/goodreads-toolbox/issues) 
107 | or see the [AUTHORS.md](../AUTHORS.md) file.
108 | 
109 | 
110 | ## See also
111 | 
112 | - [friendrated.pl](friendrated.md) - Books common among the people you follow
113 | - [friendgroup.pl](friendgroup.md) - Groups common among the people you follow
114 | - [likeminded.pl](likeminded.md)   - Find Goodreads members with similar book taste
115 | - [recentrated.pl](recentrated.md) - Know when people rate or write reviews about a book
116 | - [similarauth.pl](similarauth.md) - Find all similar authors
117 | - [search.pl](search.md)           - Sort books-search results by popularity or date published
118 | - [savreviews.pl](savreviews.md)   - Get all reviews of a book
119 | 
120 | 
121 | 


--------------------------------------------------------------------------------
/help/friendrated.md:
--------------------------------------------------------------------------------
  1 | # friendrated.pl
  2 | 
  3 | ![Maintenance](https://img.shields.io/maintenance/no/2022.svg)
  4 | 
  5 | 
  6 | ## Books common among the people you follow
  7 | 
  8 | From the _Goodreads Feedback_ forum, 
  9 | [Sophie (2013)](https://web.archive.org/web/20190525013028/https://www.goodreads.com/topic/show/1573755-most-popular-books-among-friends) or 
 10 | [Madharper (2019)](https://help.goodreads.com/s/question/0D51H00004RMzDLSA1/is-there-any-way-to-list-all-the-books-that-my-friends-have-given-five-star-ratings-to?t=1566829298278) or
 11 | [Anne (2018)](https://web.archive.org/web/20190525012925/https://www.goodreads.com/topic/show/19320371-recommendations):
 12 | > I often choose a book to read if many of the people I follow have read it
 13 | > (and rated it high). Anyway, to find these kind of books isn't always easy
 14 | > especially if they are published many years ago and do not pop up in my news
 15 | > feed daily.
 16 | > Could Goodreads develop a feature which recommends a book because it is
 17 | > common among the people I follow?
 18 | 
 19 | 
 20 | ## This
 21 | 
 22 | ![Screenshot](img/friendrated2.png?raw=true "Screenshot")
 23 | 
 24 | The report also includes a table with the most liked authors among the friends and followees:
 25 | 
 26 | ![Screenshot](img/friendrated3.png?raw=true "Screenshot")
 27 | 
 28 | 
 29 | ## How to generate this on a GNU/Linux operating system
 30 | 
 31 | 1. [Install the toolbox](../README.md#Getting-started)
 32 | 2. at the prompt, enter:
 33 | 
 34 | ```console
 35 | $ ./friendrated.pl --help
 36 | $ ./friendrated.pl goodlogin@example.com
 37 | 
 38 | Enter GR password for goodlogin@example.com: **************
 39 | Signing in to Goodreads... OK
 40 | Getting list of members known to #18418712... 164 members (0.18s)
 41 | [  0%] Aron Mellendar            #21254511    247 read      94 hits     0.41s
 42 | [  1%] Moshe Fiono               #3932835     520 read     126 hits     0.80s
 43 | [  2%] Peter Glowwa              #18936366    392 read     148 hits     0.58s
 44 | [  3%] DuyGeboad                 #73957929      9 read       0 hits     0.05s
 45 | [  3%] Michael                   #9482539      88 read      61 hits     0.15s
 46 | [  5%] Peter Prischl             #17272051   1034 read     913 hits     1.47s
 47 | [  6%] Steven Shoffork           #51011129     69 read      50 hits     0.15s
 48 | [  7%] 2mo                       #32504210     12 read       6 hits     0.07s
 49 | ...
 50 | [ 99%] Charlene                  #2442665    1172 read     732 hits     2.41s
 51 | [100%] David                     #7634567     142 read      58 hits     0.01s
 52 | 
 53 | Perfect! Got favourites of 164 users.
 54 | Writing results to:
 55 | ./list-out/friendrated-1234567-read.html           (271 books)
 56 | ./list-out/friendrated-1234567-read-authors.htmml  (210 authors)
 57 | 
 58 | Total time: 18 minutes
 59 | ```
 60 | 
 61 | **Note:**
 62 | 
 63 | You can break the process with <kbd>CTRL</kbd>-<kbd>C</kbd> and continue later
 64 | without having to re-read all online sources again, as reading from
 65 | Goodreads.com is very time consuming.  The script internally uses a
 66 | **file-cache** which is busted after 31 days and saves to /tmp/FileCache/.
 67 | 
 68 | "0 read 0 hits" is either an empty shelf or a shelf accessible only to friends
 69 | of that person (depends on your login).
 70 | 
 71 | 
 72 | ## Alternative reports
 73 | 
 74 | - most _wished-for_ books among the members you follow: use `--toread` option
 75 | - most _hated_ books among the members you follow: use `--hated` option
 76 | - there are options (`--help`) to fine-tune the reports, e.g., 
 77 | 	only include books published in a specific time-range, 
 78 | 	exclude bestsellers etc.
 79 | 
 80 | 
 81 | ## Observations and limitations
 82 | 
 83 | - long runtime: Goodreads slows down all requests and we have to load a lot of data
 84 | - books in the upper value range are usually well-known titles, fiction, classics, no surprises
 85 | - female GR members mainly read fiction, tend to give 4 and 5 stars pretty generously, 
 86 |   and their networks are female
 87 |   - start with harsh program settings: min rating of 5 and rated by min 5 followees
 88 | - "common authors" tables can be misleading, at the moment:
 89 |   it just counts the frequency of a name but does not take into account
 90 |   the aggregated ratings of a member for a specific author, example:
 91 |   20 members hate 10 books of an author except 1 book.
 92 |   the program would count 20x a love relationship for this author,
 93 |   although the books in general of this author are more often hated
 94 | 
 95 | 
 96 | ## Feedback
 97 | 
 98 | If you like this project, give it a star on GitHub.
 99 | Report bugs or suggestions [via GitHub](https://github.com/andre-st/goodreads-toolbox/issues) 
100 | or see the [AUTHORS.md](../AUTHORS.md) file.
101 | 
102 | 
103 | ## See also
104 | 
105 | - ~~[Popular books](https://www.goodreads.com/friend/popular_books) among my friends _this month_ (Goodreads feature)~~
106 | - [Most read by pub-year](https://www.goodreads.com/book/popular_by_date/1919/) on all of Goodreads
107 | - [likeminded.pl](likeminded.md)   - Find Goodreads members with similar book taste
108 | - [recentrated.pl](recentrated.md) - Know when people rate or write reviews about a book
109 | - [friendnet.pl](friendnet.md)     - Social network analysis
110 | - [friendgroup.pl](friendgroup.md) - Groups common among the people you follow
111 | - [similarauth.pl](similarauth.md) - Find all similar authors
112 | - [search.pl](search.md)           - Sort books-search results by popularity or date published
113 | - [savreviews.pl](savreviews.md)   - Get all reviews of a book
114 | 
115 | 


--------------------------------------------------------------------------------
/help/img/friendgroup.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andre-st/goodreads-toolbox/010bc724003890d67efb2c40cd3e98a3b81c4007/help/img/friendgroup.png


--------------------------------------------------------------------------------
/help/img/friendrated.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andre-st/goodreads-toolbox/010bc724003890d67efb2c40cd3e98a3b81c4007/help/img/friendrated.png


--------------------------------------------------------------------------------
/help/img/friendrated2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andre-st/goodreads-toolbox/010bc724003890d67efb2c40cd3e98a3b81c4007/help/img/friendrated2.png


--------------------------------------------------------------------------------
/help/img/friendrated3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andre-st/goodreads-toolbox/010bc724003890d67efb2c40cd3e98a3b81c4007/help/img/friendrated3.png


--------------------------------------------------------------------------------
/help/img/likeminded.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andre-st/goodreads-toolbox/010bc724003890d67efb2c40cd3e98a3b81c4007/help/img/likeminded.png


--------------------------------------------------------------------------------
/help/img/search.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andre-st/goodreads-toolbox/010bc724003890d67efb2c40cd3e98a3b81c4007/help/img/search.png


--------------------------------------------------------------------------------
/help/img/similarauth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andre-st/goodreads-toolbox/010bc724003890d67efb2c40cd3e98a3b81c4007/help/img/similarauth.png


--------------------------------------------------------------------------------
/help/likeminded.md:
--------------------------------------------------------------------------------
  1 | # likeminded.pl
  2 | 
  3 | ![Maintenance](https://img.shields.io/maintenance/no/2022.svg)
  4 | 
  5 | 
  6 | ## Finding people based on the books they've read
  7 | 
  8 | From the _Goodreads Feedback_ forum, 
  9 | [Linda (2010)](https://web.archive.org/web/20170427072407/http://www.goodreads.com/topic/show/298531-is-there-an-option-to-do-a-general-search-for-people-with-similar-readin)
 10 | or [Michael (2013)](https://web.archive.org/web/20190525014022/https://www.goodreads.com/topic/show/1619830-finding-friends-using-compare-books)
 11 | or [Wren (2014)](https://web.archive.org/web/20190525013926/https://www.goodreads.com/topic/show/1790589-what-if-there-was-a-recommended-friends-feature) 
 12 | or [Kara (2015)](https://web.archive.org/web/20190525013835/https://www.goodreads.com/topic/show/17019858-compare-books-suggestion)
 13 | or [Samantha (2016)](https://web.archive.org/web/20190525013741/https://www.goodreads.com/topic/show/18167287-users-like-you-feature-suggestion)
 14 | or [Jacob (2017)](https://web.archive.org/web/20190525013655/https://www.goodreads.com/topic/show/18433578-find-me-a-friend-with-same-taste-for-books)
 15 | or [Superbunny (2018)](https://web.archive.org/web/20190525013600/https://www.goodreads.com/topic/show/19361289-searching-others-with-similar-taste-to-mine)
 16 | or [Marc (2018)](https://web.archive.org/web/20190525013501/https://www.goodreads.com/topic/show/19252693-new-suggestion-to-find-like-minded-people)
 17 | or [Anna (2019)](https://help.goodreads.com/s/question/0D51H00004AEWwNSAX/is-there-a-way-to-find-people-who-have-similar-tastes-in-books-i-like-so-i-can-follow-them)
 18 | or [Mehran](https://web.archive.org/web/20190525013406/https://www.goodreads.com/topic/show/19397936-finding-people-based-on-the-books-they-ve-read):
 19 | > Is there a way to search for people who have read books X, Y, and Z? Or maybe
 20 | > a way for you to find people who have many books in common with you, without
 21 | > going through people manually? If such features don't exist, Goodreads should
 22 | > definitely add them. They can provoke many conversations among people who have
 23 | > similar tastes in books. 
 24 | 
 25 | 
 26 | 
 27 | ## This
 28 | 
 29 | ![Screenshot](img/likeminded.png?raw=true "Screenshot")
 30 | 
 31 | 
 32 | 
 33 | ## How to generate this on a GNU/Linux operating system
 34 | 
 35 | 1. [Install the toolbox](../README.md#Getting-started)
 36 | 2. at the prompt, enter:
 37 | ```console
 38 | $ ./likeminded.pl --help
 39 | $ ./likeminded.pl goodlogin@example.com
 40 | 
 41 | Enter GR password for goodlogin@example.com: ******************
 42 | Signing in to Goodreads... OK
 43 | Loading authors from "ALL" may take a while... 95 authors
 44 | Loading books of 95 authors:
 45 | [  1%] Schuberth, Richard         #2793763    6 books    1.03s
 46 | [  2%] Lohoff, Ernst              #1339033    4 books    1.05s
 47 | [  3%] Huang, Andrew "bunnie"     #2949412    6 books    1.04s
 48 | [  4%] Pullum, Laura L.           #476506     2 books    1.05s
 49 | [  5%] Patri, Giacomo             #379757     3 books    1.04s
 50 | ...
 51 | [100%] Fertl, Herbert L.          #16159494   1 books    1.03s
 52 | Done.
 53 | Loading readers of 1625 author books:
 54 | [  0%] First as Tragedy, Then as Farce           #6636487    2278 memb    134.20s
 55 | [  0%] Descriptive Check List: Together With     #6517166       0 memb      1.41s
 56 | [  0%] Little Brother (Little Brother, #1)       #25547383   5885 memb    324.83s
 57 | [  0%] The Hardware Hacker: Adventures in Ma     #30804383    219 memb     11.25s
 58 | [  1%] Hacking the Xbox: An Introduction to      #984394      206 memb     10.26s
 59 | ...
 60 | [100%] Maker Pro Essays on Making a Living a     #24214717     33 memb      1.09s
 61 | Done.
 62 | Dropping who read less than 5% of your authors... -20205 memb (99.998%)
 63 | Loading profiles of the remaining 420 members:
 64 | [  0%] goodreads.com/user/show/120456      1.21s     *
 65 | [  1%] goodreads.com/user/show/65482       2.10s     ****
 66 | [  1%] goodreads.com/user/show/45763483    0.90s
 67 | [  2%] goodreads.com/user/show/773911      2.23s     private account
 68 | [  2%] goodreads.com/user/show/1031286     3.01s
 69 | ...
 70 | [100%] goodreads.com/user/show/818022      1.01s     **
 71 | Done.
 72 | Writing report (N=399) to "./list-out/likeminded-18418712.html"...
 73 | Total time: 294 minutes
 74 | ```
 75 | 
 76 | **Note:**
 77 | 
 78 | You can break the process with <kbd>CTRL</kbd>-<kbd>C</kbd> and continue later
 79 | without having to re-read all online sources again, as reading from
 80 | Goodreads.com is very time consuming.  The script internally uses a
 81 | file-cache which saves to /tmp/FileCache/.
 82 | The program is designed to run unattended, outwaits connection issues etc.
 83 | 
 84 | 
 85 | ## Observations and limitations
 86 | 
 87 | #### Latest version:
 88 | - long runtime: Goodreads slows down all requests and we have to load a lot of data
 89 | - loading data could take a month given too many books
 90 | - prefer loading from a separate _"best-of"_ shelf:
 91 | 	- use the program's `--shelf` option, avoid _"All"_ or _"Read"_ shelves
 92 | 	- add _100_ good but rare books (&lt;5000 ratings)
 93 | 	- the more popular your literature, the longer the program's runtime
 94 | 	- the more popular your lit, the more generic the results (500 million sales of Harry Potter)
 95 | 	- the more popular your lit, the less likely we detect reoccuring members (we cannot see all readers)
 96 | 	- you can add books to your shelf more quickly in Goodreads' [batch edit](https://2.bp.blogspot.com/-MBcqYj2mK_I/UsyW06AX43I/AAAAAAAAEdE/5V5z2_XJaCI/s1600/Step+1&2.jpg) mode   
 97 | 	- alternatively load from multiple smaller shelves: 
 98 | 	  `./likeminded.pl --shelf=nonfiction --shelf=poetry ...`
 99 | - make sure you have some _Gigabytes_ of free diskspace in `/tmp/`: 
100 | 	- my last test run with 356 books filled 11 GB in ~24 hours (many small files)
101 | - there's no way to get _all_ readers of a book:
102 | 	- the program tries different things to get as many as possible
103 | 	- you can tune this with the `--rigor` program option (increases runtime)
104 | 	- there is a number of readers not considered in our statistics
105 | 	- we cannot randomize in a way which would produce samples of similar size
106 | 	- although, we don't get _all_ readers (for books with ten of thousand readers), 
107 | 	  the final report still contains _enough_ members who read the same N authors
108 | - does _not_ list members with private accounts anymore
109 | - slow but good enough; you won't run it more often than 4x a year
110 | - _"...most number of shared books would be a list of children's books"_ 
111 | 	- exclude them by passing one or many `--shelf` arguments to the program
112 | 
113 | 
114 | #### Library sizes as ranking factor:
115 | - there are members with many common authors just because they have huge libraries
116 | - there are members with 94.857 ratings, likely bots
117 | - in a previous program version,
118 |   a member with 11 common authors and 3000 books in total was shown earlier than
119 |   a member with 10 common authors and  300 books in total, 
120 |   although the latter one is probably more "like-minded"
121 | - getting the library sizes requires an additional profiles loading stage
122 | 	- increased runtime is accepted as weeding out members
123 | 	  by hand takes even longer
124 | 
125 | 
126 | #### First version compared books, not authors:
127 | - turned out to be too narrow in order to produce satisfying results
128 | - given 299 books and a minimum of 9 _common_ books (3% similarity), 
129 |   I've got 10 of 31.398 members,
130 |   with 5 members actually worth investigating, 
131 |   and only 1 member already on my hand-curated list of 137 followees
132 | - a minimum of 6 common books (2%) listed 43 members, more or less interesting
133 | - we learn: book combinations tend to become unique quickly
134 | - combinations of same books are more rare than combinations of same authors, 
135 |   while latter still satisfies the 'same taste' condition 
136 |   (the assumption with 'same books' is that likeminded people had the same exposure to the exact same books, but that's questionable - and comparing the _authors_ relaxes this assumption)
137 | - the new authors-version takes longer but yields better results, e.g.,
138 |   more matches with my hand-curated followees list
139 | 	- try program argument `--maxauthorbooks=50` to reduce runtime (checks max. 50 books per author)
140 |   
141 | 
142 | #### Alternatives to consider:
143 | - _"I look for people who __dislike__ the same books that I do. I don't have a problem finding books to read. What I need is someone who can warn me about the books that everyone else seems to love."_
144 | - current results based on stuff you already liked, model the past as identical to the future, based on who you are/were not who you want or could become (stuff that's out of your current wheelhouse but still has likeable features)
145 | - maybe recommendations from our to-read lists are more interesting/up-to-date than our read-lists
146 | - _"People *near me* with the same taste in books.", additional filter by city
147 | 
148 | 
149 | ## Feedback
150 | 
151 | If you like this project, give it a star on GitHub.
152 | Report bugs or suggestions [via GitHub](https://github.com/andre-st/goodreads-toolbox/issues) 
153 | or see the [AUTHORS.md](../AUTHORS.md) file.
154 | 
155 | 
156 | ## See also
157 | 
158 | - [friendrated.pl](friendrated.md) - Books common among the people you follow
159 | - [friendnet.pl](friendnet.md)     - Social network analysis
160 | - [friendgroup.pl](friendgroup.md) - Groups common among the people you follow
161 | - [recentrated.pl](recentrated.md) - Know when people rate or write reviews about a book
162 | - [similarauth.pl](similarauth.md) - Find all similar authors
163 | - [search.pl](search.md)           - Sort books-search results by popularity or date published
164 | - [savreviews.pl](savreviews.md)   - Get all reviews of a book
165 | 
166 | 


--------------------------------------------------------------------------------
/help/recentrated.md:
--------------------------------------------------------------------------------
  1 | # recentrated.pl
  2 | 
  3 | ![Maintenance](https://img.shields.io/maintenance/no/2022.svg)
  4 | 
  5 | 
  6 | ## Know when people rate or write reviews about a book
  7 | 
  8 | From the _Goodreads Feedback_ forum, 
  9 | [Scribble (2010)](https://web.archive.org/web/20190525012742/https://www.goodreads.com/topic/show/440170-follow-user-reviews----follow-the-book)
 10 | or [Jimmy (2011)](https://web.archive.org/web/20190525012635/https://www.goodreads.com/topic/show/563115-follow-a-book)
 11 | or [PetraX (2014)](https://web.archive.org/web/20190525012443/https://www.goodreads.com/topic/show/2136206-following-books)
 12 | or [Lucas (2018)](https://web.archive.org/web/20190525012344/https://www.goodreads.com/topic/show/19212816-follow-all-reviews-of-a-book)
 13 | or [Jason (2018)](https://web.archive.org/web/20190525012148/https://www.goodreads.com/topic/show/19540183-subscribe-to-book-reviews-of-certain-books)
 14 | or [Elizabeth (2016):](https://web.archive.org/web/20190525012253/https://www.goodreads.com/topic/show/18060629-follow-book)
 15 | > I know this has been requested before, but I'd really like the opportunity to
 16 | > follow a book. I'd like to know when people rate or write reviews about a
 17 | > book and to be notified of such. I have some favorites that are not
 18 | > particularly well known or often read, and I'd like to know about who chooses
 19 | > to read them. 
 20 | 
 21 | 
 22 | **Receive notification e-mails:**
 23 | ```
 24 | From: yourmail@example.com
 25 | To: yourmail@example.com
 26 | Subject: New ratings on Goodreads.com
 27 | Date: Wed, 10 Jan 2018 21:10:50 +0100
 28 | 
 29 | Recently rated books in your "watch-ratings" shelf:
 30 | 
 31 |   "The Machine Question"
 32 |    www.goodreads.com/user/show/54336239   [*****]
 33 | 
 34 |   "Spam: A Shadow History of the Internet"
 35 |    www.goodreads.com/book/show/16718273   [9 new]
 36 | 
 37 |   "Understanding Beliefs"
 38 |    www.goodreads.com/review/show/22346637 [TTTT ]
 39 |    www.goodreads.com/user/show/24850532   [**   ]
 40 | 
 41 | 
 42 | --
 43 |  [***  ] 3/5 stars rating without text
 44 |  [ttt  ] 3/5 stars rating with tweet-size text
 45 |  [TTT  ] 3/5 stars rating with text
 46 |  [9 new] ratings better viewed on book page
 47 |  ...   
 48 | ```
 49 | - low-bandwidth, distraction-free plaintext mail; HTML mail appeals to marketers because it's another place to stick their logo, nobody else needs it
 50 | - most mail-clients recognize the signature and the links and make the latter clickable
 51 | - changes are collected in periodic mails; individual mails would be annoying
 52 | - text-reviews in the mail are bloat, a click on a review-link is bearable - I would have checked the reviewer on the GR website anyway
 53 | - usernames in the mail are bloat - 99% are unknown/random letters to me and I would see it on the GR website anyway
 54 | 
 55 | 
 56 | ## How to "follow books" 
 57 | 
 58 | ### Installation-free:
 59 | 
 60 | 1. visit [https://andre-st.github.io/goodreads/](https://andre-st.github.io/goodreads/) 
 61 | 2. enter your e-mail and shelf address
 62 | 
 63 | 
 64 | ### Dos and don'ts:
 65 | 
 66 | - don't use the "All" or "Read" shelves; be picky, use a separate single purpose shelf
 67 | - don't run this on more than one of your shelves; it's feasible but better use a single purpose shelf
 68 | - don't use this program with well known fiction books that get a lot of reviews; 
 69 |   some books receive 300 ratings every day = no insights, readers too random; a separate single
 70 |   purpose shelf allows fine-tuning by dropping such books over time without affecting other shelves
 71 | - create and [populate](http://i0.wp.com/theeverscholar.com/wp-content/uploads/2015/03/goodreads3.jpg) 
 72 | 	a Goodreads shelf, e.g., "watch-ratings": You can add and remove books at any time. 
 73 | 	New books will be checked automatically. 
 74 | 	Such a shelf prevents unnecessary mails and eases manual checks if this system is discontinued someday
 75 | 
 76 | 
 77 | ### Installation on a server:
 78 | 
 79 | 1. open a GNU/Linux terminal and install the Goodreads Toolbox:
 80 | 	```console
 81 | 	$ git clone https://github.com/andre-st/goodreads-toolbox.git
 82 | 	$ cd goodreads
 83 | 	$ sudo make     # Required Perl modules from CPAN etc.
 84 | 	```
 85 | 2. have a sendmail MTA set up. 
 86 | 	Most simple thing is 
 87 | 	[ssmtp](https://wiki.debian.org/sSMTP) (deprecated) or 
 88 | 	[nullmailer](http://untroubled.org/nullmailer/) or 
 89 | 	[msmtp-mta](http://msmtp.sourceforge.net), 
 90 | 	with your original sendmail being renamed and symlinked to one of them:
 91 | 	```sh
 92 | 	$ vi ~/.msmtprc
 93 | 			# All accounts:
 94 | 			defaults
 95 | 			auth           on
 96 | 			tls            on
 97 | 			tls_starttls   on
 98 | 			tls_trust_file /etc/ssl/certs/ca-certificates.crt  # or .../ca-bundle.crt
 99 | 			logfile        ~/.msmtp.log
100 | 			
101 | 			# Gmail account:
102 | 			account        gmail
103 | 			host           smtp.gmail.com  # smtp-relay.gmail.com for G Suite users
104 | 			port           587
105 | 			from           XXXXXXXX@gmail.com
106 | 			user           XXXXXXXX@gmail.com
107 | 			password       XXXXXXXXXXXXXXXXXX
108 | 			
109 | 			# Default account:
110 | 			account default : gmail
111 | 			
112 | 	$ chmod u=rw,go= ~/.msmtprc
113 | 	$ echo "Test message"         | mail -s "Mailer Test" XXXXXXXX@gmail.com
114 | 	$ echo "Subject: Mailer Test" | sendmail -v XXXXXXXX@gmail.com
115 | 	```
116 | 
117 | 3. add a cron-job (I prefer 
118 | 	[anacrony](https://en.wikipedia.org/wiki/Anacron "performs pending jobs if the computer was previously shut down") 
119 | 	daemons such as 
120 | 	[dcron](https://github.com/dubiousjim/dcron) or 
121 | 	[fcron](https://en.wikipedia.org/wiki/Fcron)):
122 | 	edit `/etc/cron.daily/goodratings` and replace ARGUMENTS:
123 | 	```sh
124 | 	#!/usr/bin/env sh
125 | 	# `ifne` is part of `moreutils`
126 | 	/path/to/recentrated.pl GOODUSERID SHELFNAME YOURMAIL@EXAMPLE.COM | ifne /usr/sbin/sendmail -t
127 | 	
128 | 	# Provide this self-hosted service to your Goodreads friends too!
129 | 	# ... HERUSERID HERSHELF HERMAIL@EXAMPLE.COM ADMIN@EXAMLE.COM | ...
130 | 	# ...
131 | 	```
132 | 	```sh
133 | 	$ sudo chmod +x /etc/cron.daily/goodratings
134 | 	```
135 | 	See also [cron.daily/goodratings.example](cron.daily/goodratings.example)
136 | 
137 | 
138 | ## Feedback
139 | 
140 | If you like this project, you can "star" it on GitHub.
141 | Report bugs or suggestions [via GitHub](https://github.com/andre-st/goodreads-toolbox/issues) 
142 | or see the [AUTHORS.md](../AUTHORS.md) file.
143 | 
144 | 
145 | ## See also
146 | 
147 | - [likeminded.pl](likeminded.md)   - Find Goodreads members with similar book taste
148 | - [friendrated.pl](friendrated.md) - Books common among the people you follow
149 | - [friendnet.pl](friendnet.md)     - Social network analysis
150 | - [friendgroup.pl](friendgroup.md) - Groups common among the people you follow
151 | - [similarauth.pl](similarauth.md) - Find all similar authors
152 | - [search.pl](search.md)           - Sort books-search results by popularity or date published
153 | - [savreviews.pl](savreviews.md)   - Get all reviews of a book
154 | 
155 | 


--------------------------------------------------------------------------------
/help/savreviews.md:
--------------------------------------------------------------------------------
  1 | # savreviews.pl
  2 | 
  3 | ![Maintenance](https://img.shields.io/maintenance/no/2022.svg)
  4 | 
  5 | 
  6 | ## Download all reviews for a book, e.g., for sentiment analysis
  7 | 
  8 | From [r/goodreads (2018)](https://www.reddit.com/r/goodreads/comments/aail3f/is_there_any_way_website_or_api_to_see_all/) or the _Goodreads Developers_ forum, 
  9 | [Breslin (2018)](https://web.archive.org/web/20190525014427/https://www.goodreads.com/topic/show/19484417-increase-the-visible-number-of-ratings-of-a-book)
 10 | or [Giulia (2018)](https://web.archive.org/web/20190525014339/https://www.goodreads.com/topic/show/19477061-how-can-i-extract-all-reviews-full-text-for-a-specific-book):
 11 | 
 12 | > I simply need to obtain all (or as many) reviews for two books, namely
 13 | > Woolf's To the Lighthouse and Mrs Dalloway, so that i can then analyse
 14 | > the corpus obtained from them and see if readers define the two novels
 15 | > as "difficult".
 16 | 
 17 | 
 18 | ## Output format
 19 | ```console
 20 | $ cat savreviews-book12345-stars2.txt
 21 | 2018/12/29 #1234567
 22 | 
 23 | Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do
 24 | eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad
 25 | minim veniam, quis nostrud exercitation ullamco laboris nisi ut
 26 | aliquip ex ea <em>commodo consequat</em>. 
 27 | 
 28 | Duis aute irure dolor in reprehenderit in voluptate velit esse cillum 
 29 | dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non 
 30 | proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
 31 | 
 32 | -------------------------------------------------------------------------------
 33 | 2018/10/21 #7654321
 34 | 
 35 | Ut enim ad minim veniam, quis nostrud <b>exercitation</b> ullamco laboris nisi 
 36 | ut aliquip ex ea commodo consequat: <a href="https://example.com">example.com</a>
 37 | 
 38 | -------------------------------------------------------------------------------
 39 | 2018/04/01 #918273
 40 | 
 41 | Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do
 42 | eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad
 43 | minim veniam, quis nostrud exercitation ullamco laboris nisi
 44 | ```
 45 | 
 46 | **Note:**
 47 | 
 48 | The generated files (one per star-rating) contain review-texts, dates and the review-ID only. 
 49 | They do not contain any other information, e.g., user names.
 50 | If there is interest in these details or other output formats, just contact 
 51 | me or [add an issue](https://github.com/andre-st/goodreads-toolbox/issues).
 52 | 
 53 | 
 54 | 
 55 | ## How to generate this on a GNU/Linux operating system
 56 | 
 57 | 1. [Install the toolbox](../README.md#Getting-started)
 58 | 2. at the prompt, enter:
 59 | 
 60 | ```console
 61 | $ ./savreviews.pl --help
 62 | $ ./savreviews.pl 59716  # Goodreads Book-ID in URL
 63 | 
 64 | Loading reviews for "To the Lighthouse"... 5271 of 5860 [searching]
 65 | 
 66 | Number of reviews per year:
 67 | 2007 ################                           263
 68 | 2008 #####################                      343
 69 | 2009 ################                           266
 70 | 2010 #################                          276
 71 | 2011 ######################                     357
 72 | 2012 #############################              473
 73 | 2013 ##################################         565
 74 | 2014 ############################               456
 75 | 2015 ###########################                440
 76 | 2016 #############################              474
 77 | 2017 ####################################       599
 78 | 2018 ########################################   648
 79 | 2019 ######                                     111
 80 | 
 81 | Writing reviews to:
 82 | ./list-out/savreviews-book59716-stars0.txt
 83 | ./list-out/savreviews-book59716-stars1.txt
 84 | ./list-out/savreviews-book59716-stars2.txt
 85 | ./list-out/savreviews-book59716-stars3.txt
 86 | ./list-out/savreviews-book59716-stars4.txt
 87 | ./list-out/savreviews-book59716-stars5.txt
 88 | 
 89 | Total time: 36 minutes
 90 | ```
 91 | 
 92 | 
 93 | ## Observations and limitations
 94 | 
 95 | - long runtime: Goodreads slows down all requests and we have to load a lot of data
 96 | - there's no way to load _all_ reviews of a book, but the program 
 97 |   tries different things to get as many fulltext reviews as 
 98 |   possible -- this can take very long (see `--rigor` parameter and [this](../list-in/))
 99 | - needs data cleansing on your side
100 | - review text might include user-entered (broken) HTML code and URLs
101 | - review text can be in any language, e.g., German or Russian
102 | - review text might include non-latin characters, e.g., Cyrillic
103 | - no duplicate reviewers, but could theoretically contain duplicate 
104 |   reviews posted by different members (statistically negligible?)
105 | 
106 | 
107 | ## Feedback
108 | 
109 | If you like this project, give it a star on GitHub.
110 | Report bugs or suggestions [via GitHub](https://github.com/andre-st/goodreads-toolbox/issues) 
111 | or see the [AUTHORS.md](../AUTHORS.md) file.
112 | 
113 | 
114 | ## See also
115 | 
116 | - [friendrated.pl](friendrated.md) - Books common among the people you follow
117 | - [friendnet.pl](friendnet.md)     - Social network analysis
118 | - [friendgroup.pl](friendgroup.md) - Groups common among the people you follow
119 | - [recentrated.pl](recentrated.md) - Know when people rate or write reviews about a book
120 | - [similarauth.pl](similarauth.md) - Find all similar authors
121 | - [likeminded.pl](likeminded.md)   - Finding people based on the books they've read
122 |  
123 | 
124 | 


--------------------------------------------------------------------------------
/help/search.md:
--------------------------------------------------------------------------------
 1 | # search.pl
 2 | 
 3 | ![Maintenance](https://img.shields.io/maintenance/no/2022.svg)
 4 | 
 5 | 
 6 | ## Sort Goodreads search results by popularity or date published
 7 | 
 8 | From the _Goodreads Feedback_ forum,
 9 | [Pawel (2010)](https://web.archive.org/web/20190525015116/https://www.goodreads.com/topic/show/423469-sorting-search-results)
10 | or [obsessedwithbooks (2013)](https://web.archive.org/web/20190525015022/https://www.goodreads.com/topic/show/1188302-sort-search-results)
11 | or [Sonja (2016)](https://web.archive.org/web/20190525014930/https://www.goodreads.com/topic/show/18177911-advanced-search-for-books)
12 | or [Ferouk (2016)](https://web.archive.org/web/20190525014842/https://www.goodreads.com/topic/show/18084428-we-want-to-find-good-books-fast)
13 | or [David-Emmanuel (2017)](https://web.archive.org/web/20190525014755/https://www.goodreads.com/topic/show/18541118-better-search)
14 | or [Halordain (2017)](https://web.archive.org/web/20190525014643/https://www.goodreads.com/topic/show/18496984-sorting-by-average-rating)
15 | or [Kevin (2018)](https://web.archive.org/web/20190525014542/https://www.goodreads.com/topic/show/19464605-sort-search-results-by-rating):
16 | 
17 | > I am trying to explore and discover the *best* books. I am not looking
18 | > for the most relevant book. Probably all the books that contain
19 | > "Linux" in the title are relevant to what I'm looking for. I am not
20 | > interested in a particular book's algorithmically-determined
21 | > "relevance score" to my search query. I'm strictly interested in star
22 | > ratings.
23 | 
24 | In addition to [Em__Jay (2015)](https://web.archive.org/web/20190525015950/https://www.goodreads.com/topic/show/2279173-search-results)
25 | or [Carri (2016)](https://web.archive.org/web/20190525015857/https://www.goodreads.com/topic/show/18123885-search-functionality)
26 | or [G.H. (2016)](https://web.archive.org/web/20190525015818/https://www.goodreads.com/topic/show/18034964-search-results)
27 | or [Epper (2016)](https://web.archive.org/web/20190525015727/https://www.goodreads.com/topic/show/18223264-search-books-filter-results)
28 | or [Shanna_redwind (2016)](https://web.archive.org/web/20190525015634/https://www.goodreads.com/topic/show/18208444-search-very-frustrating)
29 | or [Lisa (2017)](https://web.archive.org/web/20190525015546/https://www.goodreads.com/topic/show/19114134-search-fundction-when-looking-for-books)
30 | or [Jenna (2017)](https://web.archive.org/web/20190525015501/https://www.goodreads.com/topic/show/18901296-please-improve-search-function)
31 | or [SL (2018)](https://web.archive.org/web/20190525020028/https://www.goodreads.com/topic/show/19387052-search-needs-improvement)
32 | or [Mimi (2018)](https://web.archive.org/web/20190525015405/https://www.goodreads.com/topic/show/19272652-refined-search)
33 | or [Ian (2016)](https://web.archive.org/web/20190525015312/https://www.goodreads.com/topic/show/18115612-search-prioritise-exact-matches):
34 | 
35 | >I kind of wonder if I'm the only one who finds this annoying. If you search
36 | >for a book and type in the title of the book, exact matches to what you type
37 | >are rarely the first listed. 
38 | 
39 | 
40 | ## This
41 | 
42 | [![Screenshot](img/search.png?raw=true "Search result for 'Linux'")](https://andre-st.github.io/search-linux.html)
43 | 
44 | 
45 | ## How to generate this on a GNU/Linux operating system
46 | 
47 | 1. [Install the toolbox](../README.md#Getting-started)
48 | 2. at the prompt, enter:
49 | 
50 | ```console
51 | $ ./search.pl --help
52 | $ ./search.pl YOURKEYWORD
53 | 
54 | Searching books:
55 | 
56 |  about..... YOURKEYWORD
57 |  rated by.. 5 members or more
58 |  order by.. stars, num_ratings, year
59 |  progress.. 100%
60 | 
61 | Writing search result (N=275) to "./list-out/search-YOURKEYWORD.html"... 
62 | Total time: 3 minutes
63 | ```
64 | 
65 | 
66 | ## Observations and limitations
67 | 
68 | - long runtime: Goodreads slows down all requests and we have to load a lot of data
69 | - start the program with defaults and re-run to fine-tune with parameters later (previously downloaded resources are reused so it's faster than the first run); you might not know how many ratings actually exists, if `--ratings` is too high you will not get any results (`N=0`)
70 | - [garbage in, garbage out](https://en.wikipedia.org/wiki/Garbage_in,_garbage_out)
71 | 
72 | 
73 | ## Feedback
74 | 
75 | If you like this project, give it a star on GitHub.
76 | Report bugs or suggestions [via GitHub](https://github.com/andre-st/goodreads-toolbox/issues) 
77 | or see the [AUTHORS.md](../AUTHORS.md) file.
78 | 
79 | 
80 | ## See also
81 | 
82 | - [friendrated.pl](friendrated.md) - Books common among the people you follow
83 | - [friendnet.pl](friendnet.md)     - Social network analysis
84 | - [friendgroup.pl](friendgroup.md) - Groups common among the people you follow
85 | - [recentrated.pl](recentrated.md) - Know when people rate or write reviews about a book
86 | - [similarauth.pl](similarauth.md) - Find all similar authors
87 | - [likeminded.pl](likeminded.md)   - Finding people based on the books they've read
88 | - [savreviews.pl](savreviews.md)   - Get all reviews of a book
89 | 
90 | 
91 | 


--------------------------------------------------------------------------------
/help/similarauth.md:
--------------------------------------------------------------------------------
 1 | # similarauth.pl
 2 | 
 3 | ![Maintenance](https://img.shields.io/maintenance/no/2022.svg)
 4 | 
 5 | 
 6 | ## Finding all similar authors
 7 | 
 8 | From the _Goodreads Feedback_ forum, 
 9 | [Anne (2018)](https://web.archive.org/web/20190525014222/https://www.goodreads.com/topic/show/19438988-finding-similar-authors):
10 | > I like Laura Kinsale and Loretta Chase. If I do some digging, I discover that
11 | > I might like Judith Ivory too, because she is on the similar authors list of
12 | > both authors. And if I like Judith Ivory, too, I certainly should try Sherry
13 | > Thomas, because she is on all lists of those three authors
14 | 
15 | 
16 | 
17 | ## This
18 | 
19 | ![Screenshot](img/similarauth.png?raw=true "Screenshot")
20 | 
21 | 
22 | 
23 | ## How to generate this on a GNU/Linux operating system
24 | 
25 | 1. [Install the toolbox](../README.md#Getting-started)
26 | 2. at the prompt, enter:
27 | 
28 | ```console
29 | $ ./similarauth.pl --help
30 | $ ./similarauth.pl goodlogin@example.com
31 | 
32 | Enter GR password for goodlogin@example.com: ****************
33 | Signing in to Goodreads... OK
34 | Loading books from "ALL" may take a while... 108 books
35 | Loading similar authors for 96 authors:
36 | [  0%] Huhn, Willy               #17326001	  0 similar	  2.56s
37 | [  1%] Gse, Don Murdoch          #8506208	 24 similar	  2.13s
38 | [  2%] Foucault, Michel          #1260		 19 similar	  2.41s
39 | [  3%] Siedersleben, Johannes    #1878894	  0 similar	  1.11s
40 | [  4%] Mattheck, Claus           #1960		  0 similar	  3.27s
41 | [  5%] Dillmann, Renate          #9835498	  0 similar	  1.51s
42 | [  6%] Decker, Peter             #361391	  0 similar	  2.42s
43 | [  7%] Bockelmann, Eske          #6219827	  0 similar	  2.20s
44 | ...
45 | [100%] O'Neill, Ryan "Elfmaster" #15065556	  0 similar	  2.43s
46 | Done.
47 | Writing authors (N=360) to "./list-out/similarauth-18418712.html"...
48 | Total time: 8 minutes
49 | ```
50 | 
51 | 
52 | **Note:**
53 | 
54 | You can break the process with <kbd>CTRL</kbd>-<kbd>C</kbd> and continue later
55 | without having to re-read all online sources again, as reading from
56 | Goodreads.com is very time consuming.  The script internally uses a
57 | **file-cache** which is busted after 31 days and saves to /tmp/FileCache/.
58 | 
59 | 
60 | 
61 | ## Observations and limitations
62 | 
63 | - long runtime: Goodreads slows down all requests and we have to load a lot of data
64 | - many authors (in my shelves) have no "similar authors" data on Goodreads
65 | - actual value of this isn't the 'seen' part but just having a long list with
66 |   similar but yet unknown authors
67 | 
68 | 
69 | 
70 | ## Feedback
71 | 
72 | If you like this project, give it a star on GitHub.
73 | Report bugs or suggestions [via GitHub](https://github.com/andre-st/goodreads-toolbox/issues) 
74 | or see the [AUTHORS.md](../AUTHORS.md) file.
75 | 
76 | 
77 | ## See also
78 | 
79 | - [friendrated.pl](friendrated.md) - Books common among the people you follow
80 | - [friendnet.pl](friendnet.md)     - Social network analysis
81 | - [friendgroup.pl](friendgroup.md) - Groups common among the people you follow
82 | - [recentrated.pl](recentrated.md) - Know when people rate or write reviews about a book
83 | - [likeminded.pl](likeminded.md)   - Finding people based on the books they've read 
84 | - [search.pl](search.md)           - Sort books-search result by popularity or date published
85 | - [savreviews.pl](savreviews.md)   - Get all reviews of a book
86 | 
87 | 


--------------------------------------------------------------------------------
/list-in/README.md:
--------------------------------------------------------------------------------
 1 | # Dictionaries
 2 | 
 3 | ## Purpose
 4 | 
 5 | As far as the reviews are concerned, the official Goodreads API typically gets you a maximum of 300 
 6 | short _excerpts_ ([here](https://www.goodreads.com/topic/show/19512142-how-to-get-whole-body-of-book-review), 
 7 | [here](https://www.goodreads.com/topic/show/12070102-review-is-truncated?comment=130838734#comment_130838734)
 8 | or [here](https://www.goodreads.com/topic/show/19455087-unable-to-get-book-reviews-by-book-id?comment=182375978#comment_182375978)). Goodreads does not use this API on its own website, it is a side project. 
 9 | They use other mechanisms to display reviews on their website, mechanisms that
10 | are used by the _Toolbox_ programs too ([AJAX](https://en.wikipedia.org/wiki/Ajax_(programming)) endpoints in this case). 
11 | These mechanisms have their own limitations: you can not see all reviews, 
12 | but search a book's reviews by a keyword and/or filter by the number of stars, age etc.
13 | Toolbox programs such as [savreviews.pl](../help/savreviews.md) or [likeminded.pl](../help/likeminded.md) 
14 | use filters and also run a dictionary against this search in order to collect reviews.
15 | 
16 | 
17 | ## Results
18 | 
19 | | Dictionary               | Lines | Minutes | "To the Lighthouse"<br>5514 text reviews | "Mrs Dalloway"<br>7376 text reviews |
20 | |:-------------------------|------:|--------:|-------------:|--------------:|
21 | | _none (filters only)_    |     - |         |  948 or 17%  |   _untested_
22 | | gram-en-l.lst            |  3349 |     111 | 3057 or 55%  |   _untested_
23 | | gram-en-s.lst            |   390 |         |   _untested_ |   _untested_
24 | | word-en-1k.lst           |  1000 |      33 | 4962 or 90%  | 6413 or 87%
25 | | word-en-s.lst            |   114 |         |   _untested_ |   _untested_
26 | | gram-en-s,word-en-1k.lst |  1390 |         |   _untested_ |   _untested_
27 | | gram-en-l,word-en-1k.lst |  4349 |     144 | 5127 or 93%  | 6715 or 91%
28 | 
29 | No duplicate reviewers, but could theoretically contain duplicate reviews
30 | posted by different members, which would be counted by Goodreads too.
31 | 
32 |     
33 | ## Naming Conventions
34 | 
35 | File names: `${TYPE4LETTERCODE}-${LANGUAGE2LETTERCODE}-${SIZE}.lst` with 
36 | size `l` meaning large dictionaries, `s` meaning small dictionaries,
37 | 1k meaning 1000 lines, 3k meaning 3000 lines,
38 | extension `lst` meaning "list". Lists are ASCII files with one word per line.
39 | Comma denotes combined dictionaries, e.g., `gram-en-l,word-en-1k.lst`.
40 | 
41 | Smaller dictionaries are usually a subset of the larger ones, so you should 
42 | start with the smaller ones to test. Since all Toolbox programs cache their 
43 | results for some days, switching to the larger dictionaries in addition 
44 | will not waste time with downloading already present results.
45 | 
46 | 
47 | ## File: gram-en-l.lst
48 | 
49 | most frequent english n-grams first
50 | 
51 | 
52 | ## File: gram-en-s.lst
53 | 
54 | most frequent english trigrams from `gram-en-l.lst` tested against
55 | Harry Potter reviews: I only saved trigrams which led to 10-30 unique(!) hits,
56 | best first.  Appended most frequent english trigrams which are not already
57 | present in the Harry Potter set.  Works better with a larger set of available
58 | reviews.  Randomization yield no improvements (rather opposite). 
59 | Seems often as good as the whole `gram-en-l.lst`.
60 | 
61 | 
62 | ## File: word-en-1k.lst
63 | 
64 | most frequent english words first.
65 | Performed better than the Ngrams based dictionaries
66 | 
67 | 
68 | ## File: word-en-s.lst
69 | 
70 | [Parts of speech](https://en.wikipedia.org/wiki/Most_common_words_in_English#Parts_of_speech)
71 | 
72 | 
73 | ## File: gram-en-l,word-en-1k.lst
74 | 
75 | little more results than just word-en-l.lst
76 | but way more search time (1000 vs 4349)
77 | 
78 | ## File: dict.lst
79 | 
80 | A symlink to any of the other dictionary files. Toolbox programs default to this dictionary-symlink, so you can change it for all programs at once.
81 | 
82 | 
83 | 


--------------------------------------------------------------------------------
/list-in/dict.lst:
--------------------------------------------------------------------------------
1 | word-en-1k.lst


--------------------------------------------------------------------------------
/list-in/gram-en-s.lst:
--------------------------------------------------------------------------------
  1 | 3
  2 | 4
  3 | 5
  4 | let
  5 | wit
  6 | ing
  7 | put
  8 | ten
  9 | met
 10 | ass
 11 | ini
 12 | bit
 13 | lit
 14 | men
 15 | job
 16 | get
 17 | rat
 18 | cut
 19 | mix
 20 | our
 21 | are
 22 | owe
 23 | win
 24 | all
 25 | con
 26 | hit
 27 | the
 28 | use
 29 | pre
 30 | ran
 31 | ist
 32 | ate
 33 | you
 34 | art
 35 | per
 36 | era
 37 | ton
 38 | her
 39 | end
 40 | ter
 41 | lot
 42 | old
 43 | one
 44 | and
 45 | low
 46 | fit
 47 | was
 48 | fan
 49 | too
 50 | ill
 51 | dec
 52 | add
 53 | tho
 54 | pay
 55 | row
 56 | tra
 57 | ver
 58 | act
 59 | mad
 60 | sat
 61 | awe
 62 | nor
 63 | ive
 64 | can
 65 | new
 66 | car
 67 | had
 68 | ish
 69 | for
 70 | tan
 71 | pro
 72 | she
 73 | lea
 74 | ice
 75 | not
 76 | age
 77 | two
 78 | cat
 79 | got
 80 | off
 81 | far
 82 | lay
 83 | wee
 84 | tea
 85 | try
 86 | day
 87 | kid
 88 | est
 89 | sin
 90 | way
 91 | red
 92 | etc
 93 | par
 94 | sit
 95 | ser
 96 | com
 97 | cos
 98 | led
 99 | sum
100 | fed
101 | see
102 | own
103 | son
104 | mum
105 | por
106 | out
107 | via
108 | saw
109 | fun
110 | rid
111 | ear
112 | ink
113 | now
114 | eat
115 | his
116 | hes
117 | mid
118 | but
119 | eye
120 | han
121 | ugh
122 | ron
123 | bar
124 | who
125 | ask
126 | dit
127 | yea
128 | fav
129 | how
130 | pop
131 | bad
132 | due
133 | bug
134 | don
135 | sci
136 | sad
137 | set
138 | ame
139 | hot
140 | man
141 | dry
142 | ago
143 | air
144 | lie
145 | fly
146 | run
147 | did
148 | bat
149 | law
150 | bed
151 | tip
152 | leg
153 | cry
154 | has
155 | mom
156 | tie
157 | bag
158 | yes
159 | boy
160 | top
161 | ese
162 | gem
163 | him
164 | bus
165 | map
166 | war
167 | fix
168 | amo
169 | odd
170 | wat
171 | its
172 | app
173 | tal
174 | owl
175 | mil
176 | dog
177 | las
178 | pun
179 | arc
180 | nth
181 | che
182 | buy
183 | egg
184 | fat
185 | der
186 | dia
187 | ler
188 | mal
189 | pig
190 | key
191 | tom
192 | mis
193 | pet
194 | sun
195 | beg
196 | big
197 | alt
198 | hid
199 | que
200 | dat
201 | any
202 | box
203 | eso
204 | sex
205 | del
206 | rip
207 | nos
208 | sea
209 | sky
210 | ama
211 | leo
212 | hog
213 | und
214 | ban
215 | sus
216 | lee
217 | aug
218 | mon
219 | mas
220 | til
221 | den
222 | ans
223 | hut
224 | yer
225 | aka
226 | itu
227 | bet
228 | pen
229 | dig
230 | net
231 | nov
232 | asi
233 | boa
234 | ele
235 | los
236 | eve
237 | lei
238 | dio
239 | una
240 | vas
241 | tak
242 | gap
243 | ale
244 | ont
245 | fue
246 | min
247 | tag
248 | les
249 | bow
250 | non
251 | hal
252 | sem
253 | imo
254 | rob
255 | uni
256 | sue
257 | ein
258 | ook
259 | dan
260 | aun
261 | boo
262 | fin
263 | tem
264 | qui
265 | ins
266 | arm
267 | nel
268 | ora
269 | ref
270 | tim
271 | ani
272 | hop
273 | pan
274 | sam
275 | chi
276 | hat
277 | ada
278 | lil
279 | esa
280 | nut
281 | poi
282 | inc
283 | sub
284 | api
285 | pat
286 | aid
287 | umm
288 | bin
289 | lad
290 | def
291 | uno
292 | doo
293 | oli
294 | oct
295 | nit
296 | mes
297 | vol
298 | lap
299 | bir
300 | din
301 | pra
302 | pie
303 | tha
304 | mit
305 | dis
306 | sis
307 | uit
308 | ect
309 | sur
310 | cap
311 | ben
312 | mai
313 | int
314 | ali
315 | ilk
316 | pub
317 | max
318 | dos
319 | mia
320 | eva
321 | dal
322 | raw
323 | flu
324 | wer
325 | ile
326 | des
327 | gue
328 | dar
329 | pot
330 | bon
331 | elf
332 | har
333 | ven
334 | dip
335 | log
336 | ide
337 | apa
338 | mud
339 | wel
340 | bom
341 | woo
342 | ray
343 | cup
344 | toe
345 | ant
346 | aim
347 | gar
348 | ero	
349 | ion
350 | tio
351 | ati
352 | ent
353 | ess
354 | ine
355 | nce
356 | res
357 | cti
358 | tic
359 | sth
360 | sta
361 | ste
362 | ica
363 | tin
364 | str
365 | tor
366 | 			
367 | rea
368 | ite
369 | lin
370 | ble
371 | rin
372 | cal
373 | nte
374 | anc
375 | ity
376 | ure
377 | oun
378 | eri
379 | ain
380 | ers
381 | nal
382 | iti
383 | ted
384 | tur
385 | sti
386 | ons
387 | ort
388 | lan
389 | lat
390 | ell
391 | igh
392 | tri
393 | nes
394 | ial
395 | ous
396 | gra
397 | 


--------------------------------------------------------------------------------
/list-in/test.lst:
--------------------------------------------------------------------------------
1 | dict.lst


--------------------------------------------------------------------------------
/list-in/word-en-1k.lst:
--------------------------------------------------------------------------------
   1 | a
   2 | ability
   3 | able
   4 | about
   5 | above
   6 | accept
   7 | according
   8 | account
   9 | across
  10 | act
  11 | action
  12 | activity
  13 | actually
  14 | add
  15 | address
  16 | administration
  17 | admit
  18 | adult
  19 | affect
  20 | after
  21 | again
  22 | against
  23 | age
  24 | agency
  25 | agent
  26 | ago
  27 | agree
  28 | agreement
  29 | ahead
  30 | air
  31 | all
  32 | allow
  33 | almost
  34 | alone
  35 | along
  36 | already
  37 | also
  38 | although
  39 | always
  40 | American
  41 | among
  42 | amount
  43 | analysis
  44 | and
  45 | animal
  46 | another
  47 | answer
  48 | any
  49 | anyone
  50 | anything
  51 | appear
  52 | apply
  53 | approach
  54 | area
  55 | argue
  56 | arm
  57 | around
  58 | arrive
  59 | art
  60 | article
  61 | artist
  62 | as
  63 | ask
  64 | assume
  65 | at
  66 | attack
  67 | attention
  68 | attorney
  69 | audience
  70 | author
  71 | authority
  72 | available
  73 | avoid
  74 | away
  75 | baby
  76 | back
  77 | bad
  78 | bag
  79 | ball
  80 | bank
  81 | bar
  82 | base
  83 | be
  84 | beat
  85 | beautiful
  86 | because
  87 | become
  88 | bed
  89 | before
  90 | begin
  91 | behavior
  92 | behind
  93 | believe
  94 | benefit
  95 | best
  96 | better
  97 | between
  98 | beyond
  99 | big
 100 | bill
 101 | billion
 102 | bit
 103 | black
 104 | blood
 105 | blue
 106 | board
 107 | body
 108 | book
 109 | born
 110 | both
 111 | box
 112 | boy
 113 | break
 114 | bring
 115 | brother
 116 | budget
 117 | build
 118 | building
 119 | business
 120 | but
 121 | buy
 122 | by
 123 | call
 124 | camera
 125 | campaign
 126 | can
 127 | cancer
 128 | candidate
 129 | capital
 130 | car
 131 | card
 132 | care
 133 | career
 134 | carry
 135 | case
 136 | catch
 137 | cause
 138 | cell
 139 | center
 140 | central
 141 | century
 142 | certain
 143 | certainly
 144 | chair
 145 | challenge
 146 | chance
 147 | change
 148 | character
 149 | charge
 150 | check
 151 | child
 152 | choice
 153 | choose
 154 | church
 155 | citizen
 156 | city
 157 | civil
 158 | claim
 159 | class
 160 | clear
 161 | clearly
 162 | close
 163 | coach
 164 | cold
 165 | collection
 166 | college
 167 | color
 168 | come
 169 | commercial
 170 | common
 171 | community
 172 | company
 173 | compare
 174 | computer
 175 | concern
 176 | condition
 177 | conference
 178 | Congress
 179 | consider
 180 | consumer
 181 | contain
 182 | continue
 183 | control
 184 | cost
 185 | could
 186 | country
 187 | couple
 188 | course
 189 | court
 190 | cover
 191 | create
 192 | crime
 193 | cultural
 194 | culture
 195 | cup
 196 | current
 197 | customer
 198 | cut
 199 | dark
 200 | data
 201 | daughter
 202 | day
 203 | dead
 204 | deal
 205 | death
 206 | debate
 207 | decade
 208 | decide
 209 | decision
 210 | deep
 211 | defense
 212 | degree
 213 | Democrat
 214 | democratic
 215 | describe
 216 | design
 217 | despite
 218 | detail
 219 | determine
 220 | develop
 221 | development
 222 | die
 223 | difference
 224 | different
 225 | difficult
 226 | dinner
 227 | direction
 228 | director
 229 | discover
 230 | discuss
 231 | discussion
 232 | disease
 233 | do
 234 | doctor
 235 | dog
 236 | door
 237 | down
 238 | draw
 239 | dream
 240 | drive
 241 | drop
 242 | drug
 243 | during
 244 | each
 245 | early
 246 | east
 247 | easy
 248 | eat
 249 | economic
 250 | economy
 251 | edge
 252 | education
 253 | effect
 254 | effort
 255 | eight
 256 | either
 257 | election
 258 | else
 259 | employee
 260 | end
 261 | energy
 262 | enjoy
 263 | enough
 264 | enter
 265 | entire
 266 | environment
 267 | environmental
 268 | especially
 269 | establish
 270 | even
 271 | evening
 272 | event
 273 | ever
 274 | every
 275 | everybody
 276 | everyone
 277 | everything
 278 | evidence
 279 | exactly
 280 | example
 281 | executive
 282 | exist
 283 | expect
 284 | experience
 285 | expert
 286 | explain
 287 | eye
 288 | face
 289 | fact
 290 | factor
 291 | fail
 292 | fall
 293 | family
 294 | far
 295 | fast
 296 | father
 297 | fear
 298 | federal
 299 | feel
 300 | feeling
 301 | few
 302 | field
 303 | fight
 304 | figure
 305 | fill
 306 | film
 307 | final
 308 | finally
 309 | financial
 310 | find
 311 | fine
 312 | finger
 313 | finish
 314 | fire
 315 | firm
 316 | first
 317 | fish
 318 | five
 319 | floor
 320 | fly
 321 | focus
 322 | follow
 323 | food
 324 | foot
 325 | for
 326 | force
 327 | foreign
 328 | forget
 329 | form
 330 | former
 331 | forward
 332 | four
 333 | free
 334 | friend
 335 | from
 336 | front
 337 | full
 338 | fund
 339 | future
 340 | game
 341 | garden
 342 | gas
 343 | general
 344 | generation
 345 | get
 346 | girl
 347 | give
 348 | glass
 349 | go
 350 | goal
 351 | good
 352 | government
 353 | great
 354 | green
 355 | ground
 356 | group
 357 | grow
 358 | growth
 359 | guess
 360 | gun
 361 | guy
 362 | hair
 363 | half
 364 | hand
 365 | hang
 366 | happen
 367 | happy
 368 | hard
 369 | have
 370 | he
 371 | head
 372 | health
 373 | hear
 374 | heart
 375 | heat
 376 | heavy
 377 | help
 378 | her
 379 | here
 380 | herself
 381 | high
 382 | him
 383 | himself
 384 | his
 385 | history
 386 | hit
 387 | hold
 388 | home
 389 | hope
 390 | hospital
 391 | hot
 392 | hotel
 393 | hour
 394 | house
 395 | how
 396 | however
 397 | huge
 398 | human
 399 | hundred
 400 | husband
 401 | I
 402 | idea
 403 | identify
 404 | if
 405 | image
 406 | imagine
 407 | impact
 408 | important
 409 | improve
 410 | in
 411 | include
 412 | including
 413 | increase
 414 | indeed
 415 | indicate
 416 | individual
 417 | industry
 418 | information
 419 | inside
 420 | instead
 421 | institution
 422 | interest
 423 | interesting
 424 | international
 425 | interview
 426 | into
 427 | investment
 428 | involve
 429 | issue
 430 | it
 431 | item
 432 | its
 433 | itself
 434 | job
 435 | join
 436 | just
 437 | keep
 438 | key
 439 | kid
 440 | kill
 441 | kind
 442 | kitchen
 443 | know
 444 | knowledge
 445 | land
 446 | language
 447 | large
 448 | last
 449 | late
 450 | later
 451 | laugh
 452 | law
 453 | lawyer
 454 | lay
 455 | lead
 456 | leader
 457 | learn
 458 | least
 459 | leave
 460 | left
 461 | leg
 462 | legal
 463 | less
 464 | let
 465 | letter
 466 | level
 467 | lie
 468 | life
 469 | light
 470 | like
 471 | likely
 472 | line
 473 | list
 474 | listen
 475 | little
 476 | live
 477 | local
 478 | long
 479 | look
 480 | lose
 481 | loss
 482 | lot
 483 | love
 484 | low
 485 | machine
 486 | magazine
 487 | main
 488 | maintain
 489 | major
 490 | majority
 491 | make
 492 | man
 493 | manage
 494 | management
 495 | manager
 496 | many
 497 | market
 498 | marriage
 499 | material
 500 | matter
 501 | may
 502 | maybe
 503 | me
 504 | mean
 505 | measure
 506 | media
 507 | medical
 508 | meet
 509 | meeting
 510 | member
 511 | memory
 512 | mention
 513 | message
 514 | method
 515 | middle
 516 | might
 517 | military
 518 | million
 519 | mind
 520 | minute
 521 | miss
 522 | mission
 523 | model
 524 | modern
 525 | moment
 526 | money
 527 | month
 528 | more
 529 | morning
 530 | most
 531 | mother
 532 | mouth
 533 | move
 534 | movement
 535 | movie
 536 | Mr
 537 | Mrs
 538 | much
 539 | music
 540 | must
 541 | my
 542 | myself
 543 | name
 544 | nation
 545 | national
 546 | natural
 547 | nature
 548 | near
 549 | nearly
 550 | necessary
 551 | need
 552 | network
 553 | never
 554 | new
 555 | news
 556 | newspaper
 557 | next
 558 | nice
 559 | night
 560 | no
 561 | none
 562 | nor
 563 | north
 564 | not
 565 | note
 566 | nothing
 567 | notice
 568 | now
 569 | n't
 570 | number
 571 | occur
 572 | of
 573 | off
 574 | offer
 575 | office
 576 | officer
 577 | official
 578 | often
 579 | oh
 580 | oil
 581 | ok
 582 | old
 583 | on
 584 | once
 585 | one
 586 | only
 587 | onto
 588 | open
 589 | operation
 590 | opportunity
 591 | option
 592 | or
 593 | order
 594 | organization
 595 | other
 596 | others
 597 | our
 598 | out
 599 | outside
 600 | over
 601 | own
 602 | owner
 603 | page
 604 | pain
 605 | painting
 606 | paper
 607 | parent
 608 | part
 609 | participant
 610 | particular
 611 | particularly
 612 | partner
 613 | party
 614 | pass
 615 | past
 616 | patient
 617 | pattern
 618 | pay
 619 | peace
 620 | people
 621 | per
 622 | perform
 623 | performance
 624 | perhaps
 625 | period
 626 | person
 627 | personal
 628 | phone
 629 | physical
 630 | pick
 631 | picture
 632 | piece
 633 | place
 634 | plan
 635 | plant
 636 | play
 637 | player
 638 | PM
 639 | point
 640 | police
 641 | policy
 642 | political
 643 | politics
 644 | poor
 645 | popular
 646 | population
 647 | position
 648 | positive
 649 | possible
 650 | power
 651 | practice
 652 | prepare
 653 | present
 654 | president
 655 | pressure
 656 | pretty
 657 | prevent
 658 | price
 659 | private
 660 | probably
 661 | problem
 662 | process
 663 | produce
 664 | product
 665 | production
 666 | professional
 667 | professor
 668 | program
 669 | project
 670 | property
 671 | protect
 672 | prove
 673 | provide
 674 | public
 675 | pull
 676 | purpose
 677 | push
 678 | put
 679 | quality
 680 | question
 681 | quickly
 682 | quite
 683 | race
 684 | radio
 685 | raise
 686 | range
 687 | rate
 688 | rather
 689 | reach
 690 | read
 691 | ready
 692 | real
 693 | reality
 694 | realize
 695 | really
 696 | reason
 697 | receive
 698 | recent
 699 | recently
 700 | recognize
 701 | record
 702 | red
 703 | reduce
 704 | reflect
 705 | region
 706 | relate
 707 | relationship
 708 | religious
 709 | remain
 710 | remember
 711 | remove
 712 | report
 713 | represent
 714 | Republican
 715 | require
 716 | research
 717 | resource
 718 | respond
 719 | response
 720 | responsibility
 721 | rest
 722 | result
 723 | return
 724 | reveal
 725 | rich
 726 | right
 727 | rise
 728 | risk
 729 | road
 730 | rock
 731 | role
 732 | room
 733 | rule
 734 | run
 735 | safe
 736 | same
 737 | save
 738 | say
 739 | scene
 740 | school
 741 | science
 742 | scientist
 743 | score
 744 | sea
 745 | season
 746 | seat
 747 | second
 748 | section
 749 | security
 750 | see
 751 | seek
 752 | seem
 753 | sell
 754 | send
 755 | senior
 756 | sense
 757 | series
 758 | serious
 759 | serve
 760 | service
 761 | set
 762 | seven
 763 | several
 764 | sex
 765 | sexual
 766 | shake
 767 | share
 768 | she
 769 | shoot
 770 | short
 771 | shot
 772 | should
 773 | shoulder
 774 | show
 775 | side
 776 | sign
 777 | significant
 778 | similar
 779 | simple
 780 | simply
 781 | since
 782 | sing
 783 | single
 784 | sister
 785 | sit
 786 | site
 787 | situation
 788 | six
 789 | size
 790 | skill
 791 | skin
 792 | small
 793 | smile
 794 | so
 795 | social
 796 | society
 797 | soldier
 798 | some
 799 | somebody
 800 | someone
 801 | something
 802 | sometimes
 803 | son
 804 | song
 805 | soon
 806 | sort
 807 | sound
 808 | source
 809 | south
 810 | southern
 811 | space
 812 | speak
 813 | special
 814 | specific
 815 | speech
 816 | spend
 817 | sport
 818 | spring
 819 | staff
 820 | stage
 821 | stand
 822 | standard
 823 | star
 824 | start
 825 | state
 826 | statement
 827 | station
 828 | stay
 829 | step
 830 | still
 831 | stock
 832 | stop
 833 | store
 834 | story
 835 | strategy
 836 | street
 837 | strong
 838 | structure
 839 | student
 840 | study
 841 | stuff
 842 | style
 843 | subject
 844 | success
 845 | successful
 846 | such
 847 | suddenly
 848 | suffer
 849 | suggest
 850 | summer
 851 | support
 852 | sure
 853 | surface
 854 | system
 855 | table
 856 | take
 857 | talk
 858 | task
 859 | tax
 860 | teach
 861 | teacher
 862 | team
 863 | technology
 864 | television
 865 | tell
 866 | ten
 867 | tend
 868 | term
 869 | test
 870 | than
 871 | thank
 872 | that
 873 | the
 874 | their
 875 | them
 876 | themselves
 877 | then
 878 | theory
 879 | there
 880 | these
 881 | they
 882 | thing
 883 | think
 884 | third
 885 | this
 886 | those
 887 | though
 888 | thought
 889 | thousand
 890 | threat
 891 | three
 892 | through
 893 | throughout
 894 | throw
 895 | thus
 896 | time
 897 | to
 898 | today
 899 | together
 900 | tonight
 901 | too
 902 | top
 903 | total
 904 | tough
 905 | toward
 906 | town
 907 | trade
 908 | traditional
 909 | training
 910 | travel
 911 | treat
 912 | treatment
 913 | tree
 914 | trial
 915 | trip
 916 | trouble
 917 | true
 918 | truth
 919 | try
 920 | turn
 921 | TV
 922 | two
 923 | type
 924 | under
 925 | understand
 926 | unit
 927 | until
 928 | up
 929 | upon
 930 | us
 931 | use
 932 | usually
 933 | value
 934 | various
 935 | very
 936 | victim
 937 | view
 938 | violence
 939 | visit
 940 | voice
 941 | vote
 942 | wait
 943 | walk
 944 | wall
 945 | want
 946 | war
 947 | watch
 948 | water
 949 | way
 950 | we
 951 | weapon
 952 | wear
 953 | week
 954 | weight
 955 | well
 956 | west
 957 | western
 958 | what
 959 | whatever
 960 | when
 961 | where
 962 | whether
 963 | which
 964 | while
 965 | white
 966 | who
 967 | whole
 968 | whom
 969 | whose
 970 | why
 971 | wide
 972 | wife
 973 | will
 974 | win
 975 | wind
 976 | window
 977 | wish
 978 | with
 979 | within
 980 | without
 981 | woman
 982 | wonder
 983 | word
 984 | work
 985 | worker
 986 | world
 987 | worry
 988 | would
 989 | write
 990 | writer
 991 | wrong
 992 | yard
 993 | yeah
 994 | year
 995 | yes
 996 | yet
 997 | you
 998 | young
 999 | your
1000 | yourself
1001 | 
1002 | 


--------------------------------------------------------------------------------
/list-in/word-en-s.lst:
--------------------------------------------------------------------------------
  1 | time
  2 | person
  3 | year
  4 | way
  5 | day
  6 | thing
  7 | man
  8 | world
  9 | life
 10 | hand
 11 | part
 12 | child
 13 | eye
 14 | woman
 15 | place
 16 | work
 17 | week
 18 | case
 19 | point
 20 | government
 21 | company
 22 | number
 23 | group
 24 | problem
 25 | fact
 26 | be
 27 | have
 28 | do
 29 | say
 30 | get
 31 | make
 32 | go
 33 | know
 34 | take
 35 | see
 36 | come
 37 | think
 38 | look
 39 | want
 40 | give
 41 | use
 42 | find
 43 | tell
 44 | ask
 45 | work
 46 | seem
 47 | feel
 48 | try
 49 | leave
 50 | call
 51 | good
 52 | new
 53 | first
 54 | last
 55 | long
 56 | great
 57 | little
 58 | own
 59 | other
 60 | old
 61 | right
 62 | big
 63 | high
 64 | different
 65 | small
 66 | large
 67 | next
 68 | early
 69 | young
 70 | important
 71 | few
 72 | public
 73 | bad
 74 | same
 75 | able
 76 | to
 77 | of
 78 | in
 79 | for
 80 | on
 81 | with
 82 | at
 83 | by
 84 | from
 85 | up
 86 | about
 87 | into
 88 | over
 89 | after
 90 | the
 91 | and
 92 | a
 93 | that
 94 | I
 95 | it
 96 | not
 97 | he
 98 | as
 99 | you
100 | this
101 | but
102 | his
103 | they
104 | her
105 | she
106 | or
107 | an
108 | will
109 | my
110 | one
111 | all
112 | would
113 | there
114 | their	
115 | 


--------------------------------------------------------------------------------
/list-out/README.md:
--------------------------------------------------------------------------------
1 | # List-out
2 | 
3 | ## Purpose
4 | 
5 | This directory stores all results that were generated by the Toolbox programs.
6 | 
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/recentrated.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | 
  3 | #<--------------------------------- MAN PAGE --------------------------------->|
  4 | 
  5 | =pod
  6 | 
  7 | =head1 NAME
  8 | 
  9 | recentrated - know when people rate or write reviews about a book
 10 | 
 11 | 
 12 | =head1 SYNOPSIS
 13 | 
 14 | B<recentrated.pl> 
 15 | [B<-t> F<mailaddr>] 
 16 | [B<-f> F<mailaddr>] 
 17 | [B<-u> F<number>] 
 18 | [B<-s> F<shelfname>] 
 19 | [B<-q>] 
 20 | [I<goodloginmail>] 
 21 | [F<goodloginpass>]
 22 | 
 23 | 
 24 | =head1 OPTIONS
 25 | 
 26 | =over 4
 27 | 
 28 | 
 29 | =item I<goodloginmail>
 30 | 
 31 | add an unsubscribe email header and a contact address for
 32 | administrative issues to the programm output.
 33 | This also appends a helpful email signature.
 34 | It limits the number of books in the mail, with the rest to be 
 35 | mailed the next time (if I<MAILTO> does not equal I<MAILFROM>).
 36 | Less books means shorter program runtimes for each receiver
 37 | (GitHub #23).
 38 | 
 39 | 
 40 | =item I<goodloginpass>
 41 | 
 42 | the password that is required for the Goodreads website login
 43 | 
 44 | 
 45 | =item B<-t, --to>=F<emailaddr>
 46 | 
 47 | by default "TO:" mail header is set to I<goodloginmail>.
 48 | This tool does not send mails by its own.
 49 | You would have to pipe its output into a C<sendmail> programm.
 50 | 
 51 | 
 52 | =item B<-f, --from>=F<emailaddr>
 53 | 
 54 | by default "FROM:" mail header is set to I<goodloginmail>.
 55 | This tool does not send mails by its own.
 56 | You would have to pipe its output into a C<sendmail> programm.
 57 | 
 58 | 
 59 | =item B<-u, --userid>=F<number>
 60 | 
 61 | check another member instead of the one identified by the login-mail 
 62 | and password arguments. You find the ID by looking at the shelf URLs.
 63 | 
 64 | 
 65 | =item B<-s, --shelf>=F<shelfname>
 66 | 
 67 | name of the shelf with a selection of books, default is "#ALL#". 
 68 | If the name contains special characters use an URL-encoded name.
 69 | You can use this parameter multiple times if there is more than 1 shelf to
 70 | include (boolean OR operation), see the examples section of this man page.
 71 | Use B<--shelf>=shelf1,shelf2,shelf3 to intersect shelves (Intersection
 72 | requires password).
 73 | 
 74 | 
 75 | =item B<-q, --textonly>
 76 | 
 77 | output links to text-reviews only. This drops all non-text 
 78 | ratings (stars only) and the smaller text-reviews too ("Loved it so much!"). 
 79 | This option is useful if you have many books which get many ratings every day. 
 80 | But it shifts the use case from finding new people to mere reading 
 81 | new ideas about a book.
 82 | 
 83 | 
 84 | =item B<-?, --help>
 85 | 
 86 | show full man page
 87 | 
 88 | 
 89 | =back
 90 | 
 91 | 
 92 | =head1 EXAMPLES
 93 | 
 94 | $ ./recentrated.pl my@mail.com
 95 | 
 96 | $ ./recentrated.pl --shelf=read my@mail.com
 97 | 
 98 | $ ./recentrated.pl --userid=55554444 --shelf=read --to=my@mail.com
 99 | 
100 | $ ./recentrated.pl -u 55554444 -s read -t friend@mail.com admin@mail.com
101 | 
102 | 
103 | =head1 FILES
104 | 
105 | Log written to F<./list-out/recrentrated/log.txt>
106 | 
107 | Database stored in F<./list-out/recentrated/>
108 | 
109 | 
110 | =head1 REPORTING BUGS
111 | 
112 | Report bugs to <datakadabra@gmail.com> or use Github's issue tracker
113 | <https://github.com/andre-st/goodreads-toolbox/issues>
114 | 
115 | 
116 | =head1 COPYRIGHT
117 | 
118 | This is free software. You may redistribute copies of it under the terms of
119 | the GNU General Public License <https://www.gnu.org/licenses/gpl.html>.
120 | There is NO WARRANTY, to the extent permitted by law.
121 | 
122 | 
123 | =head1 SEE ALSO
124 | 
125 | More info in ./help/recentrated.md
126 | 
127 | 
128 | =head1 VERSION
129 | 
130 | 2022-09-25 (Since 2018-01-09)
131 | 
132 | =cut
133 | 
134 | #<--------------------------------- 79 chars --------------------------------->|
135 | 
136 | 
137 | use strict;
138 | use warnings;
139 | use locale;
140 | use 5.18.0;
141 | 
142 | # Perl core:
143 | use FindBin;
144 | use local::lib "$FindBin::Bin/lib/local/";
145 | use        lib "$FindBin::Bin/lib/";
146 | use POSIX           qw( locale_h );
147 | use List::Util      qw( max      );
148 | use File::Spec;     # Platform indep. directory separator
149 | use Time::Piece;
150 | use Getopt::Long;
151 | use Pod::Usage;
152 | # Third party:
153 | use Log::Any        '$_log', default_adapter => [ 'File' => File::Spec->catfile( $FindBin::Bin, 'list-out', 'recentrated', 'log.txt' )];
154 | use Text::CSV       qw( csv );
155 | use List::MoreUtils qw( any );
156 | # Ours:
157 | use Goodscrapes;
158 | 
159 | 
160 | 
161 | # ----------------------------------------------------------------------------
162 | # Program configuration:
163 | # 
164 | pod2usage( -verbose => 2 ) if $#ARGV < 0;
165 | setlocale( LC_CTYPE, "en_US" );  # GR dates all en_US
166 | 
167 | our $TEXTONLY = 0;
168 | our @SHELVES;
169 | our $USERID;
170 | our $MAILTO;
171 | our $MAILFROM;
172 | 
173 | GetOptions( 'userid|u=s' => \$USERID,
174 |             'shelf|s=s'  => \@SHELVES,
175 |             'to|t=s'     => \$MAILTO,
176 |             'from|f=s'   => \$MAILFROM,
177 |             'textonly|q' => \$TEXTONLY,
178 |             'help|?'     => sub{ pod2usage( -verbose => 2 ) });
179 | 
180 | gsetopt( ignore_errors => 1 );  # Don't get stuck, may get book or review next time
181 | 
182 | our $LOGINMAIL = $ARGV[0];
183 | our $LOGINPASS = $ARGV[1];
184 |     $MAILFROM  = $LOGINMAIL if !$MAILFROM;
185 |     $MAILTO    = $LOGINMAIL if !$MAILTO;
186 | 
187 | glogin( usermail => $LOGINMAIL,  # Login required for reading private members
188 |         userpass => $LOGINPASS,   # Asks pw if omitted
189 |         r_userid => \$USERID )
190 | 	if $LOGINPASS;
191 | 
192 | 
193 | say( "[CRIT ] Missing --userid option or goodloginmail argument." )
194 | 	if !$USERID;
195 | 
196 | 
197 | # Path to the database files which contain last check states
198 | my  $dbname = sprintf( "%s-%s.csv", $USERID, join( '-', @SHELVES ));
199 | our $DBPATH = File::Spec->catfile( $FindBin::Bin, 'list-out', 'recentrated', $dbname );
200 | 
201 | 
202 | # The more URLs, the longer and untempting the mail.
203 | # If number exceeded, we link to the book page with *all* reviews.
204 | our $MAX_REVURLS_PER_BOOK = 3;
205 | 
206 | 
207 | # Limit number of books in the mail and limit the program runtime for non-admins
208 | our $MAX_BOOKS_TO_CHECK = ($MAILFROM && $MAILTO && $MAILFROM ne $MAILTO) ? 50 : 999999;
209 | 
210 | 
211 | # GR-URLs in mail padded to average length, with "https://" stripped
212 | sub prettyurl{ return sprintf '%-36s', substr( shift, 8 ); }
213 | 
214 | 
215 | 
216 | # ----------------------------------------------------------------------------
217 | # Looking just at the shelves, we can already see the number of current 
218 | # ratings for each individual book. We compare them with the numbers from the
219 | # last check (stored in a CSV-file $db). Only for those books whose numbers
220 | # differ, we actually load the most recent ratings, which gets us info about the
221 | # members who rated the books, how they rated it, and whether they added text.
222 | # 
223 | my $db       = ( -e $DBPATH  ?  csv( in => $DBPATH, key => 'id' )  :  {} );
224 | my $num_hits = 0;
225 | my %books;
226 | 
227 | 
228 | greadshelf( from_user_id    => $USERID,
229 |             ra_from_shelves => \@SHELVES,
230 |             rh_into         => \%books );
231 | 
232 | 
233 | my @added   = grep{ !exists $db->{$_}  } keys %books;
234 | my @removed = grep{ !exists $books{$_} } keys %{$db};
235 | 
236 | delete $db->{$_} for( @removed );
237 | 
238 | my @oldest_ids = sort{ $db->{$a}->{checked} <=> 
239 |                        $db->{$b}->{checked} } keys %{$db};  # Oldest first
240 | 
241 | my $limit = $MAX_BOOKS_TO_CHECK;
242 | for my $id (@oldest_ids)
243 | {
244 | 	last unless $limit--;  # Mail other books the next time
245 | 	
246 | 	my $num_new_rat = $books{$id}->{num_ratings} - $db->{$id}->{num_ratings};
247 | 	
248 | 	next unless $num_new_rat > 0;
249 | 	
250 | 	my %revs;
251 | 	my $lastcheck = Time::Piece->strptime( $db->{$id}->{checked} +(60*60*12), '%s' );
252 | 	
253 | 	greadreviews( rh_for_book => $books{$id},
254 | 	              since       => $lastcheck,
255 | 	              rh_into     => \%revs,
256 | 	              text_minlen => $TEXTONLY * $GOOD_USEFUL_REVIEW_LEN,
257 | 	              rigor       => 0 );
258 | 	
259 | 	$db->{$id}->{num_ratings} = $books{$id}->{num_ratings};
260 | 	$db->{$id}->{checked    } = time;  # GR locale
261 | 	
262 | 	next unless %revs;
263 | 	
264 | 	my $revcount = scalar keys %revs;
265 | 	
266 | 	$num_hits++;
267 | 	
268 | 	# E-Mail header and first body line:
269 | 	if( $MAILTO && $num_hits == 1 )
270 | 	{
271 | 		print ( "To: ${MAILTO}\n"                           );
272 | 		print ( "From: ${MAILFROM}\n"                       ) if $MAILFROM;
273 | 		print ( "List-Unsubscribe: <mailto:${MAILFROM}>\n"  ) if $MAILFROM;
274 | 		print ( "Content-Type: text/plain; charset=utf-8\n" );
275 | 		print ( "Subject: New ratings on Goodreads.com\n\n" );  # 2x \n hdr end
276 | 		printf( "Recently rated books in your \"%s\" shelf:\n", join( '" and "', @SHELVES ));
277 | 	}
278 | 	
279 | 	
280 | 	#  ASCII design isn't responsive, and the GMail web client neither uses fixed
281 | 	#  width fonts nor treats multiple space characters as defined, even on large
282 | 	#  screens. It treats plain text mails as HTML text. I don't do HTML mails,
283 | 	#  so mobile GMail web users will have the disadvantage.
284 | 	#
285 | 	#<-------------------- 78 chars per line i.a.w. RFC 2822 --------------------->
286 | 	#
287 | 	#  "Book Title1"
288 | 	#   www.goodreads.com/book/show/609606     [9 new]
289 | 	#  
290 | 	#  "Book Title2"
291 | 	#   www.goodreads.com/review/show/1234567  [TTT  ]
292 | 	#   www.goodreads.com/user/show/2345       [*****]
293 | 	#
294 | 	printf( "\n  \"%s\"\n", $books{$id}->{title} );
295 | 	
296 | 	if( $revcount > $MAX_REVURLS_PER_BOOK )
297 | 	{
298 | 		printf( "   %s  [%d new]\n", prettyurl( $books{$id}->{url} ), $revcount );
299 | 	}
300 | 	else
301 | 	{
302 | 		printf( "   %s  %s\n", prettyurl( $_->{text} ? $_->{url} : $_->{rh_user}->{url} ), $_->{rating_str} )
303 | 			foreach (values %revs);
304 | 	}
305 | }
306 | 
307 | 
308 | # Help user to help himself.
309 | #
310 | # Experience has shown
311 | # that users cannot be dissuaded from their shelf choice by giving advice.
312 | # Therefore, the admin should use the program option `--text-only`
313 | # for large shelves - especially "ALL", "read", "to-read" - by default
314 | # to keep the mails small.
315 | #
316 | print "\n\n\nToo many ratings?\n"
317 |     . ">> Create a shelf \"watch-ratings\" or similar on Goodreads.com "
318 |     . "with 50-150 special but lesser-known books, "
319 |     . "and drop or add books from time to time. "
320 |     . "Reply \"shelf watch-ratings\" when ready. "
321 |     . "You can also reply \"textonly\" to skip the ratings without text. "
322 | 	if $MAILFROM && $num_hits > 20;
323 | 
324 | 
325 | # Without a hint, the user doesn't know whether there are simply no
326 | # stars-only ratings or whether they were intentionally ignored:
327 | #
328 | print "\n\n\nRatings without text were ignored (Reply 'all' otherwise)." 
329 | 	if $TEXTONLY;
330 | 
331 | 
332 | # E-mail signature block if run for other users:
333 | if( $MAILFROM && $num_hits > 0 )
334 | {
335 | 	print "\n\n-- \n"  # RFC 3676 sig delimiter (has space char)
336 | 	    . " [***  ] 3/5 stars rating without text           \n"
337 | 	    . " [ttt  ] 3/5 stars rating with tweet-size text   \n"
338 | 	    . " [TTT  ] 3/5 stars rating with text              \n"
339 | 	    . " [9 new] ratings better viewed on the book page  \n"
340 | 	    . "                                                 \n";
341 | 	print " Reply 'textonly'     to skip ratings w/o text   \n"  if !$TEXTONLY;
342 | 	print " Reply 'shelf NAME'   to check alternative shelf \n"
343 | 	#   . " Reply 'hateonly'     to see negative rat. only  \n"
344 | 	#   . " Reply 'weekly'       to avoid daily mails       \n"
345 | 	    . " Reply 'unsubscribe'  to unsubscribe             \n"
346 | 	    . " Via https://andre-st.github.io/goodreads/       \n\n";
347 | }
348 | 
349 | 
350 | # Add new books:
351 | $db->{$_} = { 'id'          => $_, 
352 |               'num_ratings' => $books{$_}->{num_ratings}, 
353 |               'checked'     => time } for( @added );
354 | 
355 | 
356 | # Cronjob audits:
357 | $_log->infof( 'Recently rated: %d of %d books in %s\'s shelf "%s" (check limit %d)', 
358 | 		$num_hits, 
359 | 		scalar keys %books, 
360 | 		$USERID, 
361 | 		join( '" and "', @SHELVES ), 
362 | 		$MAX_BOOKS_TO_CHECK );
363 | 
364 | 
365 | # Update database:
366 | my @lines = values %{$db};
367 | csv( in      => \@lines, 
368 |      out     => $DBPATH, 
369 |      headers => [qw( id num_ratings checked )] );
370 | 
371 | 
372 | # Done.
373 | 
374 | 


--------------------------------------------------------------------------------
/savreviews.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | 
  3 | #<--------------------------------- MAN PAGE --------------------------------->|
  4 | 
  5 | =pod
  6 | 
  7 | =head1 NAME
  8 | 
  9 | savreviews - Download reviews for a book
 10 | 
 11 | 
 12 | =head1 SYNOPSIS
 13 | 
 14 | B<savreviews.pl> 
 15 | [B<-x> F<numlevel>] 
 16 | [B<-d> F<filename>] 
 17 | [B<-c> F<numdays>] 
 18 | [B<-o> F<dirname>]
 19 | [B<-i>]
 20 | F<goodbookid>
 21 | 
 22 | You find the F<goodbookid> by looking at the book URL.
 23 | 
 24 | 
 25 | =head1 OPTIONS
 26 | 
 27 | Mandatory arguments to long options are mandatory for short options too.
 28 | 
 29 | =over 4
 30 | 
 31 | =item B<-x, --rigor>=F<numlevel>
 32 | 
 33 |  level 0 = search newest reviews only (max 300 ratings)
 34 |  level 1 = search with a combination of filters (max 5400 ratings)
 35 |  level 2 = like 1 plus dict-search if more than 3000 ratings with stall-time of 2 minutes
 36 |  level n = like 1 plus dict-search with stall-time of n minutes - default is 10
 37 | 
 38 | 
 39 | =item B<-d, --dict>=F<filename>
 40 | 
 41 | default see section FILES
 42 | 
 43 | 
 44 | =item B<-c, --cache>=F<numdays>
 45 | 
 46 | number of days to store and reuse downloaded data in F</tmp/FileCache/>,
 47 | default is 7 days. This helps on experimenting with parameters. 
 48 | Loading data from Goodreads is a time consuming process.
 49 | 
 50 | 
 51 | =item B<-o, --outdir>=F<path>
 52 | 
 53 | directory path where the final reports will be saved,
 54 | default see section FILES
 55 | 
 56 | 
 57 | =item B<-i, --ignore-errors>
 58 | 
 59 | Don't retry on errors, just keep going. 
 60 | Sometimes useful if a single Goodreads resource hangs over long periods 
 61 | and you're okay with some values missing in your result.
 62 | This option is not recommended when you run the program unattended.
 63 | 
 64 | 
 65 | =item B<-?, --help>
 66 | 
 67 | show full man page
 68 | 
 69 | =back
 70 | 
 71 | 
 72 | =head1 FILES
 73 | 
 74 | F<./list-in/dict.lst>
 75 | 
 76 | F<./list-out/savreviews-book*-stars{0..5}.txt>
 77 | 
 78 | F</tmp/FileCache/>
 79 | 
 80 | 
 81 | =head1 EXAMPLES
 82 | 
 83 | $ ./savreviews.pl 333222
 84 | 
 85 | 
 86 | =head1 REPORTING BUGS
 87 | 
 88 | Report bugs to <datakadabra@gmail.com> or use Github's issue tracker
 89 | L<https://github.com/andre-st/goodreads-toolbox/issues>
 90 | 
 91 | 
 92 | =head1 COPYRIGHT
 93 | 
 94 | This is free software. You may redistribute copies of it under the terms of
 95 | the GNU General Public License L<https://www.gnu.org/licenses/gpl.html>.
 96 | There is NO WARRANTY, to the extent permitted by law.
 97 | 
 98 | 
 99 | =head1 SEE ALSO
100 | 
101 | More info in ./help/savreviews.md
102 | 
103 | 
104 | =head1 VERSION
105 | 
106 | 2022-03-10 (Since 2018-08-13)
107 | 
108 | =cut
109 | 
110 | #<--------------------------------- 79 chars --------------------------------->|
111 | 
112 | 
113 | use strict;
114 | use warnings;
115 | use locale;
116 | use 5.18.0;
117 | 
118 | # Perl core:
119 | use FindBin;
120 | use local::lib "$FindBin::Bin/lib/local/";
121 | use        lib "$FindBin::Bin/lib/";
122 | use Time::HiRes qw( time tv_interval );
123 | use POSIX       qw( locale_h );
124 | use File::Spec; # Platform indep. directory separator
125 | use IO::File;
126 | use Getopt::Long;
127 | use Pod::Usage;
128 | # Third party:
129 | # Ours:
130 | use Goodscrapes;
131 | 
132 | 
133 | 
134 | # ----------------------------------------------------------------------------
135 | # Program configuration:
136 | # 
137 | setlocale( LC_CTYPE, 'en_US' );  # GR dates all en_US
138 | STDOUT->autoflush( 1 );
139 | gsetopt( cache_days => 7 );
140 | 
141 | our $TSTART     = time();
142 | our $RIGOR      = 10;
143 | our $DICTPATH   = File::Spec->catfile( $FindBin::Bin, 'list-in', 'dict.lst' );
144 | our $OUTDIR     = File::Spec->catfile( $FindBin::Bin, 'list-out' );
145 | our $OUTNAMEFMT = 'savreviews-book%s-stars%d.txt';
146 | our $OUTDATEFMT = "%Y/%m/%d";  # man strptime
147 | our $BOOKID;
148 | our $REVIEWSEPARATOR  = "\n\n".( '-' x 79 )."\n";  # long line
149 | our $MAXPOSSIBLESTARS = 5;
150 | 
151 | GetOptions( 'rigor|x=i'       => \$RIGOR,
152 |             'dict|d=s'        => \$DICTPATH,
153 |             'outdir|o=s'      => \$OUTDIR,
154 |             'ignore-errors|i' => sub{  gsetopt( ignore_errors => 1  );  },
155 |             'cache|c=i'       => sub{  gsetopt( cache_days => $_[1] );  },
156 |             'help|?'          => sub{  pod2usage( -verbose => 2 );      }) 
157 | 	or pod2usage( 1 );
158 | 
159 | $BOOKID = $ARGV[0] or pod2usage( 1 );
160 | 
161 | 
162 | 
163 | # ----------------------------------------------------------------------------
164 | print( 'Loading reviews ' );
165 | 
166 | my %reviews;
167 | 
168 | my %book = greadbook( $BOOKID );
169 | 
170 | printf( 'for "%s"...', $book{title} );
171 | 
172 | greadreviews( rh_for_book => \%book,
173 |               rigor       => $RIGOR,
174 |               rh_into     => \%reviews,
175 |               dict_path   => $DICTPATH,
176 |               text_minlen => 1,
177 |               on_progress => gmeter( "of $book{num_reviews} [\033[38;5msearching\033[0m]" ));
178 | 
179 | ghistogram( rh_from => \%reviews );
180 | 
181 | 
182 | 
183 | # ----------------------------------------------------------------------------
184 | print( "\n\nWriting reviews to:" );
185 | 
186 | my @files;
187 | 
188 | for my $n (0..$MAXPOSSIBLESTARS)
189 | {
190 | 	my $fpath = File::Spec->catfile( $OUTDIR, sprintf( $OUTNAMEFMT, $BOOKID, $n ));
191 | 	
192 | 	print( "\n$fpath" );
193 | 	
194 | 	push @files, IO::File->new( $fpath, '>:utf8' ) 
195 | 		or die( "[FATAL] Cannot write to $fpath ($!)" );
196 | }
197 | 
198 | 
199 | print {$files[$_->{rating}]} 
200 | 		$_->{date}->strftime( $OUTDATEFMT ) . " #" .
201 | 		$_->{id  } . "\n\n" .
202 | 		$_->{text} .
203 | 		$REVIEWSEPARATOR 
204 | 	for (values %reviews);
205 | 
206 | 
207 | 
208 | # ----------------------------------------------------------------------------
209 | printf( "\n\nTotal time: %.0f minutes\n", (time()-$TSTART)/60 );
210 | 
211 | 
212 | 


--------------------------------------------------------------------------------
/search.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | 
  3 | #<--------------------------------- MAN PAGE --------------------------------->|
  4 | 
  5 | =pod
  6 | 
  7 | =head1 NAME
  8 | 
  9 | search - Search a book and sort result by popularity
 10 | 
 11 | 
 12 | =head1 SYNOPSIS
 13 | 
 14 | B<search.pl> 
 15 | [B<-z> F<columns>] 
 16 | [B<-r> F<number>] 
 17 | [B<-c> F<numdays>] 
 18 | [B<-o> F<filename>] 
 19 | [B<-i>]
 20 | F<keyword>...
 21 | 
 22 | Use quotes if you want exact matches (see examples section)
 23 | 
 24 | =head1 OPTIONS
 25 | 
 26 | Mandatory arguments to long options are mandatory for short options too.
 27 | 
 28 | =over 4
 29 | 
 30 | =item B<-z, --order>=F<columns>
 31 | 
 32 | sort order, all descending, comma-separated column names,
 33 | default is "stars,num_ratings,year"
 34 | (you're free to change the order but not the names)
 35 | 
 36 | 
 37 | =item B<-r, --ratings>=F<number>
 38 | 
 39 | only include books with N or more ratings:
 40 | a 4-stars book rated by 30 readers might be "better" than a 5-stars book rated
 41 | by 1 reader (perhaps the author). This also declutters our F<outfile>.
 42 | Use low values to cut away the nonsense, use high values only if you know
 43 | the available range otherwise you might get zero results.
 44 | Default is 5 or 0 if exact match. 
 45 | 
 46 | 
 47 | =item B<-c, --cache>=F<numdays>
 48 | 
 49 | number of days to store and reuse downloaded data in F</tmp/FileCache/>,
 50 | default is 7 days. This helps on experimenting with parameters. 
 51 | Loading data from Goodreads is a time consuming process.
 52 | 
 53 | 
 54 | =item B<-o, --outfile>=F<filename>
 55 | 
 56 | name of the HTML file where we write results to, 
 57 | default see section FILES
 58 | 
 59 | 
 60 | =item B<-i, --ignore-errors>
 61 | 
 62 | Don't retry on errors, just keep going. 
 63 | Sometimes useful if a single Goodreads resource hangs over long periods 
 64 | and you're okay with some values missing in your result.
 65 | This option is not recommended when you run the program unattended.
 66 | 
 67 | 
 68 | =item B<-?, --help>
 69 | 
 70 | show full man page
 71 | 
 72 | =back
 73 | 
 74 | 
 75 | =head1 FILES
 76 | 
 77 | F<./list-out/search-$KEYWORD.html>
 78 | 
 79 | F</tmp/FileCache/>
 80 | 
 81 | 
 82 | =head1 EXAMPLES
 83 | 
 84 | $ ./search.pl linux
 85 | 
 86 | $ ./search.pl --ratings=10 --order=stars,num_ratings linux kernel
 87 | 
 88 | $ ./search.pl --order=year,num_ratings linux kernel
 89 | 
 90 | $ ./search.pl -r 10 -z year "linux kernel"
 91 | 
 92 | 
 93 | =head1 REPORTING BUGS
 94 | 
 95 | Report bugs to <datakadabra@gmail.com> or use Github's issue tracker
 96 | L<https://github.com/andre-st/goodreads-toolbox/issues>
 97 | 
 98 | 
 99 | =head1 COPYRIGHT
100 | 
101 | This is free software. You may redistribute copies of it under the terms of
102 | the GNU General Public License L<https://www.gnu.org/licenses/gpl.html>.
103 | There is NO WARRANTY, to the extent permitted by law.
104 | 
105 | 
106 | =head1 SEE ALSO
107 | 
108 | More info in ./help/search.md
109 | 
110 | 
111 | =head1 VERSION
112 | 
113 | 2022-03-10 (Since 2018-07-29)
114 | 
115 | =cut
116 | 
117 | #<--------------------------------- 79 chars --------------------------------->|
118 | 
119 | 
120 | use strict;
121 | use warnings;
122 | use locale;
123 | use 5.18.0;
124 | 
125 | # Perl core:
126 | use FindBin;
127 | use local::lib "$FindBin::Bin/lib/local/";
128 | use        lib "$FindBin::Bin/lib/";
129 | use Time::HiRes qw( time tv_interval );
130 | use POSIX       qw( strftime locale_h );
131 | use File::Spec; # Platform indep. directory separator
132 | use IO::File;
133 | use Getopt::Long;
134 | use Pod::Usage;
135 | # Third party:
136 | use List::MoreUtils qw( uniq );
137 | # Ours:
138 | use Goodscrapes;
139 | 
140 | 
141 | 
142 | # ----------------------------------------------------------------------------
143 | # Program configuration:
144 | # 
145 | setlocale( LC_CTYPE, "en_US" );  # GR dates all en_US
146 | STDOUT->autoflush( 1 );
147 | gsetopt( cache_days => 7 );
148 |  
149 | our $TSTART = time();
150 | our @ORDER;
151 | our $NUMRATINGS;
152 | our $PHRASE;
153 | our $OUTPATH;
154 | our $ISEXACT;
155 | my  $ordercsv = '';
156 | 
157 | GetOptions( 'ratings|r=i'     => \$NUMRATINGS,
158 |             'order|z=s'       => \$ordercsv,
159 |             'outfile|o=s'     => \$OUTPATH,
160 |             'ignore-errors|i' => sub{  gsetopt( ignore_errors => 1 );   },
161 |             'cache|c=i'       => sub{  gsetopt( cache_days => $_[1] );  },
162 |             'help|?'          => sub{  pod2usage( -verbose => 2 );      })
163 | 	or pod2usage( 1 );
164 | 
165 | $PHRASE     = join( ' ', @ARGV ) or pod2usage( 1 );
166 | $OUTPATH    = File::Spec->catfile( $FindBin::Bin, 'list-out', "search-${PHRASE}.html" ) if !$OUTPATH;
167 | $ISEXACT    = index( $ARGV[0], ' ' ) > -1;  # Quoted "aaa bbb" as single argument, otherwise 2 args
168 | $NUMRATINGS = $ISEXACT ? 0 : 5 if !defined $NUMRATINGS;
169 | $ordercsv   =~ s/\s+//g;  # Mistakenly added spaces
170 | @ORDER      = uniq(( split( ',', lc $ordercsv ), qw( stars num_ratings year )));  # Adds missing
171 | 
172 | 
173 | 
174 | # ----------------------------------------------------------------------------
175 | # Primary data structures:
176 | # 
177 | my @books;
178 | 
179 | 
180 | 
181 | # ----------------------------------------------------------------------------
182 | # Load basic data:
183 | #
184 | printf( "Searching books:\n\n about..... %s\n rated by.. %d members or more\n order by.. %s\n progress.. ",
185 | 		$ISEXACT ? "$PHRASE (exact)" : $PHRASE, $NUMRATINGS, join( ', ', @ORDER ) );
186 | 
187 | gsearch( phrase      => $PHRASE,
188 |          ra_into     => \@books,
189 |          is_exact    => $ISEXACT,
190 |          ra_order_by => \@ORDER,
191 |          num_ratings => $NUMRATINGS,
192 |          on_progress => gmeter() );
193 | 
194 | 
195 | 
196 | # ----------------------------------------------------------------------------
197 | # Write results to HTML file
198 | # 
199 | printf( "\n\nWriting search result (N=%d) to \"%s\"... ", scalar @books, $OUTPATH );
200 | 
201 | my $fh  = IO::File->new( $OUTPATH, 'w' ) or die "[FATAL] Cannot write to $OUTPATH ($!)";
202 | my $now = strftime( '%a %b %e %H:%M:%S %Y', localtime );
203 | 
204 | print $fh ghtmlhead( "Query: \"$PHRASE\", $now", [ '!Cover', 'Title', 'Author', ">$ORDER[0]:", "$ORDER[1]:", "$ORDER[2]:" ]);
205 | 
206 | my $line;
207 | for my $b (@books)
208 | {
209 | 	$line++;
210 | 	print $fh qq{
211 | 			<tr>
212 | 			<td><img src="${\ghtmlsafe( $b->{img_url} )}" height="80" /></td>
213 | 			<td><a  href="${\ghtmlsafe( $b->{url    } )}" target="_blank">
214 | 			              ${\ghtmlsafe( $b->{title  } )}</a></td>
215 | 			<td>
216 | 				<a href="${\ghtmlsafe( $b->{rh_author}->{url } )}" target="_blank">
217 | 				         ${\ghtmlsafe( $b->{rh_author}->{name} )}</a>
218 | 			</td>
219 | 			<td>${\ghtmlsafe( $b->{$ORDER[0]} )}</td>
220 | 			<td>${\ghtmlsafe( $b->{$ORDER[1]} )}</td>
221 | 			<td>${\ghtmlsafe( $b->{$ORDER[2]} )}</td>
222 | 			</tr>
223 | 			};
224 | }
225 | 
226 | print $fh ghtmlfoot();
227 | undef $fh;
228 | 
229 | 
230 | 
231 | # ----------------------------------------------------------------------------
232 | # Done:
233 | #
234 | printf( "\nTotal time: %.0f minutes\n", (time()-$TSTART)/60 );
235 | 
236 | 


--------------------------------------------------------------------------------
/similarauth.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | 
  3 | #<--------------------------------- MAN PAGE --------------------------------->|
  4 | 
  5 | =pod
  6 | 
  7 | =head1 NAME
  8 | 
  9 | similarauth - Finding all similar authors
 10 | 
 11 | 
 12 | =head1 SYNOPSIS
 13 | 
 14 | B<similarauth.pl> 
 15 | [B<-u> F<number>] 
 16 | [B<-c> F<numdays>] 
 17 | [B<-o> F<filename>]
 18 | [B<-s> F<shelfname> ...] 
 19 | [B<-i>]
 20 | F<goodloginmail> [F<goodloginpass>]
 21 | 
 22 | 
 23 | =head1 OPTIONS
 24 | 
 25 | Mandatory arguments to long options are mandatory for short options too.
 26 | 
 27 | =over 4
 28 | 
 29 | =item B<-u, --userid>=F<number>
 30 | 
 31 | check another member instead of the one identified by the login-mail 
 32 | and password arguments. You find the ID by looking at the shelf URLs.
 33 | 
 34 | 
 35 | =item B<-m, --minseen>=F<number>
 36 | 
 37 | only report authors that were similar to N other authors; default is 1
 38 | 
 39 | 
 40 | =item B<-c, --cache>=F<numdays>
 41 | 
 42 | number of days to store and reuse downloaded data in F</tmp/FileCache/>,
 43 | default is 31 days. This helps with cheap recovery on a crash, power blackout 
 44 | or pause, and when experimenting with parameters. Loading data from Goodreads
 45 | is a very time consuming process.
 46 | 
 47 | 
 48 | =item B<-o, --outfile>=F<filename>
 49 | 
 50 | name of the HTML file where we write results to, 
 51 | default see section FILES
 52 | 
 53 | 
 54 | =item B<-s, --shelf>=F<shelfname>
 55 | 
 56 | name of the shelf with a selection of books to be considered, default is
 57 | "#ALL#". If the name contains special characters use an URL-encoded name.
 58 | You can use this parameter multiple times if there is more than 1 shelf to
 59 | include (boolean OR operation), see the examples section of this man page.
 60 | Use B<--shelf>=shelf1,shelf2,shelf3 to intersect shelves (Intersection
 61 | requires password).
 62 | 
 63 | 
 64 | =item B<-i, --ignore-errors>
 65 | 
 66 | Don't retry on errors, just keep going. 
 67 | Sometimes useful if a single Goodreads resource hangs over long periods 
 68 | and you're okay with some values missing in your result.
 69 | This option is not recommended when you run the program unattended.
 70 | 
 71 | 
 72 | =item B<-?, --help>
 73 | 
 74 | show full man page
 75 | 
 76 | =back
 77 | 
 78 | 
 79 | =head1 FILES
 80 | 
 81 | F<./list-out/similarauth-$USER-$SHELF.html>
 82 | 
 83 | F</tmp/FileCache/>
 84 | 
 85 | 
 86 | =head1 EXAMPLES
 87 | 
 88 | $ ./similarauth.pl login@gmail.com MyPASSword
 89 | 
 90 | $ ./similarauth.pl --shelf=science --shelf=music  login@gmail.com
 91 | 
 92 | $ ./similarauth.pl --shelf=read --outfile=./sub/myfile.html  login@gmail.com
 93 | 
 94 | $ ./similarauth.pl -c 31 -s science -s music -o myfile.html  login@gmail.com
 95 | 
 96 | 
 97 | =head1 REPORTING BUGS
 98 | 
 99 | Report bugs to <datakadabra@gmail.com> or use Github's issue tracker
100 | L<https://github.com/andre-st/goodreads-toolbox/issues>
101 | 
102 | 
103 | =head1 COPYRIGHT
104 | 
105 | This is free software. You may redistribute copies of it under the terms of
106 | the GNU General Public License L<https://www.gnu.org/licenses/gpl.html>.
107 | There is NO WARRANTY, to the extent permitted by law.
108 | 
109 | 
110 | =head1 SEE ALSO
111 | 
112 | More info in ./help/similarauth.md
113 | 
114 | 
115 | =head1 VERSION
116 | 
117 | 2022-03-08 (Since 2018-07-05)
118 | 
119 | =cut
120 | 
121 | #<--------------------------------- 79 chars --------------------------------->|
122 | 
123 | 
124 | use strict;
125 | use warnings;
126 | use 5.18.0;
127 | 
128 | # Perl core:
129 | use FindBin;
130 | use local::lib "$FindBin::Bin/lib/local/";
131 | use        lib "$FindBin::Bin/lib/";
132 | use Time::HiRes qw( time tv_interval );
133 | use POSIX       qw( strftime );
134 | use File::Spec; # Platform indep. directory separator
135 | use IO::File;
136 | use Getopt::Long;
137 | use Pod::Usage;
138 | # Third party:
139 | # Ours:
140 | use Goodscrapes;
141 | 
142 | 
143 | 
144 | # ----------------------------------------------------------------------------
145 | # Program configuration:
146 | # 
147 | STDOUT->autoflush( 1 );
148 | gsetopt( cache_days => 31 );
149 | 
150 | our $TSTART = time();
151 | our @SHELVES;
152 | our $OUTPATH;
153 | our $USERID;
154 | our $MINSEEN = 1;
155 | 
156 | GetOptions( 'shelf|s=s'       => \@SHELVES,
157 |             'minseen|m=i'     => \$MINSEEN,
158 |             'userid|u=s'      => \$USERID,
159 |             'outfile|o=s'     => \$OUTPATH,
160 |             'cache|c=i'       => sub{  gsetopt( cache_days => $_[1] );  },
161 |             'ignore-errors|i' => sub{  gsetopt( ignore_errors => 1 );   },
162 |             'help|?'          => sub{  pod2usage( -verbose => 2 );      })
163 | 	or pod2usage( 1 );
164 | 
165 | pod2usage( 1 ) if !$ARGV[0];
166 | 
167 | glogin( usermail => $ARGV[0],  # Login not really required at the moment
168 |         userpass => $ARGV[1],  # Asks pw if omitted
169 |         r_userid => \$USERID );
170 | 
171 | @SHELVES = qw( %23ALL%23 ) 
172 | 	if !@SHELVES;
173 | 
174 | $OUTPATH = File::Spec->catfile( $FindBin::Bin, 'list-out', sprintf( 'similarauth-%s-%s.html', $USERID, join( '-', @SHELVES )))
175 | 	if !$OUTPATH;
176 | 
177 | 
178 | 
179 | # ----------------------------------------------------------------------------
180 | # Primary data structures:
181 | # 
182 | our %auknown;  # {$auid => %author}
183 | our %aufound;  # {$auid => %author}
184 | 
185 | 
186 | 
187 | # ----------------------------------------------------------------------------
188 | # Load basic data:
189 | #
190 | printf( "Loading authors from \"%s\"...", join( '" and "', @SHELVES ) );
191 | 
192 | greadauthors( from_user_id    => $USERID, 
193 |               ra_from_shelves => \@SHELVES,
194 |               rh_into         => \%auknown, 
195 |               on_progress     => gmeter( 'authors' ));
196 | 
197 | 
198 | 
199 | # ----------------------------------------------------------------------------
200 | # Query similar authors:
201 | # TODO recurs_depth = n
202 | # 
203 | my $aucount = scalar keys %auknown;
204 | my $audone  = 0;
205 | 
206 | die( $GOOD_ERRMSG_NOBOOKS ) if $aucount == 0;
207 | 
208 | printf( "\nLoading similar authors for %d authors:\n", $aucount );
209 | 
210 | for my $auid (keys %auknown)
211 | {
212 | 	my $t0 = time();
213 | 	printf( "[%3d%%] %-25s #%-8s\t", ++$audone/$aucount*100, $auknown{$auid}->{name}, $auid );
214 | 	
215 | 	# Also increments each author's '_seen' counter if already in %aufound
216 | 	greadsimilaraut( author_id   => $auid,
217 | 	                 rh_into     => \%aufound,
218 | 	                 on_progress => gmeter( 'similar' ));
219 | 	
220 | 	printf( "\t%6.2fs\n", time()-$t0 );
221 | }
222 | say( "Done." );
223 | 
224 | 
225 | 
226 | # ----------------------------------------------------------------------------
227 | # Write results to HTML file
228 | # 
229 | printf( "Writing authors (N=%d) to \"%s\"... ", scalar keys %aufound, $OUTPATH );
230 | 
231 | my $fh  = IO::File->new( $OUTPATH, 'w' ) or die "[FATAL] Cannot write to $OUTPATH ($!)";
232 | my $now = strftime( '%a %b %e %H:%M:%S %Y', localtime );
233 | 
234 | print $fh ghtmlhead( "Similar Authors, $now", [ 'Author', '>Seen:' ]);
235 | 
236 | my $line;
237 | for my $auid (keys %aufound)
238 | {
239 | 	next if exists $auknown{$auid};
240 | 	next if $aufound{$auid}->{_seen} < $MINSEEN;
241 | 	
242 | 	$line++;
243 | 	print $fh qq{
244 | 			<tr>
245 | 			<td>
246 | 			<a  href="${\ghtmlsafe( $aufound{$auid}->{works_url} )}" target="_blank">
247 | 			<img src="${\ghtmlsafe( $aufound{$auid}->{img_url}   )}" height="80" />
248 | 			          ${\ghtmlsafe( $aufound{$auid}->{name}      )}
249 | 			</a></td>
250 | 			<td>$aufound{$auid}->{_seen}</td>
251 | 			</tr> 
252 | 			};
253 | }
254 | 
255 | print $fh ghtmlfoot();
256 | undef $fh;
257 | 
258 | 
259 | # ----------------------------------------------------------------------------
260 | # Done:
261 | #
262 | printf( "\nTotal time: %.0f minutes\n", (time()-$TSTART)/60 );
263 | 
264 | 


--------------------------------------------------------------------------------
/t/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andre-st/goodreads-toolbox/010bc724003890d67efb2c40cd3e98a3b81c4007/t/README.md


--------------------------------------------------------------------------------
/t/config.pl-example:
--------------------------------------------------------------------------------
 1 | use 5.18.0;
 2 | use warnings;
 3 | 
 4 | sub get_gooduser_mail { return 'yourgoodreadslogin@gmail.com'; }
 5 | sub get_gooduser_pass { return 'YOUR-GOODREADS-PASSWORD'; }
 6 | sub get_gooduser_id   { return '123456789'; }  # Use number in your shelf-URLs
 7 | 
 8 | 1;
 9 | 
10 | 


--------------------------------------------------------------------------------
/t/ghtmlxxx.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | 
 3 | # Test cases realized:
 4 | #   [x] HTML safe strings
 5 | #   [ ] 
 6 | #   [ ] 
 7 | #   [ ] 
 8 | 
 9 | use diagnostics;  # More debugging info
10 | use warnings;
11 | use strict;
12 | use FindBin;
13 | use local::lib "$FindBin::Bin/../lib/local";
14 | use        lib "$FindBin::Bin/../lib/";
15 | use Test::More qw( no_plan );
16 | 
17 | 
18 | use_ok( 'Goodscrapes' );
19 | 
20 | 
21 | my %user;
22 | my %book;
23 | 
24 | $user{name}      = '<script>alert("User Name XSS");</script>';
25 | $user{num_books} = 100;
26 | $user{url}       = '"><script>alert("User URL XSS");</script>';
27 | $book{title}     = '<script>alert("Book Title XSS");</script>';
28 | $book{stars}     = 4;
29 | $book{url}       = '"><script>alert("Book URL XSS");</script>';
30 | $book{rh_author} = \%user;
31 | 
32 | 
33 | # Also example of functions inside string interpolations:
34 | my $test = qq{
35 | 	<html>
36 | 	<body>
37 | 		${\ghtmlsafe( $book{title}                  )}
38 | 		${\ghtmlsafe( $book{stars}                  )}
39 | 		${\ghtmlsafe( $book{url}                    )}
40 | 		${\ghtmlsafe( $book{rh_author}->{name}      )}
41 | 		${\ghtmlsafe( $book{rh_author}->{num_books} )}
42 | 		${\ghtmlsafe( $book{rh_author}->{url}       )}
43 | 	</body>
44 | 	</html>
45 | };
46 | 
47 | my $expected = qq{
48 | 	<html>
49 | 	<body>
50 | 		&lt;script&gt;alert(&quot;Book Title XSS&quot;);&lt;/script&gt;
51 | 		4
52 | 		&quot;&gt;&lt;script&gt;alert(&quot;Book URL XSS&quot;);&lt;/script&gt;
53 | 		&lt;script&gt;alert(&quot;User Name XSS&quot;);&lt;/script&gt;
54 | 		100
55 | 		&quot;&gt;&lt;script&gt;alert(&quot;User URL XSS&quot;);&lt;/script&gt;
56 | 	</body>
57 | 	</html>
58 | };
59 | 
60 | 
61 | is( $test, $expected );
62 | 
63 | 


--------------------------------------------------------------------------------
/t/gisxxx.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | 
 3 | # Test cases realized:
 4 | #   [x] Good profiles, bad profiles
 5 | #   [x] unexpected values (undef etc)
 6 | #   [ ] 
 7 | #   [ ] 
 8 | 
 9 | 
10 | use diagnostics;  # More debugging info
11 | use warnings;
12 | use strict; 
13 | use FindBin;
14 | use local::lib "$FindBin::Bin/../lib/local/";
15 | use        lib "$FindBin::Bin/../lib/";
16 | use Test::More qw( no_plan );
17 | 
18 | 
19 | use_ok( 'Goodscrapes' );
20 | 
21 | 
22 | ok( !gisbaduser( '1'       ), 'Otis Chandler (GR founder)' );
23 | ok( !gisbaduser( '2'       ), 'Goodreads employee'         );
24 | ok(  gisbaduser( '1000834' ), '"NOT A BOOK" author'        );
25 | ok(  gisbaduser( '5158478' ), '"Anonymous" author'         );
26 | ok(  gisbaduser( undef     ), 'Invalid value is bad'       );
27 | 
28 | 
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/t/glogin.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | 
 3 | # Test cases realized:
 4 | #   [x] login and get correct user-id
 5 | #   [ ] 
 6 | #   [ ] 
 7 | #   [ ] 
 8 | 
 9 | 
10 | use diagnostics;  # More debugging info
11 | use warnings;
12 | use strict;
13 | use FindBin;
14 | use local::lib "$FindBin::Bin/../lib/local/";
15 | use lib        "$FindBin::Bin/../lib/";
16 | use lib        "$FindBin::Bin/../t/";
17 | use Test::More      qw( no_plan      );
18 | use List::MoreUtils qw( any firstval );
19 | 
20 | 
21 | use_ok( 'Goodscrapes' );
22 | require( 'config.pl' );
23 | 
24 | 
25 | my $userid_extracted;
26 | my $userid_expected = get_gooduser_id();
27 | 
28 | glogin( usermail => get_gooduser_mail(),
29 |         userpass => get_gooduser_pass(),
30 |         r_userid => \$userid_extracted );
31 | 
32 | is( $userid_extracted, $userid_expected, 'Got correct user ID after login' );
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/t/gmeter.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | 
 3 | # Test cases realized:
 4 | #   [x] additive absolute progress with custom unit
 5 | #   [x] additive percent progress
 6 | #   [ ] invalid arguments
 7 | #   [ ] 
 8 | 
 9 | 
10 | use diagnostics;  # More debugging info
11 | use warnings;
12 | use strict;
13 | use FindBin;
14 | use local::lib "$FindBin::Bin/../lib/local/";
15 | use        lib "$FindBin::Bin/../lib/";
16 | use Test::More      qw( no_plan      );
17 | use List::MoreUtils qw( any firstval );
18 | 
19 | 
20 | use_ok( 'Goodscrapes' );
21 | 
22 | 
23 | my $stdout;
24 | {
25 | 	local *STDOUT;
26 | 	open( STDOUT, ">", \$stdout );
27 | 	my $meter;
28 | 	
29 | 	
30 | 	# Absolute progress with custom unit:
31 | 	$meter = gmeter( 'test unit' );
32 | 	
33 | 	$stdout = '';
34 | 	$meter->( 1 );
35 | 	like( $stdout, qr/1 test unit/, 'Prints number with custom unit' );
36 | 	
37 | 	$stdout = '';
38 | 	$meter->( 20 );  # Adds 20 to previous value 1
39 | 	like( $stdout, qr/21 test unit/, 'Prints sum with custom unit' );
40 | 	
41 | 	
42 | 	# Percent progress is enabled by using a second number with a known maximum
43 | 	# Any custom unit is ignored.
44 | 	$meter = gmeter();
45 | 	
46 | 	$stdout = '';
47 | 	$meter->( 1, 10 );
48 | 	like( $stdout, qr/10%/, 'Prints percent number' );
49 | 	
50 | 	$stdout = '';
51 | 	$meter->( 5, 10 );  # Adds another 5 to prev value 1; You must not read this as "5 of 10" or 50%
52 | 	like( $stdout, qr/60%/, 'Prints percent number for sum' );
53 | }
54 | 
55 | 
56 | 
57 | 
58 | 


--------------------------------------------------------------------------------
/t/greadauthorbk.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | 
 3 | # Test cases realized:
 4 | #   [x] read books and check attributes (detects changed markup)
 5 | #   [ ] invalid arguments
 6 | #   [ ] 
 7 | #   [ ] 
 8 | 
 9 | 
10 | 
11 | use diagnostics;  # More debugging info
12 | use warnings;
13 | use strict;
14 | use FindBin;
15 | use local::lib "$FindBin::Bin/../lib/local/";
16 | use        lib "$FindBin::Bin/../lib/";
17 | use Test::More      qw( no_plan      );
18 | use List::MoreUtils qw( any firstval );
19 | 
20 | 
21 | use_ok( 'Goodscrapes' );
22 | 
23 | 
24 | # We should never use caching during real tests:
25 | # We need to test against the most up-to-date markup from Goodreads.com
26 | # Having no cache during development is annoying, tho. 
27 | # So we leave a small window:
28 | gsetopt( cache_days => 1 );
29 | 
30 | 
31 | print( 'Reading books of author...' );
32 | 
33 | my %books;
34 | my $LIMIT = 10;
35 | my $AUTID = '2546';  # Palahniuk, Chuck (Fight Club)
36 | 
37 | greadauthorbk( author_id   => $AUTID,  
38 |                limit       => $LIMIT,
39 |                rh_into     => \%books, 
40 |                #on_book    => sub{}
41 |                on_progress => gmeter( 'books' ));
42 | 
43 | print( "\n" );
44 | 
45 | 
46 | ok( scalar( keys( %books )) == $LIMIT, "$LIMIT books read from author" );
47 | 
48 | 
49 | map {
50 | 	ok  ( $_->{title},                                                             'Book has title'               );
51 | 	like( $_->{url},        qr/^https:\/\/www\.goodreads\.com\/book\/show\//,      'Book has URL'                 );
52 | 	like( $_->{img_url},    qr/^https:\/\/[a-z0-9]+\.gr-assets\.com/,              'Book has image URL'           );
53 | 	like( $_->{id},         qr/^\d+$/,                                             'Book has Goodreads ID'        );
54 | 	ok  ( $_->{num_ratings} > 0,                                                   'Book has number of ratings'   );
55 | 	ok  ( $_->{avg_rating}  > 0,                                                   'Book has average rating'      );
56 | 	ok  ( $_->{rh_author}->{name},                                                 'Book author has name'         );
57 | 	ok  ( $_->{rh_author}->{name_lf},                                              'Book author has name'         );
58 | 	is  ( $_->{rh_author}->{id},            $AUTID,                                'Book author has Goodreads ID' );
59 | 	like( $_->{rh_author}->{img_url},       qr/^https:\/\/images\.gr-assets\.com/, 'Book author has image URL'    );
60 | 	like( $_->{rh_author}->{url},           qr/^https:\/\/www\.goodreads\.com/,    'Book author has URL'          );
61 | 	like( $_->{rh_author}->{works_url},     qr/^https:\/\/www\.goodreads\.com/,    'Book author has works URL'    );
62 | 	is  ( $_->{rh_author}->{is_author},     1,                                     'Book author is author'        );
63 | 	is  ( $_->{rh_author}->{is_private},    0,                                     'Book author not private'      );
64 | 	is  ( $_->{rh_author}->{is_mainstream}, 1,                                     'Is a mainstream author'       );
65 | 	
66 | 	
67 | 	# Not available or scraped yet, otherwise one of the following
68 | 	# tests will fail and remind me of implementing a correct test:
69 | 	is  ( $_->{isbn},                        undef, 'N/A: Book ISBN'                );
70 | 	is  ( $_->{isbn13},                      undef, 'N/A: Book ISBN13'              );
71 | 	is  ( $_->{format},                      undef, 'N/A: Book format'              );
72 | 	is  ( $_->{user_rating},                 undef, 'N/A: User rating'              );
73 | 	is  ( $_->{user_read_count},             undef, 'N/A: User read count'          );
74 | 	is  ( $_->{user_num_owned},              undef, 'N/A: Number user-owned books'  );
75 | 	is  ( $_->{user_date_read},              undef, 'N/A: User reading-date'        );
76 | 	is  ( $_->{user_date_added},             undef, 'N/A: User addition-date'       );
77 | 	is  ( $_->{ra_user_shelves},             undef, 'N/A: User shelves'             );
78 | 	is  ( $_->{stars},                       undef, 'N/A: Book average rating'      );
79 | 	is  ( $_->{num_pages},                   undef, 'N/A: Book number of pages'     );
80 | 	is  ( $_->{num_reviews},                 undef, 'N/A: Book number of reviews'   );
81 | 	is  ( $_->{review_id},                   undef, 'N/A: User review id'           );
82 | 	is  ( $_->{year},                        undef, 'N/A: Book pub-year'            );
83 | 	is  ( $_->{year_edit},                   undef, 'N/A: Book edition pub-year'    );
84 | 	is  ( $_->{rh_author}->{residence},      undef, 'N/A: Author residence'         );
85 | 	is  ( $_->{rh_author}->{age},            undef, 'N/A: Author age'               );
86 | 	is  ( $_->{rh_author}->{is_staff},       undef, 'N/A: Is Goodreads author'      );
87 | 	is  ( $_->{rh_author}->{is_female},      undef, 'N/A: Author gender'            );
88 | 	is  ( $_->{rh_author}->{is_friend},      undef, 'N/A: Author friend status'     );
89 | 	is  ( $_->{rh_author}->{num_books},      undef, 'N/A: Number of author books'   );
90 | 	
91 | } values( %books )
92 | 
93 | 


--------------------------------------------------------------------------------
/t/greadauthors.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | 
 3 | # Test cases realized:
 4 | #   [x] Read authors and check attributes (detects changed markup) 
 5 | #   [ ] Invalid arguments
 6 | #   [ ] 
 7 | #   [ ] 
 8 | 
 9 | 
10 | 
11 | use diagnostics;  # More debugging info
12 | use warnings;
13 | use strict;
14 | use FindBin;
15 | use local::lib "$FindBin::Bin/../lib/local/";
16 | use        lib "$FindBin::Bin/../lib/";
17 | use Test::More      qw( no_plan      );
18 | use List::MoreUtils qw( any firstval );
19 | 
20 | 
21 | use_ok( 'Goodscrapes' );
22 | 
23 | 
24 | # We should never use caching during real tests:
25 | # We need to test against the most up-to-date markup from Goodreads.com
26 | # Having no cache during development is annoying, tho. 
27 | # So we leave a small window:
28 | gsetopt( cache_days => 1 );
29 | 
30 | 
31 | print( 'Reading authors from book shelf... ');
32 | 
33 | my %authors;
34 | 
35 | greadauthors( from_user_id    => 2, 
36 |               ra_from_shelves => [ 'read' ],
37 |               rh_into         => \%authors, 
38 |               on_progress     => gmeter( 'authors' ));
39 | 
40 | print( "\n" );
41 | 
42 | 
43 | ok( scalar( keys( %authors )) > 30, 'At least 30 authors read from shelf' );
44 | 
45 | ok( exists( $authors{2546} ), 'Expected author found via hash-key = Goodreads author ID' )
46 | 	or BAIL_OUT( "Cannot test author attributes when expected author is missing." );
47 | 
48 | 
49 | my $a = $authors{2546};
50 | 
51 | isa_ok( $a, 'HASH', 'Author datatype' );
52 | is  ( $a->{id},            '2546',                                                 'Author has ID'          );
53 | is  ( $a->{name_lf},       'Palahniuk, Chuck',                                     'Author has name'        );
54 | is  ( $a->{url},           'https://www.goodreads.com/author/show/2546',           'Author has URL'         );
55 | like( $a->{works_url},     qr/^https:\/\/www\.goodreads\.com\/author\/list\/2546/, 'Author has works URL'   );
56 | is  ( $a->{is_author},     1,                                                      'Author has author flag' );
57 | is  ( $a->{is_private},    0,                                                      'Author not private'     );
58 | is  ( $a->{is_mainstream}, 1,                                                      'Author is mainstream'   );
59 | 
60 | 
61 | # Not available or scraped yet, otherwise one of the following
62 | # tests will fail and remind me of implementing a correct test:
63 | is  ( $a->{name},       $a->{name_lf}, 'N/A: author name != name_lf' );  # "Chuck Palahniuk"
64 | is  ( $a->{residence},        undef,         'N/A: author residence'       );
65 | is  ( $a->{img_url},          undef,         'N/A: author image URL'       );
66 | is  ( $a->{age},              undef,         'N/A: author age'             );
67 | is  ( $a->{num_books},        undef,         'N/A: number of author books' );
68 | is  ( $a->{is_friend},        undef,         'N/A: author friend status'   );
69 | is  ( $a->{is_female},        undef,         'N/A: author gender status'   );
70 | is  ( $a->{user_avg_rating},  undef,         'N/A: user avg rating'        );
71 | is  ( $a->{user_min_rating},  undef,         'N/A: user min rating'        );
72 | is  ( $a->{user_max_rating},  undef,         'N/A: user max rating'        );
73 | 
74 | 
75 | 


--------------------------------------------------------------------------------
/t/greadbook.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | 
 3 | # Test cases realized:
 4 | #   [x] read book and check attributes (detects changed markup)
 5 | #   [x] wrong book ID
 6 | #   [ ] 
 7 | #   [ ] 
 8 | 
 9 | 
10 | use diagnostics;  # More debugging info
11 | use warnings;
12 | use strict;
13 | use FindBin;
14 | use local::lib "$FindBin::Bin/../lib/local/";
15 | use        lib "$FindBin::Bin/../lib/";
16 | use Test::More      qw( no_plan      );
17 | use List::MoreUtils qw( any firstval );
18 | 
19 | 
20 | use_ok( 'Goodscrapes' );
21 | 
22 | 
23 | # We should never use caching during real tests:
24 | # We need to test against the most up-to-date markup from Goodreads.com
25 | # Having no cache during development is annoying, tho. 
26 | # So we leave a small window:
27 | gsetopt( cache_days => 1 );
28 | 
29 | 
30 | my %nob = greadbook( 'TEST_INVALID_BOOK_ID' );
31 | 
32 | ok( !%nob, 'Book not found' );
33 | 
34 | 
35 | my $test_book_id = 5759;  # 5759 legacy id, 36236124 new id
36 | my %b            = greadbook( $test_book_id );
37 | 
38 | ok( %b, 'Book read' )
39 | 	or BAIL_OUT( "Cannot test book attributes when expected book is missing." );
40 | 
41 | 
42 | is  ( $b{id},          $test_book_id,                              'Book has Goodreads ID'      );
43 | is  ( $b{isbn},        '0393327345',                               'Book has ISBN'              );
44 | is  ( $b{isbn13},      '9780393327342',                            'Book has ISBN13'            );
45 | is  ( $b{num_pages},   218,                                        'Book has number of pages'   );
46 | ok  ( $b{num_ratings}  > 190000  &&  $b{num_ratings} < 1000000,    'Book has number of ratings' );
47 | ok  ( $b{num_reviews}  > 18000   &&  $b{num_reviews} < 50000,      'Book has number of reviews' );
48 | is  ( $b{title},       'Fight Club',                               'Book has title'             );
49 | ok  ( $b{avg_rating}   >= 4      &&  $b{avg_rating} < 5,           'Book has average rating'    );
50 | ok  ( $b{stars}        >= 4      &&  $b{stars}      < 5,           'Book has stars rating'      );
51 | like( $b{img_url},     qr/\.jpg$/,                                 'Book has image URL'         );
52 | is  ( $b{url},         'https://www.goodreads.com/book/show/5759', 'Book has URL'               );
53 | is  ( $b{format},      'Paperback',                                'Book has format'            );
54 | 
55 | 
56 | # Not available or scraped yet, otherwise one of the following
57 | # tests will fail and remind me of implementing a correct test:
58 | #   is( $b{year},        1996,             'Book has pub-year'          );
59 | #   is( $b{year_edit},   2005,             'Book edition has pub-year'  );
60 | #   user_xxx
61 | #   ra_user_shelves
62 | #   review_id
63 | #   is  ( $b{rh_author}->{id},         '2546',                                                'Book has author ID'    );
64 | #   is  ( $b{rh_author}->{name_lf},    'Palahniuk, Chuck',                                    'Book has author name'  );
65 | #   is  ( $b{rh_author}->{url},        'https://www.goodreads.com/author/show/2546',          'Book has author URL'         );
66 | #   like( $b{rh_author}->{works_url},  qr/https:\/\/www.goodreads.com\/author\/list\/2546.*/, 'Book has author works URL'   );
67 | #   is  ( $b{rh_author}->{residence},
68 | #   is  ( $b{rh_author}->{num_books},
69 | #   is  ( $b{rh_author}->{age},
70 | #   is  ( $b{rh_author}->{is_author},  1,                                                     'Book author has author flag' );
71 | #   is  ( $b{rh_author}->{is_private}, 0,                                                     'Book author not private'     );
72 | #   is  ( $b{rh_author}->{is_staff},
73 | #   is  ( $b{rh_author}->{is_female},
74 | #   is  ( $b{rh_author}->{is_friend},
75 | #   is  ( $b{rh_author}->{is_mainstream},
76 | 
77 | 
78 | 
79 | 


--------------------------------------------------------------------------------
/t/greadcomments.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | 
 3 | # Test cases realized:
 4 | # [ ] ...
 5 | 
 6 | 
 7 | use diagnostics;  # More debugging info
 8 | use warnings;
 9 | use strict;
10 | use FindBin;
11 | use local::lib "$FindBin::Bin/../lib/local/";
12 | use        lib "$FindBin::Bin/../lib/";
13 | use        lib "$FindBin::Bin/../t/";
14 | use Test::More qw( no_plan );
15 | 
16 | 
17 | use_ok( 'Goodscrapes' );
18 | require( 'config.pl' );
19 | 
20 | 
21 | # We should never use caching during real tests:
22 | # We need to test against the most up-to-date markup from Goodreads.com
23 | # Having no cache during development is annoying, tho. 
24 | # So we leave a small window:
25 | gsetopt( cache_days => 1 );
26 | 
27 | 
28 | # Info is only available to authenticated users:
29 | glogin( usermail => get_gooduser_mail(),
30 |         userpass => get_gooduser_pass() );
31 | 
32 | 
33 | print( 'Reading comments... ');
34 | 
35 | 
36 | my @comments;
37 | 
38 | greadcomments( from_user_id  => 18418712, # 1036726,
39 |                ra_into       => \@comments,
40 |                on_progress   => gmeter( 'comments' ));
41 | 
42 | 
43 | print( "\n" );
44 | 
45 | 
46 | 
47 | ok( scalar( @comments ) >= 10, 'At least 10 books read from shelf' );
48 | 
49 | for my $c ( @comments )
50 | {
51 | 	ok( $c->{text}, 'Comment has text' );
52 | 	
53 | 	if( $c->{rh_to_user} )  # No user info if comment on a group
54 | 	{
55 | 		ok  ( $c->{rh_to_user},         'Comment has an addressee'      );
56 | 		ok  ( $c->{rh_to_user}->{name}, 'Addressee of comment has name' );
57 | 	}
58 | 	
59 | 	if( $c->{rh_book} )  # No book info if comment on a group or a quote or a user status
60 | 	{
61 | 		ok  ( $c->{rh_book}->{title},                                        'Commented book has title'       );
62 | 		like( $c->{rh_book}->{img_url}, qr/^https:.*\.(jpg|png)$/,           'Commented book has image URL'   );
63 | 		like( $c->{rh_book}->{url},     qr/^https:\/\/www.goodreads.com\//,  'Commented book has an URL'      );  # Not real URL but search-URL due to missing book ID
64 | 		
65 | 		ok  ( $c->{rh_review}.                                               'Comment addressed a review'     );
66 | 		ok  ( $c->{rh_review}->{id},                                         'Commented review has an ID'     );
67 | 		like( $c->{rh_review}->{url},   qr/^https:\/\/www.goodreads.com\//,  'Commented review has an URL'    ); 
68 | 		ok  ( $c->{rh_review}->{rh_user},                                    'Commented review has an author' );
69 | 		ok  ( $c->{rh_review}->{rh_user}->{name},                            'Author of commented review has a name' );
70 | 		
71 | 		
72 | 		# Not available or scraped yet, otherwise one of the following
73 | 		# tests will fail and remind me of implementing a correct test:
74 | 		
75 | 		is  ( $c->{rh_book}->{id},  undef,  'N/A: Book ID' );
76 | 	}
77 | 
78 | }
79 | 
80 | 
81 | 
82 | 
83 | 


--------------------------------------------------------------------------------
/t/greadfolls.t:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/perl -w
  2 | 
  3 | # Test cases realized:
  4 | #   [x] get friends and followees
  5 | #   [x] get friends only
  6 | #   [x] get followees only
  7 | #   [ ] get only friends who are authors
  8 | #   [ ] get only followees who are authors
  9 | #   [x] discard threshold
 10 | #   [ ] check member attributees
 11 | 
 12 | 
 13 | 
 14 | use diagnostics;  # More debugging info
 15 | use warnings;
 16 | use strict;
 17 | use FindBin;
 18 | use local::lib "$FindBin::Bin/../lib/local/";
 19 | use        lib "$FindBin::Bin/../lib/";
 20 | use        lib "$FindBin::Bin/../t/";
 21 | use Test::More      qw( no_plan    );
 22 | use List::MoreUtils qw( duplicates );
 23 | 
 24 | 
 25 | use_ok( 'Goodscrapes' );
 26 | require( 'config.pl' );
 27 | 
 28 | 
 29 | # We should never use caching during real tests:
 30 | # We need to test against the most up-to-date markup from Goodreads.com
 31 | # Having no cache during development is annoying, tho. 
 32 | # So we leave a small window:
 33 | gsetopt( cache_days => 1 );
 34 | 
 35 | 
 36 | # Access to member lists needs some privileges:
 37 | glogin( usermail => get_gooduser_mail(),
 38 |         userpass => get_gooduser_pass() );
 39 | 
 40 | 
 41 | my $userid            = '2'; 
 42 | my $discard_threshold = 3;
 43 | my %friends;
 44 | my %followees;
 45 | my %all;
 46 | my %discarded_friends;
 47 | my %discarded_followees;
 48 | 
 49 | 
 50 | greadfolls( from_user_id      => $userid,
 51 |             rh_into           => \%friends, 
 52 |             incl_followees    => 0,
 53 |             incl_friends      => 1,
 54 |             incl_authors      => 1 );
 55 | 
 56 | greadfolls( from_user_id      => $userid,
 57 |             rh_into           => \%followees,
 58 |             incl_followees    => 1,
 59 |             incl_friends      => 0,
 60 |             incl_authors      => 1 );
 61 | 
 62 | greadfolls( from_user_id      => $userid,
 63 |             rh_into           => \%all,
 64 |             incl_followees    => 1,
 65 |             incl_friends      => 1,
 66 |             incl_authors      => 1 );
 67 | 
 68 | greadfolls( from_user_id      => $userid,
 69 |             rh_into           => \%discarded_friends,
 70 |             discard_threshold => $discard_threshold,
 71 |             incl_followees    => 0,
 72 |             incl_friends      => 1,
 73 |             incl_authors      => 1 );
 74 | 
 75 | greadfolls( from_user_id      => $userid,
 76 |             rh_into           => \%discarded_followees,
 77 |             discard_threshold => $discard_threshold,
 78 |             incl_followees    => 1,
 79 |             incl_friends      => 0,
 80 |             incl_authors      => 1 );
 81 | 
 82 | 
 83 | ok( exists $friends{1},                             "Member $userid and Otis Chandler are friends" );
 84 | ok( exists $followees{21269},                       "Member $userid is following Guy Kawasaki (author)" );
 85 | ok( exists $friends{1} && exists $followees{21269}, "Member $userid is friends with Otis Chandler and is following Guy Kawasaki (author)" );
 86 | ok( !%discarded_friends,                            "No friends returned if there are more than $discard_threshold" );
 87 | ok( !%discarded_followees,                          "No followees returned if there are more than $discard_threshold" );
 88 | 
 89 | 
 90 | my @kfriends   = keys %friends;
 91 | my @kfollowees = keys %followees;
 92 | my @kall       = keys %all;
 93 | 
 94 | ok( !duplicates(( @kfriends, @kfollowees )), 'Friends and followees lists expected to be exclusive' );
 95 | 
 96 | is( scalar(@kall), scalar(duplicates(( @kfriends, @kfollowees, @kall ))), 'Friends and followees in all-list expected' );
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 | 
103 | 


--------------------------------------------------------------------------------
/t/greadreviews.t:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/perl -w
  2 | 
  3 | # Test cases realized:
  4 | #   [x] latest and check attributes (detects changed markup)
  5 | #   [x] text only
  6 | #   [x] date range
  7 | #   [x] dict
  8 | #   [ ] 
  9 | #   [ ] invalid arguments
 10 | 
 11 | use diagnostics;  # More debugging info
 12 | use warnings;
 13 | use strict;
 14 | use FindBin;
 15 | use local::lib "$FindBin::Bin/../lib/local/";
 16 | use        lib "$FindBin::Bin/../lib/";
 17 | use Time::Piece;
 18 | use Test::More      qw( no_plan          );
 19 | use List::MoreUtils qw( any all firstval );
 20 | 
 21 | 
 22 | use_ok( 'Goodscrapes' );
 23 | 
 24 | 
 25 | # We should never use caching during real tests:
 26 | # We need to test against the most up-to-date markup from Goodreads.com
 27 | # Having no cache during development is annoying, tho. 
 28 | # So we leave a small window:
 29 | gsetopt( cache_days    => 1 );
 30 | gsetopt( ignore_errors => 1 );
 31 | gsetopt( maxretries    => 0 );
 32 | 
 33 | 
 34 | diag( 'takes ~1 minute' );
 35 | 
 36 | 
 37 | print( 'Loading reviews...' );
 38 | 
 39 | my %reviews;
 40 | my %reviews_textonly;
 41 | my %reviews_by_dict;
 42 | 
 43 | my %book;
 44 | $book{id}          = '984394';  # "Hacking the Xbox"
 45 | $book{num_ratings} = 253;       # This value can be obtained using greadbook() or ignored, it helps optimizing; TODO: constant might break test
 46 | $book{num_reviews} =  28;       #     "      "                                                                  TODO: constant might break test
 47 | my $since          = Time::Piece->strptime( '2016-01-01', '%Y-%m-%d' );
 48 | 
 49 | 
 50 | greadreviews( rh_for_book => \%book,
 51 |               rigor       => 0,  # 0 = 300 reviews only (latest)
 52 |               rh_into     => \%reviews,
 53 |               text_minlen => 0,
 54 |               since       => $since,
 55 |               on_progress => gmeter());
 56 | 
 57 | greadreviews( rh_for_book => \%book,  # Uses some cached values from query above, which is fine for this test
 58 |               rigor       => 0,       # 0 = 300 reviews only (latest)
 59 |               rh_into     => \%reviews_textonly,
 60 |               text_minlen => 1,
 61 |               on_progress => gmeter());
 62 | 
 63 | greadreviews( rh_for_book => \%book,  # Uses some cached values from query above, which is fine for this test
 64 |               rigor       => 3,       # Include dict in every case
 65 |               rh_into     => \%reviews_by_dict,
 66 |               dict_path   => "$FindBin::Bin/../list-in/test.lst",
 67 |               text_minlen => 1,
 68 |               on_progress => gmeter());
 69 | 
 70 | print( "\n" );
 71 | 
 72 | 
 73 | # Check numbers:
 74 | my $num_reviews          = scalar( keys( %reviews          ));
 75 | my $num_reviews_textonly = scalar( keys( %reviews_textonly ));
 76 | my $num_reviews_by_dict  = scalar( keys( %reviews_by_dict  ));
 77 | 
 78 | ok( $num_reviews > 0, 'Load some reviews' )
 79 | 	or BAIL_OUT( "Cannot test review attributes when there are no reviews." );
 80 | 
 81 | ok( $num_reviews_textonly > 0, 'Load some text reviews' )
 82 | 	or BAIL_OUT( "Cannot test text reviews when there are no text reviews." );
 83 | 
 84 | ok( $num_reviews_by_dict >= $num_reviews_textonly, 'Load more or equal number of reviews compared to rigor-level 0' )
 85 | 	or BAIL_OUT( "Book specimen might not sufficient for this test anymore or adjust book's num_reviews constant in this testfile. Expected #reviews from dict ($num_reviews_by_dict) >= #reviews from latest ($num_reviews_textonly)" );
 86 | 
 87 | 
 88 | # Check contents:
 89 | ok(( !all { $_->{text} } values( %reviews          )), 'Reviews include text and non-text ratings');
 90 | ok((  all { $_->{text} } values( %reviews_textonly )), 'All reviews include text');
 91 | ok((  all { $_->{text} } values( %reviews_by_dict  )), 'All dict-searched reviews include text');
 92 | 
 93 | 
 94 | # Check contents in detail:
 95 | map {
 96 | 	ok  ( $_->{rating} >= 0,           "Review $_->{id} has rating"            );
 97 | 	ok  ( $_->{rating_str},            "Review $_->{id} has rating code"       );
 98 | 	#ok ( $_->{text},                  "Review $_->{id} has text"              );  # Often no text but just stars
 99 | 	#ok ( $_->{date}->year > 2005,     "Review $_->{id} has date > 2006 (got date: '$_->{date}')" );  # GR was founded 2007, but there are reviews from 2006, e.g., #454926175
100 | 	ok  ( $_->{date} >= $since,        "Review $_->{id} isn't older than ".$since->strftime( "%Y-%m-%d" ));
101 | 	is  ( $_->{book_id},               $book{id},                                          "Review $_->{id} has Goodreads book ID" );
102 | 	like( $_->{id},                    qr/^\d+$/,                                          "Review $_->{id} has ID"                );
103 | 	like( $_->{url},                   qr/^https:\/\/www\.goodreads\.com\/review\/show\//, "Review $_->{id} has URL"               );
104 | 	like( $_->{rh_user}->{url},        qr/^https:\/\/www\.goodreads\.com\/user\/show\//,   "Review $_->{id} has author URL"        );
105 | 	like( $_->{rh_user}->{id},         qr/^\d+$/,                                          "Review $_->{id} has author ID"         );
106 | 	like( $_->{rh_user}->{img_url},    qr/^https:\/\/[a-z0-9]+\.gr-assets\.com\//,         "Review $_->{id} has author image URL"  );
107 | 	ok  ( $_->{rh_user}->{name},       "Review $_->{id} has author name: $_->{rh_user}->{name}" );
108 | 	ok  ( $_->{rh_user}->{name_lf},    "Review $_->{id} has author lastname, firstname"  );
109 | 	
110 | 	
111 | 	# Not available or scraped yet, otherwise one of the following
112 | 	# tests will fail and remind me of implementing a correct test:
113 | 	is  ( $_->{rh_user}->{is_private},     undef, 'N/A: User is private'            );
114 | 	is  ( $_->{rh_user}->{is_female},      undef, 'N/A: User gender'                );
115 | 	is  ( $_->{rh_user}->{is_author},      undef, 'N/A: User is author'             );
116 | 	is  ( $_->{rh_user}->{is_staff},       undef, 'N/A: User is Goodreads employee' );
117 | 	is  ( $_->{rh_user}->{is_friend},      undef, 'N/A: User friend status'         );
118 | 	is  ( $_->{rh_user}->{is_mainstream},  undef, 'N/A: User mainstream status'     );
119 | 	is  ( $_->{rh_user}->{residence},      undef, 'N/A: User residence'             );
120 | 	is  ( $_->{rh_user}->{age},            undef, 'N/A: User age'                   );
121 | 	is  ( $_->{rh_user}->{num_books},      undef, 'N/A: Number of books'            );  # Works or books read?
122 | 	is  ( $_->{rh_user}->{works_url},      undef, 'N/A: Works URL if author'        );
123 | } values( %reviews );
124 | 
125 | 
126 | 


--------------------------------------------------------------------------------
/t/greadshelf.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | 
 3 | # Test cases realized:
 4 | #   [x] Read shelf, find specific book, check all attributes (detects changed markup)
 5 | #   [ ] Reading from multiple shelves
 6 | #   [ ] invalid arguments
 7 | #   [ ] 
 8 | 
 9 | 
10 | 
11 | use diagnostics;  # More debugging info
12 | use warnings;
13 | use strict;
14 | use FindBin;
15 | use local::lib "$FindBin::Bin/../lib/local/";
16 | use        lib "$FindBin::Bin/../lib/";
17 | use Test::More qw( no_plan );
18 | 
19 | 
20 | use_ok( 'Goodscrapes' );
21 | 
22 | 
23 | # We should never use caching during real tests:
24 | # We need to test against the most up-to-date markup from Goodreads.com
25 | # Having no cache during development is annoying, tho. 
26 | # So we leave a small window:
27 | gsetopt( cache_days => 1 );
28 | 
29 | 
30 | print( 'Reading book shelf... ');
31 | 
32 | my %books;
33 | 
34 | greadshelf( from_user_id    => 2,  # "Odawg" (GR employee; 1 is GR founder Otis Chandler, but too many books = test too long)
35 |             ra_from_shelves => [ 'read' ],
36 |             rh_into         => \%books,
37 |             # on_book       => sub{},
38 |             on_progress     => gmeter( 'books' ) );
39 | 
40 | print( "\n" );
41 | 
42 | 
43 | ok( scalar( keys( %books )) > 50, 'At least 50 books read from shelf' );
44 | 
45 | ok( exists( $books{5759} ), 'Expected book found via hash-key = Goodreads book ID' )
46 | 	or BAIL_OUT( "Cannot test book attributes when expected book is missing." );
47 | 
48 | 
49 | my $b = $books{5759};
50 | 
51 | isa_ok( $b, 'HASH', 'Book datatype' );
52 | 
53 | is  ( $b->{id},          '5759',           'Book has Goodreads ID'      );
54 | is  ( $b->{year},        1996,             'Book has pub-year'          );
55 | is  ( $b->{year_edit},   2005,             'Book edition has pub-year'  );
56 | is  ( $b->{isbn},        '0393327345',     'Book has ISBN'              );
57 | is  ( $b->{isbn13},      '9780393327342',  'Book has ISBN13'            );
58 | ok  ( $b->{avg_rating}   > 2,              'Book has average rating'    );
59 | is  ( $b->{num_pages},   218,              'Book has number of pages'   );
60 | ok  ( $b->{num_ratings}  > 190000,         'Book has number of ratings' );
61 | is  ( $b->{format},      'Paperback',      'Book has format'            );
62 | is  ( $b->{title},       'Fight Club',     'Book has title'             );
63 | ok  ( $b->{stars}        > 2,              'Book has stars rating'      );
64 | is  ( $b->{url},         'https://www.goodreads.com/book/show/5759',    'Book has URL'            );
65 | like( $b->{img_url},     qr/^https:.*\.jpg$/,                           'Book has image URL'      );
66 | like( $b->{review_id},   qr/^\d+$/,                                     'Book has user review ID' );
67 | ok  ( $b->{user_rating}           > 2,     'User rating'                );
68 | ok  ( $b->{user_read_count}       > 0,     'User read count'            );
69 | ok  ( $b->{user_date_added}->year > 2006,  'User addition-date > 2006'  );  # GR was founded in 2007
70 | is  ( $b->{user_num_owned},       0,       'Number of user-owned books' ); 
71 | 
72 | is  ( $b->{rh_author}->{id},               '2546',                                                 'Book has author ID'          );
73 | is  ( $b->{rh_author}->{name_lf},          'Palahniuk, Chuck',                                     'Book has author name'        );
74 | is  ( $b->{rh_author}->{url},              'https://www.goodreads.com/author/show/2546',           'Book has author URL'         );
75 | like( $b->{rh_author}->{works_url},        qr/^https:\/\/www\.goodreads\.com\/author\/list\/2546/, 'Book has author works URL'   );
76 | is  ( $b->{rh_author}->{is_author},        1,                                                      'Book author has author flag' );
77 | is  ( $b->{rh_author}->{is_private},       0,                                                      'Book author not private'     );
78 | is  ( $b->{rh_author}->{is_mainstream},    1,                                                      'Author is mainstream'        );
79 | is  ( $b->{rh_author}->{user_avg_rating},  5,                                                      'user avg rating'             );
80 | is  ( $b->{rh_author}->{user_min_rating},  5,                                                      'user min rating'             );
81 | is  ( $b->{rh_author}->{user_max_rating},  5,                                                      'user max rating'             );
82 | 
83 | 
84 | # Not available or scraped yet, otherwise one of the following
85 | # tests will fail and remind me of implementing a correct test:
86 | is  ( $b->{rh_author}->{residence},        undef,  'N/A: Author residence'         );
87 | is  ( $b->{rh_author}->{img_url},          undef,  'N/A: Author image URL'         );
88 | is  ( $b->{rh_author}->{is_staff},         undef,  'N/A: Is Goodreads author'      );
89 | is  ( $b->{rh_author}->{is_female},        undef,  'N/A: Author gender'            );
90 | is  ( $b->{rh_author}->{is_friend},        undef,  'N/A: Author friend status'     );
91 | is  ( $b->{rh_author}->{num_books},        undef,  'N/A: Number of author books'   );
92 | is  ( $b->{rh_author}->{num_reviews},      undef,  'N/A: Number of book reviews'   );
93 | #is  ( $b->{user_date_read},                undef,  'N/A: User reading-date'        );  # TODO
94 | is  ( scalar( @{$b->{ra_user_shelves}} ),  0,      'N/A: User shelves for book'    );
95 | 
96 | 


--------------------------------------------------------------------------------
/t/greadshelfnames.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | 
 3 | # Test cases realized:
 4 | #   [x] read all shelf names of another Goodreads member, exclude some shelves
 5 | 
 6 | 
 7 | use diagnostics;  # More debugging info
 8 | use warnings;
 9 | use strict;
10 | use FindBin;
11 | use local::lib "$FindBin::Bin/../lib/local/";
12 | use        lib "$FindBin::Bin/../lib/";
13 | use        lib "$FindBin::Bin/../t/";
14 | use Test::More      qw( no_plan  );
15 | use List::MoreUtils qw( any none );
16 | 
17 | 
18 | use_ok( 'Goodscrapes' );
19 | require( 'config.pl' );
20 | 
21 | 
22 | # We should never use caching during real tests:
23 | # We need to test against the most up-to-date markup from Goodreads.com
24 | # Having no cache during development is annoying, tho. 
25 | # So we leave a small window:
26 | gsetopt( cache_days => 1 );
27 | 
28 | 
29 | # At the moment, functionality is just available to signed-in users:
30 | glogin( usermail => get_gooduser_mail(),
31 |         userpass => get_gooduser_pass() );
32 | 
33 | 
34 | # Because scraping *all* shelf names is more nasty than you would expect,
35 | # it got its own command (more commentary see function in Goodscrapes.pm):
36 | my @shelfnames;
37 | 
38 | greadshelfnames( from_user_id => '1',     # Otis Chandler
39 |                  ra_into      => \@shelfnames,
40 |                  ra_exclude   => [ 'to-read', 'nonfiction' ]);
41 | 
42 | 
43 | # Otis Chandler has so many shelves that they are paginated. 
44 | # This test includes some shelves from page 2 too:
45 | ok( (any { $_ eq 'read'              } @shelfnames),  'User has shelf' );
46 | ok( (any { $_ eq 'currently-reading' } @shelfnames),  'User has shelf' );
47 | ok( (any { $_ eq 'health'            } @shelfnames),  'User has shelf' );
48 | ok( (any { $_ eq 'submarine'         } @shelfnames),  'User has shelf' );
49 | ok( (any { $_ eq 'travel'            } @shelfnames),  'User has shelf' );
50 | ok( (none{ $_ eq 'to-read'           } @shelfnames),  'User shelf was excluded');
51 | ok( (none{ $_ eq 'nonfiction'        } @shelfnames),  'User shelf was excluded' );
52 | 
53 | 


--------------------------------------------------------------------------------
/t/greadsimilaraut.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | 
 3 | # Test cases realized:
 4 | #   [x] 
 5 | #   [ ] 
 6 | #   [ ] 
 7 | #   [ ] 
 8 | 
 9 | 
10 | use diagnostics;  # More debugging info
11 | use warnings;
12 | use strict;
13 | use FindBin;
14 | use local::lib "$FindBin::Bin/../lib/local/";
15 | use        lib "$FindBin::Bin/../lib/";
16 | use Test::More      qw( no_plan      );
17 | use List::MoreUtils qw( any firstval );
18 | 
19 | 
20 | use_ok( 'Goodscrapes' );
21 | 
22 | 
23 | # We should never use caching during real tests:
24 | # We need to test against the most up-to-date markup from Goodreads.com
25 | # Having no cache during development is annoying, tho. 
26 | # So we leave a small window:
27 | gsetopt( cache_days => 1 );
28 | 
29 | 
30 | print( 'Reading book shelf... ');
31 | 
32 | my %authors;
33 | my $SIMILAR_AUTHOR_ID = '9876';  # John Milton
34 | 
35 | greadsimilaraut( author_id   => '1734373',  # Karl Held
36 |                  rh_into     => \%authors,
37 |                  on_progress => gmeter( 'similar' ));
38 | 
39 | 
40 | 
41 | print( "\n" );
42 | 
43 | 
44 | ok( scalar( keys( %authors )) >= 4, 'At least 4 similar authors' );
45 | 
46 | ok( exists( $authors{$SIMILAR_AUTHOR_ID} ), 'Expected author found via hash-key = Goodreads author ID' ) 
47 | 	or BAIL_OUT( "Cannot test author attributes when expected author is missing." );
48 | 
49 | 
50 | my $a = $authors{$SIMILAR_AUTHOR_ID};
51 | 
52 | isa_ok( $a, 'HASH', 'Author datatype' );
53 | is  ( $a->{id},             $SIMILAR_AUTHOR_ID,                                         'Author has ID'             );
54 | is  ( $a->{name},           'John Milton',                                              'Author has name'           );
55 | is  ( $a->{url},            "https://www.goodreads.com/author/show/$SIMILAR_AUTHOR_ID", 'Author has URL'            );
56 | like( $a->{works_url},      qr/^https:\/\/www\.goodreads\.com\/author\/list\/$SIMILAR_AUTHOR_ID/, 'Author has works URL' );
57 | like( $a->{img_url},        qr/\.jpg$/,                                                 'Author has image URL' );
58 | is  ( $a->{is_author},      1,                                                          'Author has author flag'    );
59 | is  ( $a->{is_private},     0,                                                          'Author not private'        );
60 | ok  ( $a->{is_mainstream},                                                              'is mainstream author'      );
61 | 
62 | # Not available or scraped yet, otherwise one of the following
63 | # tests will fail and remind me of implementing a correct test:
64 | is  ( $a->{name_lf},        $a->{name}, 'N/A: author name != name_lf' );  # "Dick, Philip K."
65 | is  ( $a->{residence},      undef,      'N/A: author residence'       );
66 | is  ( $a->{age},            undef,      'N/A: author age'             );
67 | is  ( $a->{num_books},      undef,      'N/A: number of author books' );
68 | is  ( $a->{is_friend},      undef,      'N/A: author friend status'   );
69 | is  ( $a->{is_female},      undef,      'N/A: author gender status'   );
70 | is  ( $a->{is_staff},       undef,      'N/A: is Goodreads author'    );
71 | 
72 | 
73 | 
74 | 
75 | 


--------------------------------------------------------------------------------
/t/greaduser.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | 
 3 | # Test cases realized:
 4 | #   [x] read normal user info and check attributes (detects changed markup)
 5 | #   [ ] read author user info and check attributes (detects changed markup)
 6 | #   [ ] private users
 7 | #   [ ] 
 8 | 
 9 | 
10 | use diagnostics;  # More debugging info
11 | use warnings;
12 | use strict;
13 | use FindBin;
14 | use local::lib "$FindBin::Bin/../lib/local/";
15 | use        lib "$FindBin::Bin/../lib/";
16 | use        lib "$FindBin::Bin/../t/";
17 | use Test::More      qw( no_plan      );
18 | use List::MoreUtils qw( any firstval );
19 | 
20 | 
21 | use_ok( 'Goodscrapes' );
22 | require( 'config.pl' );
23 | 
24 | 
25 | # We should never use caching during real tests:
26 | # We need to test against the most up-to-date markup from Goodreads.com
27 | # Having no cache during development is annoying, tho. 
28 | # So we leave a small window:
29 | gsetopt( cache_days => 1 );
30 | 
31 | 
32 | # Some info is only available to authenticated users:
33 | glogin( usermail => get_gooduser_mail(),
34 |         userpass => get_gooduser_pass() );
35 | 
36 | 
37 | # Normal user:
38 | my $user_id = '62730330';
39 | my %u       = greaduser( $user_id );
40 | 
41 | is  ( $u{id},         $user_id,                                           'User has Goodreads ID'    );
42 | is  ( $u{name},       'Paola Quiros (pbarrant)',                          'User has name'            );
43 | is  ( $u{is_female},  1,                                                  'User is female'           );
44 | ok  ( $u{num_books}   >  10,                                              'User has number of books' );
45 | ok  ( $u{age}         >= 38,                                              'User has age'             );   # login
46 | #is ( $u{residence},  '',                                                 'User has residence'       );   # login
47 | #is ( $u{is_private}, 0,                                                  'User is not private'      );   # login
48 | is  ( $u{is_author},  0,                                                  'User not an author'       );
49 | is  ( $u{is_staff},   1,                                                  'User is GR employee'      );
50 | is  ( $u{url},        'https://www.goodreads.com/user/show/' . $user_id,  'User has URL'             );
51 | like( $u{img_url},    qr/^https:\/\/[a-z0-9]+\.gr-assets\.com\//,         'User has image URL'       );
52 | is  ( $u{works_url},  undef,                                              'User has no works URL (not an author) ' );
53 | 
54 | # Not available or scraped yet, otherwise one of the following
55 | # tests will fail and remind me of implementing a correct test:
56 | is  ( $u{is_friend},  undef, 'Not avail: user friend status' );
57 | 
58 | 
59 | # Author user:
60 | 
61 | my %au = greaduser( '2546', 1 );
62 | 
63 | is  ( $au{id},             '2546',                                                 'Author has ID'            );
64 | is  ( $au{name},           'Chuck Palahniuk',                                      'Author has name'          );
65 | is  ( $au{url},            'https://www.goodreads.com/author/show/2546',           'Author has URL'           );
66 | like( $au{works_url},      qr/^https:\/\/www\.goodreads\.com\/author\/list\/2546/, 'Author has works URL'     );
67 | like( $au{img_url},        qr/^https:\/\/images.gr-assets.com/,                    'Author has image URL'     );
68 | is  ( $au{is_author},      1,                                                      'Author has author flag'   );
69 | is  ( $au{is_private},     0,                                                      'Author not private'       );
70 | is  ( $au{is_staff},       1,                                                      'Goodreads author'         );
71 | ok  ( $au{num_books}       > 10,                                                   'Author > 10 books'        );
72 | ok  (!$au{is_mainstream},                                                          'Author is not mainstream' );
73 | 
74 | # Not available or scraped yet, otherwise one of the following
75 | # tests will fail and remind me of implementing a correct test:
76 | is  ( $au{is_friend},      undef, 'N/A: author friend status' );
77 | is  ( $au{is_female},      undef, 'N/A: author gender status' );
78 | is  ( $au{residence},      undef, 'N/A: author residence'     );
79 | 
80 | 
81 | #use Data::Dumper;
82 | #print Dumper(%u);
83 | 
84 | 
85 | 
86 | 


--------------------------------------------------------------------------------
/t/greadusergrp.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | 
 3 | # Test cases realized:
 4 | #   [x] Read groups and check attributes (detects changed markup)
 5 | #   [ ] 
 6 | #   [ ] 
 7 | #   [ ] 
 8 | 
 9 | 
10 | use diagnostics;  # More debugging info
11 | use warnings;
12 | use strict;
13 | use FindBin;
14 | use local::lib "$FindBin::Bin/../lib/local/";
15 | use        lib "$FindBin::Bin/../lib/";
16 | use Test::More      qw( no_plan      );
17 | use List::MoreUtils qw( any firstval );
18 | 
19 | 
20 | use_ok( 'Goodscrapes' );
21 | 
22 | 
23 | # We should never use caching during real tests:
24 | # We need to test against the most up-to-date markup from Goodreads.com
25 | # Having no cache during development is annoying, tho. 
26 | # So we leave a small window:
27 | gsetopt( cache_days => 1 );
28 | 
29 | 
30 | print( "Getting groups... " );
31 | 
32 | my %groups;
33 | 	
34 | greadusergp( from_user_id => '1',  # "Otis Chandler" (GR founder)
35 |              rh_into      => \%groups,
36 | 		   # on_group   => sub{},
37 |              on_progress  => gmeter( 'groups' ));
38 | 
39 | print( "\n" );
40 | 
41 | ok( scalar( keys( %groups )) > 70, 'At least 70 groups (3 pages)' );  # Chandler had 127
42 | 
43 | ok( exists( $groups{8095} ), 'Expected group found via hash-key = Goodreads group ID' )
44 | 	or BAIL_OUT( "Cannot test group attributes when expected group is missing." );
45 | 
46 | my $g = $groups{8095};
47 | 
48 | is( $g->{id},         '8095',                                      'Group has Goodreads ID'      );
49 | is( $g->{name},       'Goodreads Developers',                      'Group has name'              );
50 | ok( $g->{num_members} > 1000,                                      'Group has number of members' );
51 | is( $g->{url},        'https://www.goodreads.com/group/show/8095', 'Group has URL'               );
52 | is( $g->{img_url},    'https://images.gr-assets.com/groups/1220414390p2/8095.jpg', 'Group has image URL' );
53 | 
54 | 
55 | 
56 | 
57 | 
58 | 
59 | 


--------------------------------------------------------------------------------
/t/gsearch.t:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/perl -w
  2 | 
  3 | # Test cases realized:
  4 | #   [x] getting books with execpted attributes (detects changes in markup)
  5 | #   [ ] order
  6 | #   [ ] num_ratings
  7 | #   [ ] exact matches
  8 | #   [ ] invalid arguments
  9 | 
 10 | use diagnostics;  # More debugging info
 11 | use warnings;
 12 | use strict;
 13 | use FindBin;
 14 | use local::lib "$FindBin::Bin/../lib/local/";
 15 | use        lib "$FindBin::Bin/../lib/";
 16 | use Test::More      qw( no_plan  );
 17 | use List::MoreUtils qw( firstval );
 18 | 
 19 | 
 20 | use_ok( 'Goodscrapes' );
 21 | 
 22 | 
 23 | # We should never use caching during real tests:
 24 | # We need to test against the most up-to-date markup from Goodreads.com
 25 | # Having no cache during development is annoying, tho. 
 26 | # So we leave a small window:
 27 | gsetopt( cache_days    => 1 );
 28 | gsetopt( ignore_errors => 1 );
 29 | gsetopt( maxretries    => 0 );
 30 | 
 31 | 
 32 | diag( 'takes ~8 minutes' );
 33 | 
 34 | 
 35 | print( 'Searching books... ' );
 36 | 
 37 | my @books;
 38 | gsearch( phrase      => 'Linux',
 39 |          ra_into     => \@books,
 40 |          is_exact    => 0,
 41 |          ra_order_by => [ 'stars', 'num_ratings', 'year' ],
 42 |          num_ratings => 5,
 43 |          on_progress => gmeter());
 44 | 
 45 | print( "\n" );
 46 | 
 47 | my $numbooks = scalar( @books );
 48 | ok( $numbooks > 450, "At least 500 results, got $numbooks" );  # was 500, later 480
 49 | 
 50 | my $BOOK_ID = '8474434';
 51 | 
 52 | my $b = firstval{ $_->{id} eq $BOOK_ID } @books;
 53 | 
 54 | isa_ok( $b, 'HASH', 'Book datatype' )
 55 | 	or BAIL_OUT( "Cannot test book attributes when expected book is missing." );
 56 | 
 57 | 
 58 | is  ( $b->{id},                         $BOOK_ID,                                         'Book has Goodreads ID'      );
 59 | is  ( $b->{title},                      'Linux Kernel Development',                       'Book has title'             );
 60 | is  ( $b->{url},                        'https://www.goodreads.com/book/show/'.$BOOK_ID,  'Book has URL'               );
 61 | like( $b->{img_url},                    qr/^https:.*\.(jpg|png)$/,                        'Book has image URL'         );
 62 | ok  ( $b->{stars}                       > 0,                                              'Book has stars rating'      );
 63 | ok  ( $b->{avg_rating}                  > 0,                                              'Book has average rating'    );
 64 | ok  ( $b->{num_ratings}                 > 0,                                              'Book has number of ratings' );
 65 | is  ( $b->{year},                       2003,                                             'Book has year published'    );
 66 | is  ( $b->{rh_author}->{id},            '13609144',                                       'Book has author ID'         );
 67 | is  ( $b->{rh_author}->{name},          'Robert   Love',                                  'Book has author name'       );
 68 | is  ( $b->{rh_author}->{url},           'https://www.goodreads.com/author/show/13609144', 'Book has author URL'        );
 69 | like( $b->{rh_author}->{works_url},     qr/^https:\/\/www\.goodreads\.com\/author\/list\/13609144/, 'Book has author works URL' );
 70 | is  ( $b->{rh_author}->{is_author},     1,                                                'Book author has author flag' );
 71 | is  ( $b->{rh_author}->{is_private},    0,                                                'Book author not private'     );
 72 | ok  (!$b->{rh_author}->{is_mainstream},                                                   'Book author not mainstream author' );
 73 | 
 74 | 
 75 | 
 76 | # Not available or scraped yet, otherwise one of the following
 77 | # tests will fail and remind me of implementing a correct test:
 78 | is  ( $b->{rh_author}->{name_lf},          $b->{rh_author}->{name},  'N/A: Author name_lf != name' );
 79 | is  ( $b->{rh_author}->{residence},        undef,        'N/A: Author residence'        );
 80 | like( $b->{rh_author}->{img_url},          qr/nophoto/,  'N/A: Author real image URL'   );
 81 | is  ( $b->{rh_author}->{is_staff},         undef,        'N/A: Is Goodreads author'     );
 82 | is  ( $b->{rh_author}->{is_female},        undef,        'N/A: Author gender'           );
 83 | is  ( $b->{rh_author}->{is_friend},        undef,        'N/A: Author friend status'    );
 84 | is  ( $b->{rh_author}->{num_books},        undef,        'N/A: Number of author books'  );
 85 | is  ( $b->{year_edit},                     undef,        'N/A: Book edition pub-year'   );
 86 | is  ( $b->{isbn},                          undef,        'N/A: Book ISBN'               );
 87 | is  ( $b->{isbn13},                        undef,        'N/A: Book ISBN13'             );
 88 | is  ( $b->{num_pages},                     undef,        'N/A: Book number of pages'    );
 89 | is  ( $b->{format},                        undef,        'N/A: Book format'             );
 90 | is  ( $b->{review_id},                     undef,        'N/A: User book review ID'     );
 91 | is  ( $b->{user_rating},                   undef,        'N/A: User book rating'        );
 92 | is  ( $b->{user_read_count},               undef,        'N/A: User read count'         );
 93 | is  ( $b->{user_date_added},               undef,        'N/A: User addition-date'      );
 94 | is  ( $b->{num_reviews},                   undef,        'N/A: Number of book reviews'  );
 95 | is  ( $b->{user_num_owned},                undef,        'N/A: Number user-owned books' );
 96 | is  ( $b->{user_date_read},                undef,        'N/A: User reading-date'       );
 97 | is  ( scalar( @{$b->{ra_user_shelves}} ),  0,            'N/A: User shelves for book'   );
 98 | 
 99 | 
100 | 
101 | 
102 | 


--------------------------------------------------------------------------------
/t/gsocialnet.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | 
 3 | # Test cases realized:
 4 | #   [x] 
 5 | #   [ ] 
 6 | #   [ ] 
 7 | #   [ ] 
 8 | 
 9 | 
10 | 
11 | use diagnostics;  # More debugging info
12 | use warnings;
13 | use strict;
14 | use FindBin;
15 | use local::lib "$FindBin::Bin/../lib/local/";
16 | use        lib "$FindBin::Bin/../lib/";
17 | use Test::More      qw( no_plan      );
18 | use List::MoreUtils qw( any firstval );
19 | 
20 | 
21 | use_ok( 'Goodscrapes' );
22 | 
23 | 
24 | diag( "Tests TODO" );
25 | ok( 1 );
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/t/gverifyshelf.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | 
 3 | # Test cases realized:
 4 | #   [x] shelf name corrections (displayed name vs real ids)
 5 | #   [ ] invalid shelves
 6 | #   [ ] 
 7 | #   [ ] 
 8 | 
 9 | 
10 | use diagnostics;  # More debugging info
11 | use warnings;
12 | use strict; 
13 | use FindBin;
14 | use local::lib "$FindBin::Bin/../lib/local/";
15 | use        lib "$FindBin::Bin/../lib/";
16 | use Test::More qw( no_plan );
17 | 
18 | 
19 | use_ok( 'Goodscrapes' );
20 | 
21 | # Internal vs displayed shelf names for default GR shelves:
22 | 
23 | is  ( gverifyshelf( '#ALL#'   ), '#ALL#', 'Shelf valid'         );
24 | is  ( gverifyshelf( 'AlL'     ), '#ALL#', 'Shelf corrected'     );
25 | is  ( gverifyshelf( '#AlL'    ), '#ALL#', 'Shelf corrected'     );
26 | isnt( gverifyshelf( 'all-x'   ), '#ALL#', 'Shelf not corrected' );
27 | isnt( gverifyshelf( 'x-all'   ), '#ALL#', 'Shelf not corrected' );
28 | isnt( gverifyshelf( 'x-all-x' ), '#ALL#', 'Shelf not corrected' );
29 | 
30 | is  ( gverifyshelf( 'read'     ), 'read',  'Shelf valid'         );
31 | is  ( gverifyshelf( 'ReAd'     ), 'read',  'Shelf corrected'     );
32 | isnt( gverifyshelf( 'x-read'   ), 'read',  'Shelf not corrected' );
33 | isnt( gverifyshelf( 'read-x'   ), 'read',  'Shelf not corrected' );
34 | isnt( gverifyshelf( 'x-read-x' ), 'read',  'Shelf not corrected' );
35 | 
36 | is  ( gverifyshelf( 'currently-reading'     ), 'currently-reading', 'Shelf valid'         );
37 | is  ( gverifyshelf( 'CurrEntly_ReAding'     ), 'currently-reading', 'Shelf corrected'     );
38 | isnt( gverifyshelf( 'x-currently-reading'   ), 'currently-reading', 'Shelf not corrected' );
39 | isnt( gverifyshelf( 'currently-reading-x'   ), 'currently-reading', 'Shelf not corrected' );
40 | isnt( gverifyshelf( 'x-currently-reading-x' ), 'currently-reading', 'Shelf not corrected' );
41 | 
42 | is  ( gverifyshelf( 'to-read'      ), 'to-read', 'Shelf valid'         );
43 | is  ( gverifyshelf( 'tO_ReaD'      ), 'to-read', 'Shelf corrected'     );
44 | is  ( gverifyshelf( 'Want-To_ReAd' ), 'to-read', 'Shelf corrected'     );  # You could have such a shelf but misspelling more likely 
45 | isnt( gverifyshelf( 'x-to-read'    ), 'to-read', 'Shelf not corrected' );
46 | isnt( gverifyshelf( 'to-read-x'    ), 'to-read', 'Shelf not corrected' );
47 | isnt( gverifyshelf( 'x-to-read-x'  ), 'to-read', 'Shelf not corrected' );
48 | 
49 | 
50 | # User created shelves:
51 | 
52 | is( gverifyshelf( 'UsEr_CreaTed-shElf' ), 'user_created-shelf', 'Shelf "UsEr_CreaTed-shElf" corrected to lowercase' );
53 | 
54 | 
55 | # Invalid shelves:
56 | 
57 | # @TODO
58 | 
59 | 
60 | 
61 | 
62 | 
63 | 
64 | 
65 | 


--------------------------------------------------------------------------------
/t/gverifyxxx.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | 
 3 | # Test cases realized:
 4 | #   [x] sanitization
 5 | #   [x] invalid/empty/missing argument -> die
 6 | #   [ ] 
 7 | #   [ ] 
 8 | 
 9 | 
10 | use diagnostics;  # More debugging info
11 | use warnings;
12 | use strict;
13 | use FindBin;
14 | use local::lib "$FindBin::Bin/../lib/local/";
15 | use        lib "$FindBin::Bin/../lib/";
16 | use Test::More qw( no_plan );
17 | use Test::Exception;
18 | 
19 | 
20 | use_ok( 'Goodscrapes' );
21 | 
22 | 
23 | is( gverifyuser( '123'          ), '123', 'Valid user ID'     );
24 | is( gverifyuser( '123-username' ), '123', 'Sanitized user ID' );
25 | 
26 | dies_ok( sub{ gverifyuser( 'username' ); }, 'Invalid user ID' );
27 | dies_ok( sub{ gverifyuser( ''         ); }, 'Empty user ID'   );
28 | dies_ok( sub{ gverifyuser( undef      ); }, 'Missing user ID' );
29 | 
30 | 
31 | is( gverifyshelf( 'myshelf' ), 'myshelf', 'Valid shelf name' );
32 | 
33 | dies_ok( sub{ gverifyshelf( '^@#' ); }, 'Invalid shelf name' );
34 | dies_ok( sub{ gverifyshelf( ''    ); }, 'Empty shelf name'   );
35 | dies_ok( sub{ gverifyshelf( undef ); }, 'Missing shelf name' );
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------